Skip to content

Introduction

DesmondTools have a set of command-line scripts and a python library written to make setting up the molecular dynamics simulations easier for Desmond.

Install

$ pip install desmondtools

What is Multisim Class?

The following code examples from test/test_multisim.py demonstrate the functionality of the Multisim class.

from desmondtools import Multisim

def test_Multisim_variable():
    # parse_string returns ['barostat.tau']
    o1 = Multisim.variable.parse_string("barostat.tau")[0]
    o2 = Multisim.variable.parse_string("barostat  .tau")[0]
    assert o1 == o2


def test_Multisim_expression_1():
    i = "task {} simulate { meta = [{a=1 b=3 c=[7 8 9]}] f = {} } simulate {n=2} simulate {n = 3}"
    D = Multisim(string=i).to_dot()
    assert D.simulate[0].meta[0].b == str(3)
    assert D.simulate[0].meta[0].c[1] == str(8)
    assert D.simulate[-1].n == str(3)


def test_Multisim_expression_2():
    i = "simulate { effect_if = [[ a ] b ] }"
    D = Multisim(string=i).to_dot()
    assert D.simulate.effect_if[0] == ['a']
    assert D.simulate.effect_if[1] == 'b'


def test_Multisim_expression_3():
    i = """ensemble = {
        brownie = {
            delta_max = 0.1
        }
        class = NVT
        method = Brownie
        thermostat = {
            tau = 1.0
        }
    }"""
    D = Multisim(string=i)
    assert D.dot.ensemble.brownie.delta_max == str(0.1)

    # assert D.dot.ensemble.class == 'NVT'
    #                       ^^^^^
    # SyntaxError: invalid syntax
    # 'class' is reserved word in python and raises SyntaxError
    # use instead .['class']
    assert D.dot.ensemble['class'] == 'NVT' # <--- it does not raise SyntaxError

    assert D.dot.ensemble.method == 'Brownie'
    assert D.dot.ensemble.thermostat.tau == str(1.0)
    D.write()


def test_Multisim_expression_4():
    i = """task {
        task = "desmond:auto"
    }
    simulate {
        title = "NPT and no restraints, 24ps"
        effect_if = [[ "@*.*.annealing" ] 'annealing = off temperature = "@*.*.temperature[0][0]"' ]
        time = 24
        ensemble = {
            class = NPT
            method = Langevin
            thermostat.tau = 0.1
            barostat.tau = 2.0
        }
        eneseq.interval = 0.3
        trajectory.center = solute
    }
    simulate {
        meta = {
            cv = [
                { 
                    atom = [0 0 0 0] 
                    type = dihedral 
                    wall = 0  
                    width = 5.0 
                }
            ]
            cv_name = "$JOBNAME$[_replica$REPLICA$].cvseq"
            first = 0.0
            height = 0.03
            interval = 0.09
            name = "$JOBNAME$[_replica$REPLICA$].kerseq"
        }
    }
    """

    D = Multisim(string=i)

    assert len(D.dot.simulate[0].effect_if) == 2
    assert len(D.dot.simulate[0].effect_if[0]) == 1
    assert D.dot.simulate[0].ensemble.method == 'Langevin'
    assert D.dot.simulate[0].ensemble.barostat.tau == str(2.0)
    assert D.dot.simulate[0].eneseq.interval == str(0.3)
    assert D.dot.simulate[0].trajectory.center == 'solute' # <---

    assert len(D.dot.simulate[1].meta.cv) == 1
    assert len(D.dot.simulate[1].meta.cv[0].atom) == 4
    assert D.dot.simulate[1].meta.height == str(0.03)
    assert D.dot.simulate[-1].meta.cv[0].wall == str(0)

    D.dot.simulate[0].ensemble.barostat.tau = 2.1
    assert D.dot.simulate[0].ensemble.barostat.tau == 2.1

    D.dot.simulate[1].meta.height = 0.06
    assert D.dot.simulate[1].meta.height == 0.06

    D.dot.simulate[-1].meta.cv[0].wall = 50.0
    assert D.dot.simulate[-1].meta.cv[0].wall == 50.0


def test_Multisim_expression_5():
    D= Multisim(template="desmond-md.msj")
    D.write()

Caution

Some keywords in the .msj or .cfg files may conflict with the reserved python syntax. In the below example, accessing the class variable by directly using the .dot expression such as D.dot.ensemble.class = 'NVT' would raise a SyntaxError because class is a reserved word in Python. However, you can avoid the error by using a dictionary type access such as D.dot.ensemble['class'] = 'NVT'.

    ensemble = {
        brownie = {
            delta_max = 0.1
        }
        class = NVT
        method = Brownie
        thermostat = {
            tau = 1.0
        }
    }

Use of Multisim Class

The desmondtools.Multisim class facilitates reading and modifying Schrodinger Desmond .msj and .cfg files, which are typically generated by Schrodinger Maestro GUI task panels. By using a simple Python script with desmondtools, you can efficiently prepare and set up multiple MD simulations without repeatedly launching Maestro or manually editing files. The example below demonstrates how to modify or create the contents of .msj and .cfg files using the .dot attribute of a Multisim instance.

from desmondtools import Multisim

# read template .msj and .cfg
md_msj = Multisim(template="desmond-md.msj")
md_cfg = Multisim(template="desmond-md.cfg")

with open(msj_file,"w") as msj:
    # modify desmond msj template
    md_msj.dot.simulate[-1].cfg_file = cfg_file_basename

    # Setting up restraints using the restraints keyword:
    # https://www.schrodinger.com/kb/332119
    if args.posres_force > 0.0:
        # print the restraints in the multisim log file
        md_msj.dot.simulate[-1].print_restraint = 'true'

        # add the new terms defined in "restraints.new" to existing restraints.
        # The default is restraints.existing = ignore which will 
        # delete existing terms before adding any new ones.
        # md_msj.dot.simulate[-1].restraints.existing = 'retain'

        md_msj.dot.simulate[-1].restraints.new = [
            {
                'name'              : 'posre_harm',
                'atoms'             : [ f'"{args.posres}"' ],
                'force_constants'   : [ args.posres_force, ] * 3,
            }
            ]
        # force constants in the x, y, and z direction

    # writing modified msj
    md_msj.write(msj)

with open(cfg_file,"w") as cfg:
    # read and modify desmond cfg template
    md_cfg.dot.randomize_velocity.seed = random.randint(1000, 9999)
    md_cfg.dot.time = total_simulation_time
    md_cfg.dot.temperature = t_schedule
    md_cfg.dot.trajectory.interval = args.interval
    # wring modified cfg
    md_cfg.write(cfg)

Below are examples of .msj and .cfg files:

# .msj file example (part)

task {
   task = "desmond:auto"
   set_family = {
      desmond = {
         checkpt.write_last_step = no
      }
   }
}

simulate {
   title       = "Brownian Dynamics NVT, T = 10 K, small timesteps, ..."
   annealing   = off
   time        = 100
   timestep    = [0.001 0.001 0.003 ]
   temperature = 10.0
   ensemble = {
      class = "NVT"
      method = "Brownie"
      brownie = {
         delta_max = 0.1
      }
   }
   restrain = {
      atom = "solute_heavy_atom"
      force_constant = 50.0
   }
}

simulate {
   effect_if   = [["==" "-gpu" "@*.*.jlaunch_opt[-1]"] 'ensemble.method = Langevin']
   title       = "NVT, T = 10 K, small timesteps, ..."
   annealing   = off
   time        = 12
   timestep    = [0.001 0.001 0.003]
   temperature = 10.0
   restrain    = { atom = solute_heavy_atom force_constant = 50.0 }
   ensemble    = {
      class  = NVT
      method = Berendsen
      thermostat.tau = 0.1
   }

   randomize_velocity.interval = 1.0
   eneseq.interval             = 0.3
   trajectory.center           = []
}
# .cfg file example (part)

randomize_velocity = {
   first = 0.0
   interval = inf
   seed = 2967
   temperature = "@*.temperature"
}
restrain = none
simbox = {
   first = 0.0
   interval = 1.2
   name = "$JOBNAME$[_replica$REPLICA$]_simbox.dat"
}
surface_tension = 0.0
taper = false
temperature = [
   [300.0 0 ]
]
time = 100000.0
timestep = [0.002 0.002 0.006 ]

Event and Interaction Classes

Schrodinger event analysis format or .eaf files use the same syntax as the .cfg or .msj files. So, the desmondtools.Multisim class is reused in the desmondtools.Event and desmondtools.Interaction classes to process Schrodinger provided trajectory analysis output ...-out.eaf files. For example, below codes from desmondtools.Interaction class shows how the class uses the .dot attributes to process .eaf output files.

for section in self.dot.Keywords:
    try:
        assert section.ProtLigInter.HBondResult
        self.num_frames = len(section.ProtLigInter.HBondResult)
        for frame in section.ProtLigInter.HBondResult:
            # [[3 "_:ARG_143:HH22" d-s "L-FRAG_0:N6" ]]
            for (frameno, prot, hbond_type, lig) in frame:
                prot = prot.strip('\"')
                (_, resid, atom) = prot.split(":")
                (resName, resSeq) = resid.split("_")
                resSeq = int(resSeq)
                if resSeq in self.HBond:
                    self.HBond[resSeq]['count'] += 1
                else:
                    self.HBond[resSeq] = {'resName': resName, 'count':1 }
        for resSeq in sorted(self.HBond):
            fraction = float(self.HBond[resSeq]['count'])/self.num_frames
            if verbose:
                print(f"HBond {self.HBond[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
    except:
        pass

Command-Line Interface

Command-Line Interfaces (CLI) are built on the Multisim class. Some CLIs require Schrodinger Python API.

Command-line interface Description
batch-desmond-cif Convert .mae/.maegz to .cif
batch-desmond-mdinfo Check running MD simulations
batch-desmond-maeinfo Show info of .mae/.maegz file(s)
batch-desmond-cmsinfo Show info of .cms file(s)
batch-desmond-setup Batch Prepare MD systems
batch-desmond-min Batch Setup energy minimizations
batch-desmond-md Batch Setup MD simulations
batch-desmond-metad Batch Setup metadynamics
batch-desmond-report Batch Generate reports
batch-desmond-pli Batch Analyze protein-ligand interactions
batch-desmond-distance Batch Analyze distance
batch-desmond-dihedral Batch Analyze dihedral angles
batch-desmond-ligrmsd Batch Analyze ligand rmsd
batch-desmond-rg Batch Analyze radius of gyration
batch-desmond-rmsx Batch Analyze RMSD and ligand RMSF
batch-desmond-mmgbsa Batch Analyze trajectory MMGBSA

For more helps, $ command --help.

Metadynamics

Types of Collective Variables

type Description Default Width Wall Floor Atom sites
dist Distance 0.05 Å yes yes 2
angle Angle 2.5° yes yes 3
dihedral Dihedral 5.0° no no 4
rgyr Radius of gyration 0.1 Å no no 1
rgyr_mass Mass-weighted radius of gyration 0.1 Å no no 1
rmsd RMSD from aligned starting structure 0.1 Å no no 1
rmsd_symm Symmetry aware RMSD 0.1 Å no no 1
zdist Distance along the z axis 0.05 Å yes yes 1
zdist0 Absolute distance along the z axis 0.1 Å yes yes 1
whim1 WHIM1 - first principal moment [35] 0.5 Å2 no no 1
whim2 WHIM2 - second principal moment [35] 0.25 Å2 no no 1

Examples of Collective Variables

distance:
    cv = [
        {atom = ["res. UNK" "res. 849" ]
        type = dist
        wall = 40
        floor = 10
        width = 0.05
        }
    ]

dihedral:
    cv = [
        {atom = [404 406 407 415 ]
        type = dihedral
        width = 5.0
        }
        {atom = [406 407 415 417 ]
        type = dihedral
        width = 5.0
        }
    ]

zdist(membrane):
    cv = [
        {atom = ["res. UNK"]
        type = zdist
        width = 0.05
        wall = 20
        floor = 5
        }
        ]

rmsd:
    cv = [
        {atom = ["res. UNK" "res. 849" ]
        type = rmsd
        width = 0.1
        }
    ]

Well-Tempered Metadynamics

In well-tempered metadynamics, the height of the deployed Gaussians are rescaled (decreased) during simulation time by: omega_0 * exp(-V/(kB * ΔT)) Where omega_0 is the initial hill height, V is the bias potential and the denominator in the exponential, kB * ΔT,
is the bias factor (kTemp). During well-tempered metadynamics, the dynamics of the system are effectively accelerated (without heating up the system) up to T + ΔT, where T is the chosen MD simulation temperature. The choice of the bias factor value is guided by the highest barrier in the simulation system which the well-tempered metadynamics run should overcome. Here are some suggestions, assuming that the initial hill height ω0 (height parameter in the Metadynamics panel) has been set to 0.3 kcal/mol:

Max. barrier height (kcal/mol) kTemp (kcal/mol)
3 1.7
6 3.4
10 5.6
15 8.4
20 11.2

Extending Simulations

See https://www.schrodinger.com/kb/788642 for extending metadynamics simulation.

Python Classes

desmondtools.Multisim

Parsing Desmond multisim .cfg and .msj expressions

Source code in src/desmondtools/multisim.py
class Multisim:
    """Parsing Desmond multisim .cfg and .msj expressions"""
    # variable, value, array, expr
    # pyparsing module’s default behavior is to ignore whitespace.
    # +: AND, |: MatchFirst, left-to-right, ^: Or(longest match)
    # Group: --> list
    # Dict: --> dict
    # Forward: --> recursive

    EQ = pp.Suppress('=')
    LBRACKET, RBRACKET, LBRACE, RBRACE = map(pp.Literal, "[]{}")
    variable = (pp.Word(pp.alphanums + "._/?-@") + 
                pp.Opt("." + pp.Word(pp.alphanums))).set_parse_action(''.join)
    _string1 = pp.Word(pp.alphanums + "._/?-@*")
    _string2 = pp.quoted_string()
    _number  = ppc.number()
    value   = (_string1 | _string2 | _number)
    array   = pp.Forward()
    array   <<= pp.Group(LBRACKET + (pp.ZeroOrMore(value | array)) + RBRACKET)
    expr    = pp.Forward()
    _expr_0  = (variable + EQ + value)
    _expr_1  = (variable + EQ + array)
    _expr_2  = (variable + EQ + pp.Group(
        LBRACE + pp.ZeroOrMore(expr) + RBRACE ))
    _expr_3  = (variable + EQ + pp.Group(
        LBRACKET + pp.ZeroOrMore(pp.Group(LBRACE + expr + RBRACE)) + RBRACKET))
    _expr_4  = pp.Group(variable + pp.Group(
        LBRACE + pp.ZeroOrMore( expr ) + RBRACE))
    expr    <<= pp.OneOrMore(pp.Dict(pp.Group(
        _expr_0 | _expr_1 | _expr_2 | _expr_3 | _expr_4)))
    expr.ignore("#" + pp.restOfLine)


    def __init__(self, **kwargs):
        self.template_path = None
        self.ParseResults = None
        self.dict = {}
        self.dot = DotMap()
        self.output = sys.stdout # do not attempt to close
        self.indent = 4

        if 'template' in kwargs:
            template = kwargs['template']
            template_path = pathlib.Path(template)
            if template_path.is_file():
                self.template_path = template_path
            else:
                template_path = importlib.resources.files('desmondtools')
                self.template_path = template_path / template
            if self.template_path is None:
                raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), template)

            self.ParseResults = Multisim.expr.parse_file(self.template_path)
            self.decode()

        elif 'string' in kwargs:
            string = kwargs['string']
            try:
                self.ParseResults = Multisim.expr.parse_string(string)
                self.decode()
            except:
                raise RuntimeError("Multisim: cannot parse the input string.")
        else:
            raise RuntimeError("Multisim: template filename or string is required.")



    @staticmethod
    def unfold_dict_key(d) -> None:
        """Unfold '.' in the keys of a dictionary

        Args:
            d (dict): dictionary (to be modified)
        """
        ks = [ k for k in d ]
        for k in ks:
            v = d[k]
            if '.' in k:
                # d[k] = v --> d[kk[0]] = { kk[1] : v }
                _u = None
                _k = k.split('.')[0]
                for kk in k.split('.')[-1:0:-1]:
                    if _u is None:
                        _u = {kk : v }
                    else:
                        _u = {kk : _u}
                d[_k] = _u
                del d[k]
            else:
                pass
            if isinstance(v, dict):
                Multisim.unfold_dict_key(v)


    @staticmethod
    def traverse_dict(d) -> None:
        """Recursively traverse a nested dictionary/list

        Args:
            d (dict): dictionary (to be modified)
        """
        if isinstance(d, dict):
            for k,v in d.items():
                if isinstance(v, dict):
                    Multisim.traverse_dict(v)            
                elif isinstance(v, list):
                    if v == ['{','}']:
                        d[k] = {}
                    elif v == ['[',']']:
                        d[k] = []
                    elif v[0] == '[' and v[-1] == ']':
                        d[k] = v[1:-1]
                        Multisim.traverse_dict(d[k])
                    elif v[0] == '{' and v[-1] == '}':
                        d[k] = dict(v[1:-1])
                        Multisim.traverse_dict(d[k])
        elif isinstance(d, list):
            for k,v in enumerate(d):
                if isinstance(v, dict):
                    Multisim.traverse_dict(v)
                elif isinstance(v, list):
                    if v == ['{','}']:
                        d[k] = {}
                    elif v == ['[',']']:
                        d[k] = []
                    elif v[0] == '[' and v[-1] == ']':
                        d[k] = v[1:-1]
                        Multisim.traverse_dict(d[k])
                    elif v[0] == '{' and v[-1] == '}':
                        d[k] = dict(v[1:-1])
                        Multisim.traverse_dict(d[k])


    def decode(self) -> None:
        """decode the parsed results into a dictionary and its dotmap"""
        # create .dict
        if isinstance(self.ParseResults.as_list()[0][0], str): # key
            self.dict = self.ParseResults.as_dict()
            Multisim.traverse_dict(self.dict)
            # handle the case where key has '.'
            Multisim.unfold_dict_key(self.dict)

        elif isinstance(self.ParseResults.as_list()[0][0], list):
            self.dict = [] # now self.dict is a list of dictionary
            for section in self.ParseResults:
                dict_ = dict(section.as_list())
                Multisim.traverse_dict(dict_)
                # handle the case where key has '.'
                Multisim.unfold_dict_key(dict_)
                self.dict.append(dict_)

        # create .dot
        if isinstance(self.dict, list):
            self.dot = {}
            for d in self.dict:
                for k,v in d.items():
                    if k in self.dot:
                        if isinstance(self.dot[k], list):
                            self.dot[k].append(v)
                        else:
                            self.dot[k] = [self.dot[k], v]
                    else:
                        self.dot[k] = v
            self.dot = DotMap(self.dot)
        else:
            self.dot = DotMap(self.dict)



    def write(self, output=None):
        """Writes DOT object"""
        if isinstance(output, io.IOBase):
            self.output = output
        elif isinstance(output, str):
            self.output = open(output, "w")

        if isinstance(self.dict, list):
            blocks = []
            for k, v in self.dot.items():
                if isinstance(v, list):
                    for vv in v:
                        blocks.append({k:vv})
                else:
                    blocks.append({k:v})
            for block in blocks:
                self._write_dict(block, block=True)
                self.output.write("\n")
        else:
            self._write_dict(self.dot)


    def _write_dict(self, d, block=False, depth=0):
        """subroutine of .write() method"""
        spc = ' ' * self.indent * depth
        if isinstance(d, dict) or isinstance(d, DotMap):
            for k, v in d.items():
                k = str(k)
                if v:
                    if isinstance(v, dict) or isinstance(v, DotMap):
                        if depth == 0 and block:
                            self.output.write(spc + k + " {\n")
                        else:
                            self.output.write(spc + k + " = {\n")
                        self._write_dict(v, depth=depth+1)
                        self.output.write(spc + "}\n")
                    elif isinstance(v, list):
                        self.output.write(spc + k + " = [")
                        for vv in v:
                            if isinstance(vv, dict) or isinstance(vv, DotMap): 
                                self.output.write("{\n")
                            elif isinstance(vv, list):
                                self.output.write("[")
                            self._write_dict(vv, depth=depth+1)
                            if isinstance(vv, dict) or isinstance(vv, DotMap):
                                self.output.write(spc + "}")
                            elif isinstance(vv, list):
                                self.output.write("]")                
                        self.output.write("]\n")
                    else:
                        self.output.write(spc + k + " = " + str(v) +"\n")
                else:
                    if isinstance(v, list) and (not bool(v)):
                        self.output.write(spc + k + " = []\n")
                    elif (isinstance(v, dict) or isinstance(v, DotMap))and (not bool(v)):
                        self.output.write(spc + k + " = {\n}\n")
                    else:
                        self.output.write(spc + k + " =   \n")
        elif isinstance(d, list):
            for v in d:
                self._write_dict(v, depth=depth+1)
        else:
            self.output.write(" " + str(d) + " ")


    def to_dot(self) -> DotMap:
        """Returns parsed results as a DotMap object

        Returns:
            DotMap : DotMap object
        """
        return self.dot


    def to_list(self) -> list:
        """Returns parsed results as a list

        Returns:
            list : list
        """
        return self.ParseResults.as_list()


    def to_dict(self) -> dict:
        """Returns parsed results as a dictionary

        Returns:
            dict : dictionary
        """
        return self.dict

Attributes

EQ = pp.Suppress('=') class-attribute instance-attribute

ParseResults = Multisim.expr.parse_string(string) instance-attribute

array = pp.Forward() class-attribute instance-attribute

dict = {} instance-attribute

dot = DotMap() instance-attribute

expr = pp.Forward() class-attribute instance-attribute

indent = 4 instance-attribute

output = sys.stdout instance-attribute

template_path = None instance-attribute

value = _string1 | _string2 | _number class-attribute instance-attribute

variable = pp.Word(pp.alphanums + '._/?-@') + pp.Opt('.' + pp.Word(pp.alphanums)).set_parse_action(''.join) class-attribute instance-attribute

Functions

__init__(**kwargs)

Source code in src/desmondtools/multisim.py
def __init__(self, **kwargs):
    self.template_path = None
    self.ParseResults = None
    self.dict = {}
    self.dot = DotMap()
    self.output = sys.stdout # do not attempt to close
    self.indent = 4

    if 'template' in kwargs:
        template = kwargs['template']
        template_path = pathlib.Path(template)
        if template_path.is_file():
            self.template_path = template_path
        else:
            template_path = importlib.resources.files('desmondtools')
            self.template_path = template_path / template
        if self.template_path is None:
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), template)

        self.ParseResults = Multisim.expr.parse_file(self.template_path)
        self.decode()

    elif 'string' in kwargs:
        string = kwargs['string']
        try:
            self.ParseResults = Multisim.expr.parse_string(string)
            self.decode()
        except:
            raise RuntimeError("Multisim: cannot parse the input string.")
    else:
        raise RuntimeError("Multisim: template filename or string is required.")

decode()

decode the parsed results into a dictionary and its dotmap

Source code in src/desmondtools/multisim.py
def decode(self) -> None:
    """decode the parsed results into a dictionary and its dotmap"""
    # create .dict
    if isinstance(self.ParseResults.as_list()[0][0], str): # key
        self.dict = self.ParseResults.as_dict()
        Multisim.traverse_dict(self.dict)
        # handle the case where key has '.'
        Multisim.unfold_dict_key(self.dict)

    elif isinstance(self.ParseResults.as_list()[0][0], list):
        self.dict = [] # now self.dict is a list of dictionary
        for section in self.ParseResults:
            dict_ = dict(section.as_list())
            Multisim.traverse_dict(dict_)
            # handle the case where key has '.'
            Multisim.unfold_dict_key(dict_)
            self.dict.append(dict_)

    # create .dot
    if isinstance(self.dict, list):
        self.dot = {}
        for d in self.dict:
            for k,v in d.items():
                if k in self.dot:
                    if isinstance(self.dot[k], list):
                        self.dot[k].append(v)
                    else:
                        self.dot[k] = [self.dot[k], v]
                else:
                    self.dot[k] = v
        self.dot = DotMap(self.dot)
    else:
        self.dot = DotMap(self.dict)

to_dict()

Returns parsed results as a dictionary

Returns:

Name Type Description
dict dict

dictionary

Source code in src/desmondtools/multisim.py
def to_dict(self) -> dict:
    """Returns parsed results as a dictionary

    Returns:
        dict : dictionary
    """
    return self.dict

to_dot()

Returns parsed results as a DotMap object

Returns:

Name Type Description
DotMap DotMap

DotMap object

Source code in src/desmondtools/multisim.py
def to_dot(self) -> DotMap:
    """Returns parsed results as a DotMap object

    Returns:
        DotMap : DotMap object
    """
    return self.dot

to_list()

Returns parsed results as a list

Returns:

Name Type Description
list list

list

Source code in src/desmondtools/multisim.py
def to_list(self) -> list:
    """Returns parsed results as a list

    Returns:
        list : list
    """
    return self.ParseResults.as_list()

traverse_dict(d) staticmethod

Recursively traverse a nested dictionary/list

Parameters:

Name Type Description Default
d
dict

dictionary (to be modified)

required
Source code in src/desmondtools/multisim.py
@staticmethod
def traverse_dict(d) -> None:
    """Recursively traverse a nested dictionary/list

    Args:
        d (dict): dictionary (to be modified)
    """
    if isinstance(d, dict):
        for k,v in d.items():
            if isinstance(v, dict):
                Multisim.traverse_dict(v)            
            elif isinstance(v, list):
                if v == ['{','}']:
                    d[k] = {}
                elif v == ['[',']']:
                    d[k] = []
                elif v[0] == '[' and v[-1] == ']':
                    d[k] = v[1:-1]
                    Multisim.traverse_dict(d[k])
                elif v[0] == '{' and v[-1] == '}':
                    d[k] = dict(v[1:-1])
                    Multisim.traverse_dict(d[k])
    elif isinstance(d, list):
        for k,v in enumerate(d):
            if isinstance(v, dict):
                Multisim.traverse_dict(v)
            elif isinstance(v, list):
                if v == ['{','}']:
                    d[k] = {}
                elif v == ['[',']']:
                    d[k] = []
                elif v[0] == '[' and v[-1] == ']':
                    d[k] = v[1:-1]
                    Multisim.traverse_dict(d[k])
                elif v[0] == '{' and v[-1] == '}':
                    d[k] = dict(v[1:-1])
                    Multisim.traverse_dict(d[k])

unfold_dict_key(d) staticmethod

Unfold '.' in the keys of a dictionary

Parameters:

Name Type Description Default
d
dict

dictionary (to be modified)

required
Source code in src/desmondtools/multisim.py
@staticmethod
def unfold_dict_key(d) -> None:
    """Unfold '.' in the keys of a dictionary

    Args:
        d (dict): dictionary (to be modified)
    """
    ks = [ k for k in d ]
    for k in ks:
        v = d[k]
        if '.' in k:
            # d[k] = v --> d[kk[0]] = { kk[1] : v }
            _u = None
            _k = k.split('.')[0]
            for kk in k.split('.')[-1:0:-1]:
                if _u is None:
                    _u = {kk : v }
                else:
                    _u = {kk : _u}
            d[_k] = _u
            del d[k]
        else:
            pass
        if isinstance(v, dict):
            Multisim.unfold_dict_key(v)

write(output=None)

Writes DOT object

Source code in src/desmondtools/multisim.py
def write(self, output=None):
    """Writes DOT object"""
    if isinstance(output, io.IOBase):
        self.output = output
    elif isinstance(output, str):
        self.output = open(output, "w")

    if isinstance(self.dict, list):
        blocks = []
        for k, v in self.dot.items():
            if isinstance(v, list):
                for vv in v:
                    blocks.append({k:vv})
            else:
                blocks.append({k:v})
        for block in blocks:
            self._write_dict(block, block=True)
            self.output.write("\n")
    else:
        self._write_dict(self.dot)

desmondtools.Event

Source code in src/desmondtools/event.py
class Event:
    def __init__(self, filename:Path | str):
        if isinstance(filename, Path):
            result = Multisim.expr.parse_file(filename.as_posix())
        else:
            result = Multisim.expr.parse_file(filename)
        d = result.as_dict()
        Multisim.traverse_dict(d)
        self.dot = DotMap(d)

Attributes

dot = DotMap(d) instance-attribute

Functions

__init__(filename)

Source code in src/desmondtools/event.py
def __init__(self, filename:Path | str):
    if isinstance(filename, Path):
        result = Multisim.expr.parse_file(filename.as_posix())
    else:
        result = Multisim.expr.parse_file(filename)
    d = result.as_dict()
    Multisim.traverse_dict(d)
    self.dot = DotMap(d)

desmondtools.Interaction

Bases: Event

Source code in src/desmondtools/event.py
class Interaction(Event):
    def __init__(self, filename:Path | str, verbose:bool=False):
        super().__init__(filename)

        self.num_frames = 0
        self.HBond = {}
        self.Hydrophobic = {}
        self.WaterBridge = {}
        self.Polar = {}
        self.HalogenBond = {}
        self.LigWat = {}
        self.Metal = {}
        self.PiCat = {}
        self.PiPi = {}

        for section in self.dot.Keywords:
            try:
                assert section.ProtLigInter.HBondResult
                self.num_frames = len(section.ProtLigInter.HBondResult)
                for frame in section.ProtLigInter.HBondResult:
                    # [[3 "_:ARG_143:HH22" d-s "L-FRAG_0:N6" ]]
                    for (frameno, prot, hbond_type, lig) in frame:
                        prot = prot.strip('\"')
                        (_, resid, atom) = prot.split(":")
                        (resName, resSeq) = resid.split("_")
                        resSeq = int(resSeq)
                        if resSeq in self.HBond:
                            self.HBond[resSeq]['count'] += 1
                        else:
                            self.HBond[resSeq] = {'resName': resName, 'count':1 }
                for resSeq in sorted(self.HBond):
                    fraction = float(self.HBond[resSeq]['count'])/self.num_frames
                    if verbose:
                        print(f"HBond {self.HBond[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
            except:
                pass

            try:
                assert section.ProtLigInter.HydrophobicResult
                self.num_frames = len(section.ProtLigInter.HydrophobicResult)
                for frame in section.ProtLigInter.HydrophobicResult:
                    # [[0 "_:PHE_223" L-FRAG_0 ] [0 "_:ALA_241" L-FRAG_0 ]]
                    for (frameno, prot, lig) in frame:
                        prot = prot.strip('\"')
                        (_, resid) = prot.split(":")
                        (resName, resSeq) = resid.split("_")
                        resSeq = int(resSeq)
                        if resSeq in self.Hydrophobic:
                            self.Hydrophobic[resSeq]['count'] += 1
                        else:
                            self.Hydrophobic[resSeq] = {'resName': resName, 'count':1 }
                for resSeq in sorted(self.Hydrophobic):
                    fraction = float(self.Hydrophobic[resSeq]['count'])/self.num_frames
                    if verbose:
                        print(f"Hydrophobic {self.Hydrophobic[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
            except:
                pass

            try:
                assert section.ProtLigInter.PolarResult
                self.num_frames = len(section.ProtLigInter.PolarResult)
                for frame in section.ProtLigInter.PolarResult:
                    # [[1 "_:GLU_216:OE2" b "L-FRAG_1:N3" 4.45 ]]
                    for (frameno, prot, _, lig, _) in frame:
                        prot = prot.strip('\"')
                        (_, resid, atom) = prot.split(":")
                        (resName, resSeq) = resid.split("_")
                        resSeq = int(resSeq)
                        if resSeq in self.Polar:
                            self.Polar[resSeq]['count'] += 1
                        else:
                            self.Polar[resSeq] = {'resName': resName, 'count':1 }
                for resSeq in sorted(self.Polar):
                    fraction = float(self.Polar[resSeq]['count'])/self.num_frames
                    if verbose:
                        print(f"Polar {self.Polar[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
            except:
                pass

            try:
                assert section.ProtLigInter.WaterBridgeResult
                self.num_frames = len(section.ProtLigInter.WaterBridgeResult)
                for frame in section.ProtLigInter.WaterBridgeResult:
                    # [[3 "_:GLU_216:OE2" a "L-FRAG_0:N2" a 2431 ]]
                    for (frameno, prot, _, lig, _, _) in frame:
                        prot = prot.strip('\"')
                        (_, resid, atom) = prot.split(":")
                        (resName, resSeq) = resid.split("_")
                        resSeq = int(resSeq)
                        if resSeq in self.WaterBridge:
                            self.WaterBridge[resSeq]['count'] += 1
                        else:
                            self.WaterBridge[resSeq] = {'resName': resName, 'count':1 }
                for resSeq in sorted(self.WaterBridge):
                    fraction = float(self.WaterBridge[resSeq]['count'])/self.num_frames
                    if verbose:
                        print(f"WaterBridge {self.WaterBridge[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
            except:
                pass

Attributes

HBond = {} instance-attribute

HalogenBond = {} instance-attribute

Hydrophobic = {} instance-attribute

LigWat = {} instance-attribute

Metal = {} instance-attribute

PiCat = {} instance-attribute

PiPi = {} instance-attribute

Polar = {} instance-attribute

WaterBridge = {} instance-attribute

num_frames = len(section.ProtLigInter.WaterBridgeResult) instance-attribute

Functions

__init__(filename, verbose=False)

Source code in src/desmondtools/event.py
def __init__(self, filename:Path | str, verbose:bool=False):
    super().__init__(filename)

    self.num_frames = 0
    self.HBond = {}
    self.Hydrophobic = {}
    self.WaterBridge = {}
    self.Polar = {}
    self.HalogenBond = {}
    self.LigWat = {}
    self.Metal = {}
    self.PiCat = {}
    self.PiPi = {}

    for section in self.dot.Keywords:
        try:
            assert section.ProtLigInter.HBondResult
            self.num_frames = len(section.ProtLigInter.HBondResult)
            for frame in section.ProtLigInter.HBondResult:
                # [[3 "_:ARG_143:HH22" d-s "L-FRAG_0:N6" ]]
                for (frameno, prot, hbond_type, lig) in frame:
                    prot = prot.strip('\"')
                    (_, resid, atom) = prot.split(":")
                    (resName, resSeq) = resid.split("_")
                    resSeq = int(resSeq)
                    if resSeq in self.HBond:
                        self.HBond[resSeq]['count'] += 1
                    else:
                        self.HBond[resSeq] = {'resName': resName, 'count':1 }
            for resSeq in sorted(self.HBond):
                fraction = float(self.HBond[resSeq]['count'])/self.num_frames
                if verbose:
                    print(f"HBond {self.HBond[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
        except:
            pass

        try:
            assert section.ProtLigInter.HydrophobicResult
            self.num_frames = len(section.ProtLigInter.HydrophobicResult)
            for frame in section.ProtLigInter.HydrophobicResult:
                # [[0 "_:PHE_223" L-FRAG_0 ] [0 "_:ALA_241" L-FRAG_0 ]]
                for (frameno, prot, lig) in frame:
                    prot = prot.strip('\"')
                    (_, resid) = prot.split(":")
                    (resName, resSeq) = resid.split("_")
                    resSeq = int(resSeq)
                    if resSeq in self.Hydrophobic:
                        self.Hydrophobic[resSeq]['count'] += 1
                    else:
                        self.Hydrophobic[resSeq] = {'resName': resName, 'count':1 }
            for resSeq in sorted(self.Hydrophobic):
                fraction = float(self.Hydrophobic[resSeq]['count'])/self.num_frames
                if verbose:
                    print(f"Hydrophobic {self.Hydrophobic[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
        except:
            pass

        try:
            assert section.ProtLigInter.PolarResult
            self.num_frames = len(section.ProtLigInter.PolarResult)
            for frame in section.ProtLigInter.PolarResult:
                # [[1 "_:GLU_216:OE2" b "L-FRAG_1:N3" 4.45 ]]
                for (frameno, prot, _, lig, _) in frame:
                    prot = prot.strip('\"')
                    (_, resid, atom) = prot.split(":")
                    (resName, resSeq) = resid.split("_")
                    resSeq = int(resSeq)
                    if resSeq in self.Polar:
                        self.Polar[resSeq]['count'] += 1
                    else:
                        self.Polar[resSeq] = {'resName': resName, 'count':1 }
            for resSeq in sorted(self.Polar):
                fraction = float(self.Polar[resSeq]['count'])/self.num_frames
                if verbose:
                    print(f"Polar {self.Polar[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
        except:
            pass

        try:
            assert section.ProtLigInter.WaterBridgeResult
            self.num_frames = len(section.ProtLigInter.WaterBridgeResult)
            for frame in section.ProtLigInter.WaterBridgeResult:
                # [[3 "_:GLU_216:OE2" a "L-FRAG_0:N2" a 2431 ]]
                for (frameno, prot, _, lig, _, _) in frame:
                    prot = prot.strip('\"')
                    (_, resid, atom) = prot.split(":")
                    (resName, resSeq) = resid.split("_")
                    resSeq = int(resSeq)
                    if resSeq in self.WaterBridge:
                        self.WaterBridge[resSeq]['count'] += 1
                    else:
                        self.WaterBridge[resSeq] = {'resName': resName, 'count':1 }
            for resSeq in sorted(self.WaterBridge):
                fraction = float(self.WaterBridge[resSeq]['count'])/self.num_frames
                if verbose:
                    print(f"WaterBridge {self.WaterBridge[resSeq]['resName']}_{resSeq} {fraction:5.3f} {self.num_frames}")
        except:
            pass

desmondtools.Maestro

Export maestro file

m_atom block from PDB

1 atom_index
2 i_m_mmod_type
3 r_m_x_coord
4 r_m_y_coord
5 r_m_z_coord
6 i_m_residue_number
7 s_m_mmod_res
8 s_m_chain_name
9 i_m_color
10 r_m_charge1
11 r_m_charge2
12 s_m_pdb_residue_name
13 s_m_pdb_atom_name
14 i_m_atomic_number
15 i_m_formal_charge
16 s_m_color_rgb
17 s_m_atom_name
18 i_m_secondary_structure
19 r_glide_flexr_altx1
20 r_glide_flexr_altx2
21 r_glide_flexr_alty1
22 r_glide_flexr_alty2
23 r_glide_flexr_altz1
24 r_glide_flexr_altz2
25 r_m_pdb_occupancy
26 r_m_pdb_tfactor
27 i_glide_flexr_naltpos
28 i_i_internal_atom_index
29 i_pdb_PDB_serial
30 i_pdb_seqres_index
31 b_glide_flexr_movable
32 r_m_alt_pdb_occupancy
33 r_m_alt_pdb_tfactor
34 r_m_alt_x_coord
35 r_m_alt_y_coord
36 r_m_alt_z_coord
37 r_pdb_alt_occupancy_A
38 r_pdb_alt_occupancy_B
39 r_pdb_alt_tfactor_A
40 r_pdb_alt_tfactor_B
41 r_pdb_alt_x_coord_A
42 r_pdb_alt_x_coord_B
43 r_pdb_alt_y_coord_A
44 r_pdb_alt_y_coord_B
45 r_pdb_alt_z_coord_A
46 r_pdb_alt_z_coord_B
47 i_m_pdb_convert_problem
48 i_pdb_alt_PDB_serial
49 i_pdb_alt_PDB_serial_A
50 i_pdb_alt_PDB_serial_B
51 s_pdb_altloc_chars

m_atom block from ligand

i_m_mmod_type
r_m_x_coord
r_m_y_coord
r_m_z_coord
i_m_residue_number
i_m_color
r_m_charge1
r_m_charge2
i_m_atomic_number
i_m_formal_charge
s_m_color_rgb
i_sd_original_parity
b_st_SpecifiedChirality
r_epik_H2O_pKa
r_epik_H2O_pKa_uncertainty

m_bond block

# First column is bond index #
i_m_from
i_m_to
i_m_order
i_sd_original_parity
r_glide_torcontrol_penalty
s_glide_torcontrol_name
Source code in src/desmondtools/maestro.py
class Maestro:
    """
    Export maestro file

    m_atom block from PDB
    =====================
        1 atom_index
        2 i_m_mmod_type
        3 r_m_x_coord
        4 r_m_y_coord
        5 r_m_z_coord
        6 i_m_residue_number
        7 s_m_mmod_res
        8 s_m_chain_name
        9 i_m_color
        10 r_m_charge1
        11 r_m_charge2
        12 s_m_pdb_residue_name
        13 s_m_pdb_atom_name
        14 i_m_atomic_number
        15 i_m_formal_charge
        16 s_m_color_rgb
        17 s_m_atom_name
        18 i_m_secondary_structure
        19 r_glide_flexr_altx1
        20 r_glide_flexr_altx2
        21 r_glide_flexr_alty1
        22 r_glide_flexr_alty2
        23 r_glide_flexr_altz1
        24 r_glide_flexr_altz2
        25 r_m_pdb_occupancy
        26 r_m_pdb_tfactor
        27 i_glide_flexr_naltpos
        28 i_i_internal_atom_index
        29 i_pdb_PDB_serial
        30 i_pdb_seqres_index
        31 b_glide_flexr_movable
        32 r_m_alt_pdb_occupancy
        33 r_m_alt_pdb_tfactor
        34 r_m_alt_x_coord
        35 r_m_alt_y_coord
        36 r_m_alt_z_coord
        37 r_pdb_alt_occupancy_A
        38 r_pdb_alt_occupancy_B
        39 r_pdb_alt_tfactor_A
        40 r_pdb_alt_tfactor_B
        41 r_pdb_alt_x_coord_A
        42 r_pdb_alt_x_coord_B
        43 r_pdb_alt_y_coord_A
        44 r_pdb_alt_y_coord_B
        45 r_pdb_alt_z_coord_A
        46 r_pdb_alt_z_coord_B
        47 i_m_pdb_convert_problem
        48 i_pdb_alt_PDB_serial
        49 i_pdb_alt_PDB_serial_A
        50 i_pdb_alt_PDB_serial_B
        51 s_pdb_altloc_chars

    m_atom block from ligand
    ========================
        i_m_mmod_type
        r_m_x_coord
        r_m_y_coord
        r_m_z_coord
        i_m_residue_number
        i_m_color
        r_m_charge1
        r_m_charge2
        i_m_atomic_number
        i_m_formal_charge
        s_m_color_rgb
        i_sd_original_parity
        b_st_SpecifiedChirality
        r_epik_H2O_pKa
        r_epik_H2O_pKa_uncertainty

    m_bond block
    ============
        # First column is bond index #
        i_m_from
        i_m_to
        i_m_order
        i_sd_original_parity
        r_glide_torcontrol_penalty
        s_glide_torcontrol_name
    """


    def __init__(self, filename:str, max_two_entries:bool=True) -> None:
        """ initialize """
        if not Path(filename).exists:
            print(f"Error file not found: {filename}")
            sys.exit(1)

        self.filename = filename     # ex: ./dir/a.maegz  ./dir/a.mae.gz
        self.filename_prefix = None  # ex: ./dir/a        ./dir/a
        self.filename_stem = None    # ex: a              a
        self.lines = None
        self.entries = None


        self.max_two_entries = max_two_entries
        self.title = ""
        self.serial = 0
        self.usedId = {}
        self.mmcif = {"_entry": None, "_chem_comp_bond": None, "_atom_site": None}
        self.iamap = {}


    @staticmethod
    def getFromDict(dataDict, mapList):
        return reduce(operator.getitem, mapList, dataDict)


    @staticmethod
    def setInDict(dataDict, mapList, value):
        if mapList:
            Maestro.getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
        else:
            for k,v in value.items():
                dataDict[k] = v


    def parse_impl(self, f:TextIO) -> None:
        """Parse Maestro file to a list of dictionaries"""
        self.entries = []
        count = re.compile(r'(\w+)\[(\d+)\]')
        tokens = shlex.split(f.read())
        level = []
        data = {}
        previous_token = None
        header = False
        extra_column = 0
        num_repeat = 1
        skip = False
        for token in tokens :
            if token == "#" :
                skip = not skip # invert
                continue
            elif skip:
                continue
            elif token == "{" :
                header = True
                key = []
                val = []
                arr = []
                if previous_token:
                    if previous_token == "f_m_ct" and data:
                        self.entries.append(data)
                        data = {}
                    try:
                        (block, num_repeat) = count.findall(previous_token)[0]
                        num_repeat = int(num_repeat)
                        extra_column = 1
                    except:
                        block = previous_token
                        num_repeat = 1
                        extra_column = 0
                    level.append(block)

            elif token == "}":
                if level: 
                    level.pop()
            elif token == ":::":
                header = False
            elif header:
                key.append(token)
            else:
                val.append(token)
                # only store f_m_ct blocks (level != [])
                if len(val) == (len(key)+extra_column) and level :
                    arr.append(val[extra_column:])
                    val = []
                    if len(arr) == num_repeat:
                        if len(arr) == 1:
                            Maestro.setInDict(data, level, dict(zip(key, arr[0])))
                        else:
                            T = list(zip(*arr)) # transpose
                            Maestro.setInDict(data, level, dict(zip(key, T)))
            previous_token = token

        if data:
            self.entries.append(data)


    def parse(self) -> None:
        """Parse a Maestro file

            set self.lines, self.filename_prefix, self.filename_stem
        """
        if self.filename.endswith('.maegz'):
            self.filename_prefix = self.filename.replace(".maegz", "")
            with gzip.open(self.filename, 'rt') as f:
                self.parse_impl(f)

        elif self.filename.endswith('.mae.gz'):
            self.filename_prefix = self.filename.replace(".mae.gz", "")
            with gzip.open(self.filename, "rt") as f:
                self.parse_impl(f)

        elif self.filename.endswith('.mae'):
            self.filename_prefix = self.filename.replace(".mae", "")
            with open(self.filename, "rt") as f:
                self.parse_impl(f)
        else:
            print("Error : .mae, .mae.gz, or .maegz file is expected")
            sys.exit(1)
        self.filename_stem = Path(self.filename_prefix).stem


    def read_lines(self) -> None:
        """Read a Maestro file

            set self.lines, self.filename_prefix, self.filename_stem
        """
        if self.filename.endswith('.maegz'):
            self.filename_prefix = self.filename.replace(".maegz", "")
            with gzip.open(self.filename, 'rt') as f:
                self.lines = f.readlines()

        elif self.filename.endswith('.mae.gz'):
            self.filename_prefix = self.filename.replace(".mae.gz", "")
            with gzip.open(self.filename, "rt") as f:
                self.lines = f.readlines()

        elif self.filename.endswith('.mae'):
            self.filename_prefix = self.filename.replace(".mae", "")
            with open(self.filename, "rt") as f:
                self.lines = f.readlines()
        else:
            print("Error : .mae, .mae.gz, or .maegz file is expected")
            sys.exit(1)
        self.filename_stem = Path(self.filename_prefix).stem 


    def append_entry(self, title):
        """Start a new entry."""
        if self.mmcif["_entry"]:
            if isinstance(self.mmcif["_entry"]["id"], list):
                # you got here as the third and later entry
                if self.max_two_entries:
                    # write out last two entries
                    self.write_mmcif()
                    # return to the state in which only the first entry was defined
                    self.mmcif = copy.deepcopy(self.mmcif_first)
                    self.usedId = copy.deepcopy(self.mmcif_first_usedId)
                    self.serial = self.mmcif_first_serial
                    # regardless of its appearance, current entry now is regarded as the second entry
                    self.mmcif["_entry"]["id"] = [ self.mmcif["_entry"]["id"][0], title ]
                else:
                    # otherwise continue to add entry
                    self.mmcif["_entry"]["id"].append(title)
            else:
                # you got here as the second entry
                # save the first entry before adding the second entry
                self.mmcif_first = copy.deepcopy(self.mmcif)
                self.mmcif_first_usedId = copy.deepcopy(self.usedId)
                self.mmcif_first_serial = self.serial
                # change to a list type
                self.mmcif["_entry"]["id"] = [ self.mmcif["_entry"]["id"], title ]
        else:
            # regular string type
            self.mmcif["_entry"] = { "id" : title }


    def append_atom_site(self, 
                        group_PDB, id, 
                        type_symbol, 
                        label_atom_id, 
                        label_alt_id, 
                        label_comp_id,
                        label_asym_id, 
                        label_entity_id, 
                        label_seq_id, 
                        pdbx_PDB_ins_code, 
                        Cartn_x, 
                        Cartn_y, 
                        Cartn_z,
                        occupancy, 
                        B_iso_or_equiv, 
                        pdbx_formal_charge, 
                        auth_seq_id, 
                        auth_comp_id, 
                        auth_asym_id, 
                        auth_atom_id, 
                        pdbx_PDB_model_num):

        """Add mmcif _atom_site """

        if self.mmcif["_atom_site"] :
            self.mmcif["_atom_site"]["group_PDB"].append(group_PDB)
            self.mmcif["_atom_site"]["id"].append(id)
            self.mmcif["_atom_site"]["type_symbol"].append(type_symbol)
            self.mmcif["_atom_site"]["label_atom_id"].append(label_atom_id)
            self.mmcif["_atom_site"]["label_alt_id"].append(label_alt_id)
            self.mmcif["_atom_site"]["label_comp_id"].append(label_comp_id)
            self.mmcif["_atom_site"]["label_asym_id"].append(label_asym_id)
            self.mmcif["_atom_site"]["label_entity_id"].append(label_entity_id)
            self.mmcif["_atom_site"]["label_seq_id"].append(label_seq_id)
            self.mmcif["_atom_site"]["pdbx_PDB_ins_code"].append(pdbx_PDB_ins_code)
            self.mmcif["_atom_site"]["Cartn_x"].append(Cartn_x)
            self.mmcif["_atom_site"]["Cartn_y"].append(Cartn_y)
            self.mmcif["_atom_site"]["Cartn_z"].append(Cartn_z)
            self.mmcif["_atom_site"]["occupancy"].append(occupancy)
            self.mmcif["_atom_site"]["B_iso_or_equiv"].append(B_iso_or_equiv)
            self.mmcif["_atom_site"]["pdbx_formal_charge"].append(pdbx_formal_charge)
            self.mmcif["_atom_site"]["auth_seq_id"].append(auth_seq_id)
            self.mmcif["_atom_site"]["auth_comp_id"].append(auth_comp_id)
            self.mmcif["_atom_site"]["auth_asym_id"].append(auth_asym_id)
            self.mmcif["_atom_site"]["auth_atom_id"].append(auth_atom_id)
            self.mmcif["_atom_site"]["pdbx_PDB_model_num"].append(pdbx_PDB_model_num)
        else:
            self.mmcif["_atom_site"] = {
                "group_PDB" : [ group_PDB ],
                "id" : [ id ],
                "type_symbol" : [ type_symbol ],
                "label_atom_id" : [ label_atom_id ],
                "label_alt_id" : [ label_alt_id ],
                "label_comp_id" : [ label_comp_id ],
                "label_asym_id" : [ label_asym_id ],
                "label_entity_id" : [ label_entity_id ],
                "label_seq_id" : [ label_seq_id ],
                "pdbx_PDB_ins_code" : [ pdbx_PDB_ins_code ],
                "Cartn_x" : [ Cartn_x ],
                "Cartn_y" : [ Cartn_y ],
                "Cartn_z" : [ Cartn_z ],
                "occupancy" : [ occupancy ],
                "B_iso_or_equiv" : [ B_iso_or_equiv ],
                "pdbx_formal_charge" : [ pdbx_formal_charge ],
                "auth_seq_id" : [ auth_seq_id ],
                "auth_comp_id" : [ auth_comp_id ],
                "auth_asym_id" : [ auth_asym_id ],
                "auth_atom_id" : [ auth_atom_id ],
                "pdbx_PDB_model_num" : [ pdbx_PDB_model_num ],
            }


    def append_chem_comp_bond(self, chem_comp_id, i, j, k):
        """ add chem_comp_bond """
        p = self.mmcif["_atom_site"]["label_atom_id"][self.mmcif["_atom_site"]["id"].index(i)]
        q = self.mmcif["_atom_site"]["label_atom_id"][self.mmcif["_atom_site"]["id"].index(j)]
        if self.mmcif["_chem_comp_bond"]:
            self.mmcif["_chem_comp_bond"]["comp_id"].append(chem_comp_id)
            self.mmcif["_chem_comp_bond"]["atom_id_1"].append(p)
            self.mmcif["_chem_comp_bond"]["atom_id_2"].append(q)
            self.mmcif["_chem_comp_bond"]["value_order"].append(k)
        else:
            self.mmcif["_chem_comp_bond"] = {
                "comp_id" : [ chem_comp_id ],
                "atom_id_1" : [ p ],
                "atom_id_2" : [ q ],
                "value_order" : [ k ],
            }


    def new_chem_comp(self):
        """ start a new chem_comp entry """
        if not type(self.mmcif["_entry"]["id"]) == list or len(self.mmcif["_entry"]["id"]) <= 2:
            self.chem_comp_id = "LIG"
        else:
            self.chem_comp_id = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(3))
        self.chem_comp_chainId = [ c for c in string.ascii_uppercase if not (c in self.usedId)][0]
        self.chem_comp_resSeq = 100
        self.usedId[self.chem_comp_chainId] = []
        return self.chem_comp_chainId, self.chem_comp_resSeq, self.chem_comp_id


    def new_chem_comp_atom(self, obj, element):
        """ create and return unique atom name """
        if "s_m_pdb_atom_name" in obj:
            atom_name = obj["s_m_pdb_atom_name"].strip()
        else:
            atom_name = "{}{}".format(element,1)
        if not (atom_name in self.usedId[self.chem_comp_chainId]):
            self.usedId[self.chem_comp_chainId].append(atom_name)
            return atom_name
        else:
            for i in range(1,10000):
                new_name = "{}{}".format(element,i)
                if not (new_name in self.usedId[self.chem_comp_chainId]):
                    self.usedId[self.chem_comp_chainId].append(new_name)
                    return new_name


    def to_mmcif(self, **kwargs):
        token = None
        entity_id = 0
        for line in self.lines:
            line = line.strip()

            if not line or line.startswith("#"): 
                continue

            # f_m_ct block
            if line.startswith("f_m_ct {"):
                token = 'f_m_ct_key'
                natoms = 0
                nbonds = 0
                entity_id += 1
                k = []
                continue
            if token == 'f_m_ct_key':
                if line.startswith(":::"): # end of f_m_ct key block
                    n = len(k)
                    token = 'f_m_ct_val'
                    v = []
                    continue
                k.append(line)
            if token == 'f_m_ct_val':
                v.append(line)
                if len(v) == n:
                    data = dict(zip(k,v))
                    if "s_lp_Variant" in data:
                        self.title = data['s_lp_Variant'].replace('"','').replace("'","")
                    else:
                        self.title = data['s_m_title'].replace('"','').replace("'","")
                    self.append_entry(self.title)
                    token = None


            # m_atom block
            if line.startswith("m_atom") :
                natoms = int(Maestro.count.findall(line)[0])
                token = 'm_atom_key'
                k = ['i_atom_index'] # 1st column is atom_index
                continue
            if token == 'm_atom_key':
                if line.startswith(":::"):
                    token = 'm_atom_val'
                    nv = 0
                    m_atom = {}
                    m_atom_resName = {}
                    continue
                k.append(line)
            if token == 'm_atom_val':
                v = ['' if x=="<>" else x for x in shlex.split(line)]
                data = dict(zip(k,v))

                # build hierarchical structure of chain/ resSeq/ resName
                chainId = get_value_or_default(data,"s_m_chain_name", "A")
                resSeq = int(get_value_or_default(data, "i_m_residue_number", "1"))
                resName = get_value_or_default(data, "s_m_pdb_residue_name", "?")
                if resName in std_rename:
                    resName = std_rename[resName]
                if not chainId in m_atom:
                    m_atom[chainId] = {}
                if not resSeq in m_atom[chainId]:
                    m_atom[chainId][resSeq] = {}
                if not resName in m_atom[chainId][resSeq]:
                    m_atom[chainId][resSeq][resName] = []
                m_atom[chainId][resSeq][resName].append(data)
                m_atom_resName[int(data["i_atom_index"])] = resName

                nv += 1
                if nv == natoms: # end of m_atom block
                    token = None

            # m_bond block
            if line.startswith("m_bond") :
                nbonds = int(Maestro.count.findall(line)[0])
                token = 'm_bond_key'
                k = ['i_bond_index']
                continue
            if token == 'm_bond_key':
                if line.startswith(":::"): # end of m_bond key block
                    token = 'm_bond_val'
                    nv = 0
                    m_bond = {}
                    continue
                k.append(line)
            if token == 'm_bond_val':
                v = ['' if x=="<>" else x for x in shlex.split(line)]
                data = dict(zip(k,v))

                # build bond connectivity
                p = int(data["i_m_from"]) 
                q = int(data["i_m_to"]) 
                r = int(data["i_m_order"])
                if p in m_bond:
                    m_bond[p][q] = r
                else:
                    m_bond[p] = {q:r}
                if q in m_bond:
                    m_bond[q][p] = r
                else:
                    m_bond[q] = {p:r}

                nv += 1
                if nv == nbonds: # m_bond block ends
                    token = None

                    # create mmcif _atom_site
                    for chainId_ in sorted(m_atom):
                        for resSeq_ in sorted(m_atom[chainId_]):
                            for resName_ in sorted(m_atom[chainId_][resSeq_]):
                                if resName_ in std_residues:
                                    group_PDB = "ATOM"
                                    chainId, resSeq, resName = chainId_, resSeq_, resName_
                                else:
                                    group_PDB = "HETATM"
                                    # check if new_chem_comp is necessary
                                    this_residue = [ int(d["i_atom_index"]) for d in m_atom[chainId_][resSeq_][resName_] ]
                                    need_new_chem_comp = True
                                    for p_ in this_residue:
                                        for q_, bond_order in m_bond[p_].items():
                                            if  (not (q_ in this_residue)) and \
                                                (not (m_atom_resName[q_] in std_residues)) and \
                                                (q_ in self.iamap):
                                                # do not create another chem_comp
                                                need_new_chem_comp = False
                                                q, chainId, resSeq, resName = self.iamap[q_]
                                                break
                                    if need_new_chem_comp:
                                        chainId, resSeq, resName = self.new_chem_comp()

                                # go through all atoms within the same residue
                                for d in m_atom[chainId_][resSeq_][resName_]:
                                    self.serial += 1
                                    self.iamap[int(d["i_atom_index"])] = (self.serial, chainId, resSeq, resName)
                                    element = atomic_symbol[int(d["i_m_atomic_number"])]
                                    if group_PDB == "ATOM":
                                        name = get_value_or_default(d,"s_m_pdb_atom_name", "?")
                                    else:
                                        name = self.new_chem_comp_atom(d, element) 
                                    x = float(d["r_m_x_coord"])
                                    y = float(d["r_m_y_coord"])
                                    z = float(d["r_m_z_coord"])
                                    occupancy = float(get_value_or_default(d,"r_m_pdb_occupancy","1"))
                                    bfactor = float(get_value_or_default(d,"r_m_pdb_tfactor", "0"))
                                    formal_charge = int(get_value_or_default(d,"i_m_formal_charge", "0"))
                                    self.append_atom_site(
                                        group_PDB = group_PDB, 
                                        id = self.serial,
                                        type_symbol = element,
                                        label_atom_id = name,
                                        label_alt_id = ".",
                                        label_comp_id = resName,
                                        label_asym_id = chainId,
                                        label_entity_id = entity_id,
                                        label_seq_id = resSeq,
                                        pdbx_PDB_ins_code = "?",
                                        Cartn_x = x,
                                        Cartn_y = y,
                                        Cartn_z = z,
                                        occupancy = occupancy,
                                        B_iso_or_equiv = bfactor,
                                        pdbx_formal_charge = formal_charge,
                                        auth_seq_id = resSeq,
                                        auth_comp_id = resName,
                                        auth_asym_id = chainId,
                                        auth_atom_id = name,
                                        pdbx_PDB_model_num = 1,
                                    )

                    # create mmcif _chem_comp_bond
                    for chainId_ in sorted(m_atom):
                        for resSeq_ in sorted(m_atom[chainId_]):
                            for resName_ in sorted(m_atom[chainId_][resSeq_]):
                                if not (resName_ in std_residues): # HETATM
                                    for d in m_atom[chainId_][resSeq_][resName_]:
                                        p_ = int(d["i_atom_index"])
                                        p, p_chainId, p_resSeq, p_resName = self.iamap[p_]
                                        if p_ in m_bond:
                                            for q_, bond_order in m_bond[p_].items():
                                                q, q_chainId, q_resSeq, q_resName = self.iamap[q_]
                                                self.append_chem_comp_bond(p_resName, p, q, bond_order)

        cifo = CifFileWriter("{}.cif".format(self.filename_stem))

        # clean up undefined dictionary
        # force to copy a list to avoid 
        # RuntimeError: dictionary changed size during iteration
        for k in list(self.mmcif): 
            if not self.mmcif[k]:
                del self.mmcif[k]

        cifo.write({ "desmondtools" : self.mmcif })


    def export2(self, **kwargs) -> None:
        """Export to PDB/mmCIF"""
        pdb_format = kwargs.get("pdb", False)
        cif_format = kwargs.get("cif", False)
        skip_first = kwargs.get("skip_first", False)
        only_first = kwargs.get("only_first", False)
        as_complex = kwargs.get("as_complex", False)
        separately = kwargs.get("separately", False)
        names = kwargs.get("names", [])

        if names:
            # if `--names` is used, they are separately saved.
            separately = True

        if as_complex:
            # if `--as-complex` is used, `skip_first` and `only_first` is disabled.
            skip_first = False
            only_first = False

        self.parse()

        exported = []
        for i, data in enumerate(self.entries, start=1):
            if ((i == 1) and (not skip_first)) or ((i > 1) and (not only_first)):
                entry = dict_to_simplenamespace(data)
                if (not names) or (names and entry.f_m_ct.s_m_title in names):
                    exported.append(SimpleNamespace(number=i, title=entry.f_m_ct.s_m_title, PDB=to_pdb_str(entry)))
        if as_complex:
            for x in exported:
                title = safe_filename(x.title)
                with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
                    f.write(exported[0].PDB)
                    f.write(x.PDB)
                    f.write("END\n")
        elif separately:
            for x in exported:
                title = safe_filename(x.title)
                with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
                    f.write(x.PDB)
                    f.write("END\n")
        else:
            with open(f"{self.filename_stem}.pdb", "w") as f:
                for x in exported:
                    f.write(PDB_MODEL_FORMAT(x.number) + "\n")
                    f.write(x.PDB)
                    f.write("ENDMDL\n")
                f.write("END\n")



    def export(self, **kwargs) -> None:
        """Export to PDB/mmCIF"""
        pdb_format = kwargs.get("pdb", False)
        cif_format = kwargs.get("cif", False)
        skip_first = kwargs.get("skip_first", False)
        only_first = kwargs.get("only_first", False)
        as_complex = kwargs.get("as_complex", False)
        separately = kwargs.get("separately", False)
        names = kwargs.get("names", [])

        if names:
            # if `--names` is used, they are separately saved.
            separately = True

        if as_complex:
            # if `--as-complex` is used, `skip_first` and `only_first` is disabled.
            skip_first = False
            only_first = False

        entries = []
        entry_number = 0
        q = None
        count = re.compile(r'\[(\d+)\]')
        self.read_lines()

        for line in self.lines:
            line = line.strip()

            if not line or line.startswith("#"): 
                continue

            if line.startswith("f_m_ct {"): # new entry
                q = 'f_m_ct_key'
                k = []

                if entry_number > 0 and entry.PDB:
                    if (not names) or (names and entry.title in names):
                        entries.append(entry)

                entry_number += 1
                entry = SimpleNamespace(number=entry_number, title="", PDB="", natoms=0, nbonds=0, data=None)
                continue

            if q == 'f_m_ct_key' and line.startswith(":::"):
                n = len(k)
                q = 'f_m_ct_val'
                vm = []
                va = []
                vb = []
                continue

            if q == 'f_m_ct_key': 
                k.append(line)

            if q == 'f_m_ct_val':
                vm.append(line)
                if len(vm) == n:
                    data = dict(zip(k, vm))
                    q = None
                    entry.title = data['s_m_title'].replace('"','').replace("'","")
                    entry.data = data

            if line.startswith("m_atom") :
                entry.natoms = int(count.findall(line)[0])

                q = 'm_atom_key'
                k = ['atom_index']
                continue

            if q == 'm_atom_key' and line.startswith(":::"):
                q = 'm_atom_val'
                nv = 0
                continue

            if q == 'm_atom_key': 
                k.append(line)

            if q == 'm_atom_val':
                c = ['' if x=="<>" else x for x in shlex.split(line)]
                data = dict(zip(k, c))
                nv += 1
                va.append(data)
                if nv == entry.natoms:
                    q = None
                    if ((entry_number == 1) and (not skip_first)) or ((entry_number > 1) and (not only_first)):
                        entry.PDB = PDB_ATOM_FORMAT(va)

            if line.startswith("m_bond") :
                entry.nbonds = int(count.findall(line)[0])
                q = 'm_bond_key'
                k = ['bond_index']
                continue

            if q == 'm_bond_key' and line.startswith(":::"):
                q = 'm_bond_val'
                nv = 0
                continue

            if q == 'm_bond_key': 
                k.append(line)
            if q == 'm_bond_val':
                c = ['' if x=="<>" else x for x in shlex.split(line)]
                data = dict(zip(k,c))
                # conect information only for ligands
                if not 'i_glide_grid_version' in entry.data :
                    vb.append(data)
                nv += 1
                if nv == entry.nbonds:
                    q = None
                    if vb:
                        if ((entry_number == 1) and (not skip_first)) or ((entry_number > 1) and (not only_first)):
                            entry.PDB += PDB_CONECT_FORMAT(vb)

        if cif_format:
            for entry in entries:
                pdb_strings = entry.PDB
                with StringIO(pdb_strings) as pdb:
                    parser = PDBParser()
                    structure = parser.get_structure(entry.title, pdb)
                    mmcif_io = MMCIFIO()
                    mmcif_io.set_structure(structure)
                    mmcif_io.save(f"{self.filename_stem}.cif")

        elif pdb_format:

            if as_complex:
                for entry in entries:
                    title = safe_filename(entry.title)
                    with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
                        f.write(PDB_MODEL_FORMAT(entries[0].number) + "\n")
                        f.write(entries[0].PDB)
                        f.write("ENDMDL\n")
                        f.write(entry.PDB)
                        f.write("ENDMDL\n")
                        f.write("END\n")

            elif separately:
                for entry in entries:
                    title = safe_filename(entry.title)
                    with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
                        f.write(entry.PDB)
                        f.write("END\n")

            else:
                with open(f"{self.filename_stem}.pdb", "w") as f:
                    for entry in entries:
                        f.write(PDB_MODEL_FORMAT(entry.number) + "\n")
                        f.write(entry.PDB)
                        f.write("ENDMDL\n")
                    f.write("END\n")

Attributes

entries = None instance-attribute

filename = filename instance-attribute

filename_prefix = None instance-attribute

filename_stem = None instance-attribute

iamap = {} instance-attribute

lines = None instance-attribute

max_two_entries = max_two_entries instance-attribute

mmcif = {'_entry': None, '_chem_comp_bond': None, '_atom_site': None} instance-attribute

serial = 0 instance-attribute

title = '' instance-attribute

usedId = {} instance-attribute

Functions

__init__(filename, max_two_entries=True)

initialize

Source code in src/desmondtools/maestro.py
def __init__(self, filename:str, max_two_entries:bool=True) -> None:
    """ initialize """
    if not Path(filename).exists:
        print(f"Error file not found: {filename}")
        sys.exit(1)

    self.filename = filename     # ex: ./dir/a.maegz  ./dir/a.mae.gz
    self.filename_prefix = None  # ex: ./dir/a        ./dir/a
    self.filename_stem = None    # ex: a              a
    self.lines = None
    self.entries = None


    self.max_two_entries = max_two_entries
    self.title = ""
    self.serial = 0
    self.usedId = {}
    self.mmcif = {"_entry": None, "_chem_comp_bond": None, "_atom_site": None}
    self.iamap = {}

append_atom_site(group_PDB, id, type_symbol, label_atom_id, label_alt_id, label_comp_id, label_asym_id, label_entity_id, label_seq_id, pdbx_PDB_ins_code, Cartn_x, Cartn_y, Cartn_z, occupancy, B_iso_or_equiv, pdbx_formal_charge, auth_seq_id, auth_comp_id, auth_asym_id, auth_atom_id, pdbx_PDB_model_num)

Add mmcif _atom_site

Source code in src/desmondtools/maestro.py
def append_atom_site(self, 
                    group_PDB, id, 
                    type_symbol, 
                    label_atom_id, 
                    label_alt_id, 
                    label_comp_id,
                    label_asym_id, 
                    label_entity_id, 
                    label_seq_id, 
                    pdbx_PDB_ins_code, 
                    Cartn_x, 
                    Cartn_y, 
                    Cartn_z,
                    occupancy, 
                    B_iso_or_equiv, 
                    pdbx_formal_charge, 
                    auth_seq_id, 
                    auth_comp_id, 
                    auth_asym_id, 
                    auth_atom_id, 
                    pdbx_PDB_model_num):

    """Add mmcif _atom_site """

    if self.mmcif["_atom_site"] :
        self.mmcif["_atom_site"]["group_PDB"].append(group_PDB)
        self.mmcif["_atom_site"]["id"].append(id)
        self.mmcif["_atom_site"]["type_symbol"].append(type_symbol)
        self.mmcif["_atom_site"]["label_atom_id"].append(label_atom_id)
        self.mmcif["_atom_site"]["label_alt_id"].append(label_alt_id)
        self.mmcif["_atom_site"]["label_comp_id"].append(label_comp_id)
        self.mmcif["_atom_site"]["label_asym_id"].append(label_asym_id)
        self.mmcif["_atom_site"]["label_entity_id"].append(label_entity_id)
        self.mmcif["_atom_site"]["label_seq_id"].append(label_seq_id)
        self.mmcif["_atom_site"]["pdbx_PDB_ins_code"].append(pdbx_PDB_ins_code)
        self.mmcif["_atom_site"]["Cartn_x"].append(Cartn_x)
        self.mmcif["_atom_site"]["Cartn_y"].append(Cartn_y)
        self.mmcif["_atom_site"]["Cartn_z"].append(Cartn_z)
        self.mmcif["_atom_site"]["occupancy"].append(occupancy)
        self.mmcif["_atom_site"]["B_iso_or_equiv"].append(B_iso_or_equiv)
        self.mmcif["_atom_site"]["pdbx_formal_charge"].append(pdbx_formal_charge)
        self.mmcif["_atom_site"]["auth_seq_id"].append(auth_seq_id)
        self.mmcif["_atom_site"]["auth_comp_id"].append(auth_comp_id)
        self.mmcif["_atom_site"]["auth_asym_id"].append(auth_asym_id)
        self.mmcif["_atom_site"]["auth_atom_id"].append(auth_atom_id)
        self.mmcif["_atom_site"]["pdbx_PDB_model_num"].append(pdbx_PDB_model_num)
    else:
        self.mmcif["_atom_site"] = {
            "group_PDB" : [ group_PDB ],
            "id" : [ id ],
            "type_symbol" : [ type_symbol ],
            "label_atom_id" : [ label_atom_id ],
            "label_alt_id" : [ label_alt_id ],
            "label_comp_id" : [ label_comp_id ],
            "label_asym_id" : [ label_asym_id ],
            "label_entity_id" : [ label_entity_id ],
            "label_seq_id" : [ label_seq_id ],
            "pdbx_PDB_ins_code" : [ pdbx_PDB_ins_code ],
            "Cartn_x" : [ Cartn_x ],
            "Cartn_y" : [ Cartn_y ],
            "Cartn_z" : [ Cartn_z ],
            "occupancy" : [ occupancy ],
            "B_iso_or_equiv" : [ B_iso_or_equiv ],
            "pdbx_formal_charge" : [ pdbx_formal_charge ],
            "auth_seq_id" : [ auth_seq_id ],
            "auth_comp_id" : [ auth_comp_id ],
            "auth_asym_id" : [ auth_asym_id ],
            "auth_atom_id" : [ auth_atom_id ],
            "pdbx_PDB_model_num" : [ pdbx_PDB_model_num ],
        }

append_chem_comp_bond(chem_comp_id, i, j, k)

add chem_comp_bond

Source code in src/desmondtools/maestro.py
def append_chem_comp_bond(self, chem_comp_id, i, j, k):
    """ add chem_comp_bond """
    p = self.mmcif["_atom_site"]["label_atom_id"][self.mmcif["_atom_site"]["id"].index(i)]
    q = self.mmcif["_atom_site"]["label_atom_id"][self.mmcif["_atom_site"]["id"].index(j)]
    if self.mmcif["_chem_comp_bond"]:
        self.mmcif["_chem_comp_bond"]["comp_id"].append(chem_comp_id)
        self.mmcif["_chem_comp_bond"]["atom_id_1"].append(p)
        self.mmcif["_chem_comp_bond"]["atom_id_2"].append(q)
        self.mmcif["_chem_comp_bond"]["value_order"].append(k)
    else:
        self.mmcif["_chem_comp_bond"] = {
            "comp_id" : [ chem_comp_id ],
            "atom_id_1" : [ p ],
            "atom_id_2" : [ q ],
            "value_order" : [ k ],
        }

append_entry(title)

Start a new entry.

Source code in src/desmondtools/maestro.py
def append_entry(self, title):
    """Start a new entry."""
    if self.mmcif["_entry"]:
        if isinstance(self.mmcif["_entry"]["id"], list):
            # you got here as the third and later entry
            if self.max_two_entries:
                # write out last two entries
                self.write_mmcif()
                # return to the state in which only the first entry was defined
                self.mmcif = copy.deepcopy(self.mmcif_first)
                self.usedId = copy.deepcopy(self.mmcif_first_usedId)
                self.serial = self.mmcif_first_serial
                # regardless of its appearance, current entry now is regarded as the second entry
                self.mmcif["_entry"]["id"] = [ self.mmcif["_entry"]["id"][0], title ]
            else:
                # otherwise continue to add entry
                self.mmcif["_entry"]["id"].append(title)
        else:
            # you got here as the second entry
            # save the first entry before adding the second entry
            self.mmcif_first = copy.deepcopy(self.mmcif)
            self.mmcif_first_usedId = copy.deepcopy(self.usedId)
            self.mmcif_first_serial = self.serial
            # change to a list type
            self.mmcif["_entry"]["id"] = [ self.mmcif["_entry"]["id"], title ]
    else:
        # regular string type
        self.mmcif["_entry"] = { "id" : title }

export(**kwargs)

Export to PDB/mmCIF

Source code in src/desmondtools/maestro.py
def export(self, **kwargs) -> None:
    """Export to PDB/mmCIF"""
    pdb_format = kwargs.get("pdb", False)
    cif_format = kwargs.get("cif", False)
    skip_first = kwargs.get("skip_first", False)
    only_first = kwargs.get("only_first", False)
    as_complex = kwargs.get("as_complex", False)
    separately = kwargs.get("separately", False)
    names = kwargs.get("names", [])

    if names:
        # if `--names` is used, they are separately saved.
        separately = True

    if as_complex:
        # if `--as-complex` is used, `skip_first` and `only_first` is disabled.
        skip_first = False
        only_first = False

    entries = []
    entry_number = 0
    q = None
    count = re.compile(r'\[(\d+)\]')
    self.read_lines()

    for line in self.lines:
        line = line.strip()

        if not line or line.startswith("#"): 
            continue

        if line.startswith("f_m_ct {"): # new entry
            q = 'f_m_ct_key'
            k = []

            if entry_number > 0 and entry.PDB:
                if (not names) or (names and entry.title in names):
                    entries.append(entry)

            entry_number += 1
            entry = SimpleNamespace(number=entry_number, title="", PDB="", natoms=0, nbonds=0, data=None)
            continue

        if q == 'f_m_ct_key' and line.startswith(":::"):
            n = len(k)
            q = 'f_m_ct_val'
            vm = []
            va = []
            vb = []
            continue

        if q == 'f_m_ct_key': 
            k.append(line)

        if q == 'f_m_ct_val':
            vm.append(line)
            if len(vm) == n:
                data = dict(zip(k, vm))
                q = None
                entry.title = data['s_m_title'].replace('"','').replace("'","")
                entry.data = data

        if line.startswith("m_atom") :
            entry.natoms = int(count.findall(line)[0])

            q = 'm_atom_key'
            k = ['atom_index']
            continue

        if q == 'm_atom_key' and line.startswith(":::"):
            q = 'm_atom_val'
            nv = 0
            continue

        if q == 'm_atom_key': 
            k.append(line)

        if q == 'm_atom_val':
            c = ['' if x=="<>" else x for x in shlex.split(line)]
            data = dict(zip(k, c))
            nv += 1
            va.append(data)
            if nv == entry.natoms:
                q = None
                if ((entry_number == 1) and (not skip_first)) or ((entry_number > 1) and (not only_first)):
                    entry.PDB = PDB_ATOM_FORMAT(va)

        if line.startswith("m_bond") :
            entry.nbonds = int(count.findall(line)[0])
            q = 'm_bond_key'
            k = ['bond_index']
            continue

        if q == 'm_bond_key' and line.startswith(":::"):
            q = 'm_bond_val'
            nv = 0
            continue

        if q == 'm_bond_key': 
            k.append(line)
        if q == 'm_bond_val':
            c = ['' if x=="<>" else x for x in shlex.split(line)]
            data = dict(zip(k,c))
            # conect information only for ligands
            if not 'i_glide_grid_version' in entry.data :
                vb.append(data)
            nv += 1
            if nv == entry.nbonds:
                q = None
                if vb:
                    if ((entry_number == 1) and (not skip_first)) or ((entry_number > 1) and (not only_first)):
                        entry.PDB += PDB_CONECT_FORMAT(vb)

    if cif_format:
        for entry in entries:
            pdb_strings = entry.PDB
            with StringIO(pdb_strings) as pdb:
                parser = PDBParser()
                structure = parser.get_structure(entry.title, pdb)
                mmcif_io = MMCIFIO()
                mmcif_io.set_structure(structure)
                mmcif_io.save(f"{self.filename_stem}.cif")

    elif pdb_format:

        if as_complex:
            for entry in entries:
                title = safe_filename(entry.title)
                with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
                    f.write(PDB_MODEL_FORMAT(entries[0].number) + "\n")
                    f.write(entries[0].PDB)
                    f.write("ENDMDL\n")
                    f.write(entry.PDB)
                    f.write("ENDMDL\n")
                    f.write("END\n")

        elif separately:
            for entry in entries:
                title = safe_filename(entry.title)
                with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
                    f.write(entry.PDB)
                    f.write("END\n")

        else:
            with open(f"{self.filename_stem}.pdb", "w") as f:
                for entry in entries:
                    f.write(PDB_MODEL_FORMAT(entry.number) + "\n")
                    f.write(entry.PDB)
                    f.write("ENDMDL\n")
                f.write("END\n")

export2(**kwargs)

Export to PDB/mmCIF

Source code in src/desmondtools/maestro.py
def export2(self, **kwargs) -> None:
    """Export to PDB/mmCIF"""
    pdb_format = kwargs.get("pdb", False)
    cif_format = kwargs.get("cif", False)
    skip_first = kwargs.get("skip_first", False)
    only_first = kwargs.get("only_first", False)
    as_complex = kwargs.get("as_complex", False)
    separately = kwargs.get("separately", False)
    names = kwargs.get("names", [])

    if names:
        # if `--names` is used, they are separately saved.
        separately = True

    if as_complex:
        # if `--as-complex` is used, `skip_first` and `only_first` is disabled.
        skip_first = False
        only_first = False

    self.parse()

    exported = []
    for i, data in enumerate(self.entries, start=1):
        if ((i == 1) and (not skip_first)) or ((i > 1) and (not only_first)):
            entry = dict_to_simplenamespace(data)
            if (not names) or (names and entry.f_m_ct.s_m_title in names):
                exported.append(SimpleNamespace(number=i, title=entry.f_m_ct.s_m_title, PDB=to_pdb_str(entry)))
    if as_complex:
        for x in exported:
            title = safe_filename(x.title)
            with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
                f.write(exported[0].PDB)
                f.write(x.PDB)
                f.write("END\n")
    elif separately:
        for x in exported:
            title = safe_filename(x.title)
            with open(f"{self.filename_stem}_{title}.pdb", "w") as f:
                f.write(x.PDB)
                f.write("END\n")
    else:
        with open(f"{self.filename_stem}.pdb", "w") as f:
            for x in exported:
                f.write(PDB_MODEL_FORMAT(x.number) + "\n")
                f.write(x.PDB)
                f.write("ENDMDL\n")
            f.write("END\n")

getFromDict(dataDict, mapList) staticmethod

Source code in src/desmondtools/maestro.py
@staticmethod
def getFromDict(dataDict, mapList):
    return reduce(operator.getitem, mapList, dataDict)

new_chem_comp()

start a new chem_comp entry

Source code in src/desmondtools/maestro.py
def new_chem_comp(self):
    """ start a new chem_comp entry """
    if not type(self.mmcif["_entry"]["id"]) == list or len(self.mmcif["_entry"]["id"]) <= 2:
        self.chem_comp_id = "LIG"
    else:
        self.chem_comp_id = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(3))
    self.chem_comp_chainId = [ c for c in string.ascii_uppercase if not (c in self.usedId)][0]
    self.chem_comp_resSeq = 100
    self.usedId[self.chem_comp_chainId] = []
    return self.chem_comp_chainId, self.chem_comp_resSeq, self.chem_comp_id

new_chem_comp_atom(obj, element)

create and return unique atom name

Source code in src/desmondtools/maestro.py
def new_chem_comp_atom(self, obj, element):
    """ create and return unique atom name """
    if "s_m_pdb_atom_name" in obj:
        atom_name = obj["s_m_pdb_atom_name"].strip()
    else:
        atom_name = "{}{}".format(element,1)
    if not (atom_name in self.usedId[self.chem_comp_chainId]):
        self.usedId[self.chem_comp_chainId].append(atom_name)
        return atom_name
    else:
        for i in range(1,10000):
            new_name = "{}{}".format(element,i)
            if not (new_name in self.usedId[self.chem_comp_chainId]):
                self.usedId[self.chem_comp_chainId].append(new_name)
                return new_name

parse()

Parse a Maestro file

set self.lines, self.filename_prefix, self.filename_stem

Source code in src/desmondtools/maestro.py
def parse(self) -> None:
    """Parse a Maestro file

        set self.lines, self.filename_prefix, self.filename_stem
    """
    if self.filename.endswith('.maegz'):
        self.filename_prefix = self.filename.replace(".maegz", "")
        with gzip.open(self.filename, 'rt') as f:
            self.parse_impl(f)

    elif self.filename.endswith('.mae.gz'):
        self.filename_prefix = self.filename.replace(".mae.gz", "")
        with gzip.open(self.filename, "rt") as f:
            self.parse_impl(f)

    elif self.filename.endswith('.mae'):
        self.filename_prefix = self.filename.replace(".mae", "")
        with open(self.filename, "rt") as f:
            self.parse_impl(f)
    else:
        print("Error : .mae, .mae.gz, or .maegz file is expected")
        sys.exit(1)
    self.filename_stem = Path(self.filename_prefix).stem

parse_impl(f)

Parse Maestro file to a list of dictionaries

Source code in src/desmondtools/maestro.py
def parse_impl(self, f:TextIO) -> None:
    """Parse Maestro file to a list of dictionaries"""
    self.entries = []
    count = re.compile(r'(\w+)\[(\d+)\]')
    tokens = shlex.split(f.read())
    level = []
    data = {}
    previous_token = None
    header = False
    extra_column = 0
    num_repeat = 1
    skip = False
    for token in tokens :
        if token == "#" :
            skip = not skip # invert
            continue
        elif skip:
            continue
        elif token == "{" :
            header = True
            key = []
            val = []
            arr = []
            if previous_token:
                if previous_token == "f_m_ct" and data:
                    self.entries.append(data)
                    data = {}
                try:
                    (block, num_repeat) = count.findall(previous_token)[0]
                    num_repeat = int(num_repeat)
                    extra_column = 1
                except:
                    block = previous_token
                    num_repeat = 1
                    extra_column = 0
                level.append(block)

        elif token == "}":
            if level: 
                level.pop()
        elif token == ":::":
            header = False
        elif header:
            key.append(token)
        else:
            val.append(token)
            # only store f_m_ct blocks (level != [])
            if len(val) == (len(key)+extra_column) and level :
                arr.append(val[extra_column:])
                val = []
                if len(arr) == num_repeat:
                    if len(arr) == 1:
                        Maestro.setInDict(data, level, dict(zip(key, arr[0])))
                    else:
                        T = list(zip(*arr)) # transpose
                        Maestro.setInDict(data, level, dict(zip(key, T)))
        previous_token = token

    if data:
        self.entries.append(data)

read_lines()

Read a Maestro file

set self.lines, self.filename_prefix, self.filename_stem

Source code in src/desmondtools/maestro.py
def read_lines(self) -> None:
    """Read a Maestro file

        set self.lines, self.filename_prefix, self.filename_stem
    """
    if self.filename.endswith('.maegz'):
        self.filename_prefix = self.filename.replace(".maegz", "")
        with gzip.open(self.filename, 'rt') as f:
            self.lines = f.readlines()

    elif self.filename.endswith('.mae.gz'):
        self.filename_prefix = self.filename.replace(".mae.gz", "")
        with gzip.open(self.filename, "rt") as f:
            self.lines = f.readlines()

    elif self.filename.endswith('.mae'):
        self.filename_prefix = self.filename.replace(".mae", "")
        with open(self.filename, "rt") as f:
            self.lines = f.readlines()
    else:
        print("Error : .mae, .mae.gz, or .maegz file is expected")
        sys.exit(1)
    self.filename_stem = Path(self.filename_prefix).stem 

setInDict(dataDict, mapList, value) staticmethod

Source code in src/desmondtools/maestro.py
@staticmethod
def setInDict(dataDict, mapList, value):
    if mapList:
        Maestro.getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
    else:
        for k,v in value.items():
            dataDict[k] = v

to_mmcif(**kwargs)

Source code in src/desmondtools/maestro.py
def to_mmcif(self, **kwargs):
    token = None
    entity_id = 0
    for line in self.lines:
        line = line.strip()

        if not line or line.startswith("#"): 
            continue

        # f_m_ct block
        if line.startswith("f_m_ct {"):
            token = 'f_m_ct_key'
            natoms = 0
            nbonds = 0
            entity_id += 1
            k = []
            continue
        if token == 'f_m_ct_key':
            if line.startswith(":::"): # end of f_m_ct key block
                n = len(k)
                token = 'f_m_ct_val'
                v = []
                continue
            k.append(line)
        if token == 'f_m_ct_val':
            v.append(line)
            if len(v) == n:
                data = dict(zip(k,v))
                if "s_lp_Variant" in data:
                    self.title = data['s_lp_Variant'].replace('"','').replace("'","")
                else:
                    self.title = data['s_m_title'].replace('"','').replace("'","")
                self.append_entry(self.title)
                token = None


        # m_atom block
        if line.startswith("m_atom") :
            natoms = int(Maestro.count.findall(line)[0])
            token = 'm_atom_key'
            k = ['i_atom_index'] # 1st column is atom_index
            continue
        if token == 'm_atom_key':
            if line.startswith(":::"):
                token = 'm_atom_val'
                nv = 0
                m_atom = {}
                m_atom_resName = {}
                continue
            k.append(line)
        if token == 'm_atom_val':
            v = ['' if x=="<>" else x for x in shlex.split(line)]
            data = dict(zip(k,v))

            # build hierarchical structure of chain/ resSeq/ resName
            chainId = get_value_or_default(data,"s_m_chain_name", "A")
            resSeq = int(get_value_or_default(data, "i_m_residue_number", "1"))
            resName = get_value_or_default(data, "s_m_pdb_residue_name", "?")
            if resName in std_rename:
                resName = std_rename[resName]
            if not chainId in m_atom:
                m_atom[chainId] = {}
            if not resSeq in m_atom[chainId]:
                m_atom[chainId][resSeq] = {}
            if not resName in m_atom[chainId][resSeq]:
                m_atom[chainId][resSeq][resName] = []
            m_atom[chainId][resSeq][resName].append(data)
            m_atom_resName[int(data["i_atom_index"])] = resName

            nv += 1
            if nv == natoms: # end of m_atom block
                token = None

        # m_bond block
        if line.startswith("m_bond") :
            nbonds = int(Maestro.count.findall(line)[0])
            token = 'm_bond_key'
            k = ['i_bond_index']
            continue
        if token == 'm_bond_key':
            if line.startswith(":::"): # end of m_bond key block
                token = 'm_bond_val'
                nv = 0
                m_bond = {}
                continue
            k.append(line)
        if token == 'm_bond_val':
            v = ['' if x=="<>" else x for x in shlex.split(line)]
            data = dict(zip(k,v))

            # build bond connectivity
            p = int(data["i_m_from"]) 
            q = int(data["i_m_to"]) 
            r = int(data["i_m_order"])
            if p in m_bond:
                m_bond[p][q] = r
            else:
                m_bond[p] = {q:r}
            if q in m_bond:
                m_bond[q][p] = r
            else:
                m_bond[q] = {p:r}

            nv += 1
            if nv == nbonds: # m_bond block ends
                token = None

                # create mmcif _atom_site
                for chainId_ in sorted(m_atom):
                    for resSeq_ in sorted(m_atom[chainId_]):
                        for resName_ in sorted(m_atom[chainId_][resSeq_]):
                            if resName_ in std_residues:
                                group_PDB = "ATOM"
                                chainId, resSeq, resName = chainId_, resSeq_, resName_
                            else:
                                group_PDB = "HETATM"
                                # check if new_chem_comp is necessary
                                this_residue = [ int(d["i_atom_index"]) for d in m_atom[chainId_][resSeq_][resName_] ]
                                need_new_chem_comp = True
                                for p_ in this_residue:
                                    for q_, bond_order in m_bond[p_].items():
                                        if  (not (q_ in this_residue)) and \
                                            (not (m_atom_resName[q_] in std_residues)) and \
                                            (q_ in self.iamap):
                                            # do not create another chem_comp
                                            need_new_chem_comp = False
                                            q, chainId, resSeq, resName = self.iamap[q_]
                                            break
                                if need_new_chem_comp:
                                    chainId, resSeq, resName = self.new_chem_comp()

                            # go through all atoms within the same residue
                            for d in m_atom[chainId_][resSeq_][resName_]:
                                self.serial += 1
                                self.iamap[int(d["i_atom_index"])] = (self.serial, chainId, resSeq, resName)
                                element = atomic_symbol[int(d["i_m_atomic_number"])]
                                if group_PDB == "ATOM":
                                    name = get_value_or_default(d,"s_m_pdb_atom_name", "?")
                                else:
                                    name = self.new_chem_comp_atom(d, element) 
                                x = float(d["r_m_x_coord"])
                                y = float(d["r_m_y_coord"])
                                z = float(d["r_m_z_coord"])
                                occupancy = float(get_value_or_default(d,"r_m_pdb_occupancy","1"))
                                bfactor = float(get_value_or_default(d,"r_m_pdb_tfactor", "0"))
                                formal_charge = int(get_value_or_default(d,"i_m_formal_charge", "0"))
                                self.append_atom_site(
                                    group_PDB = group_PDB, 
                                    id = self.serial,
                                    type_symbol = element,
                                    label_atom_id = name,
                                    label_alt_id = ".",
                                    label_comp_id = resName,
                                    label_asym_id = chainId,
                                    label_entity_id = entity_id,
                                    label_seq_id = resSeq,
                                    pdbx_PDB_ins_code = "?",
                                    Cartn_x = x,
                                    Cartn_y = y,
                                    Cartn_z = z,
                                    occupancy = occupancy,
                                    B_iso_or_equiv = bfactor,
                                    pdbx_formal_charge = formal_charge,
                                    auth_seq_id = resSeq,
                                    auth_comp_id = resName,
                                    auth_asym_id = chainId,
                                    auth_atom_id = name,
                                    pdbx_PDB_model_num = 1,
                                )

                # create mmcif _chem_comp_bond
                for chainId_ in sorted(m_atom):
                    for resSeq_ in sorted(m_atom[chainId_]):
                        for resName_ in sorted(m_atom[chainId_][resSeq_]):
                            if not (resName_ in std_residues): # HETATM
                                for d in m_atom[chainId_][resSeq_][resName_]:
                                    p_ = int(d["i_atom_index"])
                                    p, p_chainId, p_resSeq, p_resName = self.iamap[p_]
                                    if p_ in m_bond:
                                        for q_, bond_order in m_bond[p_].items():
                                            q, q_chainId, q_resSeq, q_resName = self.iamap[q_]
                                            self.append_chem_comp_bond(p_resName, p, q, bond_order)

    cifo = CifFileWriter("{}.cif".format(self.filename_stem))

    # clean up undefined dictionary
    # force to copy a list to avoid 
    # RuntimeError: dictionary changed size during iteration
    for k in list(self.mmcif): 
        if not self.mmcif[k]:
            del self.mmcif[k]

    cifo.write({ "desmondtools" : self.mmcif })