Skip to content

Utils

Graph.add_node(node_for_adding, **attr)[source]

Add a single node node_for_adding and update node attributes.

Parameters:

    node_for_adding: node
        A node can be any hashable Python object except None.
    attr : keyword arguments, optional
        Set or change node attributes using key=value.

Examples:

    G = nx.Graph()  # or DiGraph, MultiGraph, MultiDiGraph, etc
    G.add_node(1)
    G.add_node("Hello")
    K3 = nx.Graph([(0, 1), (1, 2), (2, 0)])
    G.add_node(K3)
    G.number_of_nodes()
    >>> 3

    G.add_node(1, size=10)
    G.add_node(3, weight=0.4, UTM=("13S", 382871, 3972649))

Graph.add_edge(u_of_edge, v_of_edge, **attr)[source]

Add an edge between u and v.
The nodes u and v will be automatically added if they are not already in the graph.
Edge attributes can be specified with keywords or by directly accessing the edge's attribute dictionary.

Parameters:

    u_of_edge, v_of_edge: nodes
    Nodes can be, for example, strings or numbers. Nodes must be hashable (and not None) Python objects.

    attr: keyword arguments, optional
    Edge data (or labels or objects) can be assigned using keyword arguments.

Examples:

    G = nx.Graph()  # or DiGraph, MultiGraph, MultiDiGraph, etc
    e = (1, 2)
    G.add_edge(1, 2)  # explicit two-node form
    G.add_edge(*e)  # single edge as tuple of two nodes
    G.add_edges_from([(1, 2)])  # add edges from iterable container

    G.add_edge(1, 2, weight=3)
    G.add_edge(1, 3, weight=7, capacity=15, length=342.7)

    G.add_edge(1, 2)
    G[1][2].update({0: 5})
    G.edges[1, 2].update({0: 5})

Isomorphism

In NetworkX, the node_label parameter in the vf2pp_all_isomorphisms function is the name of a node attribute 
that is used to compare nodes when checking for isomorphism between two graphs. 
The default value for node_label is None, which means that node attributes are not considered in the comparison. 
If a node doesn't have the node_label attribute, the default_label value is used instead.

API

rdworks.utils

Functions

QT(rmsd_matrix, threshold)

Perform QT clustering.

Parameters:

  • rmsd_matrix (ndarray) –

    pairwise rmsd matrix.

  • threshold (float) –

    quality threshold (A)

Returns:

  • tuple ( tuple ) –

    (cluster assignment, centroid indices)

Source code in src/rdworks/utils.py
def QT(rmsd_matrix: np.ndarray, threshold: float) -> tuple:
    """Perform QT clustering.

    Args:
        rmsd_matrix (np.ndarray): pairwise rmsd matrix.
        threshold (float): quality threshold (A)

    Returns:
        tuple: (cluster assignment, centroid indices)
    """
    N = rmsd_matrix.shape[0]
    clusters = []
    _QT_clustering(rmsd_matrix, set(list(range(N))), threshold, clusters)
    # ex. clusters=  [{6, 7, 11}, {4, 5, 8}, {0}, {1}, {10}, {9}, {2}, {3}]
    cluster_assignment = [
        None,
    ] * N
    for cluster_idx, indices in enumerate(clusters):
        for conf_idx in indices:
            cluster_assignment[conf_idx] = cluster_idx
    centroid_indices = centroid_medoid(cluster_assignment, rmsd_matrix)

    return cluster_assignment, centroid_indices

compress_string(data)

compress string to base64-encoded string.

Parameters:

  • data (str) –

    original string.

Returns:

  • str ( str ) –

    base64-encoded compressed string.

Source code in src/rdworks/utils.py
def compress_string(data: str) -> str:
    """compress string to base64-encoded string.

    Args:
        data (str): original string.

    Returns:
        str: base64-encoded compressed string.
    """
    compressed_bytes = zlib.compress(data.encode("utf-8"))
    encoded_str = base64.b64encode(compressed_bytes).decode("utf-8")
    return encoded_str

compute(fn, largs, **kwargs)

Source code in src/rdworks/utils.py
def compute(fn: Callable, largs: list, **kwargs) -> list:
    max_workers = kwargs.get("max_workers", 1)
    chunksize = kwargs.get("chunksize", 10)
    progress = kwargs.get("progress", False)
    desc = kwargs.get("desc", "Progress")
    n = len(largs)
    if max_workers > 1:
        with ProcessPoolExecutor(max_workers=max_workers) as executor:
            if progress:
                results = list(
                    tqdm(
                        executor.map(fn, largs, chunksize=chunksize), desc=desc, total=n
                    )
                )
            else:
                results = list(executor.map(fn, largs, chunksize=chunksize))
    else:
        if progress:
            results = [fn(*larg) for larg in tqdm(largs, desc=desc, total=n)]
        else:
            results = [fn(*larg) for larg in largs]
    return results

convert_tril_to_symm(lower_triangle_values)

Converts lower triangle values to a symmetric full matrix.

Parameters:

  • lower_triangle_values (list) –

    list of lower triangle matrix values.

Returns:

  • ndarray

    np.ndarray: numpy array of a symmetric full matrix.

Source code in src/rdworks/utils.py
def convert_tril_to_symm(lower_triangle_values: list) -> np.ndarray:
    """Converts lower triangle values to a symmetric full matrix.

    Args:
        lower_triangle_values (list): list of lower triangle matrix values.

    Returns:
        np.ndarray: numpy array of a symmetric full matrix.
    """
    n = math.ceil(math.sqrt(len(lower_triangle_values) * 2))
    rmsd_matrix = np.zeros((n, n))
    rmsd_matrix[np.tril_indices(n, k=-1)] = lower_triangle_values
    symm_matrix = np.maximum(rmsd_matrix, rmsd_matrix.transpose())
    return symm_matrix

convert_triu_to_symm(upper_triangle_values)

Converts upper triangle values to a symmetric full matrix.

Parameters:

  • upper_triangle_values (list) –

    list of upper triangle matrix values.

Returns:

  • ndarray

    np.ndarray: numpy array of a symmetric full matrix.

Source code in src/rdworks/utils.py
def convert_triu_to_symm(upper_triangle_values: list) -> np.ndarray:
    """Converts upper triangle values to a symmetric full matrix.

    Args:
        upper_triangle_values (list): list of upper triangle matrix values.

    Returns:
        np.ndarray: numpy array of a symmetric full matrix.
    """
    n = math.ceil(math.sqrt(len(upper_triangle_values) * 2))
    rmsd_matrix = np.zeros((n, n))
    rmsd_matrix[np.triu_indices(n, k=1)] = upper_triangle_values
    symm_matrix = np.maximum(rmsd_matrix, rmsd_matrix.transpose())
    return symm_matrix

decompress_string(encoded_str)

decompress base64-encoded string to original string.

Parameters:

  • encoded_str (str) –

    base64-encoded compressed string.

Returns:

  • str ( str ) –

    original string.

Source code in src/rdworks/utils.py
def decompress_string(encoded_str: str) -> str:
    """decompress base64-encoded string to original string.

    Args:
        encoded_str (str): base64-encoded compressed string.

    Returns:
        str: original string.
    """
    # automatically add missing padding
    missing_padding = len(encoded_str) % 4
    if missing_padding:
        encoded_str += "=" * (4 - missing_padding)
    decoded_bytes = base64.b64decode(encoded_str)
    decompressed = zlib.decompress(decoded_bytes)
    return decompressed.decode("utf-8")

deserialize(encoded_str)

Decode, decompress, and deserialize a base64 string back to Python object.

Parameters:

  • encoded_str (str) –

    Base64-encoded compressed JSON string

Returns:

  • Any

    Deserialized Python object

Source code in src/rdworks/utils.py
def deserialize(encoded_str: str) -> Any:
    """
    Decode, decompress, and deserialize a base64 string back to Python object.

    Args:
        encoded_str: Base64-encoded compressed JSON string

    Returns:
        Deserialized Python object
    """
    try:
        # 1. Convert string to bytes
        encoded_bytes = encoded_str.encode("utf-8")

        # 2. Base64 decode
        # Base64 output only contains: A-Z, a-z, 0-9, +, /, =
        compressed = base64.b64decode(encoded_bytes)

        # 3. Decompress
        json_bytes = zlib.decompress(compressed)

        # 4. Decode bytes to string
        json_str = json_bytes.decode("utf-8")

        # 5. Parse JSON
        return json.loads(json_str)

    except (zlib.error, binascii.Error, json.JSONDecodeError, UnicodeDecodeError) as e:
        raise ValueError(f"Failed to deserialize data: {e}")

dict_to_simplenamespace(data)

Source code in src/rdworks/utils.py
def dict_to_simplenamespace(data):
    if isinstance(data, dict):
        return SimpleNamespace(
            **{k: dict_to_simplenamespace(v) for k, v in data.items()}
        )
    elif isinstance(data, list):
        return [dict_to_simplenamespace(item) for item in data]
    else:
        return data

fix_decimals_in_dict(in_dict, decimals=2)

Fixes the decimal places of all float values in a dictionary.

Parameters:

  • dictionary

    The dictionary to fix.

  • decimals (int, default: 2 ) –

    The number of decimal places to fix the float values to.

Returns:

  • dict ( dict ) –

    a dictionary with the float values fixed to the specified number of decimal places.

Source code in src/rdworks/utils.py
def fix_decimals_in_dict(in_dict: dict, decimals: int = 2) -> dict:
    """Fixes the decimal places of all float values in a dictionary.

    Args:
        dictionary: The dictionary to fix.
        decimals (int): The number of decimal places to fix the float values to.

    Returns:
        dict: a dictionary with the float values fixed to the specified number of decimal places.
    """
    out_dict = {}
    for k, v in in_dict.items():
        if isinstance(v, float):
            out_dict[k] = round(v, decimals)
        elif isinstance(v, list) or isinstance(v, tuple):
            out_dict[k] = fix_decimals_in_list(v, decimals)
        elif isinstance(v, dict):
            out_dict[k] = fix_decimals_in_dict(v, decimals)
        else:
            out_dict[k] = v
    return out_dict

fix_decimals_in_list(in_list, decimals=2)

Fixes the decimal places of all float values in a list.

Parameters:

  • list

    The list to fix.

  • decimals (int, default: 2 ) –

    The number of decimal places to fix the float values to.

Returns:

  • list ( list ) –

    a list with the float values fixed to the specified number of decimal places.

Source code in src/rdworks/utils.py
def fix_decimals_in_list(in_list: list, decimals: int = 2) -> list:
    """Fixes the decimal places of all float values in a list.

    Args:
        list: The list to fix.
        decimals (int): The number of decimal places to fix the float values to.

    Returns:
        list: a list with the float values fixed to the specified number of decimal places.
    """

    out_list = []
    for item in in_list:
        if isinstance(item, float):
            out_list.append(round(item, decimals))
        elif isinstance(item, dict):
            out_list.append(fix_decimals_in_dict(item, decimals))
        elif isinstance(item, list) or isinstance(item, tuple):
            out_list.append(fix_decimals_in_list(item, decimals))
        else:
            out_list.append(item)
    return out_list

recursive_round(data, decimals=2)

Recursively round float values to a given decimal places.

data: The input data, which can be a list, dictionary, or any other data type. It can contain nested lists and dictionaries. decimals: number of decimal places.

Source code in src/rdworks/utils.py
def recursive_round(data: Any, decimals: int = 2) -> Any:
    """Recursively round float values to a given decimal places.

    Args:
    data: The input data, which can be a list, dictionary, or any
            other data type. It can contain nested lists and dictionaries.
    decimals: number of decimal places.
    """
    if not isinstance(decimals, int) or decimals < 0:
        raise ValueError("decimals must be a non-negative integer.")

    def _recursive_round(current_item):
        if isinstance(current_item, float):
            return round(current_item, decimals)
        elif isinstance(current_item, np.float64):
            return round(float(current_item), decimals)
        elif isinstance(current_item, list):
            return [_recursive_round(item) for item in current_item]
        elif isinstance(current_item, dict):
            return {key: _recursive_round(value) for key, value in current_item.items()}
        else:
            return current_item

    return _recursive_round(data)

serialize(data)

Serialize, compress, and encode data to a base64 string.

Notes

The JSON specification only supports string keys in objects. For example, after JSON-serialization/deserialization, keys of integer type are changed to string. {1: 'a', 2: 'b', 3: 'c'} --> {'1': 'a', '2': 'b', '3': 'c'} Unfortunately, this is a fundamental limitation of JSON itself. Integer keys are not valid JSON.

Parameters:

  • data (Any) –

    Any JSON-serializable Python object

Returns:

  • str

    Base64-encoded string

Source code in src/rdworks/utils.py
def serialize(data: Any) -> str:
    """
    Serialize, compress, and encode data to a base64 string.

    Notes:
        The JSON specification only supports string keys in objects.
        For example, after JSON-serialization/deserialization, keys of integer type are changed to string.
        {1: 'a', 2: 'b', 3: 'c'} --> {'1': 'a', '2': 'b', '3': 'c'}
        Unfortunately, this is a fundamental limitation of JSON itself.
        Integer keys are not valid JSON.

    Args:
        data: Any JSON-serializable Python object

    Returns:
        Base64-encoded string
    """
    # 1. Serialize to JSON string
    json_str = json.dumps(data, separators=(",", ":"))  # Compact format

    # 2. Encode to bytes
    json_bytes = json_str.encode("utf-8")

    # 3. Compress
    compressed = zlib.compress(json_bytes)

    # 4. Base64 encode (no need to decode to str, keep as bytes if storing in binary)
    # Base64 output only contains: A-Z, a-z, 0-9, +, /, =
    encoded = base64.b64encode(compressed)

    # 5. Convert to string for text storage/transmission
    return encoded.decode("utf-8")

rdworks.units

Attributes

ev2kcalpermol = 23.060547830619026 module-attribute

hartree2ev = 27.211386245988 module-attribute

hartree2kcalpermol = 627.50947337481 module-attribute

pm2angstrom = 0.01 module-attribute

rdworks.io

Attributes

conf_name_convention = re.compile('[a-zA-Z0-9-_.!@#$%^&*()+=]+.[0-9]+/[0-9]+') module-attribute

Classes

Functions

guess_molecular_id(rdmols)

Guesses molecular ID from SDF properties.

Molecular ID is guessed by the coverage(=count of unique values divided by total count).

Priority

_Name > ID property with shortest values > any ID property

Parameters:

  • rdmols (List[Mol]) –

    a list of molecules.

Returns:

  • str ( str | None ) –

    property suitable for id

Source code in src/rdworks/io.py
def guess_molecular_id(rdmols: list[Chem.Mol]) -> str | None:
    """Guesses molecular ID from SDF properties.

    Molecular ID is guessed by the coverage(=count of unique values divided by total count).

    Priority:
        `_Name` > ID property with shortest values > any ID property

    Args:
        rdmols (List[Chem.Mol]): a list of molecules.

    Returns:
        str: property suitable for id
    """
    U = defaultdict(list)
    for rdmol in rdmols:
        properties_dict = rdmol.GetPropsAsDict()
        if "_Name" not in properties_dict.keys():
            U["_Name"].append(rdmol.GetProp("_Name"))
        for k, v in properties_dict.items():
            # float is not suitable for molecular id
            if isinstance(v, float):
                continue
            # str(int) is acceptable for molecular id
            if isinstance(v, int):
                U[k].append(str(v))
            elif isinstance(v, str):
                U[k].append(v)

    id_properties = [k for k, v in U.items() if len(set(v)) == len(rdmols)]
    num_id_properties = len(id_properties)

    if num_id_properties == 0:
        return None

    elif num_id_properties == 1:
        return id_properties[0]

    elif num_id_properties > 1:
        if "_Name" in id_properties:
            return "_Name"
        else:
            # shorter id
            _, id_property = sorted(
                [(max([len(v) for v in U[k]]), k) for k in id_properties], reverse=True
            )[0]
            return id_property

merge_csv(libr, path, on='name')

Returns a copy of MolLibr merged with properties from on column of a .csv file.

Parameters:

  • libr (MolLibr) –

    library to be merged.

  • path (Union[str, Path]) –

    filename or path to a .csv file.

  • on (str, default: 'name' ) –

    column for name. Defaults to 'name'.

Raises:

  • ValueError

    if on column is not found in the csv file.

Returns:

  • MolLibr ( MolLibr ) –

    a copy of library of molecules.

Source code in src/rdworks/io.py
def merge_csv(libr: MolLibr, path: str | Path, on: str = "name") -> MolLibr:
    """Returns a copy of MolLibr merged with properties from `on` column of a .csv file.

    Args:
        libr (MolLibr): library to be merged.
        path (Union[str, Path]): filename or path to a .csv file.
        on (str, optional): column for name. Defaults to 'name'.

    Raises:
        ValueError: if `on` column is not found in the csv file.

    Returns:
        MolLibr: a copy of library of molecules.
    """
    path = validate_path(path)
    df = pd.read_csv(path)
    try:
        assert on in list(df.columns)
    except:
        raise ValueError(f"Cannot find ON column (`on=`) {on}")
    # A list of dictionaries, where each dictionary represents a row,
    # with column names as keys and cell values as values:
    # [{column -> value}, ..., {column -> value}].
    csv_records = df.to_dict("records")
    data = {}
    for row_dict in csv_records:
        data[row_dict[on]] = {k: v for (k, v) in row_dict.items() if k != on}

    merged_libr = libr.copy()

    for mol in merged_libr:
        if mol.name in data:  # mol.props can be partly updated from csv
            mol.props.update(data[mol.name])

    return merged_libr

read_csv(path, smiles, name, std=False, **kwargs)

Returns a library of molecules reading from a .csv file.

Other columns will be read as properties.

Parameters:

  • path (Union[str, Path]) –

    filename or path to a .csv file.

  • smiles (str) –

    column for SMILES.

  • name (str) –

    column for name.

  • std (bool, default: False ) –

    whether to standardize the input. Defaults to False.

Raises:

  • ValueError

    if smiles or name column is not found in the csv file.

Returns:

  • MolLibr ( MolLibr ) –

    a library of molecules.

Source code in src/rdworks/io.py
def read_csv(
    path: str | Path, smiles: str, name: str, std: bool = False, **kwargs
) -> MolLibr:
    """Returns a library of molecules reading from a .csv file.

    Other columns will be read as properties.

    Args:
        path (Union[str, Path]): filename or path to a .csv file.
        smiles (str): column for SMILES.
        name (str): column for name.
        std (bool, optional): whether to standardize the input. Defaults to False.

    Raises:
        ValueError: if `smiles` or `name` column is not found in the csv file.

    Returns:
        MolLibr: a library of molecules.
    """
    path = validate_path(path)
    df = pd.read_csv(path)
    try:
        assert smiles in list(df.columns)
    except:
        raise ValueError(f"Cannot find SMILES column (`smiles=`) {smiles}")
    try:
        assert name in list(df.columns)
    except:
        raise ValueError(f"Cannot find NAME column (`name=`) {name}")

    largs = [
        (smiles, name, std) for smiles, name in zip(list(df[smiles]), list(df[name]))
    ]
    libr = MolLibr(compute(Mol, largs, desc="Reading CSV", **kwargs))

    # read other columns as properties
    # A list of dictionaries, where each dictionary represents a row,
    # with column names as keys and cell values as values:
    # [{column -> value}, ..., {column -> value}].
    csv_records = df.to_dict("records")
    for mol, row_dict in zip(libr, csv_records):
        mol.props.update(
            {k: v for (k, v) in row_dict.items() if k not in [smiles, name]}
        )

    return libr

read_dataframe(df, smiles, name, std=False)

Returns rdworks.MolLibr object from a pandas DataFrame.

Parameters:

  • df (DataFrame) –

    pandas.DataFrame.

  • smiles (str) –

    column for SMILES.

  • name (str) –

    column for name.

  • std (bool, default: False ) –

    whether to standardize the input. Defaults to False.

Raises:

  • TypeError

    if df is not pandas DataFrame.

  • ValueError

    if smiles or name column is not found.

Returns:

  • MolLibr ( MolLibr ) –

    a library of molecules.

Source code in src/rdworks/io.py
def read_dataframe(
    df: pd.DataFrame, smiles: str, name: str, std: bool = False
) -> MolLibr:
    """Returns rdworks.MolLibr object from a pandas DataFrame.

    Args:
        df (pd.DataFrame): pandas.DataFrame.
        smiles (str): column for SMILES.
        name (str): column for name.
        std (bool, optional): whether to standardize the input. Defaults to False.

    Raises:
        TypeError: if `df` is not pandas DataFrame.
        ValueError: if `smiles` or `name` column is not found.

    Returns:
        MolLibr: a library of molecules.
    """
    if not isinstance(df, pd.DataFrame):
        raise TypeError(f"Expects a pandas.DataFrame object")
    try:
        assert smiles in list(df.columns)
    except:
        raise ValueError(f"Cannot find SMILES column (`smiles=`) {smiles}")
    try:
        assert name in list(df.columns)
    except:
        raise ValueError(f"Cannot find NAME column (`name=`) {name}")

    return MolLibr(list(df[smiles]), list(df[name]), std=std)

read_mae(path, std=False, confs=True, **kwargs)

Returns a library of molecules reading from a Schrodinger Maestro file.

Parameters:

  • path (Union[str, Path]) –

    filename or path to the .mae or .maegz file.

  • std (bool, default: False ) –

    whether to standardize the input. Defaults to False.

  • confs (bool, default: True ) –

    whether to read 3D conformers. Defaults to True.

Returns:

  • MolLibr ( MolLibr ) –

    a library of molecules.

Source code in src/rdworks/io.py
def read_mae(
    path: str | Path, std: bool = False, confs: bool = True, **kwargs
) -> MolLibr:
    """Returns a library of molecules reading from a Schrodinger Maestro file.

    Args:
        path (Union[str, Path]): filename or path to the .mae or .maegz file.
        std (bool, optional): whether to standardize the input. Defaults to False.
        confs (bool, optional): whether to read 3D conformers. Defaults to True.

    Returns:
        MolLibr: a library of molecules.
    """
    path = validate_path(path)

    if path.suffix == ".maegz":
        with gzip.open(path, "rb") as gz:
            # switch ^ True, XOR(^) inverts only if switch is True
            with rdmolfiles.MaeMolSupplier(
                gz, sanitize=True, removeHs=(confs ^ True)
            ) as maegz:
                rdmols = [m for m in maegz if m is not None]
    else:
        # switch ^ True, XOR(^) inverts only if switch is True
        with rdmolfiles.MaeMolSupplier(
            path, sanitize=True, removeHs=(confs ^ True)
        ) as mae:
            rdmols = [m for m in mae if m is not None]

    lnames = [m.GetProp("_Name") for m in rdmols]
    largs = [(rdmol, name, std) for rdmol, name in zip(rdmols, lnames)]

    obj = MolLibr()

    if confs:  # reading 3D SDF (conformers)
        last_smiles = None
        new_mol = None
        for rdmol, name in zip(rdmols, lnames):
            # rdworks name convention (e.g. xxxx.yy/zzz)
            if conf_name_convention.match(name):
                (isomer_name, _) = name.split("/")
            else:
                isomer_name = name
            smiles = Chem.MolToSmiles(rdmol)  # canonicalized SMILES
            if last_smiles is None or last_smiles != smiles:
                if new_mol:
                    obj.libr.append(new_mol.rename())
                # start a new molecule
                # !!!! rdmol and new_mol do not have consistent atom indices !!!
                # idxmap: original atom index -> canonicalized rdmol atom index
                # smiles = Chem.MolToSmiles(rdmol) # canonicalization creates `_smilesAtomOutputOrder` property
                # idxord_o = ast.literal_eval(rdmol.GetProp("_smilesAtomOutputOrder"))
                # idxmap_o = {o.GetIdx():idxord_o.index(o.GetIdx()) for o in rdmol.GetAtoms()}
                rdmol_2d = Chem.RemoveHs(rdmol)
                AllChem.Compute2DCoords(rdmol_2d)
                new_mol = Mol(
                    rdmol_2d, isomer_name, std=False
                )  # atom indices remain unchanged.

            new_mol.confs.append(Conf(rdmol))

            last_smiles = smiles
        if new_mol:  # handle the last molecule
            obj.libr.append(new_mol.rename())

    else:  # reading 2D SDF
        obj = MolLibr(compute(Mol, largs, desc="Reading Mae", **kwargs))

    return obj

read_sdf(path, name=None, prefix=None, std=False, confs=False, props=True, **kwargs)

Returns a library of molecules reading from a SDF file.

Parameters:

  • path (Union[str, PosixPath]) –

    filename or path to the .sdf file.

  • name (str, default: None ) –

    property name to be used for name. Defaults to None.

  • prefix (str, default: None ) –

    prefix to be used for name if necessary. Defaults to None.

  • std (bool, default: False ) –

    whether to standardize the input. Defaults to False.

  • confs (bool, default: False ) –

    whether to read 3D conformers and keep hydrogens. Defaults to False.

  • props (bool, default: True ) –

    whether to read SDF properties. Defaults to True.

Returns:

  • MolLibr ( MolLibr ) –

    a library of molecules.

Source code in src/rdworks/io.py
def read_sdf(
    path: str | Path,
    name: str | None = None,
    prefix: str | None = None,
    std: bool = False,
    confs: bool = False,
    props: bool = True,
    **kwargs,
) -> MolLibr:
    """Returns a library of molecules reading from a SDF file.

    Args:
        path (Union[str, PosixPath]): filename or path to the .sdf file.
        name (str, optional): property name to be used for name. Defaults to None.
        prefix (str, optional): prefix to be used for name if necessary. Defaults to None.
        std (bool, optional): whether to standardize the input. Defaults to False.
        confs (bool, optional): whether to read 3D conformers and keep hydrogens. Defaults to False.
        props (bool, optional): whether to read SDF properties. Defaults to True.

    Returns:
        MolLibr: a library of molecules.
    """
    path = validate_path(path)
    if path.suffix == ".gz":
        with gzip.open(path, "rb") as gz:
            # switch ^ True, XOR(^) inverts only if switch is True
            with Chem.ForwardSDMolSupplier(
                gz, sanitize=True, removeHs=(confs ^ True)
            ) as gzsdf:
                rdmols = [m for m in gzsdf if m is not None]
    else:
        # switch ^ True, XOR(^) inverts only if switch is True
        with Chem.SDMolSupplier(path, sanitize=True, removeHs=(confs ^ True)) as sdf:
            rdmols = [m for m in sdf if m is not None]

    if isinstance(name, str):
        id_property = name
    else:
        id_property = guess_molecular_id(rdmols)
    if isinstance(id_property, str):
        names = [m.GetProp(id_property) for m in rdmols]
    else:
        names = [f"{prefix}_{i + 1}" for i in range(len(rdmols))]

    if props:
        _props = [m.GetPropsAsDict() for m in rdmols]
    else:
        _props = [None] * len(rdmols)

    _args = [
        (rdmol, name, std, props) for rdmol, name, props in zip(rdmols, names, _props)
    ]

    obj = MolLibr()
    if confs:
        # reading 3D SDF (conformers)
        last_smiles = None
        new_mol = None
        for rdmol, name, props in zip(rdmols, names, _props):
            # rdworks name convention (e.g. xxxx.yy/zzz)
            if conf_name_convention.match(name):
                (isomer_name, _) = name.split("/")
            else:
                isomer_name = name
            smiles = Chem.MolToSmiles(rdmol)  # canonicalized SMILES
            if last_smiles is None or last_smiles != smiles:
                if new_mol:
                    obj.libr.append(new_mol.rename())
                # start a new molecule
                rdmol_2d = Chem.RemoveHs(rdmol)
                AllChem.Compute2DCoords(rdmol_2d)
                # initialize a new molecule with the H-removed 2D
                new_mol = Mol(
                    rdmol_2d, isomer_name, std=False
                )  # atom indices remain unchanged.
            new_conf = Conf(rdmol)
            new_conf.props.update(props)
            new_mol.confs.append(new_conf)
            last_smiles = smiles
        if new_mol:  # handle the last molecule
            obj.libr.append(new_mol.rename())
    else:
        # reading 2D SDF
        obj = MolLibr(compute(_map_sdf, _args, desc="Reading SDF", **kwargs))

    return obj

read_smi(path, std=False, **kwargs)

Read a SMILES file and create a molecular library.

Parameters:

  • path (str | Path) –

    path to the SMILES file.

  • std (bool, default: False ) –

    whether to standardize. Defaults to False.

Raises:

  • FileNotFoundError

    when path does not exist.

Returns:

  • MolLibr ( MolLibr ) –

    a library of molecules.

Source code in src/rdworks/io.py
def read_smi(path: str | Path, std: bool = False, **kwargs) -> MolLibr:
    """Read a SMILES file and create a molecular library.

    Args:
        path (str | Path): path to the SMILES file.
        std (bool, optional): whether to standardize. Defaults to False.

    Raises:
        FileNotFoundError: when path does not exist.

    Returns:
        MolLibr: a library of molecules.
    """
    path = validate_path(path)
    if path.suffix == ".gz":
        with gzip.open(path, "rb") as gz:
            largs = [
                tuple(line.decode("utf-8").strip().split()[:2] + [std]) for line in gz
            ]
    else:
        with open(path, "r") as smi:
            largs = [tuple(line.strip().split()[:2] + [std]) for line in smi]
    return MolLibr(compute(Mol, largs, desc="Reading SMILES", **kwargs))

validate_path(path)

Prechecks filename or path and returns a string for the pathlib.PosixPath.

Parameters:

  • path (Union[str, PosixPath]) –

    filename or path.

Raises:

  • FileNotFoundError

    if the path is not found.

Returns:

  • str ( Path ) –

    a string for the path.

Source code in src/rdworks/io.py
def validate_path(path: str | Path) -> Path:
    """Prechecks filename or path and returns a string for the pathlib.PosixPath.

    Args:
        path (Union[str, PosixPath]): filename or path.

    Raises:
        FileNotFoundError: if the path is not found.

    Returns:
        str: a string for the path.
    """
    if isinstance(path, Path):
        pass
    elif isinstance(path, str):
        path = Path(path)

    if path.exists() and path.is_file():
        return path
    else:
        raise FileNotFoundError(f"File {path.as_posix()} does not exist.")

rdworks.view

Classes

DescriptiveDraw

Descriptive 2D Drawing

Source code in src/rdworks/view.py
class DescriptiveDraw:
    """Descriptive 2D Drawing"""

    _angles = np.linspace(0, np.pi * 2, 60)
    _circle_x, _circle_y = np.sin(_angles), np.cos(_angles)
    circle = np.vstack([_circle_x, _circle_y]).T
    style = {
        "aromatic": {
            "r": 0.3,
            "rgba": (136, 180, 168, 0.6),
            "linewidth": 1,
            "fill": True,
        },
        "conjugated": {
            "r": 0.1,
            "rgba": (51, 51, 51, 0.7),
            "linewidth": 1,
            "fill": True,
        },
        "HBA": {"r": 0.4, "rgba": (11, 57, 235, 0.7), "linewidth": 3, "fill": False},
        "HBD": {"r": 0.5, "rgba": (254, 97, 0, 0.7), "linewidth": 3, "fill": False},
        "ionizable": {
            "r": 0.5,
            "rgba": (254, 97, 0, 0.7),
            "linewidth": 3,
            "fill": False,
        },
        # Bootstrap colors
        "primary": {
            "r": 0.5,
            "rgba": (13, 110, 253, 0.7),
            "linewidth": 3,
            "fill": False,
        },
        "secondary": {
            "r": 0.5,
            "rgba": (108, 117, 125, 0.7),
            "linewidth": 3,
            "fill": False,
        },
        "success": {
            "r": 0.5,
            "rgba": (25, 135, 84, 0.7),
            "linewidth": 3,
            "fill": False,
        },
        "info": {"r": 0.5, "rgba": (13, 202, 240, 0.7), "linewidth": 3, "fill": False},
        "warning": {
            "r": 0.5,
            "rgba": (255, 193, 7, 0.7),
            "linewidth": 3,
            "fill": False,
        },
        "danger": {"r": 0.5, "rgba": (220, 53, 69, 0.7), "linewidth": 3, "fill": False},
        "light": {
            "r": 0.5,
            "rgba": (248, 249, 250, 0.7),
            "linewidth": 3,
            "fill": False,
        },
        "dark": {"r": 0.5, "rgba": (33, 37, 41, 0.7), "linewidth": 3, "fill": False},
        "blue": {"r": 0.5, "rgba": (13, 110, 253, 0.7), "linewidth": 3, "fill": False},
        "indigo": {
            "r": 0.5,
            "rgba": (102, 16, 242, 0.7),
            "linewidth": 3,
            "fill": False,
        },
        "purple": {
            "r": 0.5,
            "rgba": (111, 66, 193, 0.7),
            "linewidth": 3,
            "fill": False,
        },
        "pink": {"r": 0.5, "rgba": (214, 51, 132, 0.7), "linewidth": 3, "fill": False},
        "red": {"r": 0.5, "rgba": (220, 53, 69, 0.7), "linewidth": 3, "fill": False},
        "orange": {
            "r": 0.5,
            "rgba": (253, 126, 20, 0.7),
            "linewidth": 3,
            "fill": False,
        },
        "yellow": {"r": 0.5, "rgba": (255, 193, 7, 0.7), "linewidth": 3, "fill": False},
        "green": {"r": 0.5, "rgba": (25, 135, 84, 0.7), "linewidth": 3, "fill": False},
        "teal": {"r": 0.5, "rgba": (32, 201, 151, 0.7), "linewidth": 3, "fill": False},
        "cyan": {"r": 0.5, "rgba": (13, 202, 240, 0.7), "linewidth": 3, "fill": False},
        "white": {
            "r": 0.5,
            "rgba": (255, 255, 255, 0.7),
            "linewidth": 3,
            "fill": False,
        },
        "black": {"r": 0.5, "rgba": (0, 0, 0, 0.7), "linewidth": 3, "fill": False},
    }

    def __init__(self, rdmol: Chem.Mol, legend: str = "") -> None:
        self.rdmol = Chem.Mol(rdmol)  # copy of input molecule
        self.rdmolH = Chem.AddHs(rdmol)  # does not modify the input molecule object
        self._set_basic_nitrogens()
        self._set_acidic_oxygens()
        self.rdmol = Draw.PrepareMolForDrawing(self.rdmol)
        self.legend = legend
        self.conf = self.rdmol.GetConformer(0)
        self.canvas = None

    def _set_basic_nitrogens(self) -> None:
        nitrogens = [a for a in self.rdmol.GetAtoms() if a.GetSymbol() == "N"]
        for atom in nitrogens:
            if atom.GetIsAromatic():
                continue
            bonds = atom.GetBonds()
            conj = any([b.GetIsConjugated() for b in bonds])
            if conj:
                continue
            deg = atom.GetDegree()
            if atom.GetExplicitValence() == deg:
                atom.SetNumExplicitHs(4 - deg)
                atom.SetFormalCharge(+1)

    def _set_acidic_oxygens(self) -> None:
        # carboxylates
        oxygens = [
            i[0]
            for i in self.rdmol.GetSubstructMatches(
                Chem.MolFromSmarts("[$([OD1][CX3](=[OD1]))]")
            )
        ]
        for oidx in oxygens:
            atom = self.rdmol.GetAtomWithIdx(oidx)
            # atom.SetNumExplicitHs(0)
            atom.SetFormalCharge(-1)
            atom.UpdatePropertyCache()
        Chem.SanitizeMol(self.rdmol)

    def _get_lone_pairs(self, atom_idx: int) -> int:
        """Get number of lone pairs.

        Credit: AstraZeneca/Jazzy

        Args:
            atom_idx (int): atom index.

        Returns:
            int, number of lone pairs.
        """
        pt = Chem.GetPeriodicTable()
        atom = self.rdmolH.GetAtomWithIdx(atom_idx)
        symbol = atom.GetSymbol()
        valence_electrons = PeriodicTable.GetNOuterElecs(pt, symbol)
        unavailable_electrons = atom.GetValence(Chem.ValenceType.EXPLICIT)
        charge = atom.GetFormalCharge()
        free_electrons = valence_electrons - unavailable_electrons - charge
        return int(free_electrons / 2)

    def _get_coords(self, atom_idx: int) -> np.ndarray:
        """Get atomic coordinates

        Args:
            atom_idx (int): atom index

        Returns:
            np.ndarray: 2D coordinates
        """
        atom_pos = self.conf.GetAtomPosition(atom_idx)
        atom_pos = np.array([atom_pos.x, atom_pos.y])
        return atom_pos

    def _draw_circle(self, pos: np.ndarray, style: str) -> None:
        """Draw a circle at give position and style.

        Args:
            pos (np.ndarray): position
            style (str): drawing style
        """
        _ = DescriptiveDraw.style.get(style)
        circle_ = DescriptiveDraw.circle * _.get("r") + pos
        circle_2d = [Point2D(*c) for c in circle_]
        color = tuple([v / 256 for v in _.get("rgba")[:3]] + [_.get("rgba")[-1]])
        self.canvas.SetFillPolys(_.get("fill"))
        self.canvas.SetColour(color)
        self.canvas.SetLineWidth(_.get("linewidth"))
        self.canvas.DrawPolygon(circle_2d)

    def set_style(
        self,
        name: str,
        rgba: tuple[float, float, float, float],
        r: float = 0.52,
        linewidth: int = 1,
        fill: bool = False,
    ) -> None:
        """Set style.

        Args:
            name (str): name of style
            rgba (tuple[float,float,float,float]): RGB(0-255) and opacity (0-1)
            r (float, optional): radius of circle. Defaults to 0.52.
            linewidth (int, optional): linewidth. Defaults to 1.
            fill (bool, optional): whether to fill the circle. Defaults to False.
        """
        self.style[name] = {"r": r, "rgba": rgba, "linewidth": linewidth, "fill": fill}

    def show_styles(self) -> None:
        print(f"{'Name':<16} {'r':<8} {'rgba':<24} {'linewidth':<10} {'fill':<10}")
        for k, v in sorted(self.style.items()):
            print(
                f"{k:<16} {v['r']:<8.2f} {str(v['rgba']):<24} {v['linewidth']:<10} {v['fill']:<10}"
            )
        print()

    def draw(
        self,
        width: int = 400,
        height: int = 400,
        aromatic: bool = False,
        conjugated: bool = False,
        HBA: bool = False,
        HBD: bool = False,
        circles: Iterable | None = None,
        style: str = "primary",
        r: float | None = None,
        rgba: Iterable | None = None,
        linewidth: int | None = None,
        fill: bool | None = None,
    ) -> str:
        """Drawing SVG

        Args:
            width (int, optional): width. Defaults to 400.
            height (int, optional): height. Defaults to 400.
            aromatic (bool): whether to highlight aromatic atoms. Defaults to False.
            conjugated (bool): whether to highlight conjugated bonds. Defaults to False.
            HBA (bool): whether to circle H-bond acceptor atoms. Defaults to False.
            HBD (bool): whether to circle H-bond donor atoms. Defaults to False.
            circles (Iterable, optional): list/tuple of atom indices to circle. Defaults to None.
            style: style for circles if circles is not None.
            r (float, optional): for circles, overriding style. radius of circle. Defaults to 0.52.
            rgba (tuple[float,float,float,float]): for circles, overriding style. RGB(0-255) and opacity (0-1)
            linewidth (int, optional): for circles, overriding style. linewidth. Defaults to 1.
            fill (bool, optional): for circles, overriding style. whether to fill the circle. Defaults to False.

        Returns:
            str: SVG drawing text.
        """
        self.canvas = rdMolDraw2D.MolDraw2DSVG(width, height)
        self.canvas.drawOptions().addAtomIndices = True
        self.canvas.DrawMolecule(self.rdmol, legend=self.legend)

        if aromatic:
            for atom in self.rdmol.GetAtoms():
                aidx = atom.GetIdx()
                if atom.GetIsAromatic():
                    pos = self._get_coords(aidx)
                    self._draw_circle(pos, "aromatic")

        if conjugated:
            for bond in self.rdmol.GetBonds():
                if bond.GetIsConjugated():
                    begin_aidx = bond.GetBeginAtomIdx()
                    end_aidx = bond.GetEndAtomIdx()
                    begin_pos = self._get_coords(begin_aidx)
                    end_pos = self._get_coords(end_aidx)
                    pos = begin_pos / 2 + end_pos / 2
                    self._draw_circle(pos, "conjugated")

        if HBA:
            _HBA = Chem.MolFromSmarts(
                "[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$("
                + "[N;v3;!$(N-*=!@[O,N,P,S])]),$([nH0,o,s;+0])]"
            )
            for idx in [i[0] for i in self.rdmol.GetSubstructMatches(_HBA)]:
                pos = self._get_coords(idx)
                self._draw_circle(pos, "HBA")
        if HBD:
            _HBD = Chem.MolFromSmarts("[N&!H0&v3,N&!H0&+1&v4,O&H1&+0,S&H1&+0,n&H1&+0]")
            for idx in [i[0] for i in self.rdmol.GetSubstructMatches(_HBD)]:
                pos = self._get_coords(idx)
                self._draw_circle(pos, "HBD")

        if (isinstance(circles, list) or isinstance(circles, tuple)) and isinstance(
            circles[0], int
        ):
            adhoc = self.style[style]
            if isinstance(r, float):
                adhoc.update({"r": r})
            if isinstance(linewidth, int):
                adhoc.update({"linewidth": linewidth})
            if isinstance(fill, bool):
                adhoc.update({"fill": fill})
            if isinstance(rgba, list) or isinstance(rgba, tuple):
                adhoc.update({"rgba": rgba})
            self.style["__adhoc__"] = adhoc
            for idx in circles:
                pos = self._get_coords(idx)
                self._draw_circle(pos, "__adhoc__")
            # remove the temporary style
            del self.style["__adhoc__"]

        self.canvas.FinishDrawing()

        return self.canvas.GetDrawingText()
Attributes
canvas = None instance-attribute
circle = np.vstack([_circle_x, _circle_y]).T class-attribute instance-attribute
conf = self.rdmol.GetConformer(0) instance-attribute
legend = legend instance-attribute
rdmol = Draw.PrepareMolForDrawing(self.rdmol) instance-attribute
rdmolH = Chem.AddHs(rdmol) instance-attribute
style = {'aromatic': {'r': 0.3, 'rgba': (136, 180, 168, 0.6), 'linewidth': 1, 'fill': True}, 'conjugated': {'r': 0.1, 'rgba': (51, 51, 51, 0.7), 'linewidth': 1, 'fill': True}, 'HBA': {'r': 0.4, 'rgba': (11, 57, 235, 0.7), 'linewidth': 3, 'fill': False}, 'HBD': {'r': 0.5, 'rgba': (254, 97, 0, 0.7), 'linewidth': 3, 'fill': False}, 'ionizable': {'r': 0.5, 'rgba': (254, 97, 0, 0.7), 'linewidth': 3, 'fill': False}, 'primary': {'r': 0.5, 'rgba': (13, 110, 253, 0.7), 'linewidth': 3, 'fill': False}, 'secondary': {'r': 0.5, 'rgba': (108, 117, 125, 0.7), 'linewidth': 3, 'fill': False}, 'success': {'r': 0.5, 'rgba': (25, 135, 84, 0.7), 'linewidth': 3, 'fill': False}, 'info': {'r': 0.5, 'rgba': (13, 202, 240, 0.7), 'linewidth': 3, 'fill': False}, 'warning': {'r': 0.5, 'rgba': (255, 193, 7, 0.7), 'linewidth': 3, 'fill': False}, 'danger': {'r': 0.5, 'rgba': (220, 53, 69, 0.7), 'linewidth': 3, 'fill': False}, 'light': {'r': 0.5, 'rgba': (248, 249, 250, 0.7), 'linewidth': 3, 'fill': False}, 'dark': {'r': 0.5, 'rgba': (33, 37, 41, 0.7), 'linewidth': 3, 'fill': False}, 'blue': {'r': 0.5, 'rgba': (13, 110, 253, 0.7), 'linewidth': 3, 'fill': False}, 'indigo': {'r': 0.5, 'rgba': (102, 16, 242, 0.7), 'linewidth': 3, 'fill': False}, 'purple': {'r': 0.5, 'rgba': (111, 66, 193, 0.7), 'linewidth': 3, 'fill': False}, 'pink': {'r': 0.5, 'rgba': (214, 51, 132, 0.7), 'linewidth': 3, 'fill': False}, 'red': {'r': 0.5, 'rgba': (220, 53, 69, 0.7), 'linewidth': 3, 'fill': False}, 'orange': {'r': 0.5, 'rgba': (253, 126, 20, 0.7), 'linewidth': 3, 'fill': False}, 'yellow': {'r': 0.5, 'rgba': (255, 193, 7, 0.7), 'linewidth': 3, 'fill': False}, 'green': {'r': 0.5, 'rgba': (25, 135, 84, 0.7), 'linewidth': 3, 'fill': False}, 'teal': {'r': 0.5, 'rgba': (32, 201, 151, 0.7), 'linewidth': 3, 'fill': False}, 'cyan': {'r': 0.5, 'rgba': (13, 202, 240, 0.7), 'linewidth': 3, 'fill': False}, 'white': {'r': 0.5, 'rgba': (255, 255, 255, 0.7), 'linewidth': 3, 'fill': False}, 'black': {'r': 0.5, 'rgba': (0, 0, 0, 0.7), 'linewidth': 3, 'fill': False}} class-attribute instance-attribute
Functions
draw(width=400, height=400, aromatic=False, conjugated=False, HBA=False, HBD=False, circles=None, style='primary', r=None, rgba=None, linewidth=None, fill=None)

Drawing SVG

Parameters:

  • width (int, default: 400 ) –

    width. Defaults to 400.

  • height (int, default: 400 ) –

    height. Defaults to 400.

  • aromatic (bool, default: False ) –

    whether to highlight aromatic atoms. Defaults to False.

  • conjugated (bool, default: False ) –

    whether to highlight conjugated bonds. Defaults to False.

  • HBA (bool, default: False ) –

    whether to circle H-bond acceptor atoms. Defaults to False.

  • HBD (bool, default: False ) –

    whether to circle H-bond donor atoms. Defaults to False.

  • circles (Iterable, default: None ) –

    list/tuple of atom indices to circle. Defaults to None.

  • style (str, default: 'primary' ) –

    style for circles if circles is not None.

  • r (float, default: None ) –

    for circles, overriding style. radius of circle. Defaults to 0.52.

  • rgba (tuple[float, float, float, float], default: None ) –

    for circles, overriding style. RGB(0-255) and opacity (0-1)

  • linewidth (int, default: None ) –

    for circles, overriding style. linewidth. Defaults to 1.

  • fill (bool, default: None ) –

    for circles, overriding style. whether to fill the circle. Defaults to False.

Returns:

  • str ( str ) –

    SVG drawing text.

Source code in src/rdworks/view.py
def draw(
    self,
    width: int = 400,
    height: int = 400,
    aromatic: bool = False,
    conjugated: bool = False,
    HBA: bool = False,
    HBD: bool = False,
    circles: Iterable | None = None,
    style: str = "primary",
    r: float | None = None,
    rgba: Iterable | None = None,
    linewidth: int | None = None,
    fill: bool | None = None,
) -> str:
    """Drawing SVG

    Args:
        width (int, optional): width. Defaults to 400.
        height (int, optional): height. Defaults to 400.
        aromatic (bool): whether to highlight aromatic atoms. Defaults to False.
        conjugated (bool): whether to highlight conjugated bonds. Defaults to False.
        HBA (bool): whether to circle H-bond acceptor atoms. Defaults to False.
        HBD (bool): whether to circle H-bond donor atoms. Defaults to False.
        circles (Iterable, optional): list/tuple of atom indices to circle. Defaults to None.
        style: style for circles if circles is not None.
        r (float, optional): for circles, overriding style. radius of circle. Defaults to 0.52.
        rgba (tuple[float,float,float,float]): for circles, overriding style. RGB(0-255) and opacity (0-1)
        linewidth (int, optional): for circles, overriding style. linewidth. Defaults to 1.
        fill (bool, optional): for circles, overriding style. whether to fill the circle. Defaults to False.

    Returns:
        str: SVG drawing text.
    """
    self.canvas = rdMolDraw2D.MolDraw2DSVG(width, height)
    self.canvas.drawOptions().addAtomIndices = True
    self.canvas.DrawMolecule(self.rdmol, legend=self.legend)

    if aromatic:
        for atom in self.rdmol.GetAtoms():
            aidx = atom.GetIdx()
            if atom.GetIsAromatic():
                pos = self._get_coords(aidx)
                self._draw_circle(pos, "aromatic")

    if conjugated:
        for bond in self.rdmol.GetBonds():
            if bond.GetIsConjugated():
                begin_aidx = bond.GetBeginAtomIdx()
                end_aidx = bond.GetEndAtomIdx()
                begin_pos = self._get_coords(begin_aidx)
                end_pos = self._get_coords(end_aidx)
                pos = begin_pos / 2 + end_pos / 2
                self._draw_circle(pos, "conjugated")

    if HBA:
        _HBA = Chem.MolFromSmarts(
            "[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$("
            + "[N;v3;!$(N-*=!@[O,N,P,S])]),$([nH0,o,s;+0])]"
        )
        for idx in [i[0] for i in self.rdmol.GetSubstructMatches(_HBA)]:
            pos = self._get_coords(idx)
            self._draw_circle(pos, "HBA")
    if HBD:
        _HBD = Chem.MolFromSmarts("[N&!H0&v3,N&!H0&+1&v4,O&H1&+0,S&H1&+0,n&H1&+0]")
        for idx in [i[0] for i in self.rdmol.GetSubstructMatches(_HBD)]:
            pos = self._get_coords(idx)
            self._draw_circle(pos, "HBD")

    if (isinstance(circles, list) or isinstance(circles, tuple)) and isinstance(
        circles[0], int
    ):
        adhoc = self.style[style]
        if isinstance(r, float):
            adhoc.update({"r": r})
        if isinstance(linewidth, int):
            adhoc.update({"linewidth": linewidth})
        if isinstance(fill, bool):
            adhoc.update({"fill": fill})
        if isinstance(rgba, list) or isinstance(rgba, tuple):
            adhoc.update({"rgba": rgba})
        self.style["__adhoc__"] = adhoc
        for idx in circles:
            pos = self._get_coords(idx)
            self._draw_circle(pos, "__adhoc__")
        # remove the temporary style
        del self.style["__adhoc__"]

    self.canvas.FinishDrawing()

    return self.canvas.GetDrawingText()
set_style(name, rgba, r=0.52, linewidth=1, fill=False)

Set style.

Parameters:

  • name (str) –

    name of style

  • rgba (tuple[float, float, float, float]) –

    RGB(0-255) and opacity (0-1)

  • r (float, default: 0.52 ) –

    radius of circle. Defaults to 0.52.

  • linewidth (int, default: 1 ) –

    linewidth. Defaults to 1.

  • fill (bool, default: False ) –

    whether to fill the circle. Defaults to False.

Source code in src/rdworks/view.py
def set_style(
    self,
    name: str,
    rgba: tuple[float, float, float, float],
    r: float = 0.52,
    linewidth: int = 1,
    fill: bool = False,
) -> None:
    """Set style.

    Args:
        name (str): name of style
        rgba (tuple[float,float,float,float]): RGB(0-255) and opacity (0-1)
        r (float, optional): radius of circle. Defaults to 0.52.
        linewidth (int, optional): linewidth. Defaults to 1.
        fill (bool, optional): whether to fill the circle. Defaults to False.
    """
    self.style[name] = {"r": r, "rgba": rgba, "linewidth": linewidth, "fill": fill}
show_styles()
Source code in src/rdworks/view.py
def show_styles(self) -> None:
    print(f"{'Name':<16} {'r':<8} {'rgba':<24} {'linewidth':<10} {'fill':<10}")
    for k, v in sorted(self.style.items()):
        print(
            f"{k:<16} {v['r']:<8.2f} {str(v['rgba']):<24} {v['linewidth']:<10} {v['fill']:<10}"
        )
    print()

Functions

get_highlight_bonds(rdmol, atom_indices)

Get bond indices for bonds between atom indices.

Parameters:

  • rdmol (Mol) –

    rdkit Chem.Mol object.

  • atom_indices (list[int]) –

    atom indices.

Returns:

  • list[int] | None

    list[int]: bond indices.

Source code in src/rdworks/view.py
def get_highlight_bonds(rdmol: Chem.Mol, atom_indices: list[int]) -> list[int] | None:
    """Get bond indices for bonds between atom indices.

    Args:
        rdmol (Chem.Mol): rdkit Chem.Mol object.
        atom_indices (list[int]): atom indices.

    Returns:
        list[int]: bond indices.
    """
    bond_indices = []
    for bond in rdmol.GetBonds():
        if (
            bond.GetBeginAtomIdx() in atom_indices
            and bond.GetEndAtomIdx() in atom_indices
        ):
            bond_indices.append(bond.GetIdx())

    if bond_indices:
        return bond_indices
    else:
        return None

render_2D_mol(rdmol, moldrawer, redraw=False, coordgen=False, legend='', atom_index=False, highlight_atoms=None, highlight_bonds=None)

Source code in src/rdworks/view.py
def render_2D_mol(
    rdmol: Chem.Mol,
    moldrawer: rdMolDraw2D,
    redraw: bool = False,
    coordgen: bool = False,
    legend: str = "",
    atom_index: bool = False,
    highlight_atoms: list[int] | None = None,
    highlight_bonds: list[int] | None = None,
) -> str:
    rdmol_2d = Chem.Mol(rdmol)

    if redraw or rdmol_2d.GetNumConformers() == 0:
        rdDepictor.SetPreferCoordGen(coordgen)
        rdmol_2d = Chem.RemoveHs(rdmol_2d)
        rdDepictor.Compute2DCoords(rdmol_2d)

    rdDepictor.StraightenDepiction(rdmol_2d)

    if (highlight_bonds is None) and (highlight_atoms is not None):
        # highlight bonds between the highlighted atoms
        highlight_bonds = get_highlight_bonds(rdmol_2d, highlight_atoms)

    draw_options = moldrawer.drawOptions()

    draw_options.addAtomIndices = atom_index
    # draw_options.setHighlightColour((0,.9,.9,.8)) # Cyan highlight
    # draw_options.addBondIndices = True
    # draw_options.noAtomLabels = True
    draw_options.atomLabelDeuteriumTritium = True  # D, T
    # draw_options.explicitMethyl = True
    draw_options.singleColourWedgeBonds = True
    draw_options.addStereoAnnotation = True
    # draw_options.fillHighlights = False
    # draw_options.highlightRadius = .4
    # draw_options.highlightBondWidthMultiplier = 12
    # draw_options.variableAtomRadius = 0.2
    # draw_options.variableBondWidthMultiplier = 40
    # draw_options.setVariableAttachmentColour((.5,.5,1))
    # draw_options.baseFontSize = 1.0 # default is 0.6
    # draw_options.annotationFontScale = 1
    # draw_options.rotate = 30 # rotation angle in degrees
    # draw_options.padding = 0.2 # default is 0.05

    # for atom in rdmol_2d.GetAtoms():
    #     for key in atom.GetPropsAsDict():
    #         atom.ClearProp(key)
    # if index: # index hides polar hydrogens
    #     for atom in rdmol_2d.GetAtoms():
    #        atom.SetProp("atomLabel", str(atom.GetIdx()))
    #     #    # atom.SetProp("atomNote", str(atom.GetIdx()))
    #     #    # atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))

    moldrawer.DrawMolecule(
        rdmol_2d,
        legend=legend,
        highlightAtoms=highlight_atoms,
        highlightBonds=highlight_bonds,
    )
    moldrawer.FinishDrawing()

    return moldrawer.GetDrawingText()

render_matrix_grid(rdmol, legend, highlight_atoms=None, highlight_bonds=None, mols_per_row=5, width=200, height=200, atom_index=False, redraw=False, coordgen=False, svg=True)

Rendering a grid image from a list of molecules.

Parameters:

  • rdmol (list[Mol]) –

    list of rdkit Chem.Mol objects.

  • legend (list[str]) –

    list of legends

  • highlight_atoms (list[list[int]] | None, default: None ) –

    list of atom(s) to highlight. Defaults to None.

  • highlight_bonds (list[list[int]] | None, default: None ) –

    list of bond(s) to highlight. Defaults to None.

  • mols_per_row (int, default: 5 ) –

    molecules per row. Defaults to 5.

  • width (int, default: 200 ) –

    width. Defaults to 200.

  • height (int, default: 200 ) –

    height. Defaults to 200.

  • atom_index (bool, default: False ) –

    whether to show atom index. Defaults to False.

  • redraw (bool, default: False ) –

    whether to redraw 2D. Defaults to False.

  • coordgen (bool, default: False ) –

    whether to use coordgen to depict. Defaults to False.

Returns:

  • str | Image

    str | Image.Image: SVG string or PIL Image object.

Reference

https://greglandrum.github.io/rdkit-blog/posts/2023-10-25-molsmatrixtogridimage.html

Source code in src/rdworks/view.py
def render_matrix_grid(
    rdmol: list[Chem.Mol],
    legend: list[str] | None,
    highlight_atoms: list[list[int]] | None = None,
    highlight_bonds: list[list[int]] | None = None,
    mols_per_row: int = 5,
    width: int = 200,
    height: int = 200,
    atom_index: bool = False,
    redraw: bool = False,
    coordgen: bool = False,
    svg: bool = True,
) -> str | Image.Image:
    """Rendering a grid image from a list of molecules.

    Args:
        rdmol (list[Chem.Mol]): list of rdkit Chem.Mol objects.
        legend (list[str]): list of legends
        highlight_atoms (list[list[int]] | None, optional): list of atom(s) to highlight. Defaults to None.
        highlight_bonds (list[list[int]] | None, optional): list of bond(s) to highlight. Defaults to None.
        mols_per_row (int, optional): molecules per row. Defaults to 5.
        width (int, optional): width. Defaults to 200.
        height (int, optional): height. Defaults to 200.
        atom_index (bool, optional): whether to show atom index. Defaults to False.
        redraw (bool, optional): whether to redraw 2D. Defaults to False.
        coordgen (bool, optional): whether to use coordgen to depict. Defaults to False.

    Returns:
        str | Image.Image: SVG string or PIL Image object.

    Reference:
        https://greglandrum.github.io/rdkit-blog/posts/2023-10-25-molsmatrixtogridimage.html
    """

    n = len(rdmol)

    if isinstance(legend, list):
        assert len(legend) == n, "number of legends and molecules must be the same"
    elif legend is None:
        legend = [
            "",
        ] * n

    if isinstance(highlight_atoms, list):
        assert len(highlight_atoms) == n, (
            "number of highlights and molecules must be the same"
        )
    elif highlight_atoms is None:
        highlight_atoms = [
            (),
        ] * n

    if isinstance(highlight_bonds, list):
        assert len(highlight_bonds) == n, (
            "number of highlights and molecules must be the same"
        )
    elif highlight_bonds is None:
        highlight_bonds = [
            (),
        ] * n

    rdmol_matrix = []
    legend_matrix = []
    highlight_atoms_matrix = []
    highlight_bonds_matrix = []

    for i in range(0, n, mols_per_row):
        rdmol_matrix.append(rdmol[i : (i + mols_per_row)])
        legend_matrix.append(legend[i : (i + mols_per_row)])
        highlight_atoms_matrix.append(highlight_atoms[i : (i + mols_per_row)])
        highlight_bonds_matrix.append(highlight_bonds[i : (i + mols_per_row)])

    return MolsMatrixToGridImage(
        molsMatrix=rdmol_matrix,
        subImgSize=(width, height),
        legendsMatrix=legend_matrix,
        highlightAtomListsMatrix=highlight_atoms_matrix,
        highlightBondListsMatrix=highlight_bonds_matrix,
        useSVG=svg,
        returnPNG=False,  # whether to return PNG data (True) or a PIL object (False)
    )

render_png(rdmol, width=300, height=300, legend='', atom_index=False, highlight_atoms=None, highlight_bonds=None, redraw=False, coordgen=False, trim=True)

Draw 2D molecule in PNG format.

Parameters:

  • rdmol (Mol) –

    rdkit Chem.Mol object.

  • width (int, default: 300 ) –

    width. Defaults to 300.

  • height (int, default: 300 ) –

    height. Defaults to 300.

  • legend (str, default: '' ) –

    legend. Defaults to ''.

  • atom_index (bool, default: False ) –

    whether to show atom index. Defaults to False.

  • highlight_atoms (list[int] | None, default: None ) –

    atom(s) to highlight. Defaults to None.

  • highlight_bonds (list[int] | None, default: None ) –

    bond(s) to highlight. Defaults to None.

  • redraw (bool, default: False ) –

    whether to redraw. Defaults to False.

  • coordgen (bool, default: False ) –

    whether to use coordgen. Defaults to False.

Returns:

  • Image

    Image.Image: output PIL Image object.

Source code in src/rdworks/view.py
def render_png(
    rdmol: Chem.Mol,
    width: int = 300,
    height: int = 300,
    legend: str = "",
    atom_index: bool = False,
    highlight_atoms: list[int] | None = None,
    highlight_bonds: list[int] | None = None,
    redraw: bool = False,
    coordgen: bool = False,
    trim: bool = True,
) -> Image.Image:
    """Draw 2D molecule in PNG format.

    Args:
        rdmol (Chem.Mol): rdkit Chem.Mol object.
        width (int, optional): width. Defaults to 300.
        height (int, optional): height. Defaults to 300.
        legend (str, optional): legend. Defaults to ''.
        atom_index (bool, optional): whether to show atom index. Defaults to False.
        highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
        highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
        redraw (bool, optional): whether to redraw. Defaults to False.
        coordgen (bool, optional): whether to use coordgen. Defaults to False.

    Returns:
        Image.Image: output PIL Image object.
    """

    png_string = render_2D_mol(
        rdmol,
        moldrawer=rdMolDraw2D.MolDraw2DCairo(width, height),
        redraw=redraw,
        coordgen=coordgen,
        legend=legend,
        atom_index=atom_index,
        highlight_atoms=highlight_atoms,
        highlight_bonds=highlight_bonds,
    )

    img = Image.open(BytesIO(png_string))

    if trim:
        img = trim_png(img)

    return img

render_svg(rdmol, width=300, height=300, legend='', atom_index=False, highlight_atoms=None, highlight_bonds=None, redraw=False, coordgen=False, optimize=True)

Draw 2D molecule in SVG format.

Examples:

For Jupyternotebook, wrap the output with SVG:

>>> from IPython.display import SVG
>>> SVG(libr[0].to_svg())

Parameters:

  • rdmol (Mol) –

    rdkit Chem.Mol object.

  • width (int, default: 300 ) –

    width. Defaults to 300.

  • height (int, default: 300 ) –

    height. Defaults to 300.

  • legend (str, default: '' ) –

    legend. Defaults to ''.

  • atom_index (bool, default: False ) –

    whether to show atom index. Defaults to False.

  • highlight_atoms (list[int] | None, default: None ) –

    atom(s) to highlight. Defaults to None.

  • highlight_bonds (list[int] | None, default: None ) –

    bond(s) to highlight. Defaults to None.

  • redraw (bool, default: False ) –

    whether to redraw. Defaults to False.

  • coordgen (bool, default: False ) –

    whether to use coordgen. Defaults to False.

  • optimize (bool, default: True ) –

    whether to optimize SVG string. Defaults to True.

Returns:

  • str ( str ) –

    SVG string

Source code in src/rdworks/view.py
def render_svg(
    rdmol: Chem.Mol,
    width: int = 300,
    height: int = 300,
    legend: str = "",
    atom_index: bool = False,
    highlight_atoms: list[int] | None = None,
    highlight_bonds: list[int] | None = None,
    redraw: bool = False,
    coordgen: bool = False,
    optimize: bool = True,
) -> str:
    """Draw 2D molecule in SVG format.

    Examples:
        For Jupyternotebook, wrap the output with SVG:

        >>> from IPython.display import SVG
        >>> SVG(libr[0].to_svg())

    Args:
        rdmol (Chem.Mol): rdkit Chem.Mol object.
        width (int, optional): width. Defaults to 300.
        height (int, optional): height. Defaults to 300.
        legend (str, optional): legend. Defaults to ''.
        atom_index (bool, optional): whether to show atom index. Defaults to False.
        highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
        highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
        redraw (bool, optional): whether to redraw. Defaults to False.
        coordgen (bool, optional): whether to use coordgen. Defaults to False.
        optimize (bool, optional): whether to optimize SVG string. Defaults to True.

    Returns:
        str: SVG string
    """

    svg_string = render_2D_mol(
        rdmol,
        moldrawer=rdMolDraw2D.MolDraw2DSVG(width, height),
        redraw=redraw,
        coordgen=coordgen,
        legend=legend,
        atom_index=atom_index,
        highlight_atoms=highlight_atoms,
        highlight_bonds=highlight_bonds,
    )

    if optimize:
        scour_options = {
            "strip_comments": True,
            "strip_ids": True,
            "shorten_ids": True,
            "compact_paths": True,
            "indent_type": "none",
        }
        svg_string = scourString(svg_string, options=scour_options)

    return svg_string

rescale(rdmol, factor=1.5)

Returns a copy of rdmol by a factor.

Parameters:

  • rdmol (Mol) –

    input molecule.

  • factor (float, default: 1.5 ) –

    scaling factor.

Returns:

  • Mol

    Chem.Mol: a copy of rescaled rdkit.Chem.Mol object.

Source code in src/rdworks/view.py
def rescale(rdmol: Chem.Mol, factor: float = 1.5) -> Chem.Mol:
    """Returns a copy of `rdmol` by a `factor`.

    Args:
        rdmol (Chem.Mol): input molecule.
        factor (float): scaling factor.

    Returns:
        Chem.Mol: a copy of rescaled rdkit.Chem.Mol object.
    """
    transformed_rdmol = Chem.Mol(rdmol)
    center = AllChem.ComputeCentroid(transformed_rdmol.GetConformer())
    tf = np.identity(4, np.float)
    tf[0][3] -= center[0]
    tf[1][3] -= center[1]
    tf[0][0] = tf[1][1] = tf[2][2] = factor
    AllChem.TransformMol(transformed_rdmol, tf)
    return transformed_rdmol

rotate(rdmol, axis, degree)

Rotate rdmol around given axis and degree.

Input rdmol will be modified.

Parameters:

  • rdmol (Mol) –

    input molecule.

  • axis (str) –

    axis of rotation, 'x' or 'y' or 'z'.

  • degree (float) –

    degree of rotation.

Source code in src/rdworks/view.py
def rotate(rdmol: Chem.Mol, axis: str, degree: float) -> None:
    """Rotate `rdmol` around given axis and degree.

    Input `rdmol` will be modified.

    Args:
        rdmol (Chem.Mol): input molecule.
        axis (str): axis of rotation, 'x' or 'y' or 'z'.
        degree (float): degree of rotation.
    """
    try:
        conf = rdmol.GetConformer()
    except:
        AllChem.Compute2DCoords(rdmol)
        conf = rdmol.GetConformer()
    R = rotation_matrix(axis, degree)
    rdMolTransforms.TransformConformer(conf, R)

rotation_matrix(axis, degree)

Returns a numpy rotation matrix of shape (4,4).

Parameters:

  • axis (str) –

    'x' or 'y' or 'z'.

  • degree (float) –

    degree of rotation.

Returns:

  • ndarray

    np.ndarray: a numpy array of shape (4,4).

Source code in src/rdworks/view.py
def rotation_matrix(axis: str, degree: float) -> np.ndarray:
    """Returns a numpy rotation matrix of shape (4,4).

    Args:
        axis (str): 'x' or 'y' or 'z'.
        degree (float): degree of rotation.

    Returns:
        np.ndarray: a numpy array of shape (4,4).
    """
    rad = (np.pi / 180.0) * degree
    c = np.cos(rad)
    s = np.sin(rad)
    if axis.lower() == "x":
        return np.array(
            [
                [1.0, 0.0, 0.0, 0.0],
                [0.0, c, -s, 0.0],
                [0.0, s, c, 0.0],
                [0.0, 0.0, 0.0, 1.0],
            ]
        )
    elif axis.lower() == "y":
        return np.array(
            [
                [c, 0.0, s, 0.0],
                [0.0, 1.0, 0.0, 0.0],
                [-s, 0.0, c, 0.0],
                [0.0, 0.0, 0.0, 1.0],
            ]
        )
    elif axis.lower() == "z":
        return np.array(
            [
                [c, -s, 0.0, 0.0],
                [s, c, 0.0, 0.0],
                [0.0, 0.0, 1.0, 0.0],
                [0.0, 0.0, 0.0, 1.0],
            ]
        )

trim_png(img)

Removes white margin around molecular drawing.

Parameters:

  • img (Image) –

    input PIL Image object.

Returns:

  • Image

    Image.Image: output PIL Image object.

Source code in src/rdworks/view.py
def trim_png(img: Image.Image) -> Image.Image:
    """Removes white margin around molecular drawing.

    Args:
        img (Image.Image): input PIL Image object.

    Returns:
        Image.Image: output PIL Image object.
    """
    bg = Image.new(img.mode, img.size, img.getpixel((0, 0)))
    diff = ImageChops.difference(img, bg)
    diff = ImageChops.add(diff, diff, 2.0, -100)
    bbox = diff.getbbox()

    if bbox:
        return img.crop(bbox)

    return img

rdworks.descriptor

Attributes

rd_descriptor = {'QED': 'Quantitative estimate of drug-likeness.', 'MolWt': 'Molecular weight', 'LogP': 'Predicted octanol/water partition coefficient', 'TPSA': 'Topological polar surface area', 'HBD': 'Number of hydrogen bonding donors', 'HBA': 'Number of hydrogen bonding acceptors', 'RotBonds': 'Number of rotatable bonds', 'RingCount': 'Number of rings', 'FCsp3': 'Fraction of SP3 carbons', 'HAC': 'Number of heavy atoms', 'Hetero': 'Number of hetero atoms (not H or C) [B,N,O,P,S,F,Cl,Br,I]', 'LipinskiHBA': 'Number of hydrogen bonding acceptors according to the Lipinski definition', 'LipinskiHBD': 'Number of hydrogen bonding donors according to the Lipinski definition'} module-attribute

rd_descriptor_f = {'QED': QED.qed, 'MolWt': Descriptors.MolWt, 'HAC': Descriptors.HeavyAtomCount, 'LogP': Descriptors.MolLogP, 'TPSA': Descriptors.TPSA, 'HBA': rdMolDescriptors.CalcNumHBA, 'HBD': rdMolDescriptors.CalcNumHBD, 'RotBonds': rdMolDescriptors.CalcNumRotatableBonds, 'RingCount': rdMolDescriptors.CalcNumRings, 'FCsp3': rdMolDescriptors.CalcFractionCSP3, 'Hetero': rdMolDescriptors.CalcNumHeteroatoms, 'LipinskiHBA': rdMolDescriptors.CalcNumLipinskiHBA, 'LipinskiHBD': rdMolDescriptors.CalcNumLipinskiHBD} module-attribute

rdworks.xml

This module contains XML definitions for substructure and descriptor matching.

Available descriptors:

Name          | Description                               | RDKit function
------------- | ----------------------------------------- | --------------------------------------
HAC           | Num. of Non-H atoms                       | Descriptors.HeavyAtomCount
HBA           | Num. of H-bond acceptors                  | Descriptors.NumHAcceptors
HBD           | Num. of H-bond donors                     | Descriptors.NumHDonors
LipinskiHBA   | Num. of Lipinski H-bond acceptors         | rdMolDescriptors.CalcNumLipinskiHBA
LipinskiHBD   | Num. of Lipinski H-bond donors            | rdMolDescriptors.CalcNumLipinskiHBD
MolWt         | Molecular weight                          | Descriptors.MolWt
TPSA          | Topological polar surface area            | Descriptors.TPSA
LogP          | log(octanol/water partition coefficient)  | Descriptors.MolLogP
RotBonds      | Num. of rotatable bonds                   | Descriptors.NumRotatableBonds
RingCount     | Num. of rings                             | Descriptors.RingCount
FCsp3         | fraction of C atoms that are Sp3          | Descriptors.FractionCSP3
rdHBD         | Num. of H-bond donors                     | rdMolDescriptors.CalcNumHBD
rdHBA         | Num. of H-bond acceptors                  | rdMolDescriptors.CalcNumHBA
rdRingCount   | Num. of rings                             | rdMolDescriptors.CalcNumRings
rdRotBondst   | Num. of rotatable bonds                   | rdMolDescriptors.CalcNumRotatableBonds
rdFCsp3       | fraction of C atoms that are Sp3          | rdMolDescriptors.CalcFractionCSP3
Hetero        | Num. of non-H and non-C atoms             | rdMolDescriptors.CalcNumHeteroatoms
ALogP         | Wildman-Crippen LogP value                | Crippen.MolLogP
QED           | Quantitative estimation of drug-likeness  | QED.qed
PSA           | MOE-like molecular surface area           | MolSurf.TPSA
StereoCenters | Num. of atom stereo centers               | rdMolDescriptors.CalcNumAtomStereoCenters

References:

1. `alert_collection.csv` is copied from Patrick Walters' blog and github:
    - http://practicalcheminformatics.blogspot.com/2018/08/filtering-chemical-libraries.html
    - https://github.com/PatWalters/rd_filters
1. Jeroen Kazius, Ross McGuire, and Roberta Bursi.
    Derivation and Validation of Toxicophores for Mutagenicity Prediction.
    J. Med. Chem. 2005, 48, 312-320.
1. J. F. Blake.
    Identification and Evaluation of Molecular Properties Related to Preclinical Optimization and Clinical Fate.
    Med Chem. 2005, 1, 649-55.
1. Mike Hann, Brian Hudson, Xiao Lewell, Rob Lifely, Luke Miller, and Nigel Ramsden.
    Strategic Pooling of Compounds for High-Throughput Screening.
    J. Chem. Inf. Comput. Sci. 1999, 39, 897-902.
1. Jonathan B. Baell and Georgina A. Holloway. New Substructure Filters for Removal of Pan Assay Interference Compounds (PAINS)
    from Screening Libraries and for Their Exclusion in Bioassays.
    J. Med. Chem. 2010, 53, 2719-2740.
1. Bradley C. Pearce, Michael J. Sofia, Andrew C. Good, Dieter M. Drexler, and David A. Stock.
    An Empirical Process for the Design of High-Throughput Screening Deck Filters.
    J. Chem. Inf. Model. 2006, 46, 1060-1068.
1. Ruth Brenk, Alessandro Schipani, Daniel James, Agata Krasowski, IanHugh Gilbert, Julie Frearson aand PaulGraham Wyatt.
    Lessons learnt from assembling screening libraries for drug discovery for neglected diseases.
    ChemMedChem. 2008, 3, 435-44.
1. Sivaraman Dandapani, Gerard Rosse, Noel Southall, Joseph M. Salvino, Craig J. Thomas.
    Selecting, Acquiring, and Using Small Molecule Libraries for HighThroughput Screening.
    Curr Protoc Chem Biol. 2012, 4, 177191.
1. Huth JR, Mendoza R, Olejniczak ET, Johnson RW, Cothron DA, Liu Y, Lerner CG, Chen J, Hajduk PJ.
    ALARM NMR: a rapid and robust experimental method to detect reactive false positives in biochemical screens.
    J Am Chem Soc. 2005, 127, 217-24.
    - identificaiton of thiol reactive compounds by monitoring DTT-dependent 13-C chemical shift changes
    of the human La protein in the presence of a test compound

Attributes:

Attributes

predefined_xml = {'Zinc_fragment': {'Path': 'ZINC_fragment.xml', 'Description': "ZINC's fragment-like criteria", 'Reference': 'ZINC'}, 'Zinc_leadlike': {'Path': 'ZINC_leadlike.xml', 'Description': "ZINC's lead-like criteria", 'Reference': 'ZINC'}, 'Zinc_druglike': {'Path': 'ZINC_druglike.xml', 'Description': "ZINC's drug-like criteria", 'Reference': 'ZINC'}, 'fragment': {'Path': 'fragment.xml', 'Description': 'fragment', 'Reference': ''}, 'MLSMR': {'Path': 'ChEMBL_Walters/MLSMR.xml', 'Description': 'NIH Mol. Lib. Small Molecule Repository filters', 'Reference': 'Dandapani et al. (2012)'}, 'CNS': {'Path': 'CNS.xml', 'Description': 'CNS MPO descriptors', 'Reference': 'Wager et al. (2010)'}, 'PAINS': {'Path': 'Baell2010_PAINS/Baell2010A.xml', 'Description': 'Pan Assay Interference (>150 hits)', 'Reference': 'Baell et al. (2010)'}, 'Dundee': {'Path': 'ChEMBL_Walters/Dundee.xml', 'Description': 'Dundee NTD library filters', 'Reference': 'Brenk et al. (2008)'}, 'BMS': {'Path': 'ChEMBL_Walters/BMS.xml', 'Description': 'BMS HTS deck filters', 'Reference': 'Pearce et al. (2006)'}, 'LINT': {'Path': 'ChEMBL_Walters/LINT.xml', 'Description': 'Pfizer LINT filters', 'Reference': 'Blake (2005)'}, 'Toxicophore': {'Path': 'Kazius2005/Kazius2005.xml', 'Description': 'Toxicophores for mutagenicity', 'Reference': 'Kazius et al. (2005)'}, 'Glaxo': {'Path': 'ChEMBL_Walters/Glaxo.xml', 'Description': 'Glaxo hard filters', 'Reference': 'Hann et al. (1999)'}, 'Acid': {'Path': 'Hann1999_Glaxo/Hann1999Acid.xml', 'Description': 'acid', 'Reference': 'Hann et al. (1999)'}, 'Base': {'Path': 'Hann1999_Glaxo/Hann1999Base.xml', 'Description': 'base', 'Reference': 'Hann et al. (1999)'}, 'Nucleophile': {'Path': 'Hann1999_Glaxo/Hann1999NuPh.xml', 'Description': 'nucleophile', 'Reference': 'Hann et al. (1999)'}, 'Electrophile': {'Path': 'Hann1999_Glaxo/Hann1999ElPh.xml', 'Description': 'electrophile', 'Reference': 'Hann et al. (1999)'}, 'Inpharmatica': {'Path': 'ChEMBL_Walters/Inpharmatica.xml', 'Description': 'Inpharmatica unwanted fragments', 'Reference': 'ChEMBL'}, 'SureChEMBL': {'Path': 'ChEMBL_Walters/SureChEMBL.xml', 'Description': 'SureChEMBL filter', 'Reference': 'ChEMBL'}, 'Reactive': {'Path': 'misc/reactive.xml', 'Description': 'reactive functional groups', 'Reference': ''}, 'Astex_RO3': {'Path': 'Astex_RO3.xml', 'Description': 'Astex rule of 3', 'Reference': 'Astex'}, 'Asinex_fragment': {'Path': 'Asinex_fragment.xml', 'Description': "Asinex's fragment", 'Reference': 'Asinex'}} module-attribute

Functions

get_predefined_xml(name)

Returns matched predefined xml file.

Parameters:

  • name (str) –

    name of predefined entry.

Returns:

  • PathLike

    os.PathLike: path to the xml file.

Source code in src/rdworks/xml.py
def get_predefined_xml(name: str) -> os.PathLike:
    """Returns matched predefined xml file.

    Args:
        name (str): name of predefined entry.

    Returns:
        os.PathLike: path to the xml file.
    """
    t = name.upper()
    n = len(t)
    path = None
    for k in predefined_xml:
        if k.upper()[:n] == t:
            datadir = importlib.resources.files("rdworks.data")
            path = pathlib.Path(datadir / predefined_xml[k]["Path"])
            break
    if path is None:
        raise ValueError(f"is_matching() cannot find the xml file for {name}")
    return path

list_predefined_xml()

Returns text output of list of predefined xml.

Returns:

  • str ( str ) –

    text output of list of predefined xml

Source code in src/rdworks/xml.py
def list_predefined_xml() -> str:
    """Returns text output of list of predefined xml.

    Returns:
        str: text output of list of predefined xml
    """
    s = f"\n| {'Name':<18} | {'Description':<48} | {'Reference':<23} |\n"
    s += f"| {'-' * 18} | {'-' * 48} | {'-' * 23} |\n"
    for k, v in predefined_xml.items():
        s += f"| {k:<18} | {v['Description']:<48} | {v['Reference']:<23} |\n"
    return s

parse_xml(path)

Parse a XML file.

Parameters:

  • path (PathLike) –

    filename of the xml.

Returns:

  • Tuple ( Tuple ) –

    parsed results.

Source code in src/rdworks/xml.py
def parse_xml(path: os.PathLike) -> Tuple:
    """Parse a XML file.

    Args:
        path (os.PathLike): filename of the xml.

    Returns:
        Tuple: parsed results.
    """
    tree = ET.parse(path)
    root = tree.getroot()
    terms = []
    try:
        combine = root.attrib["combine"].upper()
    except:
        combine = "OR"  # default
    for child in root:
        name = child.attrib["name"]
        if child.tag == "substructure":
            smarts = child.find("SMARTS").text
            terms.append((name, smarts, 0.0, 0.0))
        elif child.tag == "descriptor":
            L = child.find("min")
            U = child.find("max")
            lb = float(L.text) if L is not None else None
            ub = float(U.text) if U is not None else None
            terms.append((name, None, lb, ub))

    # # parse SMARTS definitions
    # for substructure in tree.findall('substructure'):
    #     name = substructure.get('name')
    #     smarts = substructure.find('SMARTS').text
    #     terms.append((name, smarts, 0.0, 0.0))
    # # parse descriptors lower and upper bounds
    # for descriptor in tree.findall('descriptor'):
    #     name = descriptor.get('name')
    #     L = descriptor.find('min')
    #     U = descriptor.find('max')
    #     lb = float(L.text) if L is not None else None
    #     ub = float(U.text) if U is not None else None
    #     terms.append((name, '', lb, ub))

    return (terms, combine)