B
    ž3Rcó=  ã               @   sš  d Z ddlZddlZddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlT ddlT ddlT ddlT ddlT ddlT ddlT ddlT ddlT ddlT ddlT ddlT ddlT ddlT dd	lmZ dd
lmZ ddlmZmZ yddl T W n e!k
r   Y nX e"e#_"e$e#_$eƒ Zd'dd„Z%d(dd„Z&d)dd„Z'd*dd„Z(d+dd„Z)d,dd„Z*ddde+fd d!„Z,d"d#„ Z-d-d$d%„Z.e/d&kr–e.ƒ  dS ).z% Import all RDKit chemistry modules

é    N)Ú
namedtuple)ÚDataStructs)Ú
ForceField)ÚRDConfig)ÚrdBase)Ú*)Ú
rdGeometry)Úlogger)ÚStereoEnumerationOptionsÚEnumerateStereoisomerséÿÿÿÿFc             C   sj   |   |¡}t||ƒ |sf|dkr$d}dd„ |  ¡ D ƒ}x|D ]}||ks<|  |¡ q<W |   |¡ d¡ dS )z  Applies the transformation (usually a 4x4 double matrix) to a molecule
    if keepConfs is False then all but that conformer are removed
    r   r   c             S   s   g | ]}|  ¡ ‘qS © )ÚGetId)Ú.0Úcr   r   ú1lib/python3.7/site-packages/rdkit/Chem/AllChem.pyú
<listcomp><   s    z TransformMol.<locals>.<listcomp>N)ÚGetConformerZTransformConformerÚGetConformersZRemoveConformerZSetId)ÚmolZtformÚconfIdZ	keepConfsZrefConfZ
allConfIdsZcidr   r   r   ÚTransformMol3   s    


r   ©é   r   r   ç      à?c             K   s2   t j|d |d |d |d}t| ||f|Ž |S )z; returns a grid representation of the molecule's shape
    r   é   é   )Úspacing)r   ÚUniformGrid3DÚEncodeShape)r   r   ZboxDimr   ÚkwargsZresr   r   r   ÚComputeMolShapeD   s    r!   çš™™™™™É?ç       @c             C   sØ   t  | ¡} |  |¡}t|dd t|ƒ}|d j|d j d|  |d j|d j d|  |d j|d j d|  f}tj	|d |d |d |d}t
| ||ddd |d	 }| ¡ }	d
d„ |	D ƒ}
|t|
ƒ }|S )aü   Calculates the volume of a particular conformer of a molecule
    based on a grid-encoding of the molecular shape.

    A bit of demo as well as a test of github #1883:

    >>> from rdkit import Chem
    >>> from rdkit.Chem import AllChem
    >>> mol = Chem.AddHs(Chem.MolFromSmiles('C'))
    >>> AllChem.EmbedMolecule(mol)
    0
    >>> ComputeMolVolume(mol)
    28...
    >>> mol = Chem.AddHs(Chem.MolFromSmiles('O'))
    >>> AllChem.EmbedMolecule(mol)
    0
    >>> ComputeMolVolume(mol)
    20...

    F)ÚignoreHsr   r   r   )r   g      ð?)r$   ZvdwScaleé   c             S   s   g | ]}|d krd‘qS )r%   r   r   )r   Úxr   r   r   r   j   s    z$ComputeMolVolume.<locals>.<listcomp>)ÚrdchemÚMolr   ZCanonicalizeConformerZComputeConfBoxr&   ÚyÚzr   r   r   ZGetOccupancyVectÚlen)r   r   ZgridSpacingZ	boxMarginÚconfZboxZsideLenÚshapeZvoxelVolZoccVectZvoxelsZvolr   r   r   ÚComputeMolVolumeL   s    

4r.   c       
      C   s–   |s,|rt | ||g|d nt | ||gd | j|d}| j|d}d}x6t|  ¡ ƒD ]&}| |¡ | |¡¡}	||	|	 7 }qVW ||  ¡  }t |¡S )aš   Returns the RMS between two conformations.
    By default, the conformers will be aligned to the first conformer
    before the RMS calculation and, as a side-effect, the second will be left
    in the aligned state.

    Arguments:
      - mol:        the molecule
      - confId1:    the id of the first conformer
      - confId2:    the id of the second conformer
      - atomIds:    (optional) list of atom ids to use a points for
                    alingment - defaults to all atoms
      - prealigned: (optional) by default the conformers are assumed
                    be unaligned and the second conformer be aligned
                    to the first

    )ÚconfIdsÚatomIds)r/   )Úidr   )ÚAlignMolConformersr   ÚrangeÚGetNumAtomsÚGetAtomPositionÚDistanceÚnumpyZsqrt)
r   ZconfId1ZconfId2r0   Ú
prealignedZconf1Zconf2ZssrÚiÚdr   r   r   ÚGetConformerRMSo   s    r;   c             C   sÚ   g }dd„ |   ¡ D ƒ}|s<|r.t| ||d qtt| |d n8x6tdt|ƒƒD ]$}| t| |d || ||d¡ qLW g }x\tdt|ƒƒD ]J}| ||d  ¡ x2td|ƒD ]$}| t| || || |dd¡ qªW qˆW |S )	aÀ   Returns the RMS matrix of the conformers of a molecule.
    As a side-effect, the conformers will be aligned to the first
    conformer (i.e. the reference) and will left in the aligned state.

    Arguments:
      - mol:     the molecule
      - atomIds: (optional) list of atom ids to use a points for
                 alingment - defaults to all atoms
      - prealigned: (optional) by default the conformers are assumed
                    be unaligned and will therefore be aligned to the
                    first conformer

    Note that the returned RMS matrix is symmetrical, i.e. it is the
    lower half of the matrix, e.g. for 5 conformers::

      rmsmatrix = [ a,
                    b, c,
                    d, e, f,
                    g, h, i, j]

    where a is the RMS between conformers 0 and 1, b is the RMS between
    conformers 0 and 2, etc.
    This way it can be directly used as distance matrix in e.g. Butina
    clustering.

    c             S   s   g | ]}|  ¡ ‘qS r   )r   )r   r,   r   r   r   r   ±   s    z)GetConformerRMSMatrix.<locals>.<listcomp>)r0   ÚRMSlist)r<   r   r   )r0   r8   T)r   r2   r3   r+   Úappendr;   )r   r0   r8   Zrmsvalsr/   r9   ZcmatÚjr   r   r   ÚGetConformerRMSMatrix“   s     (r?   c             #   s†   t |ƒ|  ¡ kr(tdt |ƒ|  ¡ f ƒ‚d‡ fdd„	‰ tddƒ}x@ˆ |ƒD ]4}|  |¡}x$|D ]}|rt|||ƒV  q^|V  q^W qJW dS )	a»   Returns a generator for the virtual library defined by
    a reaction and a sequence of sidechain sets

    >>> from rdkit import Chem
    >>> from rdkit.Chem import AllChem
    >>> s1=[Chem.MolFromSmiles(x) for x in ('NC','NCC')]
    >>> s2=[Chem.MolFromSmiles(x) for x in ('OC=O','OC(=O)C')]
    >>> rxn = AllChem.ReactionFromSmarts('[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]')
    >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[s2,s1])
    >>> [Chem.MolToSmiles(x[0]) for x in list(r)]
    ['CNC=O', 'CCNC=O', 'CNC(C)=O', 'CCNC(C)=O']

    Note that this is all done in a lazy manner, so "infinitely" large libraries can
    be done without worrying about running out of memory. Your patience will run out first:

    Define a set of 10000 amines:

    >>> amines = (Chem.MolFromSmiles('N'+'C'*x) for x in range(10000))

    ... a set of 10000 acids

    >>> acids = (Chem.MolFromSmiles('OC(=O)'+'C'*x) for x in range(10000))

    ... now the virtual library (1e8 compounds in principle):

    >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[acids,amines])

    ... look at the first 4 compounds:

    >>> [Chem.MolToSmiles(next(r)[0]) for x in range(4)]
    ['NC=O', 'CNC=O', 'CCNC=O', 'CCCNC=O']


    z#%d sidechains provided, %d requiredr   c             3   sb   x\| | D ]P}|d t | ƒk rRˆ | |d ƒ}x,|D ]}|g}| |¡ |V  q2W q
|gV  q
W d S )Nr   )r+   Úextend)ÚitemsZdepthÚitemÚvÚentryÚl)Ú_combiEnumeratorr   r   rF   ë   s    

z6EnumerateLibraryFromReaction.<locals>._combiEnumeratorÚProductReactantszproducts,reactantsN)r   )r+   ZGetNumReactantTemplatesÚ
ValueErrorr   ZRunReactants)ZreactionZsidechainSetsZreturnReactantsrG   ZchainsZprodSetsZprodsr   )rF   r   ÚEnumerateLibraryFromReactionÄ   s    #


rI   Ti&	  c          	   K   s  |   |¡}|stdƒ‚i }| |¡}	x&t|ƒD ]\}
}|	 |
¡}|||< q.W t| f||dœ|—Ž}|dk rrtdƒ‚dd„ t|ƒD ƒ}|s<|| dd}x\t|ƒD ]P\}
}xFt|
d t|ƒƒD ]0}|| }||  || ¡}| 	||||d	¡ q¼W q W | 
¡  d
}| ¡ }x"|r*|r*| ¡ }|d8 }q
W t| ||d}nÈt| ||d}|| dd}| ¡ }xRt| ¡ ƒD ]B}
| |
¡}|j|j|j|jddd }| 	|||
 ddd	¡ qlW | 
¡  d
}|jddd}x(|rô|rô|jddd}|d8 }qÎW t| ||d}|  dt|ƒ¡ | S )aX   generates an embedding of a molecule where part of the molecule
    is constrained to have particular coordinates

    Arguments
      - mol: the molecule to embed
      - core: the molecule to use as a source of constraints
      - useTethers: (optional) if True, the final conformation will be
          optimized subject to a series of extra forces that pull the
          matching atoms to the positions of the core atoms. Otherwise
          simple distance constraints based on the core atoms will be
          used in the optimization.
      - coreConfId: (optional) id of the core conformation to use
      - randomSeed: (optional) seed for the random number generator


    An example, start by generating a template with a 3D structure:

    >>> from rdkit.Chem import AllChem
    >>> template = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1")
    >>> AllChem.EmbedMolecule(template)
    0
    >>> AllChem.UFFOptimizeMolecule(template)
    0

    Here's a molecule:

    >>> mol = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1-c3ccccc3")

    Now do the constrained embedding
    >>> mol = AllChem.ConstrainedEmbed(mol, template)

    Demonstrate that the positions are nearly the same with template:

    >>> import math
    >>> molp = mol.GetConformer().GetAtomPosition(0)
    >>> templatep = template.GetConformer().GetAtomPosition(0)
    >>> all(math.isclose(v, 0.0, abs_tol=0.01) for v in molp-templatep)
    True
    >>> molp = mol.GetConformer().GetAtomPosition(1)
    >>> templatep = template.GetConformer().GetAtomPosition(1)
    >>> all(math.isclose(v, 0.0, abs_tol=0.01) for v in molp-templatep)
    True

    zmolecule doesn't match the core)ÚcoordMapZ
randomSeedr   zCould not embed molecule.c             S   s   g | ]\}}||f‘qS r   r   )r   r9   r>   r   r   r   r   ;  s    z$ConstrainedEmbed.<locals>.<listcomp>)r   r   g      Y@é   )ZatomMapT)Zfixedg-Cëâ6?gü©ñÒMbP?)Z	energyTolZforceTolZEmbedRMS)ÚGetSubstructMatchrH   r   Ú	enumerater5   ZEmbedMoleculer3   r+   r6   ZAddDistanceConstraintZ
InitializeZMinimizeZAlignMolr4   ZAddExtraPointr&   r)   r*   ZSetPropÚstr)r   ZcoreZ
useTethersZ
coreConfIdZ
randomseedZgetForceFieldr    ÚmatchrJ   ZcoreConfr9   ZidxIZcorePtIZciZalgMapZffr>   ZidxJr:   ÚnZmoreZrmsr,   ÚpZpIdxr   r   r   ÚConstrainedEmbed   sT    .



rR   c             C   sÄ  t  | ¡}t  |¡}| |¡}|sÀx4| ¡ D ](}| ¡ tjkr.| tj¡ | d¡ q.W x&| ¡ D ]}| tj¡ | d¡ qdW x| 	¡ D ]}| 
d¡ qŒW x| 	¡ D ]}| 
d¡ q¨W |j|dd}|r¸t|ƒdkrät d¡ |d }xP|  ¡ D ]D}|| ¡  }|| ¡  }| ||¡}	|	 | ¡ ¡ |	 | ¡ ¡ qöW x\|  	¡ D ]P}| || ¡  ¡}
|
 | ¡ ¡ |
 | ¡ ¡ |
 | ¡ ¡ |
 
| ¡ ¡ qHW t|ƒ t|dƒrÀd|_ntdƒ‚|S )	aO   assigns bond orders to a molecule based on the
    bond orders in a template molecule

    Arguments
      - refmol: the template molecule
      - mol: the molecule to assign bond orders to

    An example, start by generating a template from a SMILES
    and read in the PDB structure of the molecule

    >>> import os
    >>> from rdkit.Chem import AllChem
    >>> template = AllChem.MolFromSmiles("CN1C(=NC(C1=O)(c2ccccc2)c3ccccc3)N")
    >>> mol = AllChem.MolFromPDBFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4DJU_lig.pdb'))
    >>> len([1 for b in template.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
    8
    >>> len([1 for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
    22

    Now assign the bond orders based on the template molecule

    >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol)
    >>> len([1 for b in newMol.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
    8

    Note that the template molecule should have no explicit hydrogens
    else the algorithm will fail.

    It also works if there are different formal charges (this was github issue 235):

    >>> template=AllChem.MolFromSmiles('CN(C)C(=O)Cc1ccc2c(c1)NC(=O)c3ccc(cc3N2)c4ccc(c(c4)OC)[N+](=O)[O-]')
    >>> mol = AllChem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4FTR_lig.mol'))
    >>> AllChem.MolToSmiles(mol)
    'COC1CC(C2CCC3C(O)NC4CC(CC(O)N(C)C)CCC4NC3C2)CCC1N(O)O'
    >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol)
    >>> AllChem.MolToSmiles(newMol)
    'COc1cc(-c2ccc3c(c2)Nc2ccc(CC(=O)N(C)C)cc2NC3=O)ccc1[N+](=O)[O-]'

    Fr   )Zuniquifyr   z2More than one matching pattern found - picking oneÚ
__sssAtomsNzNo matching found)r'   r(   rL   ZGetBondsZGetBondTypeZBondTypeZSINGLEZSetBondTypeZSetIsAromaticZGetAtomsZSetFormalChargeZGetSubstructMatchesr+   r	   ZwarningZGetBeginAtomIdxZGetEndAtomIdxZGetBondBetweenAtomsZGetIsAromaticZGetAtomWithIdxZGetIdxZSetHybridizationZGetHybridizationZSetNumExplicitHsZGetNumExplicitHsZGetFormalChargeZSanitizeMolÚhasattrrS   rH   )Zrefmolr   Zrefmol2Zmol2ZmatchingÚbÚaZatom1Zatom2Zb2Za2r   r   r   ÚAssignBondOrdersFromTemplateb  sJ    (



rW   c             C   s2   dd l }dd l}|j|j| d\}}| |¡ d S )Nr   )ZoptionflagsÚverbose)ÚsysÚdoctestZtestmodÚELLIPSISÚexit)rX   rY   rZ   ZfailedÚ_r   r   r   Ú_runDoctests¾  s    r^   Ú__main__)r   F)r   r   r   )r   r"   r#   )NF)NF)F)N)0Ú__doc__rY   ÚwarningsÚcollectionsr   r7   Zrdkitr   r   r   r   Z
rdkit.ChemZrdkit.Chem.ChemicalFeaturesZrdkit.Chem.rdChemReactionsZrdkit.Chem.rdDepictorZrdkit.Chem.rdDistGeomZrdkit.Chem.rdForceFieldHelpersZrdkit.Chem.rdMolAlignZrdkit.Chem.rdMolDescriptorsZrdkit.Chem.rdMolTransformsZrdkit.Chem.rdPartialChargesZrdkit.Chem.rdReducedGraphsZrdkit.Chem.rdShapeHelpersZrdkit.Chem.rdqueriesZrdkit.Chem.rdMolEnumeratorZrdkit.Geometryr   Zrdkit.RDLoggerr	   Z!rdkit.Chem.EnumerateStereoisomersr
   r   Zrdkit.Chem.rdSLNParseÚImportErrorZCompute2DCoordsr(   ZComputeGasteigerChargesr   r!   r.   r;   r?   rI   ZUFFGetMoleculeForceFieldrR   rW   r^   Ú__name__r   r   r   r   Ú<module>   sX   


#
$
1
<a\

