B
    ž3Rc`^  ã               @   sŠ   d Z ddlZddlZddlmZ ddlmZ dZe	dd„ eD ƒƒZ
G dd	„ d	eƒZdd
d„Zedkr†ddlZG dd„ dejƒZe ¡  dS )aü   Implementation of the RECAP algorithm from Lewell et al. JCICS *38* 511-522 (1998)

The published algorithm is implemented more or less without
modification. The results are returned as a hierarchy of nodes instead
of just as a set of fragments. The hope is that this will allow a bit
more flexibility in working with the results.

For example:
>>> from rdkit import Chem
>>> from rdkit.Chem import Recap
>>> m = Chem.MolFromSmiles('C1CC1Oc1ccccc1-c1ncc(OC)cc1')
>>> res = Recap.RecapDecompose(m)
>>> res
<...Chem.Recap.RecapHierarchyNode object at ...>
>>> sorted(res.children.keys())
['*C1CC1', '*c1ccc(OC)cn1', '*c1ccccc1-c1ccc(OC)cn1', '*c1ccccc1OC1CC1']
>>> sorted(res.GetAllChildren().keys())
['*C1CC1', '*c1ccc(OC)cn1', '*c1ccccc1*', '*c1ccccc1-c1ccc(OC)cn1', '*c1ccccc1OC1CC1']

To get the standard set of RECAP results, use GetLeaves():
>>> leaves=res.GetLeaves()
>>> sorted(leaves.keys())
['*C1CC1', '*c1ccc(OC)cn1', '*c1ccccc1*']
>>> leaf = leaves['*C1CC1']
>>> leaf.mol
<...Chem.rdchem.Mol object at ...>


é    N)ÚChem)ÚrdChemReactions)z:[#7;+0;D2,D3:1]!@C(!@=O)!@[#7;+0;D2,D3:2]>>*[#7:1].[#7:2]*zD[C;!$(C([#7])[#7]):1](=!@[O:2])!@[#7;+0;!D1:3]>>*[C:1]=[O:2].*[#7:3]z.[C:1](=!@[O:2])!@[O;+0:3]>>*[C:1]=[O:2].[O:3]*zC[N;!D1;+0;!$(N-C=[#7,#8,#15,#16])](-!@[*:1])-!@[*:2]>>*[*:1].[*:2]*z&[#7;R;D3;+0:1]-!@[*:2]>>*[#7:1].[*:2]*z)[#6:1]-!@[O;+0]-!@[#6:2]>>[#6:1]*.*[#6:2]z[C:1]=!@[C:2]>>[C:1]*.*[C:2]z[n;+0:1]-!@[C:2]>>[n:1]*.[C:2]*z8[O:3]=[C:4]-@[N;+0:1]-!@[C:2]>>[O:3]=[C:4]-[N:1]*.[C:2]*z[c:1]-!@[c:2]>>[c:1]*.*[c:2]z[n;+0:1]-!@[c:2]>>[n:1]*.*[c:2]zC[#7;+0;D2,D3:1]-!@[S:2](=[O:3])=[O:4]>>[#7:1]*.*[S:2](=[O:3])=[O:4]c             C   s   g | ]}t  |¡‘qS © )Ú	ReactionsZReactionFromSmarts)Ú.0Úxr   r   ú/lib/python3.7/site-packages/rdkit/Chem/Recap.pyú
<listcomp>U   s    r	   c               @   sR   e Zd ZdZdZdZdZdZdd„ Zdd„ Z	dd„ Z
d	d
„ Zddd„Zdd„ ZdS )ÚRecapHierarchyNodez3 This class is used to hold the Recap hiearchy
    Nc             C   s   || _ i | _i | _d S )N)ÚmolÚchildrenÚparents)Úselfr   r   r   r   Ú__init__`   s    zRecapHierarchyNode.__init__c             C   s6   i }x,| j  ¡ D ]\}}|||< |j|dd qW |S )z4 returns a dictionary, keyed by SMILES, of children F)ÚterminalOnly)r   ÚitemsÚ_gacRecurse)r   ÚresÚsmiÚchildr   r   r   ÚGetAllChildrene   s
    z!RecapHierarchyNode.GetAllChildrenc             C   sB   i }x8| j  ¡ D ]*\}}t|j ƒs,|||< q|j|dd qW |S )zA returns a dictionary, keyed by SMILES, of leaf (terminal) nodes T)r   )r   r   Úlenr   )r   r   r   r   r   r   r   Ú	GetLeavesm   s    

zRecapHierarchyNode.GetLeavesc             C   sN   | j s| g}n<g }x6| j  ¡ D ](}x"| ¡ D ]}||kr,| |¡ q,W qW |S )zc returns all the nodes in the hierarchy tree that contain this
            node as a child
        )r   ÚvaluesÚgetUltimateParentsÚappend)r   r   ÚpZuPr   r   r   r   w   s    z%RecapHierarchyNode.getUltimateParentsFc             C   s@   x:| j  ¡ D ],\}}|r"t|j ƒs*|||< |j||d qW d S )N)r   )r   r   r   r   )r   r   r   r   r   r   r   r   r   …   s    zRecapHierarchyNode._gacRecursec             C   s   i | _ i | _d | _d S )N)r   r   r   )r   r   r   r   Ú__del__‹   s    zRecapHierarchyNode.__del__)F)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   Úsmilesr   r   r   r   r   r   r   r   r   r   r
   X   s   

r
   c                s  t  | d¡}|dkri }||kr(|| S t| ƒ}||_||i}|||< xÂ|r
tt|ƒƒ}| |¡}|jsnqJx–tt	ƒD ]ˆ\}	}
|r’|	|kr’qz|
 
|jf¡}|rzx\|D ]R‰ d}dd„ tˆ ƒD ƒ}| ¡  ‡ fdd„|D ƒ}|‰ x”ˆ D ]Œ\}}yt  |¡ W n tk
r   wìY nX t  |d¡}|dkrR| d¡}|| |k rrd	}P n | dd
¡ dd
¡dkrrd	}P ||_qìW |r¬x~ˆ D ]v\}}|j}||krØt|ƒ}||_t |¡|j|< ||j|< |||< |||< n"|| }t |¡|j|< ||j|< q†W q¬W qzW qJW |S )z0 returns the recap decomposition for a molecule é   NTc             S   s    g | ]\}}|j d d|f‘qS )T)ZonlyExplicit)ZGetNumAtoms)r   ÚidxÚprodr   r   r   r	   ¯   s   z"RecapDecompose.<locals>.<listcomp>c                s   g | ]\}}|ˆ | f‘qS r   r   )r   r   Úy)ÚprodSeqr   r   r	   ²   s    r   Ú*FÚ z())r)   ÚCZCCZCCC)r   ZMolToSmilesr
   r"   ÚnextÚiterÚpopr   Ú	enumerateÚ	reactionsZRunReactantsÚsortZSanitizeMolÚ	ExceptionÚcountÚreplaceÚpSmiÚweakrefÚproxyr   r   )r   ÚallNodesÚminFragmentSizeÚonlyUseReactionsZmSmir   Z
activePoolZnSmiZnodeZrxnIdxZreactionZpsZseqOkZtSeqZtsZnatsr%   r4   ZnDummiesZpNoder   )r'   r   ÚRecapDecompose‘   sn    







r:   Ú__main__c               @   sœ   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd „ Zd!d"„ Zd#d$„ Zd%S )&ÚTestCasec             C   sl   t  d¡}t|ƒ}|  |¡ |  t|j ¡ ƒdk¡ |  t| ¡  ¡ ƒdk¡ |  t| ¡  ¡ ƒdk¡ d S )NzC1CC1Oc1ccccc1-c1ncc(OC)cc1é   é   é   )	r   ÚMolFromSmilesr:   Ú
assertTruer   r   Úkeysr   r   )r   Úmr   r   r   r   Útest1à   s    

zTestCase.test1c             C   s0   t  d¡}t|ƒ}|  |¡ |  |ji k¡ d S )NÚCCCOCCC)r   r@   r:   rA   r   )r   rC   r   r   r   r   Útest2è   s    

zTestCase.test2c             C   s  i }t  d¡}t||d}|  |¡ |  t|j ¡ ƒdk¡ |  t| ¡ ƒdk¡ t  d¡}t||d}|  |¡ |  t|j ¡ ƒdk¡ |  t| ¡ ƒdk¡ |  d|k¡ |  d|k¡ t  d	¡}t||d}|  |¡ |  t|j ¡ ƒd
k¡ |  t| ¡ ƒdk¡ d S )Nzc1ccccc1-c1ncccc1)r7   é   r?   zCOc1ccccc1-c1ncccc1r>   z*c1ccccc1OCz	*c1ccccc1zC1CC1Oc1ccccc1-c1ncccc1r=   é
   )r   r@   r:   rA   r   r   rB   )r   r7   rC   r   r   r   r   Útest3î   s$    





zTestCase.test3c             C   sl   t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ |  d|k¡ d S )Nzc1ccccc1OC(Oc1ccccc1)Oc1ccccc1rG   z*C(*)*z	*c1ccccc1z*C(*)Oc1ccccc1)r   r@   r:   rA   r   r   rB   ZassertFalse)r   rC   r   Úksr   r   r   ÚtestSFNetIssue1801871  s    

zTestCase.testSFNetIssue1801871c             C   s^   t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ d S )NZC1CCCCN1CCCCrG   z	*N1CCCCC1z*CCCC)r   r@   r:   rA   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestSFNetIssue1804418  s    

zTestCase.testSFNetIssue1804418c             C   s  t  d¡}t|ƒ}|  |¡ |  |ji k¡ t|dd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ t  d¡}t|dd}|  |¡ |  |ji k¡ t  d¡}t|dd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ | ¡  ¡ }|  d	|k¡ d S )
NrE   r?   )r8   r#   z*CCCZCCCOCCZCCCOCCOCrG   z*CCOC)r   r@   r:   rA   r   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestMinFragmentSize  s*    






zTestCase.testMinFragmentSizec             C   s\  t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d	¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d
|k¡ t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ t  d¡}t|dgd}|  |¡ |  t|jƒdk¡ |  t| ¡ ƒdk¡ d S )NzC1CC1C(=O)NC1OC1r#   )r9   rG   z*C(=O)C1CC1z*NC1CO1zC1CC1C(=O)N(C)C1OC1z
*N(C)C1CO1zC1CC1C(=O)n1cccc1z*n1cccc1zC1CC1C(=O)CC1OC1r   zC1CCC(=O)NC1zCC(=O)NCzCC(=O)NzC(=O)NCCNC(=O)CCr=   r?   )r   r@   r:   rA   r   r   rB   r   )r   rC   r   rJ   r   r   r   ÚtestAmideRxn1  sV    















zTestCase.testAmideRxnc             C   sÔ   t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ d S )	NzC1CC1C(=O)OC1OC1rG   )r9   z*C(=O)C1CC1z*OC1CO1zC1CC1C(=O)CC1OC1r   zC1CCC(=O)OC1)r   r@   r:   rA   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestEsterRxne  s    





zTestCase.testEsterRxnc             C   s®  t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d	¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ t  d
¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ d S )NzC1CC1NC(=O)NC1OC1r   )r9   rG   z*NC1CC1z*NC1CO1zC1CC1NC(=O)N(C)C1OC1z
*N(C)C1CO1zC1CCNC(=O)NC1Czc1cccn1C(=O)NC1OC1z*n1cccc1zc1cccn1C(=O)n1c(C)ccc1z	*n1cccc1C)r   r@   r:   rA   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestUreaRxnx  s>    









zTestCase.testUreaRxnc             C   s0  t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ |  d|k¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ |  d|k¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d	|k¡ |  d|k¡ t  d
¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d	|k¡ |  d|k¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ d S )NzC1CC1N(C1NC1)C1OC1r?   z*C1CC1z*C1CO1z*C1CN1zc1ccccc1N(C1NC1)C1OC1z	*c1ccccc1zc1ccccc1N(c1ncccc1)C1OC1z	*c1ccccn1zc1ccccc1N(c1ncccc1)c1ccco1z*c1ccco1ZC1CCCCN1C1CC1rG   z	*N1CCCCC1ZC1CCC2N1CC2r   )r   r@   r:   rA   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestAmineRxn  sV    











zTestCase.testAmineRxnc             C   sD  t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d	¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d
|k¡ d S )NZC1CC1OC1OC1rG   z*C1CC1z*C1CO1ZC1CCCCO1r   Zc1ccccc1OC1OC1z	*c1ccccc1Zc1ccccc1Oc1ncccc1z	*c1ccccn1)r   r@   r:   rA   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestEtherRxnÏ  s2    







zTestCase.testEtherRxnc             C   s   t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ d S )NzClC=CBrrG   z*CClz*CBrzC1CC=CC1r   )r   r@   r:   rA   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestOlefinRxní  s    



zTestCase.testOlefinRxnc             C   s   t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ d S )NZc1cccn1CCCCrG   z*n1cccc1z*CCCCZc1ccc2n1CCCC2r   )r   r@   r:   rA   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestAromNAliphCRxnû  s    



zTestCase.testAromNAliphCRxnc             C   s–   t  d¡}t|dgd}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ d S )	NzC1CC(=O)N1CCCCé   )r9   rG   z	*N1CCC1=Oz*CCCCzO=C1CC2N1CCCC2r   )r   r@   r:   rA   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestLactamNAliphCRxn	  s    



zTestCase.testLactamNAliphCRxnc             C   s   t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ d S )NZc1ccccc1c1ncccc1rG   z	*c1ccccc1z	*c1ccccn1Zc1ccccc1C1CC1r   )r   r@   r:   rA   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestAromCAromCRxn  s    



zTestCase.testAromCAromCRxnc             C   s^   t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ d S )NZc1cccn1c1ccccc1rG   z*n1cccc1z	*c1ccccc1)r   r@   r:   rA   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestAromNAromCRxn%  s    

zTestCase.testAromNAromCRxnc             C   sê   t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ | ¡  ¡ }|  d|k¡ |  d|k¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ d S )	NzCCCNS(=O)(=O)CCrG   z*NCCCz*S(=O)(=O)CCzc1cccn1S(=O)(=O)CCz*n1cccc1zC1CNS(=O)(=O)CC1r   )r   r@   r:   rA   r   r   rB   )r   rC   r   rJ   r   r   r   ÚtestSulfonamideRxn.  s$    





zTestCase.testSulfonamideRxnc             C   s0  t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ t  d¡}t|ƒ}|  |¡ |  t| ¡ ƒdk¡ d S )	NZc1ccccc1n1cccc1rG   zc1ccccc1[n+]1ccccc1r   zC1CC1NC(=O)CCzC1CC1[NH+]C(=O)CCzC1CC1NC(=O)NC1CCC1zC1CC1[NH+]C(=O)[NH+]C1CCC1)r   r@   r:   rA   r   r   )r   rC   r   r   r   r   ÚtestSFNetIssue1881803D  s0    











zTestCase.testSFNetIssue1881803N)r   r   r    rD   rF   rI   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rV   rW   rX   rY   rZ   r   r   r   r   r<   Þ   s$   
	4%2	r<   )Nr   N)r!   Úsysr5   Zrdkitr   Z
rdkit.Chemr   r   ZreactionDefsÚtupler/   Úobjectr
   r:   r   Zunittestr<   Úmainr   r   r   r   Ú<module>;   s   9
J   