B
    ž3RcO‹  ã               @   sˆ  d dl Z d dlZd dlZd dlZd dl Z d dlmZ d dlmZ dddddd	d	d
ddddddddddddœZ	dddgdddddgddgdd d!d"d#gd$d%d&d'gd(gd)d*d+d,d-d.gd/d0d1d2gd3d4d5d6gd7d8d9d:gd;d<d=gd>d?d@gdAgdBgfZ
e e
¡Zx|eD ]tZxneeƒD ]b\ZZe\ZZZe	dCe  Ze	dCe  Ze dDdEe¡Ze dDdEe¡ZdFeeeeef Zeee< qW qøW xVeD ]NZxFeD ]>Zye e¡Ze ¡  W n  ek
rº   eeƒ ‚ Y nX q€W qvW i Zx$e	 ¡ D ]\ZZe  e¡ee< qÖW g Z!x|ee
ƒD ]p\Z"Z#g Z$xVe#D ]N\Z%Z&Z'e	dGe%  Z(e	dGe&  Z)dHe(e'e)f Z*e  e*¡Z*e$ +e%e&e'e*f¡ qW e! +e$¡ q W e,dIdJ„ eD ƒƒa-g Z.xxeeƒD ]l\Z"Z/x`ee/ƒD ]T\ZZe 0dK¡\Z1Z2dLe2e1f Ze e¡Z3e 4dMe2¡Z5dNdJ„ e5D ƒe3_6e. +e3¡ q¦W q”W dadQdR„Z7dbdSdT„Z8dcdVdW„Z9e :dX¡Z;dddZd[„Z<d\d]„ Z=e>d^kr„d dl?Z?G d_d`„ d`e?j@ƒZ@e=ƒ \ZAZBeAr|e CeA¡ e? D¡  dS )eé    N)ÚChem)ÚrdChemReactionsz[C;D3]([#0,#6,#7,#8])(=O)z[O;D2]-;!@[#0,#6,#1]z[C;!D1;!$(C=*)]-;!@[#6]z:[N;!D1;!$(N=*);!$(N-[!#6;!#16;!#0;!#1]);!$([N;R]@[C;R]=O)]z[C;D3;!R](=O)-;!@[#0,#6,#7,#8]z[C;D2,D3]-[#6]z[C;!R;!D1]-;!@[#6]z[C;!R;!D1;!$(C!-*)]z![n;+0;$(n(:[c,n,o,s]):[c,n,o,s])]z[N;R;$(N(@C(=O))@[C,N,O,S])]z[S;D2](-;!@[#0,#6])z[S;D4]([#6,#0])(=O)(=O)z [C;$(C(-;@[C,N,O,S])-;@[N,O,S])]z[c;$(c(:[c,n,o,s]):[n,o,s])]z[C;$(C(-;@C)-;@C)]z[c;$(c(:c):c)])ZL1ZL3ZL4ZL5ZL6ZL7aZL7bz#L8ZL8ZL9ZL10ZL11ZL12ZL13ZL14ZL14bZL15ZL16ZL16b)Ú1Ú3ú-)r   Ú5r   )r   Ú10r   )r   Ú4r   )r   Ú13r   )r   Ú14r   )r   Ú15r   )r   Ú16r   )r	   r   r   )r	   Ú11r   )r   Z12r   )r   r   r   )r   r   r   )r   r
   r   )r   r   r   )Ú6r
   r   )r   r   r   )r   r   r   )r   r   r   )Z7aZ7bú=)Ú8Ú9r   )r   r   r   )r   r
   r   )r   r   r   )r   r   r   )r   r   r   )r   r
   r   )r   r   r   )r   r   r   )r   r   r   )r   r
   r   )r   r   r   )r   r   r   )r   r   r   )r   r
   r   )r   r   r   )r   r   r   )r   r   r   )r
   r   r   )r
   r   r   )r
   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   ÚLz	[a-z,A-Z]Ú z0[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]zL%sz[$(%s)]%s;!@[$(%s)]c             C   s   g | ]}d d„ |D ƒ‘qS )c             S   s   g | ]}t  |¡‘qS © )Ú	ReactionsÚReactionFromSmarts)Ú.0Úyr   r   ú/lib/python3.7/site-packages/rdkit/Chem/BRICS.pyú
<listcomp>á   s    z<listcomp>.<listcomp>r   )r   Úxr   r   r   r   á   s    r   z>>z%s>>%sz\[([0-9]+?)\*\]c             C   s   g | ]}t  d | ¡‘qS )z[%s*])r   ÚMolFromSmiles)r   r   r   r   r   r   é   s    FTc             c   s<  t  d¡}ttttƒƒƒ}tƒ }|r4tj|tjd i }x"t	 
¡ D ]\}}|  |¡||< qBW xÚ|D ]Ò}	|rŒt|	 dd… }
tj|
tjd nt|	 }
xž|
D ]–\}}}}|d|  rš|d|  sÀqš|  |¡}| d|¡}| d|¡}xL|D ]D}||krè|d |d f|krè| |¡ |d |d f||ffV  qèW qšW qbW dS )aö   returns the bonds in a molecule that BRICS would cleave

    >>> from rdkit import Chem
    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> res = list(FindBRICSBonds(m))
    >>> res
    [((3, 2), ('3', '4')), ((3, 4), ('3', '4'))]

    a more complicated case:

    >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
    >>> res = list(FindBRICSBonds(m))
    >>> res
    [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]

    we can also randomize the order of the results:

    >>> random.seed(23)
    >>> res = list(FindBRICSBonds(m,randomizeOrder=True))
    >>> sorted(res)
    [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]

    Note that this is a generator function :

    >>> res = FindBRICSBonds(m)
    >>> res
    <generator object ...>
    >>> next(res)
    ((3, 2), ('3', '4'))

    >>> m = Chem.MolFromSmiles('CC=CC')
    >>> res = list(FindBRICSBonds(m))
    >>> sorted(res)
    [((1, 2), ('7', '7'))]

    make sure we don't match ring bonds:

    >>> m = Chem.MolFromSmiles('O=C1NCCC1')
    >>> list(FindBRICSBonds(m))
    []

    another nice one, make sure environment 8 doesn't match something connected
    to a ring atom:

    >>> m = Chem.MolFromSmiles('CC1(C)CCCCC1')
    >>> list(FindBRICSBonds(m))
    []

    z	[a-z,A-Z])ÚrandomNr   r   é   r   )ÚreÚcompileÚlistÚrangeÚlenÚbondMatchersÚsetr   ÚshuffleÚenvironMatchersÚitemsÚHasSubstructMatchZGetSubstructMatchesÚsubÚadd)ÚmolZrandomizeOrderÚsilentZletterÚindicesZ	bondsDoneZ
envMatchesÚenvÚpattÚgpIdxÚcompatsÚi1Úi2ÚbTypeZmatchesÚmatchr   r   r   ÚFindBRICSBondsí   s.    2




r8   c             C   s–  |s t  | ¡}|rt  |¡ |S t  | ¡}|  ¡ }g }xð|D ]è\}}	|\}
}|  |
|¡}| ¡ }| |
|¡ |	\}}t  d¡}| 	t
|ƒ¡ | d¡ |}|d7 }| |¡ | |
||¡ t  d¡}| 	t
|ƒ¡ | d¡ |}|d7 }| |¡ | |||¡ |  ¡ r<| ||f¡ | ||
f¡ q<W | ¡ }|r@t  |¡ |  ¡ r’xF|  ¡ D ]:}| | ¡ ¡}x$|D ]\}
}| |
| |¡¡ qlW qTW |S )aî   breaks the BRICS bonds in a molecule and returns the results

    >>> from rdkit import Chem
    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> m2=BreakBRICSBonds(m)
    >>> Chem.MolToSmiles(m2,True)
    '[3*]O[3*].[4*]CC.[4*]CCC'

    a more complicated case:

    >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
    >>> m2=BreakBRICSBonds(m)
    >>> Chem.MolToSmiles(m2,True)
    '[16*]c1ccccc1.[3*]O[3*].[4*]CCC.[4*]CCC([6*])=O'


    can also specify a limited set of bonds to work with:

    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> m2 = BreakBRICSBonds(m,[((3, 2), ('3', '4'))])
    >>> Chem.MolToSmiles(m2,True)
    '[3*]OCC.[4*]CCC'

    this can be used as an alternate approach for doing a BRICS decomposition by
    following BreakBRICSBonds with a call to Chem.GetMolFrags:

    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> m2=BreakBRICSBonds(m)
    >>> frags = Chem.GetMolFrags(m2,asMols=True)
    >>> [Chem.MolToSmiles(x,True) for x in frags]
    ['[4*]CCC', '[3*]O[3*]', '[4*]CC']

    r   Tr   )r   ZFragmentOnBRICSBondsÚSanitizeMolZEditableMolÚGetNumAtomsZGetBondBetweenAtomsZGetBondTypeZ
RemoveBondZAtomZ
SetIsotopeÚintZSetNoImplicitZAddAtomZAddBondÚGetNumConformersÚappendZGetMolZGetConformersÚGetConformerZGetIdZSetAtomPositionÚGetAtomPosition)r-   ZbondsZsanitizer.   ÚresZeMolZnAtsZdummyPositionsr/   Z
dummyTypesZiaZibZobondZbondTypeZdaZdbZatomaZidxaZatombZidxbZconfZresConfZpar   r   r   ÚBreakBRICSBonds:  sP    "










rA   r   c                sf  t  | d¡}|dkrtƒ }||kr(tƒ S || i}	| |¡ || i}
xÞttƒD ]Ð\}}i }x¼|	rd}tt|	ƒƒ}|	 |¡} x|t|ƒD ]n\}}|r¨||f|kr¨qŒ|sÄt	dƒ t	t
| | ƒ | | f¡}|rŒ|sêt	|dt|ƒdƒ x|D ]‰ d}dd	„ tˆ ƒD ƒ}| ¡  x~|D ]v\}}ˆ | }yt  |¡ W n tk
rT   wY nX t  |d¡}|d
krŠ| d¡}|| |k rŠd}P ||_qW ‡ fdd	„|D ƒ}|‰ |ròd}xBˆ D ]:\}}|j}||krº|sà||	|< | |¡ ||
|< qºW qòW qŒW |s|s|s`| ||< q`W |}	qNW |sN|sN|sDt|	 ¡ ƒ}n|	 ¡ }n|sZ|}n|
 ¡ }|S )a
   returns the BRICS decomposition for a molecule

    >>> from rdkit import Chem
    >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
    >>> res = list(BRICSDecompose(m))
    >>> sorted(res)
    ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]']

    >>> res = list(BRICSDecompose(m,returnMols=True))
    >>> res[0]
    <rdkit.Chem.rdchem.Mol object ...>
    >>> smis = [Chem.MolToSmiles(x,True) for x in res]
    >>> sorted(smis)
    ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]']

    nexavar, an example from the paper (corrected):

    >>> m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1')
    >>> res = list(BRICSDecompose(m))
    >>> sorted(res)
    ['[1*]C([1*])=O', '[1*]C([6*])=O', '[14*]c1cc([16*])ccn1', '[16*]c1ccc(Cl)c([16*])c1', '[16*]c1ccc([16*])cc1', '[3*]O[3*]', '[5*]NC', '[5*]N[5*]', '[8*]C(F)(F)F']

    it's also possible to keep pieces that haven't been fully decomposed:

    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
    >>> sorted(res)
    ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[3*]O[3*]', '[4*]CC', '[4*]CCC']

    >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
    >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
    >>> sorted(res)
    ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[16*]c1cccc([16*])c1', '[3*]OCCC', '[3*]OC[8*]', '[3*]OCc1cccc(-c2ccccn2)c1', '[3*]OCc1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]', '[4*]Cc1cccc(-c2ccccn2)c1', '[4*]Cc1cccc([16*])c1', '[8*]COCCC']

    or to only do a single pass of decomposition:

    >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
    >>> res = list(BRICSDecompose(m,singlePass=True))
    >>> sorted(res)
    ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[3*]OCCC', '[3*]OCc1cccc(-c2ccccn2)c1', '[4*]CCC', '[4*]Cc1cccc(-c2ccccn2)c1', '[8*]COCCC']

    setting a minimum size for the fragments:

    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=2))
    >>> sorted(res)
    ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC']
    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=3))
    >>> sorted(res)
    ['CCCOCC', '[3*]OCC', '[4*]CCC']
    >>> res = list(BRICSDecompose(m,minFragmentSize=2))
    >>> sorted(res)
    ['[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC']


    r   NFz--------z->ZproductsTc             S   s    g | ]\}}|j d d|f‘qS )T)ZonlyExplicit)r:   )r   ÚidxÚprodr   r   r   r   ä  s   z"BRICSDecompose.<locals>.<listcomp>r   Ú*c                s   g | ]\}}|ˆ | f‘qS r   r   )r   r   r   )ÚprodSeqr   r   r   ô  s    )r   ÚMolToSmilesr&   r,   Ú	enumerateÚ	reactionsÚnextÚiterÚpopÚprintÚ	smartsGpsÚRunReactantsr$   Úsortr9   Ú	ExceptionÚcountÚpSmiÚkeysÚvalues)r-   ÚallNodesZminFragmentSizeZonlyUseReactionsr.   ZkeepNonLeafNodesÚ
singlePassZ
returnMolsZmSmiZ
activePoolZ	foundMolsr2   Z
reactionGpZnewPoolZmatchedZnSmiZrxnIdxZreactionÚpsZseqOkZtSeqZnatsrB   rC   rR   ZnDummiesÚtsr@   r   )rE   r   ÚBRICSDecomposeŠ  s€    <








rY   z[*]é   c          	   c   sÜ  t ƒ }|st| ƒ}|r.t|ƒ}tj|tjd |rLttƒ}tj|tjd nt}x„|D ]z}d}	d}
g }x|D ]ü}| |jd ¡rŠd}	| |jd ¡ržd}
xÎ| D ]Æ}d }| |jd ¡rÎ|
rÎ| ||f¡}| |jd ¡rð|	rð| ||f¡}|r¤xt|D ]l}|r,t 	|d d¡}||kr"qún
| 
|¡ |d  t¡r\| |d ¡ |sf|d V  qú|d V  qúW q¤W qrW |rX|dkrXxTt| ||||d |dD ]8}|rÈt 	|d¡}||kr¾q˜n
| 
|¡ |V  q˜W qXW d S )N)r   Fr   Tr   )ÚonlyCompleteMolsÚseedsÚuniquifyÚmaxDepthÚscrambleReagents)r&   r"   r   r'   ÚreverseReactionsr*   Ú	_matchersrN   r   rF   r,   ÚdummyPatternr=   Ú
BRICSBuild)Z	fragmentsr[   r\   r]   r_   r^   ÚseenZtempReactionsÚseedZseedIsR1ZseedIsR2Z	nextStepsÚrxnZfragmentrW   ÚprR   r   r   r   rc     sb    






rc   c              C   s,   dd l } dd l}| j|jd | j| j dS )Nr   Ú__main__)Zoptionflags)ÚdoctestÚsysZtestmodÚmodulesÚELLIPSISZNORMALIZE_WHITESPACE)ri   rj   r   r   r   Ú_testT  s    rm   rh   c               @   s|   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ ZdS )ÚTestCasec             C   sú  t  d¡}t|ƒ}|  |¡ |  t|ƒdk¡ t  d¡}t|ƒ}|  |¡ |  t|ƒdk|¡ t  d¡}t|ƒ}|  |¡ |  t|ƒdk|¡ t  d¡}t|ƒ}|  |¡ |  t|ƒdk|¡ t  d¡}t|ƒ}|  |¡ |  t|ƒdk¡ t  d¡}t|ƒ}|  |¡ |  t|ƒdk¡ t  d¡}t|ƒ}|  |¡ |  t|ƒdk¡ t  d	¡}t|ƒ}|  |¡ |  t|ƒdk¡ t  d
¡}t|ƒ}|  |¡ |  t|ƒdk|¡ |  d|k|¡ t  d¡}t|ƒ}|  |¡ |  t|ƒdk|¡ |  d|k|¡ d S )NzCC(=O)OCé   zCC(=O)N1CCC1=Ozc1ccccc1N(C)Czc1cccnc1N(C)Czo1ccnc1N(C)CZ
c1ccccc1OCZ	o1ccnc1OCZ	O1CCNC1OCZCCCSCCrZ   z[11*]S[11*]zCCNC(=O)C1CC1é   z	[5*]N[5*])r   r   rY   Ú
assertTruer$   )ÚselfÚmr@   r   r   r   Útest1`  sT    



















zTestCase.test1c             C   s4   t  d¡}t|ƒ}|  |¡ |  t|ƒdk|¡ d S )NzBCNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1é	   )r   r   rY   rq   r$   )rr   rs   r@   r   r   r   Útest2•  s
    
zTestCase.test2c             C   sd   t  d¡}t|ƒ}|  |¡ |  t|ƒdk|¡ |  d|k|¡ |  d|k|¡ |  d|k|¡ d S )Nz+FC(F)(F)C1=C(Cl)C=CC(NC(=O)NC2=CC=CC=C2)=C1é   z	[5*]N[5*]z[16*]c1ccccc1z[8*]C(F)(F)F)r   r   rY   rq   r$   )rr   rs   r@   r   r   r   Útest3  s    

zTestCase.test3c             C   s$  t ƒ }t d¡}t||d}|  |¡ |}|  t|ƒdk|¡ |  t|ƒdk|¡ t||d}|  |¡ |  t|ƒdk|¡ t d¡}t||d}|  |¡ | |¡ |  t|ƒdk|¡ |  t|ƒdk|¡ t d¡}t||d}|  |¡ | |¡ |  t|ƒd	k|¡ |  t|ƒd
k|¡ d S )NZc1ccccc1OCCC)rU   rZ   é   Zc1ccccc1OCCCCru   rp   zc1cc(C(=O)NCC)ccc1OCCCé   é   )r&   r   r   rY   rq   r$   ÚassertFalseÚupdate)rr   rU   rs   r@   Zleavesr   r   r   Útest4¦  s,    








zTestCase.test4c             C   sx   t ƒ }dddg}dd„ |D ƒ}t|ƒ}|  |¡ t|ƒ}|  t|ƒdk¡ dd„ |D ƒ}|  d|k¡ |  d	|k¡ d S )
Nz[14*]c1ncncn1z[16*]c1ccccc1z[14*]c1ncccc1c             S   s   g | ]}t  |¡‘qS r   )r   r   )r   r   r   r   r   r   Ç  s    z"TestCase.test5.<locals>.<listcomp>ry   c             S   s   g | ]}t  |d ¡‘qS )T)r   rF   )r   r   r   r   r   r   Ì  s    zc1ccc(-c2ccccc2)cc1zc1ccc(-c2ccccn2)cc1)r&   rc   rq   r"   r$   )rr   rU   Úfragsr@   Úsmisr   r   r   Útest5À  s    
zTestCase.test5c             C   sx   t ƒ }ddg}dd„ |D ƒ}t|ƒ}|  |¡ t|ƒ}dd„ |D ƒ}|  t|ƒdk|¡ |  d|k¡ |  d|k¡ d S )	Nz	[3*]O[3*]z[16*]c1ccccc1c             S   s   g | ]}t  |¡‘qS r   )r   r   )r   r   r   r   r   r   Ö  s    z#TestCase.test5a.<locals>.<listcomp>c             S   s   g | ]}t  |d ¡‘qS )T)r   rF   )r   r   r   r   r   r   Ú  s    ro   zc1ccc(Oc2ccccc2)cc1zc1ccc(-c2ccccc2)cc1)r&   rc   rq   r"   r$   )rr   rU   r   r@   r€   r   r   r   Útest5aÐ  s    
zTestCase.test5ac             C   sˆ   t ƒ }dddg}dd„ |D ƒ}t|ƒ}|  |¡ t|ƒ}|  t|ƒdk¡ dd„ |D ƒ}|  d|k¡ |  d	|k¡ |  d
|k|¡ d S )Nz[16*]c1ccccc1z[3*]OCz[9*]n1cccc1c             S   s   g | ]}t  |¡‘qS r   )r   r   )r   r   r   r   r   r   æ  s    z"TestCase.test6.<locals>.<listcomp>rZ   c             S   s   g | ]}t  |d ¡‘qS )T)r   rF   )r   r   r   r   r   r   ë  s    zc1ccc(-c2ccccc2)cc1Ú
COc1ccccc1zc1ccc(-n2cccc2)cc1)r&   rc   rq   r"   r$   )rr   rU   r   r@   r€   r   r   r   Útest6ß  s    
zTestCase.test6c             C   s†   t ƒ }dddg}dd„ |D ƒ}t|ƒ}|  |¡ t|ƒ}dd„ |D ƒ}|  t|ƒdk¡ |  d|k¡ |  d	|k¡ |  d
|k¡ d S )Nz[16*]c1ccccc1z[3*]OCz[3*]OCC(=O)[6*]c             S   s   g | ]}t  |¡‘qS r   )r   r   )r   r   r   r   r   r   ÷  s    z"TestCase.test7.<locals>.<listcomp>c             S   s   g | ]}t  |d ¡‘qS )T)r   rF   )r   r   r   r   r   r   û  s    rZ   zc1ccc(-c2ccccc2)cc1rƒ   zO=C(COc1ccccc1)c1ccccc1)r&   rc   rq   r"   r$   )rr   rU   r   r@   r€   r   r   r   Útest7ð  s    
zTestCase.test7c             C   sR  t  d¡ t d¡}t|ƒ}|  t|ƒdk|¡ dd„ |D ƒ}tt|dddƒ}x|D ]}t 	|¡ qVW d	d„ |D ƒ}|  
t|ƒd
¡ dddddg}dd„ |D ƒ}x |D ]}|  ||k||f¡ q¨W tt|dddƒ}x|D ]}t 	|¡ qÜW dd„ |D ƒ}|  
||¡ tt|dddƒ}x|D ]}t 	|¡ q W dd„ |D ƒ}|  ||¡ d S )Né   zn1cncnc1OCC(C1CC1)OC1CNC1rw   c             S   s   g | ]}t  |¡‘qS r   )r   r   )r   r   r   r   r   r     s    z"TestCase.test8.<locals>.<listcomp>rp   F)r^   r_   c             S   s   g | ]}t  |¡‘qS r   )r   rF   )r   r   r   r   r   r   
  s    é$   zn1cnc(C2CNC2)nc1zn1cnc(-c2ncncn2)nc1zC(OC1CNC1)C(C1CC1)OC1CNC1zn1cnc(OC(COC2CNC2)C2CC2)nc1zn1cnc(OCC(OC2CNC2)C2CNC2)nc1c             S   s   g | ]}t  t  |¡d ¡‘qS )T)r   rF   r   )r   r   r   r   r   r     s    c             S   s   g | ]}t  |¡‘qS r   )r   rF   )r   r   r   r   r   r     s    Tc             S   s   g | ]}t  |¡‘qS r   )r   rF   )r   r   r   r   r   r     s    )r   re   r   r   rY   rq   r$   r"   rc   r9   ÚassertEqualZassertNotEqual)rr   ÚbaseZcatalogZmsrs   rX   ÚtZms2r   r   r   Útest8  s2    





zTestCase.test8c             C   sz   t  d¡}t|ƒ}|  t|ƒd¡ |  d|k¡ |  d|k¡ t|dd}|  t|ƒd¡ |  d|k¡ |  d|k¡ d S )	Nz$CCOc1ccccc1c1ncc(c2nc(NCCCC)ncn2)cc1é   z	[3*]O[3*]z[14*]c1ncnc(NCCCC)n1T)rV   é   z[3*]OCC)r   r   rY   rˆ   r$   rq   r|   )rr   rs   r@   r   r   r   Útest9  s    
zTestCase.test9c             C   s(   t  d¡}t|ƒ}|  t|ƒd|¡ d S )NZC1CCCCN1c1ccccc1ro   )r   r   rY   rˆ   r$   )rr   rs   r@   r   r   r   Útest10*  s    
zTestCase.test10c             C   sæ  d}t  |¡}t|ƒ}t j|dd}|  t|ƒd¡ |  |d  ¡ d¡ |  |d  ¡ d¡ |  |d  ¡ d¡ | ¡ }|d  ¡ }x8td	ƒD ],}| 	|¡}| 	|¡}	|  ||	  
¡ d
¡ qŽW | 	d	¡}| 	d	¡}	|  ||	  
¡ d
¡ |d  ¡ }x<td	ƒD ]0}| 	|d ¡}| 	|¡}	|  ||	  
¡ d
¡ qüW | 	d	¡}| 	d	¡}	|  ||	  
¡ d
¡ |d  ¡ }x>tdƒD ]2}| 	|d	 ¡}| 	|¡}	|  ||	  
¡ d
¡ qnW | 	d¡}| 	d¡}	|  ||	  
¡ d
¡ | 	d	¡}| 	d¡}	|  ||	  
¡ d
¡ d}t  |¡}
|j|
 ¡ dd |  | ¡ d¡ t|ƒ}t j|dd}|  t|ƒd¡ |  |d  ¡ d¡ |  |d  ¡ d¡ |  |d  ¡ d¡ |  |d  ¡ d¡ |  |d  ¡ d¡ |  |d  ¡ d¡ | d¡}|d  d¡}x:td	ƒD ].}| 	|¡}| 	|¡}	|  ||	  
¡ d
¡ qäW | 	d	¡}| 	d	¡}	|  ||	  
¡ d
¡ |d  d¡}x>td	ƒD ]2}| 	|d ¡}| 	|¡}	|  ||	  
¡ d
¡ qVW | 	d	¡}| 	d	¡}	|  ||	  
¡ d
¡ |d  d¡}x>tdƒD ]2}| 	|d	 ¡}| 	|¡}	|  ||	  
¡ d
¡ qÌW | 	d¡}| 	d¡}	|  ||	  
¡ d
¡ | 	d	¡}| 	d¡}	|  ||	  
¡ d
¡ | d¡}|d  d¡}x:td	ƒD ].}| 	|¡}| 	|¡}	|  ||	  
¡ d
¡ qtW | 	d	¡}| 	d	¡}	|  ||	  
¡ d
¡ |d  d¡}x>td	ƒD ]2}| 	|d ¡}| 	|¡}	|  ||	  
¡ d
¡ qæW | 	d	¡}| 	d	¡}	|  ||	  
¡ d
¡ |d  d¡}x>tdƒD ]2}| 	|d	 ¡}| 	|¡}	|  ||	  
¡ d
¡ q\W | 	d¡}| 	d¡}	|  ||	  
¡ d
¡ | 	d	¡}| 	d¡}	|  ||	  
¡ d
¡ d S )NaŒ  
     RDKit          3D

 13 14  0  0  0  0  0  0  0  0999 V2000
   -1.2004    0.5900    0.6110 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.2328    1.3173    0.0343 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.4299    0.6533   -0.1500 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.3633   -0.7217   -0.3299 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.1552   -1.3791   -0.2207 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.1425   -0.7969    0.5335 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.1458   -1.4244    0.4108 O   0  0  0  0  0  0  0  0  0  0  0  0
    1.2976   -0.7398   -0.1026 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.4889   -0.7939    0.5501 N   0  0  0  0  0  0  0  0  0  0  0  0
    3.4615    0.1460    0.3535 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.0116    1.4034   -0.0296 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.9786    1.4264   -0.9435 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.1399    0.3193   -0.9885 C   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  2  0
  2  3  1  0
  3  4  2  0
  4  5  1  0
  5  6  2  0
  6  7  1  0
  7  8  1  0
  8  9  2  0
  9 10  1  0
 10 11  2  0
 11 12  1  0
 12 13  2  0
  6  1  1  0
 13  8  1  0
M  END
T)ZasMolsrZ   r   rŒ   r   ro   ry   g        rw   aŒ  
     RDKit          2D

 13 14  0  0  0  0  0  0  0  0999 V2000
   -1.2990   -0.8654    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.5981   -1.6154    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.8971   -0.8654    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.8971    0.6346    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.5981    1.3846    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.2990    0.6346    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.0000    1.3846    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
    1.2990    0.6346    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.2990   -0.8654    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    2.5981   -1.6154    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.8971   -0.8654    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.8971    0.6346    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.5981    1.3846    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  2  0
  2  3  1  0
  3  4  2  0
  4  5  1  0
  5  6  2  0
  6  7  1  0
  7  8  1  0
  8  9  2  0
  9 10  1  0
 10 11  2  0
 11 12  1  0
 12 13  2  0
  6  1  1  0
 13  8  1  0
M  END
)ZassignId)r   ZMolFromMolBlockrA   ZGetMolFragsrˆ   r$   r:   r>   r#   r?   ZLengthZAddConformerr<   )rr   Zmolblockrs   Úpiecesr   Zc1Zc2ÚiZp1Zp2Zm2r   r   r   Útest11/  sÒ    "












#


























zTestCase.test11c             C   sR   t  d¡}tt|ƒƒ}|  t|ƒd|¡ dd„ |D ƒ}| ¡  |  |ddg¡ d S )NzCCS(=O)(=O)NCCro   c             S   s   g | ]}|d  ‘qS )r   r   )r   r   r   r   r   r   ì  s    z#TestCase.test12.<locals>.<listcomp>)rw   ro   )ry   rw   )r   r   r"   r8   rˆ   r$   rO   )rr   rs   r@   ZatIdsr   r   r   Útest12è  s    
zTestCase.test12c             C   sR   t  d¡}t|ƒ}|  t|ƒd¡ |  d|k¡ t|ƒ}|  t j|ddd¡ d S )Nzc1ccccc1[C@H](C)NCrZ   z[4*][C@H]([8*])CT)ZisomericSmilesz%[16*]c1ccccc1.[4*][C@H]([8*])C.[5*]NC)r   r   rY   rˆ   r$   rq   rA   rF   )rr   rs   r@   r   r   r   ÚtestGithub1734ð  s    
zTestCase.testGithub1734N)Ú__name__Ú
__module__Ú__qualname__rt   rv   rx   r~   r   r‚   r„   r…   r‹   rŽ   r   r’   r“   r”   r   r   r   r   rn   ^  s   5	 :rn   )FT)NTT)Nr   NTFFF)TNTTrZ   )Er   Úcopyrj   r    Zrdkitr   Z
rdkit.Chemr   r   ZenvironsZreactionDefsÚdeepcopyrM   ZgprG   ÚjZdefnZg1Zg2ZbndZr1Zr2r+   Zsmar   rŠ   Z
InitializerP   rL   r(   r)   r0   ZMolFromSmartsr%   r‘   r3   Ztmpr4   r5   r6   Ze1Ze2r1   r=   ÚtuplerH   r`   ZrxnSetÚsplitZrsrW   rf   ÚfindallÚlabelsra   r8   rA   rY   r   rb   rc   rm   r•   Zunittestrn   ZfailedZtriedÚexitÚmainr   r   r   r   Ú<module>    s  








M
P 
 
 
@
   

