B
    ž3Rcè-  ã            §   @   sÌ  d Z ddlmZ ddlmZ ddlmZ ddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~dd€ddd‚dƒd„d…d†d‡dˆd‰dŠd‹dŒddŽddd‘d’d“d”d•d–d—d˜d™dšd›dœddždŸd d¡d¢d£d¤d¥d¦d§d¨dd©œ¦Zdªad«d¬„ Zd­d®„ Z	ej
Zej
Zd¯d°„ Zed±krÈddªlZeƒ \ZZe e¡ dªS )²a£   SMARTS definitions for the publicly available MACCS keys
and a MACCS fingerprinter

I compared the MACCS fingerprints generated here with those from two
other packages (not MDL, unfortunately). Of course there are
disagreements between the various fingerprints still, but I think
these definitions work pretty well. Some notes:

1) most of the differences have to do with aromaticity
2) there's a discrepancy sometimes because the current RDKit
definitions do not require multiple matches to be distinct. e.g. the
SMILES C(=O)CC(=O) can match the (hypothetical) key O=CC twice in my
definition. It's not clear to me what the correct behavior is.
3) Some keys are not fully defined in the MDL documentation
4) Two keys, 125 and 166, have to be done outside of SMARTS.
5) Key 1 (ISOTOPE) isn't defined

Rev history:
2006 (gl): Original open-source release
May 2011 (gl): Update some definitions based on feedback from Andrew Dalke

é    )ÚChem)ÚrdMolDescriptors)ÚDataStructs)ú?r   )z[#104]r   )z%[#32,#33,#34,#50,#51,#52,#82,#83,#84]r   )z-[Ac,Th,Pa,U,Np,Pu,Am,Cm,Bk,Cf,Es,Fm,Md,No,Lr]r   )z[Sc,Ti,Y,Zr,Hf]r   )z.[La,Ce,Pr,Nd,Pm,Sm,Eu,Gd,Tb,Dy,Ho,Er,Tm,Yb,Lu]r   )z[V,Cr,Mn,Nb,Mo,Tc,Ta,W,Re]r   )z[!#6;!#1]1~*~*~*~1r   )z[Fe,Co,Ni,Ru,Rh,Pd,Os,Ir,Pt]r   )z[Be,Mg,Ca,Sr,Ba,Ra]r   )z
*1~*~*~*~1r   )z[Cu,Zn,Ag,Cd,Au,Hg]r   )z[#8]~[#7](~[#6])~[#6]r   )z[#16]-[#16]r   )z[#8]~[#6](~[#8])~[#8]r   )z[!#6;!#1]1~*~*~1r   )z	[#6]#[#6]r   )z[#5,#13,#31,#49,#81]r   )z*1~*~*~*~*~*~*~1r   )z[#14]r   )z[#6]=[#6](~[!#6;!#1])~[!#6;!#1]r   )z*1~*~*~1r   )z[#7]~[#6](~[#8])~[#8]r   )z	[#7]-[#8]r   )z[#7]~[#6](~[#7])~[#7]r   )z[#6]=;@[#6](@*)@*r   )z[I]r   )z[!#6;!#1]~[CH2]~[!#6;!#1]r   )z[#15]r   )z[#6]~[!#6;!#1](~[#6])(~[#6])~*r   )z[!#6;!#1]~[F,Cl,Br,I]r   )z[#6]~[#16]~[#7]r   )z
[#7]~[#16]r   )z[CH2]=*r   )z[Li,Na,K,Rb,Cs,Fr]r   )z[#16R]r   )z[#7]~[#6](~[#8])~[#7]r   )z[#7]~[#6](~[#6])~[#7]r   )z[#8]~[#16](~[#8])~[#8]r   )z
[#16]-[#8]r   )z	[#6]#[#7]r   )ÚFr   )z[!#6;!#1;!H0]~*~[!#6;!#1;!H0]r   )z3[!#1;!#6;!#7;!#8;!#9;!#14;!#15;!#16;!#17;!#35;!#53]r   )z[#6]=[#6]~[#7]r   )ZBrr   )z[#16]~*~[#7]r   )z[#8]~[!#6;!#1](~[#8])(~[#8])r   )z[!+0]r   )z[#6]=[#6](~[#6])~[#6]r   )z[#6]~[#16]~[#8]r   )z	[#7]~[#7]r   )z![!#6;!#1;!H0]~*~*~*~[!#6;!#1;!H0]r   )z[!#6;!#1;!H0]~*~*~[!#6;!#1;!H0]r   )z[#8]~[#16]~[#8]r   )z[#8]~[#7](~[#8])~[#6]r   )z[#8R]r   )z[!#6;!#1]~[#16]~[!#6;!#1]r   )z
[#16]!:*:*r   )z
[#16]=[#8]r   )z*~[#16](~*)~*r   )z*@*!@*@*r   )z	[#7]=[#8]r   )z
*@*!@[#16]r   )zc:nr   )z[#6]~[#6](~[#6])(~[#6])~*r   )z[!#6;!#1]~[#16]r   )z[!#6;!#1;!H0]~[!#6;!#1;!H0]r   )z[!#6;!#1]~[!#6;!#1;!H0]r   )z[!#6;!#1]~[#7]~[!#6;!#1]r   )z	[#7]~[#8]r   )z[#8]~*~*~[#8]r   )z[#16]=*r   )z[CH3]~*~[CH3]r   )z	*!@[#7]@*r   )z[#6]=[#6](~*)~*r   )z[#7]~*~[#7]r   )z	[#6]=[#7]r   )z[#7]~*~*~[#7]r   )z[#7]~*~*~*~[#7]r   )z[#16]~*(~*)~*r   )z*~[CH2]~[!#6;!#1;!H0]r   )z[!#6;!#1]1~*~*~*~*~1r   )z[NH2]r   )z[#6]~[#7](~[#6])~[#6]r   )z[C;H2,H3][!#6;!#1][C;H2,H3]r   )z[F,Cl,Br,I]!@*@*r   )z[#16]r   )z[#8]~*~*~*~[#8]r   )zf[$([!#6;!#1;!H0]~*~*~[CH2]~*),$([!#6;!#1;!H0;R]1@[R]@[R]@[CH2;R]1),$([!#6;!#1;!H0]~[R]1@[R]@[CH2;R]1)]r   )z•[$([!#6;!#1;!H0]~*~*~*~[CH2]~*),$([!#6;!#1;!H0;R]1@[R]@[R]@[R]@[CH2;R]1),$([!#6;!#1;!H0]~[R]1@[R]@[R]@[CH2;R]1),$([!#6;!#1;!H0]~*~[R]1@[R]@[CH2;R]1)]r   )z[#8]~[#6](~[#7])~[#6]r   )z[!#6;!#1]~[CH3]r   )z[!#6;!#1]~[#7]r   )z[#7]~*~*~[#8]r   )z*1~*~*~*~*~1r   )z[#7]~*~*~*~[#8]r   )z[!#6;!#1]1~*~*~*~*~*~1r   )z	[#6]=[#6]r   )z*~[CH2]~[#7]r   )a_  [$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1)]r   )z[!#6;!#1]~[#8]r   )ZClr   )z[!#6;!#1;!H0]~*~[CH2]~*r   )z	*@*(@*)@*r   )z![!#6;!#1]~*(~[!#6;!#1])~[!#6;!#1]r   )z[F,Cl,Br,I]~*(~*)~*r   )z[CH3]~*~*~*~[CH2]~*r   )z*~[CH2]~[#8]r   )z[#7]~[#6]~[#8]r   )z[#7]~*~[CH2]~*r   )z*~*(~*)(~*)~*r   )z	[#8]!:*:*r   )z[CH3]~[CH2]~*r   )z[CH3]~*~[CH2]~*r   )z+[$([CH3]~*~*~[CH2]~*),$([CH3]~*1~*~[CH2]1)]r   )z[#7]~*~[#8]r   )z'[$(*~[CH2]~[CH2]~*),$(*1~[CH2]~[CH2]1)]é   )z[#7]=*r   )z[!#6;R]r   )z[#7;R]r   )z*~[#7](~*)~*r   )z[#8]~[#6]~[#8]r   )z[!#6;!#1]~[!#6;!#1]r   )z
*!@[#8]!@*r   )z	*@*!@[#8]r   )z[$(*~[CH2]~*~*~*~[CH2]~*),$([R]1@[CH2;R]@[R]@[R]@[R]@[CH2;R]1),$(*~[CH2]~[R]1@[R]@[R]@[CH2;R]1),$(*~[CH2]~*~[R]1@[R]@[CH2;R]1)]r   )zT[$(*~[CH2]~*~*~[CH2]~*),$([R]1@[CH2]@[R]@[R]@[CH2;R]1),$(*~[CH2]~[R]1@[R]@[CH2;R]1)]r   )z[!#6;!#1]~[!#6;!#1]r   )z[!#6;!#1;!H0]r   )z[#8]~*~[CH2]~*r   )z	*@*!@[#7]r   )z[F,Cl,Br,I]r   )z	[#7]!:*:*r   )z[#8]=*r   )z	[!C;!c;R]r   )z[!#6;!#1]~[CH2]~*r   )z[O;!H0]r   )z[#8]é   )z[CH3]é   )z[#7]r   )z	*@*!@[#8]r   )z	*!:*:*!:*r   )z*1~*~*~*~*~*~1r   )z[#8]r	   )z-[$(*~[CH2]~[CH2]~*),$([R]1@[CH2;R]@[CH2;R]1)]r   )z*~[!#6;!#1](~*)~*r   )z	[C;H3,H4]r   )z	*!@*@*!@*r   )z[#7;!H0]r   )z[#8]~[#6](~[#6])~[#6]r   )z[!#6;!#1]~[CH2]~*r   )z	[#6]=[#8]r   )z*!@[CH2]!@*r   )z[#7]~*(~*)~*r   )z	[#6]-[#8]r   )z	[#6]-[#7]r   )z[#8]r   )z	[C;H3,H4]r   )z[#7]r   )Úar   )z*1~*~*~*~*~*~1r   )z[#8]r   )z[R]r   )¦r   r	   r   é   é   é   é   é   é	   é
   é   é   é   é   é   é   é   é   é   é   é   é   é   é   é   é   é   é   é   é   é   é    é!   é"   é#   é$   é%   é&   é'   é(   é)   é*   é+   é,   é-   é.   é/   é0   é1   é2   é3   é4   é5   é6   é7   é8   é9   é:   é;   é<   é=   é>   é?   é@   éA   éB   éC   éD   éE   éF   éG   éH   éI   éJ   éK   éL   éM   éN   éO   éP   éQ   éR   éS   éT   éU   éV   éW   éX   éY   éZ   é[   é\   é]   é^   é_   é`   éa   éb   éc   éd   ée   éf   ég   éh   éi   éj   ék   él   ém   én   éo   ép   éq   ér   és   ét   éu   év   éw   éx   éy   éz   é{   é|   é}   é~   é   é€   é   é‚   éƒ   é„   é…   é†   é‡   éˆ   é‰   éŠ   é‹   éŒ   é   éŽ   é   é   é‘   é’   é“   é”   é•   é–   é—   é˜   é™   éš   é›   éœ   é   éž   éŸ   é    é¡   é¢   é£   é¤   é¥   é¦   Nc             C   sv   t | ƒt | ¡ ƒkstdƒ‚xT| ¡ D ]H}|| \}}|dkr&t |¡}|s^td||f ƒ q&||f| |d < q&W dS )zM *Internal Use Only*

   generates SMARTS patterns for the keys, run once

  zlength mismatchr   z#SMARTS parser error for key #%d: %sr   N)ÚlenÚkeysÚAssertionErrorr   ZMolFromSmartsÚprint)ZkeyListZkeyDictÚkeyÚpattÚcountZsma© rµ   ú3lib/python3.7/site-packages/rdkit/Chem/MACCSkeys.pyÚ	_InitKeysÜ   s    
r·   c             K   sT  t dkr$dgtt ¡ ƒ a tt tƒ | dtj¡}|tt ƒd ƒ}x
tt ƒD  ]ü\}\}}|dk	r¤|dkr€|  	|¡||d < n"|  
|¡}t|ƒ|kr¢d||d < qN|d dkr|  ¡ }d}	d|d< x„| ¡ D ]H}
d}x |
D ]}|  |¡ ¡ sÞd}P qÞW |rÐ|	d7 }	|	dkrÐd|d< P qÐW qN|d d	krNd|d	< tt | ¡ƒdkrNd|d	< qNW |S )
a   generates the MACCS fingerprint for a molecules

   **Arguments**

     - mol: the molecule to be fingerprinted

     - any extra keyword arguments are ignored
     
   **Returns**

      a _DataStructs.SparseBitVect_ containing the fingerprint.

  >>> m = Chem.MolFromSmiles('CNO')
  >>> bv = GenMACCSKeys(m)
  >>> tuple(bv.GetOnBits())
  (24, 68, 69, 71, 93, 94, 102, 124, 131, 139, 151, 158, 160, 161, 164)
  >>> bv = GenMACCSKeys(Chem.MolFromSmiles('CCC'))
  >>> tuple(bv.GetOnBits())
  (74, 114, 149, 155, 160)

  N)Nr   Úctorr   r   r„   TFr­   )Ú	maccsKeysr®   ÚsmartsPattsr¯   r·   Úgetr   ZSparseBitVectÚ	enumerateZHasSubstructMatchZGetSubstructMatchesZGetRingInfoZ	BondRingsZGetBondWithIdxZGetIsAromaticr   ZGetMolFrags)ZmolÚkwargsr¸   ZresÚir³   r´   ZmatchesZriZnAromZringZisAromZbondIdxrµ   rµ   r¶   Ú_pyGenMACCSKeysí   s@    


r¿   c              C   s    dd l } dd l}|  |jd ¡S )Nr   Ú__main__)ÚdoctestÚsysZtestmodÚmodules)rÁ   rÂ   rµ   rµ   r¶   Ú_test3  s    rÄ   rÀ   )Ú__doc__Zrdkitr   Z
rdkit.Chemr   r   rº   r¹   r·   r¿   ZGetMACCSKeysFingerprintZGenMACCSKeysZFingerprintMolrÄ   Ú__name__rÂ   ZfailedZtriedÚexitrµ   rµ   rµ   r¶   Ú<module>    sf  >

