B
    ž3Rc  ã               @   sà   d dl Z d dlZd dlmZ d dlmZ dd„ Zdd„ Zdd	„ Ze	d
krÜe
e jƒdkrˆedƒ edƒ edƒ edƒ edƒ e  d¡ xRe jD ]HZe ¡ Ze de¡Zed  Zed ZeeeƒZxeD ]Zeeƒ qÈW qW dS )é    N)ÚChem)ÚrdMMPAc             C   s|   d}d}x0| D ](}|  d¡}|dkr2d||f }q|}qW | d¡}t |¡}tj|dd}t |¡}tj|dd}||fS )NÚ Ú*é   z%s.%sÚ.T)ÚisomericSmiles)ÚcountÚlstripr   ÚMolFromSmilesÚMolToSmiles)Zf_arrayÚcoreÚside_chainsÚfZattachmentsZtemp© r   ú!share/RDKit/Contrib/mmpa/rfrag.pyÚfind_correct*   s    




r   c             C   sN  t  |¡}d}i }x„|D ]|}|d7 }| |d |d ¡ | t  d¡¡}	| |d |	t jj¡ | t  d¡¡}
| |d |
t jj¡ |||	< |||
< qW | ¡ }t j	|dd}d}| 
d¡}|dkrìd}x"|D ]}t d|¡}|rÎd}P qÎW |rJ|dkrlt d	d
|¡}| 
d¡}t  |d ¡}t  |d ¡}d| |t j	|ddt j	|ddf }||krJ| |¡ nÞ|dkrJx"|D ]}| |¡ || ¡ q|W t j	|dd}t dd
|¡}t dd|¡}t dd|¡}| 
d¡}t|ƒ\}}i }| 
d¡}xBtt|ƒƒD ]2}t d|| ¡}|rt|d ƒ|| d¡< qW |d dkr|t dd|d  d |¡}t dd|d  d |¡}|d dkr¾t dd|d  d |¡}t dd|d  d |¡}|dkr
|d dkr
t dd|d  d |¡}t dd|d  d |¡}t dd|¡}t dd|¡}d| |||f }||krJ| |¡ d S )Nr   r   T)r   r   é   Fz
\*.*\*.*\*z\[\*\]z[*:1]z%s,%s,,%s.%sé   z\[1\*\]z\[2\*\]z[*:2]z\[3\*\]z[*:3]z\[\*\:([123])\]Ú1z	\[\*\:1\]z[*:XXzXX]Ú2z	\[\*\:2\]Ú3z	\[\*\:3\]ZXXr   z%s,%s,%s,%s)r   ZEditableMolZ
RemoveBondZAddAtomZAtomZAddBondZBondTypeZSINGLEZGetMolr   ÚsplitÚreÚsearchÚsubr   ÚaddZGetAtomWithIdxZ
SetIsotoper   ÚrangeÚlenÚstrÚgroup)ÚsmiÚidÚmolZbondsÚoutZemZisotopeZisotope_trackÚiZnewAtomAZnewAtomBZmodifiedMolZfragmented_smi_noIsotopesZvalidZ	fragmentsr   ZmatchObjÚs1Ús2ÚoutputÚkeyZfragmented_smir   r   Zside_chain_fragmentsÚsr   r   r   Údelete_bondsC   s~    













r+   c             C   sˆ   t  | ¡}tƒ }|d kr*tj d|  ¡ nZtj|ddd}x2|D ]*\}}d| |||f }||kr@| |¡ q@W |s„| d| |f ¡ |S )NzCan't generate mol for: %s
z[#6+0;!$(*=,#[!#6])]!@!=!#[*]F)ÚpatternZresultsAsMolsz%s,%s,%s,%sz%s,%s,,)	r   r   ÚsetÚsysÚstderrÚwriter   ZFragmentMolr   )r!   Zcidr#   ZoutlinesZfragsr   Zchainsr(   r   r   r   Úfragment_mol·   s    
r1   Ú__main__r   z2Program that fragments a user input set of smiles.z^The program enumerates every single,double and triple acyclic single bond cuts in a molecule.
z!USAGE: ./rfrag.py <file_of_smilesz2Format of smiles file: SMILES ID (space separated)z)Output: whole mol smiles,ID,core,context
r   z\s|,)r.   r   Zrdkitr   Z
rdkit.Chemr   r   r+   r1   Ú__name__r   ÚargvÚprintÚexitÚstdinÚlineÚrstripr   Zline_fieldsZsmilesZcmpd_idÚoÚlr   r   r   r   Ú<module>$   s0   t


