B
    3Rc<	              
   @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ dd Z	dd Z
ed	kre	 Ze Zeejd
LZedd6Zx.eD ]&Ze d\ZZee
ee qW W dQ R X W dQ R X ejdeddZeejdd d dZeejd
ZxleD ]dZe dZed ZxFede ed dD ].Z!ee! Zee!d  Z"edee"e q2W qW e#  edkre$d dS )a  
This script performs fast clustering of SMILES

Clustering method is repeated bi section, the method looks like -k-means.
To use this script, the user needs to install bayon at first.
Input format: Tab separated SMILES strings (SMILES 	 molID 
 ...)
Please see more details in README.

    N)Chem)AllChemc              C   sN   t d} | jddd | jdddd | jd	d
dd | jddddd | S )z Create the argument parser z$Fast clustering for chemoinformaticsinputzfilename of input file)helpZ	nclustersNzthe number of clusters)metavarr   z--outputz(filename of output, tab separated formatzclustered.tsv)r   defaultz
--centroidZCENTROIDz6filename of centroid information. tab separated formatzcentroid.tsv)r   r   r   )argparseArgumentParseradd_argument)parser r   .share/RDKit/Contrib/Fastcluster/fastcluster.pygetArgParser   s    
r   c             C   sF   t |}t|d }| }x|D ]}|d|7 }q$W |d7 }|S )N   z
	FP_{}	1.0
)r   ZMolFromSmilesr   ZGetMorganFingerprintAsBitVectZ	GetOnBitsformat)molidsmilesZmolZonbitsrowbitr   r   r   smi2fp!   s    

r   __main__rzfp.tsvw	zCtime bayon -p -c {0.centroid} -n  {0.nclusters} fp.tsv > {0.output}T)shell.z
_parse.tsv   r   z{}	{}	CLS_ID_{}
zError running bayon)%__doc__r	   
subprocesspickleosZrdkitr   Z
rdkit.Chemr   r   r   __name__r   
parse_argsargsopenr   ZinputfZtempflinerstripsplitr   r   writeZcallr   ZresoutputZ	parsefileZ
cluster_idrangeleniZpointcloseexitr   r   r   r   <module>   s:   	


(
 
