B
    3Rc9                 @   s  d dl mZ d dl mZ d dlmZ d dlmZ d dlmZ e Zd dl	Z	dZ
dZd	Zd dlZd dlZd d
lmZmZmZ dd ZG dd deZeddedZejdddedd ejdddgddd ejddddd ed!d ejd"d#d$d%d& ejd'd(d)d* ed+kre \ZZeed, d-Zed. eed  Z g Z!xbe"e D ]V\Z#Z$e$s`qNe%e$d	Z&e$'e
Z(e)e$'eZ*ee$ej+Z,e!-e(e&e,e*f qNW ed/e.e!  e.ed0krreed1 Z d	Z/ed2 g Z0xte"e D ]h\Z#Z$e$sqe%e$d	Z&e$'e
Z(e$1er6e)e$'eZ*nd Z*ee$ej+Z,e0-e(e&e,e*f qW ed/e.e0  nd(Z/e!Z0dge.e0 Z2x*e3e.e0D ]Z#dge.ej4 e2e#< qW ej5reej5d-Z6e7e6? d3f d1Z8ndZ6xe"ej4D ]~\Z9Z:e/sed4e:  ned5e:  xBe3e.e0D ]0Z#e/sNd6d7 e3e.e!D Z;ne!Z;e0e# Z<eej=g d8d9 d:e: d;Z>e>?e; g Z@e>jAe<ee@d<ZBe0e# \Z(Z&Z,ZCe6rxhe@D ]`\ZDZEeEdkrĐqeE\ZFZGZHZIeJe8e(e&eFeGeJeIeJeDd: gZKe8d17 Z8e7e6? d=LeKf qW d>d7 e@D Z@e(e&eCeBe.e@fe2e# e9< e#d1 d? s*ed@e#d1   q*W edA qW ej=Z=ej+d1 Z+edB dCdDdEgZMx@ej4D ]6Z:eM-dFeN   eM-dGeN   eM-dHeN   qW e7e? d=LeMf xe3e.e0D ]Z#e2e# d  d  Z(e2e# d  d1 Z&e2e# d  d0 ZCe(e&eJeCgZOxje3e.ej4D ]XZ9e2e# e9 dI Z@e2e# e9 dJ ZBeO-eJeB eO-eJePeCeB  eO-eJe@ qHW e7e? d=LeOf qW dS )K    )Chem)DataStructs)GetMolFingerprint)KNNRegressionModel)loggerNZCompound_origZchemical_shift_1T)OptionParserOptionOptionValueErrorc             C   s\   y2t |}t|tjtjfkr"tdd |D }W n$ tk
rV   td||f Y nX |S )Nc             S   s   g | ]}t |qS  )float).0xr
   r
   *share/RDKit/Contrib/LEF/DistancePredict.py
<listcomp>;   s    z#check_floatlist.<locals>.<listcomp>z(option %s : invalid float list value: %r)evaltypetypesZListTypeZ	TupleType
ValueErrorr	   )ZoptionZoptvaluevr
   r
   r   check_floatlist6   s    r   c               @   s*   e Zd Zejd ZeejZeed< dS )MyOption)	floatlistr   N)__name__
__module____qualname__r   ZTYPEScopyZTYPE_CHECKERr   r
   r
   r
   r   r   A   s   
r   zdistance predictz%prog)versionZoption_classz--maxPathLengthz--max   z'maximum length path for the fingerprint)defaultr   helpz--similarityThresholdz--simg?r   zthreshold for similarityz--numNeighborsz--numz-nz-k2   znumber of neighbors to considerz--neighborsFilez--nbrs z1name of an output file to hold the neighbor lists)r   r    z--scanF
store_true)r   action__main__zw+z6reading training molecules and generating fingerprintsz  got %d molecules      z5reading testing molecules and generating fingerprintszSID|CompoundName|CompoundSmiles|NeighborName|NeighborSmiles|NeighborShift|Similarityz*Doing cross validation with threshold %.2fz$Doing prediction with threshold %.2fc             C   s   g | ]}|t krt| qS r
   )itrain)r   r   r
   r
   r   r      s    r   c             G   s   dt | d |d  S )Nr(   )r   ZDiceSimilarity)r   yargsr
   r
   r   <lambda>       r.   g      ?)Zradius)weightedAverageZneighborList|c             C   s   g | ]}|d  dk	r|qS )r(   Nr
   )r   r   r
   r
   r   r      s    d   zDone %d moleculesz  donezcreating output filenameZsmilesshiftz9predShift_%(maxPathLength)d_%(numNeighbors)d_%(thresh).2fz5dPred_%(maxPathLength)d_%(numNeighbors)d_%(thresh).2fz4nbrs_%(maxPathLength)d_%(numNeighbors)d_%(thresh).2f      )QZrdkitr   r   Z	CreateFpsr   Zrdkit.ML.KNN.KNNRegressionModelr   Zrdkit.RDLoggerr   sysZ	nameFieldZ	propFieldr0   r   r   Zoptparser   r   r	   r   r   parserZ
add_optionintr   
parse_argsZoptionsr-   fileZoutFinfoZSDMolSupplierZsupplr*   	enumerater)   ZmolZMolToSmilesZsmiZGetPropZnmr   propertyZmaxPathLengthfpappendlenZhaveTestZtestZHasPropZresultsrangeZsimilarityThresholdZneighborsFileZnbrFileprintidjZthreshZ
localTrainZ	localTestZnumNeighborsZmdlZSetTrainingExamplesZnbrsZPredictExampleZpredZpropZdistdataZnnmZnsmiZnfpZ	npropertystrZoutRowjoinZheaderslocalsrowabsr
   r
   r
   r   <module>    s   







"


