B
    3Rc$                 @   sZ  d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddlZdZd$d	d
Zd%ddZd&ddZdd ZedkrVddlZy eejdd dd\ZZW n ek
r   e  Y nX dZdZdZdZdZdZxxeD ]p\ZZedkre ed ZqedkreZqedkr&eZqedkr6dZqedkrFdZqedkre eZqW g Z!edkrxeD ] Ze"e#edZ$e!%e$ qnW nned Z&eee&Z'erde Z(ndZ(ese'j)de(dZ*g Z!x.e*D ]&Z+e,e+d Z+e-e+Z.e!%e. qW e/e!ree!eed  nes4e0d! e1d" erVee'e(ed#Z2e2rVee2 dS )'a   command line utility to report on the contributions of descriptors to
tree-based composite models

Usage:  AnalyzeComposite [optional args] <models>

      <models>: file name(s) of pickled composite model(s)
        (this is the name of the db table if using a database)

    Optional Arguments:

      -n number: the number of levels of each model to consider

      -d dbname: the database from which to read the models

      -N Note: the note string to search for to pull models from the database

      -v: be verbose whilst screening
    N)	DbConnect)ScreenComposite)Stats)	TreeUtilsTreez2.2.0   c          
   C   sT  | d }t | }| }t |dkrLi }d}i }xj| D ]`}|dkrZtd td| t |}	|d7 }i }
xtt |D ]}||}t|tjr|t	|i d|}t
||d| xF| D ]:}|
|t|tj}|||   d|	 7  < ||
|< qW q|W x<|
D ]4}||t|tj}||
| | 7 }|||< qW |dkr:xL|
 D ]@}|| }ddd	 |
| D }td
||t|
| f  qRW t  q:W |dkrtd g }x|D ]z}|| }|dkrddd	 || D }td
||t|| f  |g}|||  |t||  || qW |dkrPt  ng }|S )Nr         z%#------------------------------------zDoing: g      ?z, c             S   s   g | ]}d | qS )z%4.2f ).0xr
   r
   8lib/python3.7/site-packages/rdkit/ML/AnalyzeComposite.py
<listcomp>N   s    zProcessIt.<locals>.<listcomp>z%s,%s,%5.4fz# Average Descriptor Positionsc             S   s   g | ]}d | qS )z%4.2fr
   )r   r   r
   r
   r   r   Y   s    )lenZGetDescriptorNamesprintrangeZGetModel
isinstancer   ZTreeNoder   ZCollectLabelLevelsZCollectDescriptorNameskeysgetnumpyzerosfloatjoinsumextendappend)
compositesZnToConsiderverbose	compositeZnCompositesnsZ	globalResZnDoneZ	descNamesZnModelsresimodelZlevelsZdescIdvknameZstrResZretValtmpr
   r
   r   	ProcessIt-   sb    


 




r'   r	   c              C   s  d}y| j ||d}W n$ tk
r:   dd l}|  d S X t|}|sXtjd d S t	|tj
}t	|tj
}d}	d}
t	|tj
}t	|tj
}d}d}d }d }x\t|D ]N}|| d d k	r|| d ||< |	|| d 7 }	|
|| d 7 }
|| d d k	r"|| d ||< d}nd}d	t|| d
  }|dkrVtj||d||< |rd	t|| d  }|dkrtj||d||< |d kr|| d
 d k	r|}|r|| d d k	r|}n||7 }|r||7 }|r||| d 7 }||| d 7 }qW t|| }|	| }	|
| }
|| }t|}||d  }||8 }tt|d
 |d  }i }d| |d< d| |d< d|	 |d< d|
 |d< ||d< |d |d< d| |d< |dkrt|\}}||d< ||d< |rt|| }|| }|| }|| }t|}||d  }||8 }tt|d
 |d  }d| |d< d| |d< d| |d< d| |d< ||d< |d |d< d| |d< |dkrt|\}}||d < ||d!< |S )"Nzoverall_error,holdout_error,overall_result_matrix,holdout_result_matrix,overall_correct_conf,overall_incorrect_conf,holdout_correct_conf,holdout_incorrect_conf)fieldswherer   zno runs found
g              r	   g      ?r   )Ztgtr         d   ZoAvgZoDevZoCorrectConfZoIncorrectConf
oResultMatoBestIdxZoBestErr
oAvgEnrich
oDevEnrichhAvgZhDevZhCorrectConfZhIncorrectConf
hResultMathBestIdxZhBestErr
hAvgEnrich
hDevEnrich)GetData	Exception	traceback	print_excr   sysstderrwriter   r   r   r   evalr   ZCalcEnrichmentr   ZargsortZsqrtr   Z
MeanAndDev) connr)   enrichr(   datar:   ZnPtsZoverallZoverallEnrichZoCorConfZ
oInCorConfZholdoutZholdoutEnrichZhCorConfZ
hInCorConfZoverallMatrixZholdoutMatrixr!   ZhaveHoldoutZ
tmpOverallZ
tmpHoldoutZ
avgOverallZoSortZoMinZ
devOverallr    ZmeanZdevZ
avgHoldoutZhSortZhMinZ
devHoldoutr
   r
   r   
ErrorStatsf   s    






rC   c       	      C   s^  |   } | d d | d< d|  }d| krD| d d | d< |d|  7 }t| t  td td t| d	 }t|}t|d}xtt|D ]v}|| d
krd||< || }tddd x(tt|D ]}td||  dd qW tdd|||f  ||    qW tddd x"tt|D ]}tddd q$W t  tddd xPtt|D ]@}|| d
krtd||< tdd|||f  ||   dd qZW t  |dkrd| krtd|| d | d f  d| krZtd t| d }t|}t|d}xtt|D ]|}|| d
kr,d||< || }tddd x*tt|D ]}td||  dd qNW tdd|||f  ||    qW tddd x"tt|D ]}tddd qW t  tddd xPtt|D ]@}|| d
krd||< tdd|||f  ||   dd qW t  |dkrZd| krZtd|| d | d f  d S )Nr0   r	   z
# Error Statistics:
	Overall: %(oAvg)6.3f%% (%(oDev)6.3f)  %(oCorrectConf)4.1f/%(oIncorrectConf)4.1f
		Best: %(oBestIdx)d %(oBestErr)6.3f%%r3   r5   z{
	Holdout: %(hAvg)6.3f%% (%(hDev)6.3f)  %(hCorrectConf)4.1f/%(hIncorrectConf)4.1f
		Best: %(hBestIdx)d %(hBestErr)6.3f%%
  z# Results matrices:z		Overall:r/   r   z		 )endz% 6.2fz		| % 4.2fg      Y@z------r1   z		Enrich(%d): %.3f (%.3f)r2   r4   z		Holdout:r6   r7   )copyr   r   Z	transposer   r   r   )	ZstatDrA   Ztxtr&   Z	colCountsZ	rowCountsr!   rowjr
   r
   r   	ShowStats   sx    

$*

&*rJ   c               C   s   t t td d S )NrF   )r   __doc__r<   exitr
   r
   r
   r   Usage  s    rM   __main__zn:d:N:vX)skipzenrich=rD   z-nz-dz-Nz-vz--skipz--enrichrbzwhere note='%s'r"   )r(   r)   )r   z ERROR: no composite models foundrF   )rA   )r   r   )r	   )r	   )3rK   r<   r   Zrdkit.Dbase.DbConnectionr   Zrdkit.MLr   Zrdkit.ML.Datar   Zrdkit.ML.DecTreer   r   pickleZ__VERSION_STRINGr'   rC   rJ   rM   __name__ZgetoptargvargsZextrasr9   countZdbZnoter   rO   rA   argvalintr   loadopenr   r   Ztblr@   r)   r8   ZpklsZpklstrloadscompr   r   rL   r    r
   r
   r
   r   <module>   s~   
9
c
F
 












