B
    ž3RcŒ  ã               @   sü  d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZmZ ddlmZmZ dd	lmZ ddlZe	 	¡ Zd
ZdZdd„ Zg fdd„Zdd„ Zd(dd„Zd)dd„Zd*dd„Zdd„ Zd+dd„Zdd„ Ze dkrøe!ej"ƒdk reƒ  d  #ej"¡e_$eeƒ eeƒ edd! ej%dkrrx¾e&ej%ƒD ](Z'ej( )d"e'd ej%f ¡ eeƒ qDW n†ej*rðyTddl+Z+ddl,Z+e+ -d#¡Z.e. /ee¡ e+j0 1d#¡Z0e0 2¡  e0 3d$d%¡ e0 4d&¡ W n e5k
rì   e6d'ƒ Y nX neeƒ dS ),a   command line utility for building composite models

#DOC

**Usage**

  BuildComposite [optional args] filename

Unless indicated otherwise (via command line arguments), _filename_ is
a QDAT file.

**Command Line Arguments**

  - -o *filename*: name of the output file for the pickled composite

  - -n *num*: number of separate models to add to the composite

  - -p *tablename*: store persistence data in the database
     in table *tablename*

  - -N *note*: attach some arbitrary text to the persistence data

  - -b *filename*: name of the text file to hold examples from the
     holdout set which are misclassified

  - -s: split the data into training and hold-out sets before building
     the composite

  - -f *frac*: the fraction of data to use in the training set when the
     data is split

  - -r: randomize the activities (for testing purposes).  This ignores
     the initial distribution of activity values and produces each
     possible activity value with equal likliehood.

  - -S: shuffle the activities (for testing purposes) This produces
     a permutation of the input activity values.

  - -l: locks the random number generator to give consistent sets
     of training and hold-out data.  This is primarily intended
     for testing purposes.

  - -B: use a so-called Bayesian composite model.

  - -d *database name*: instead of reading the data from a QDAT file,
     pull it from a database.  In this case, the _filename_ argument
     provides the name of the database table containing the data set.

  - -D: show a detailed breakdown of the composite model performance
     across the training and, when appropriate, hold-out sets.

  - -P *pickle file name*: write out the pickled data set to the file

  - -F *filter frac*: filters the data before training to change the
     distribution of activity values in the training set.  *filter
     frac* is the fraction of the training set that should have the
     target value.  **See note below on data filtering.**

  - -v *filter value*: filters the data before training to change the
     distribution of activity values in the training set. *filter
     value* is the target value to use in filtering.  **See note below
     on data filtering.**

  - --modelFiltFrac *model filter frac*: Similar to filter frac above,
     in this case the data is filtered for each model in the composite
     rather than a single overall filter for a composite. *model
     filter frac* is the fraction of the training set for each model
     that should have the target value (*model filter value*).

  - --modelFiltVal *model filter value*: target value to use for
     filtering data before training each model in the composite.

  - -t *threshold value*: use high-confidence predictions for the
     final analysis of the hold-out data.

  - -Q *list string*: the values of quantization bounds for the
     activity value.  See the _-q_ argument for the format of *list
     string*.

  - --nRuns *count*: build *count* composite models

  - --prune: prune any models built

  - -h: print a usage message and exit.

  - -V: print the version number and exit

  *-*-*-*-*-*-*-*- Tree-Related Options -*-*-*-*-*-*-*-*

  - -g: be less greedy when training the models.

  - -G *number*: force trees to be rooted at descriptor *number*.

  - -L *limit*: provide an (integer) limit on individual model
     complexity

  - -q *list string*: Add QuantTrees to the composite and use the list
     specified in *list string* as the number of target quantization
     bounds for each descriptor.  Don't forget to include 0's at the
     beginning and end of *list string* for the name and value fields.
     For example, if there are 4 descriptors and you want 2 quant
     bounds apiece, you would use _-q "[0,2,2,2,2,0]"_.
     Two special cases:
       1) If you would like to ignore a descriptor in the model
          building, use '-1' for its number of quant bounds.
       2) If you have integer valued data that should not be quantized
          further, enter 0 for that descriptor.

  - --recycle: allow descriptors to be used more than once in a tree

  - --randomDescriptors=val: toggles growing random forests with val
      randomly-selected descriptors available at each node.


  *-*-*-*-*-*-*-*- KNN-Related Options -*-*-*-*-*-*-*-*

  - --doKnn: use K-Nearest Neighbors models

  - --knnK=*value*: the value of K to use in the KNN models

  - --knnTanimoto: use the Tanimoto metric in KNN models

  - --knnEuclid: use a Euclidean metric in KNN models

  *-*-*-*-*-*-*- Naive Bayes Classifier Options -*-*-*-*-*-*-*-*
  - --doNaiveBayes : use Naive Bayes classifiers

  - --mEstimateVal : the value to be used in the m-estimate formula
      If this is greater than 0.0, we use it to compute the conditional
      probabilities by the m-estimate

  *-*-*-*-*-*-*-*- SVM-Related Options -*-*-*-*-*-*-*-*

  **** NOTE: THESE ARE DISABLED ****

# #   - --doSVM: use Support-vector machines

# #   - --svmKernel=*kernel*: choose the type of kernel to be used for
# #     the SVMs.  Options are:
# #     The default is:

# #   - --svmType=*type*: choose the type of support-vector machine
# #     to be used.  Options are:
# #     The default is:

# #   - --svmGamma=*gamma*: provide the gamma value for the SVMs.  If this
# #     is not provided, a grid search will be carried out to determine an
# #     optimal *gamma* value for each SVM.

# #   - --svmCost=*cost*: provide the cost value for the SVMs.  If this is
# #     not provided, a grid search will be carried out to determine an
# #     optimal *cost* value for each SVM.

# #   - --svmWeights=*weights*: provide the weight values for the
# #     activities.  If provided this should be a sequence of (label,
# #     weight) 2-tuples *nActs* long.  If not provided, a weight of 1
# #     will be used for each activity.

# #   - --svmEps=*epsilon*: provide the epsilon value used to determine
# #     when the SVM has converged.  Defaults to 0.001

# #   - --svmDegree=*degree*: provide the degree of the kernel (when
# #     sensible) Defaults to 3

# #   - --svmCoeff=*coeff*: provide the coefficient for the kernel (when
# #     sensible) Defaults to 0

# #   - --svmNu=*nu*: provide the nu value for the kernel (when sensible)
# #     Defaults to 0.5

# #   - --svmDataType=*float*: if the data is contains only 1 and 0 s, specify by
# #     using binary. Defaults to float

# #   - --svmCache=*cache*: provide the size of the memory cache (in MB)
# #     to be used while building the SVM.  Defaults to 40

**Notes**

  - *Data filtering*: When there is a large disparity between the
    numbers of points with various activity levels present in the
    training set it is sometimes desirable to train on a more
    homogeneous data set.  This can be accomplished using filtering.
    The filtering process works by selecting a particular target
    fraction and target value.  For example, in a case where 95% of
    the original training set has activity 0 and ony 5% activity 1, we
    could filter (by randomly removing points with activity 0) so that
    30% of the data set used to build the composite has activity 1.


é    N)ÚDataStructs)ÚDbModule)ÚCompositeRun)ÚScreenComposite)Ú	CompositeÚBayesComposite)Ú	DataUtilsÚ	SplitData)Ú	listutilsz3.2.3é   c             C   s   t rtj d|  ¡ dS )z¥ emits messages to _sys.stdout_
    override this in modules which import this one to redirect output

    **Arguments**

      - msg: the string to be displayed

  z%s
N)Ú_verboseÚsysÚstdoutÚwrite)Úmsg© r   ú6lib/python3.7/site-packages/rdkit/ML/BuildComposite.pyÚmessageá   s    	r   c             C   sd   g }xZ|D ]R}|   ¡ r&|  |¡d }n|d }|  |¡\}}||kr
| ||f¡ | |¡ q
W |S )a¬   screens a number of examples past a composite

    **Arguments**

      - composite: a composite model

      - examples: a list of examples (with results) to be screened

      - badExamples: a list to which misclassified examples are appended

    **Returns**

      a list of 2-tuples containing:

        1) a vote

        2) a confidence

      these are the votes and confidence levels for **misclassified** examples

  éÿÿÿÿ)ZGetActivityQuantBoundsZQuantizeActivityZClassifyExampleÚappend)Ú	compositeZexamplesÚbadExamplesÚwrongZexampleZanswerZresZconfr   r   r   Útestallî   s    
r   c             C   sà  dg}|  d| j ¡ | jdkr6|  d| j| jf ¡ | jdkrV|  d| j| jf ¡ | jrl|  d| j ¡ | jr||  d¡ | j	rŒ|  d¡ | j
dkr¦|  d	| j
 ¡ | jr¼|  d
| j ¡ | jrÒ|  d| j ¡ | jrâ|  d¡ t| dƒrü| jrü|  d¡ | jr| jr|  d| j ¡ | jr4|  d| j ¡ | jr| jdkrX|  d| j ¡ | jrj|  d¡ | jr–t | j¡}| jr®|  d| ¡ n| jr®|  d| j ¡ | jrÀ|  d¡ | jrØ|  d| j ¡ | jrê|  d¡ | jr|  d| j ¡ | jrH|  d¡ | jdkr0|  d| j ¡ | jrH|  d| j ¡ | jr‚|  d| j  ¡ | j!dkrx|  d¡ n
|  d¡ | j"r°|  d¡ | j#dkr°|  d| j# ¡ | j$rÂ|  d ¡ | j%rÖ|  | j%¡ d! &|¡S )"z	 #DOC

  ZBuildCompositez-n %dg        z-F %.3f -v %dz&--modelFiltFrac=%.3f --modelFiltVal=%dz
-s -f %.3fz-Sz-rz-t %.3fz-Q "%s"z-d %sz-DÚnoScreenz
--noScreenz-p %sz-N %sr   z-L %dz-gz-q "%s"z--prunez-G %dz	--recyclez--randomDescriptors=%dz--doSigTreez--doKnn --knnK %dÚTanimotoz--knnTanimotoz--knnEuclidz--doNaiveBayesz--mEstimateVal=%.3fz--replacementSelectionú )'r   ÚnModelsÚ
filterFracÚ	filterValÚmodelFilterFracÚmodelFilterValÚsplitRunÚ	splitFracÚshuffleActivitiesÚrandomActivitiesÚ	thresholdÚactivityBoundsÚactivityBoundsValsÚdbNameÚdetailedResÚhasattrr   ÚpersistTblNameÚnoteÚuseTreesÚ
limitDepthÚ
lessGreedyÚqBoundsr
   ZCompactListReprÚqBoundCountÚpruneItÚstartAtÚrecycleVarsÚrandomDescriptorsÚuseSigTreesÚuseKNNÚ	knnNeighsÚknnDistFuncÚuseNaiveBayesÚmEstimateValÚreplacementSelectionÚ	tableNameÚjoin)ÚdetailsÚargsZshortBoundsr   r   r   ÚGetCommandLine  s„    













#
rB   c       @         sÎ
  | j r| j}n dd l}| dd¡| dd¡f}t |¡ g }| jdkrXtj|d| d n| jdkrrtj|d| d | 	¡ ‰ | j
dkrÆtjtˆ ƒ| jt d\}}	‡ fdd„|D ƒ‰‡ fdd„|	D ƒ}ng }g }	tttˆ ƒƒƒ}ˆ ‰| jd	krvt| d
ƒr’| jr’g }
| j}xŒˆD ]z}|d d … }|d }d}d}x>|sp|t|ƒk rp||| k rd||d< d}n|d7 }q4W |s€||d< |
 |¡ qW nd }ˆ}
tj|
| j| jddd\}}‡fdd„|D ƒ}|‡fdd„|D ƒ7 }|‰tjˆ|d}| ¡ }| ¡  tdƒ x"|D ]}tt||| fƒƒ qW tj||d}| ¡ }| ¡  tdƒ x"|D ]}tt||| fƒƒ qXW tˆƒ}td| ƒ | ¡ }ttd|d ƒƒ}| ¡ }x4tdt|ƒƒD ]"}||d  dkr¼| |¡ q¼W | j dkrt!| j dƒ}t" #ˆ|¡ t" #||¡ | $¡  | j%r,t& &¡ }nt' '¡ }||_(| j|_)| j|_*| j|_+t| dƒrf| j|_,t| dƒrz| j|_-| .| j/| j0¡ | 1| j¡ | ¡ }| jr¸t| jƒd |d< |rÚ| 2| 3¡ ¡ | 4| j5¡ n| 4| 3¡ ¡ | 1| j¡ | j6dkrd	| _7| j8rÚddl9m:}m;} | j<g kr>ddl9m=} |j>}nddl9m?}  | j@}|jA}!|j;}"| B| j<¡ | ¡ }| jrŠt| jƒd |d< |jCˆ|dg| |!|"| j6| jD| jEd|| j<| jF| jG|| j7| jH| jI| jJt d nN| jKr´ddl9m:} ddl9mL}# |#jM}|jA}!| ¡ }| jr(t| jƒd |d< t| dƒr<| jN}$nd }$t| dƒrT| jO}%nd}%t| dƒrl| jP}&nd }&|jCˆ|dg| |!| j6d|| jG|| j7| jH| jI| jJ|$|%|&t d! nt| jQr>ddlRm:} dd"lRmS}' |jA}!d}(| jTd#krò|'jU}(n"| jTd$kr|'jV}(ndstWd%ƒ‚|jCˆ|dg| |!| j6d| jX| j7|(d&	 nê| jYsN| jZrøddl[m:} |jA}!t| d'ƒrt| jZs¨|jCˆ|dg| |!| j6d| j<| j7| jH| j\t d( nNt| dƒr¼| jO}%nd}%|jCˆ|dg| |!| j6d| j<| j\d)|%| j7| jHt d* n0ddl]m:} |jA}!|jCˆ|dg| | j6|!dd+ | ^¡  | _¡  | `¡ \})}}*ta b|¡}ta b|*¡}*| 3¡ |_cx&tt|)ƒƒD ]}|)|  d|jc¡ qrW ||* }+te|+ƒte|ƒ },|*|, }-|-| }-ta f|-|- ¡}-te|-ƒte|ƒ }.td,d-|, d-|. f ƒ | j%r |jgˆdd. | h¡  |r| i| jj¡ tk lt" m|¡¡| _ng }/| jo	st| d/ƒrL| jp	s| j
r¬td0ƒ tq|||/ƒ}0td1t|0ƒd-trt|0ƒƒ trt|ƒƒ f ƒ trt|0ƒƒt|ƒ ts_tnVtd2ƒ tq|ˆ |/ƒ}0td1t|0ƒd-trt|0ƒƒ trtˆ ƒƒ f ƒ trt|0ƒƒtˆ ƒ ts_u| jo
r.td3ƒ tv wt| x¡ ƒ|||d | jy¡}1|1\}2}3}4}5}6}7}8tˆ ƒ}9|2|3 }:tr|3ƒ|: ts_u|5ts_z|6ts_{t||8ƒts_}|:|9 };|;dk	r˜tr|;ƒ|9 ts_~| j

r.td4ƒ tv wtt|ƒƒ|||d | jy¡}1|1\}2}3}4}5}6}7}8t|ƒ}9|2|3 }:tr|3ƒ|: ts_t|5ts_|6ts_€t||8ƒts_|:|9 };|;dk
r.tr|;ƒ|9 ts_‚| jƒ
rd| j„
rdtd5| j„| jƒf ƒ | j…| j„| jƒd6 | j†dk
rÂt!| j†d7ƒ}<x<tt|/ƒƒD ],}|/| }=|0| }>d8|=|>f }?|< ‡|?¡ 
qŠW |< $¡  | h¡  |S )9Nr   g    €„.Ar   )ZshuffleÚ
runDetails)Úsilentc                s   g | ]}ˆ | ‘qS r   r   )Ú.0Úx)ÚnamedExamplesr   r   ú
<listcomp>–  s    zRunOnData.<locals>.<listcomp>c                s   g | ]}ˆ | ‘qS r   r   )rE   rF   )rG   r   r   rH   —  s    g        r'   r   )ZindicesOnlyc                s   g | ]}ˆ | ‘qS r   r   )rE   rF   )ÚtrainExamplesr   r   rH   ¶  s    c                s   g | ]}ˆ | ‘qS r   r   )rE   rF   )rI   r   r   rH   ·  s    )ÚboundszResult Counts in training set:zResult Counts in test set:zTraining with %d examplesÚ zwb+r   r   )ÚCrossValidateÚ	PruneTree)ÚBuildQuantTree)ÚID3)ÚnPossibleValsÚbuildDriverÚprunerÚnTriesr3   r0   ÚneedsQuantizationÚtreeBuilderÚnQuantBoundsr4   ÚmaxDepthÚprogressCallbackÚholdOutFracr=   r5   r6   rD   )rL   )ÚBuildSigTreeÚsigTreeBiasListÚuseCMIMÚallowCollectionsF)rP   rQ   rS   rT   rU   rW   rX   rY   r=   r5   r6   ÚbiasListr\   ZallowCollectionrD   )ÚDistFunctionsÚ	Euclideanr   zBad KNN distance metric value)rP   rQ   rS   rT   ZnumNeighrY   ZdistFuncÚuseSigBayes)	rP   rQ   rS   rT   rV   rY   r=   r<   rD   T)rP   rQ   rS   rT   rV   r<   ZuseSigsr\   rY   r=   rD   )rS   rQ   rT   z># Overall Average Error: %%% 5.2f, Average Deviation: %%% 6.2fg      Y@)Úverboser   zTesting all hold-out examplesz)%d examples (%% %5.2f) were misclassifiedzTesting all examplesz
Entire data set:z
Hold-out data:zUpdating results table %s:%s)ZdbÚtablezw+z%s	%s
)ˆÚ
lockRandomÚ
randomSeedÚrandomZrandintr   ZInitRandomNumbersr$   ZRandomizeActivitiesr%   ZGetNamedDatar"   r	   ZSplitIndicesÚlenr#   r   ÚlistÚranger   r+   r'   r   Z
FilterDatar   ZCountResultsÚkeysÚsortr   ÚstrZGetNVarsZGetNPossibleValsÚremoveÚpickleDataFileNameÚopenÚpickleÚdumpÚcloseZ
bayesModelr   r   Z_randomSeedZ
_splitFracZ_shuffleActivitiesZ_randomizeActivitiesZ_filterFracZ
_filterValZSetModelFilterDatar    r!   ZSetActivityQuantBoundsZSetInputOrderZGetVarNamesZSetDescriptorNamesZ
_descNamesr   ZinternalHoldoutFracr.   Zrdkit.ML.DecTreerL   rM   r1   rN   ZQuantTreeBootrO   ZID3BootZCrossValidationDriverZSetQuantBoundsZGrowr3   r0   r4   r/   r=   r5   r6   r7   rZ   ZSigTreeBuilderr[   r\   r]   r8   Zrdkit.ML.KNNr_   r:   ZEuclideanDistZTanimotoDistÚAssertionErrorr9   r;   ra   Zrdkit.ML.NaiveBayesr<   Zrdkit.ML.NeuralZAverageErrorsZ
SortModelsZ
GetAllDataÚnumpyZarrayZ	_varNamesZ	NameModelÚsumZsqrtZTrainZClearModelExamplesZPickleÚoutNamer   ZbinaryHolderÚdumpsZmodelr*   r   r   ÚfloatÚ_runDetailsZholdout_errorZoverall_errorr   ZShowVoteResultsZGetNPtsr&   Zoverall_correct_confZoverall_incorrect_confÚreprZoverall_result_matrixZoverall_fraction_droppedZholdout_correct_confZholdout_incorrect_confZholdout_result_matrixZholdout_fraction_droppedr,   r)   ZStoreÚbadNamer   )@r@   ÚdatarX   ÚsaveItÚsetDescNamesZseedrf   ZtestExamplesZtrainIdxZtestIdxZ	tExamplesrJ   ZptZactZplacedZboundZtempZtmpZcountsZksÚkZ	nExamplesZnVarsZattrsrP   ÚiZpickleDataFiler   rL   rM   rN   ZbuilderrO   ZdriverrR   rZ   r^   r\   r]   r_   ZdfuncZ	modelListZavgErrsZweightedErrsZ
averageErrZdevsZavgDevr   r   ZresTupZnGoodZnBadZnSkipZavgGoodZavgBadZavgSkipZvoteTabZnPtsZnClassZnRejZbadFileÚexZvoteZoutStrr   )rG   rI   r   Ú	RunOnData„  sî   



















r‚   c             C   sÆ   t  ¡ | _| j ¡ }| jdkr(|d | _| jsP| jg krDt 	|¡}q°t 
|¡}n`| js\| jrt|| _| jdtjd}n<| jg ks„| js”|| _|  ¡ }ntj| j|| j| j| jd}t| ||||d}|S )aÏ   does the actual work of building a composite model

    **Arguments**

      - details:  a _CompositeRun.CompositeRun_ object containing details
        (options, parameters, etc.) about the run

      - progressCallback: (optional) a function which is called with a single
        argument (the number of models built so far) after each model is built.

      - saveIt: (optional) if this is nonzero, the resulting model will be pickled
        and dumped to the filename specified in _details.outName_

      - setDescNames: (optional) if nonzero, the composite's _SetInputOrder()_ method
        will be called using the results of the data set's _GetVarNames()_ method;
        it is assumed that the details object has a _descNames attribute which
        is passed to the composites _SetDescriptorNames()_ method.  Otherwise
        (the default), _SetDescriptorNames()_ gets the results of _GetVarNames()_.

    **Returns**

      the composite model constructed


  rK   z.pklr   )Z	pickleColZpickleClass)Z	quantNameÚuserZpassword)rX   r}   r~   )ÚtimeZasctimeZrundater>   Ústriprv   r)   r1   r   ZTextFileToDataZBuildQuantDataSetr7   ra   Z
GetDataSetr   ZExplicitBitVectr.   ZDBToQuantDataZ
qTableNameZdbUserZ
dbPasswordr‚   )r@   rX   r}   r~   ZfNamer|   r   r   r   r   ÚRunIt¿  s,    







r†   c             C   s,   t dt ƒ | r(t dƒ t d tj¡ƒ dS )z prints the version number

  z$This is BuildComposite.py version %szcommand line was:r   N)ÚprintÚ__VERSION_STRINGr?   r   Úargv)ÚincludeArgsr   r   r   ÚShowVersionõ  s    r‹   c               C   s   t tƒ t d¡ dS )zM provides a list of arguments for when this is used from the command line

  r   N)r‡   Ú__doc__r   Úexitr   r   r   r   ÚUsageÿ  s    rŽ   c             C   s   | dkrt } t | ¡S )a    initializes a details object with default values

      **Arguments**

        - details:  (optional) a _CompositeRun.CompositeRun_ object.
          If this is not provided, the global _runDetails will be used.

      **Returns**

        the initialized _CompositeRun_ object.


  N)ry   r   ÚSetDefaults)rC   r   r   r   r     s    r   c             C   sj  ddl }|  tjdd… dddddd	d
dddddddddddddg¡\}}d| _x|D ]þ\}}|dkrvt|ƒ| _qX|dkr†|| _qX|dkr–|| _qX|dkrÔt|ƒ}t	|ƒt	g ƒt	dƒgksÆt
dƒ‚|| _|| _qX|dkrä|| _qX|dk rö|| _qX|d krd| _qX|d!krd| _qX|d"kr,|| _qX|d#kr>d| _qX|d$krPd| _qX|d%krft|ƒ| _qX|d&kr|t|ƒ| _qX|d'kr’t|ƒ| _qX|d(kr¤d| _qX|d)kr¶d| _qX|d*krÌt|ƒ| _qX|d+krÞ|| _qX|d,krðd| _qX|d-krt|ƒ| _qX|d.krd| _qX|d/kr.t|ƒ| _qX|d0krpt|ƒ}t	|ƒt	g ƒt	dƒgksbt
d1ƒ‚|| _ || _!qX|d2krŒt"ƒ  t #d¡ qX|d3kr¢t|ƒ| _$qX|d4kr¸t|ƒ| _%qX|d5krÎt|ƒ| _&qX|d6kràd| _'qX|d7kròd| _qX|d8krd| _(qX|d9krt|ƒ| _)qX|d:kr8d| _*d| _d| _+qX|d;krNt|ƒ| _,qX|d<kr`d=| _-qX|d>krrd?| _-qX|d@kr–d| _*d| _d| _+d| _.qX|dAkr¨dB| _/qX|dCkrÌd| _+d| _*d| _d| _0qX|dDkrðd| _0d| _+d| _*d| _qX|dEkrt|ƒ| _1qX|dFkrt|ƒ| _2qX|dGkr.d| _3qX|dHkr@d| _4qX|dIkrRt5ƒ  qXt5ƒ  qXW |d | _6dS )JzŽ parses command line arguments and updates _runDetails_

      **Arguments**

        - runDetails:  a _CompositeRun.CompositeRun_ object.

  r   Nr   z(P:o:n:p:b:sf:F:v:hlgd:rSTt:BQ:q:DVG:N:L:znRuns=ZpruneZprofilezseed=r   zmodelFiltFrac=zmodelFiltVal=ZrecyclezrandomDescriptors=ZdoKnnzknnK=ZknnTanimotoZ	knnEuclidZ	doSigTreer]   ZdoNaiveBayeszmEstimateVal=Z
doSigBayesr=   z-nz-Nz-oz-Qr   z4bad argument type for -Q, specify a list as a stringz-pz-Pz-rz-Sz-bz-Bz-sz-fz-Fz-vz-lz-gz-Gz-dz-Tz-tz-Dz-Lz-qz4bad argument type for -q, specify a list as a stringz-Vz--nRunsz--modelFiltFracz--modelFiltValz--prunez	--profilez	--recyclez--randomDescriptorsz--doKnnz--knnKz--knnTanimotor   z--knnEuclidr`   z--doSigTreez--allowCollectionsTz--doNaiveBayesz--doSigBayesz--mEstimateValz--seedz
--noScreenz--replacementSelectionz-h)7Úgetoptr   r‰   Ú	profileItÚintr   r-   rv   ÚevalÚtypers   r'   r(   r,   rn   r%   r$   r{   ZbayesModelsr"   rx   r#   r   r   rd   r0   r4   r)   r.   r&   r*   r/   r2   r1   r‹   r   ÚnRunsr    r!   r3   r5   r6   r8   r;   r9   r:   r7   r]   ra   r<   re   r   r=   rŽ   r>   )rC   r   rA   ZextraÚargÚvalr1   r   r   r   Ú	ParseArgs  s   


































*




r˜   Ú__main__é   r   )rŠ   zT---------------------------------
	Doing %d of %d
---------------------------------
zprof.datr„   Zcallsé   z%Profiling requires the hotshot module)Nr   r   )Nr   r   )r   )N)7rŒ   r   r„   rt   Zrdkitr   Zrdkit.Dbaser   Zrdkit.MLr   r   Zrdkit.ML.Compositer   r   Zrdkit.ML.Datar   r	   Zrdkit.utilsr
   rp   ry   rˆ   r   r   r   rB   r‚   r†   r‹   rŽ   r   r˜   Ú__name__rg   r‰   r?   Úcmdr•   ri   r€   Ústderrr   r‘   ZhotshotZhotshot.statsZProfileZprofZruncallZstatsÚloadZ
strip_dirsZ
sort_statsZprint_statsÚImportErrorr‡   r   r   r   r   Ú<module>È   sh   $r
  =
6


 M


