B
    3RcvF                 @   s   d Z ddlZddlZddlZddlZddlmZ G dd dZG dd dZG dd	 d	Z	G d
d dZ
G dd dZdd Zdd Zdd Zedkre  dS )a  
Glare Algorithm

Some nomenclature:

 GLARE: A New Approach for Filtering Large Reagent Lists in 
           Combinatorial Library Design Using Product Properties
    Jean-Francois Truchon* and Christopher I. Bayly

    http://pubs.acs.org/doi/pdf/10.1021/ci0504871

 A Libary is made of RGroups
 RGroups are a collection of sidechains (the paper uses Fragments)
  that can populate the rgroup position.

 We desire to optimize the Library so that we have a good chance
  of making the products in the desired property range.

 Example From the testing code, using Fake data:

    r1 = RGroups(makeFakeSidechains("aldehydes", num=1000))
    r2 = RGroups(makeFakeSidechains("boronic_acids", num=1500))
    
    lib = Library([r1,r2])
    props = [
        Property("mw",    propIdx=0, minValue=0, maxValue=500),
        Property("alogp", propIdx=1, minValue=-2.4, maxValue=5),
        Property("tpsa",  propIdx=2, minValue=0, maxValue=90)
    ]
    
    glare = Glare()
    # optimize the library...
    glare.optimize(lib, props)
    for reactant_idx, rgroup in enumerate(lib.rgroups):
        print(f"Reactants for reactant {reactant_idx}")
        for reactant in rgroup.sidechhains:
            print(reactant.name)
    N)reducec               @   s   e Zd ZdddZdd ZdS )Property        c             C   s"   || _ || _|| _|| _|| _dS )a  name, propIdx, minValue, maxValue, scaffoldoffset -> initial a Property
        name is the name of the property.
         propIdx:  the index of the property in the property vector
         minValue: the minimum acceptable value for the property
         maxValue: the maximum acceptable value for the property
         scaffoldoffset: any offset from the reaction scaffold (defaults to 0)
        N)namepropIdxminValuemaxValueoffset)selfr   r   r   r   Zscaffoldoffset r   "share/RDKit/Contrib/Glare/glare.py__init__,   s
    zProperty.__init__c             C   sD   | j }| j}x|D ]}||j| 7 }qW | j|  ko>| jkS   S )a  sidechains -> Evaluate a list of sidechains to see if they
        pass the property values.

        Each sidechain must have a property vector e.g. (s.props for s in sidechains)
        which is a vector of values where s.props[propIdx] is the property
        being inspected
        )r	   r   propsr   r   )r
   
sidechainsproductr   sr   r   r   evaluate;   s
    
zProperty.evaluateN)r   )__name__
__module____qualname__r   r   r   r   r   r   r   +   s   
r   c               @   s2   e Zd ZdZdddZdd Zdd Zd	d
 ZdS )	SidechainzHolds the name (identifier) and property list for the
    given sidechain/fragment.  Properties are assumed to
    be numerical valuesr   c             K   s"   || _ || _|| _d| _|| _dS )aw  name, props, goodCount=0 -> initialize a Sidechain
        initialize a sidechain.
        name: the unique name for the sidechain
        props: the property vector (see Properties class for details)
        goodCount: the number of times this reagent belongs to
            a good product, where good is a product that is in the desired
            property space.
        FN)r   r   
good_countZdropped
extra_data)r
   r   r   Z	goodCountr   r   r   r   r   M   s
    	zSidechain.__init__c             C   s   | j S )N)r   )r
   r   r   r   data\   s    zSidechain.datac             C   s   d| j | j| j| jf S )Nz$Sidechain %s(%s, goodCount=%s, **%r))r   r   r   r   )r
   r   r   r   __str___   s    zSidechain.__str__c             C   s   d| j | j| j| jf S )NzSidechain(%r, %r, %s, **%r))r   r   r   r   )r
   r   r   r   __repr__b   s    zSidechain.__repr__N)r   )r   r   r   __doc__r   r   r   r   r   r   r   r   r   I   s
   
r   c               @   sH   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dS )RGroupsz5Holds a collection of sidechains for the given RGroupc             C   s   || _ g | _t|| _dS )zSidechains -> RGroups
         sidechains: the list of Sidechains that make up the potential
                     sidechains at this rgroup positionN)r   rejectedleninitial_size)r
   r   r   r   r   r   g   s    zRGroups.__init__c             C   s
   t | jS )z)Returns the number of possible sidechains)r   r   )r
   r   r   r   countp   s    zRGroups.countc             C   s&   t | j x| jD ]
}d|_qW dS )z>Randomly shuffles the sidechains and reset the goodness countsr   N)randomZshuffler   r   )r
   r   r   r   r   	randomizet   s    zRGroups.randomizec             C   s   t | jt| j S )z-> return the current effectiveness of this collection
        effectiveness is the number of items left divided by the 
        initial amount)r   r   floatr    )r
   r   r   r   effectivenessz   s    zRGroups.effectivenessc             C   s   t ttt| j| S )zvnum_chunks -> return the number of sidechains in each chunk
        if the sidechains are split into num_chunks chunks)intmathZceilr$   r   r   )r
   
num_chunksr   r   r   
chunk_size   s    zRGroups.chunk_sizec             C   sH   |dkr||k s t d||f | |}t| j|| |d |  S )zcchunk_idx, num)chunks -> RGroups
        return the chunk_idxth chunk given num_chunks total chunksr   z%s %s   )AssertionErrorr)   r   r   )r
   	chunk_idxr(   nr   r   r   chunk   s    
zRGroups.chunkc             C   sx   d|  k rdks"n t d| | jjdd dd tt| j| d }|  j| j|d	 7  _| jd	| | _d	S )
zfractionToKeep -> Sort the sidechains from the most often 
        found if good products to the least, and keep the best 
        fractionToKeep percentager   g      ?zfractionToKeep: %sc             S   s   | j S )N)r   )xr   r   r   <lambda>       zRGroups.prune.<locals>.<lambda>T)keyreverseg      ?N)r+   r   sortr&   r   r   )r
   fractionToKeepZfragment_indexr   r   r   prune   s
    "zRGroups.pruneN)r   r   r   r   r   r!   r#   r%   r)   r.   r6   r   r   r   r   r   e   s   		r   c               @   sH   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dS )LibraryzQA library is a collection of RGroups that need to be combinitorially
    combinedc             C   s
   || _ dS )zrgroups -> Initialize the Library.
        rgroups: the list of possible RGroups that is combinitorially
                 combined to make the libraryN)rgroups)r
   r8   r   r   r   r      s    zLibrary.__init__c             C   s&   x | j D ]}t|jdkrdS qW dS )z4If we have an empty set for any rgroup, return Falser   FT)r8   r   r   )r
   rgr   r   r   isValid   s    zLibrary.isValidc             C   s   x| j D ]}|  qW dS )z%randomize the order of the sidechainsN)r8   r#   )r
   r9   r   r   r   r#      s    zLibrary.randomizec                s    fddt | jD }|jdd d d}g }x2|dd D ]"\}}|||||  f |}q>W |d \}}|||f |  d	d |D }|S )
ztotal_num_partitions -> [num_fragments/partition for rgroup1, 
                                    num_fragments/partition for rgroup2]
        return the number of sidechains in a partition
        for each rgroupc                s&   g | ]\}}|t |   d fqS )r*   )maxr!   ).0libIdxr9   )total_num_partitions_per_rgroupr   r   
<listcomp>   s   z5Library.getSidechainsPerPartition.<locals>.<listcomp>c             S   s   | d S )Nr*   r   )Zszr   r   r   r0      r1   z3Library.getSidechainsPerPartition.<locals>.<lambda>)r2   r*   Nc             S   s   g | ]\}}|qS r   r   )r<   r=   sizer   r   r   r?      s    )	enumerater8   r4   append)r
   r>   ZsizesZ	last_sizeZ	opt_sizesr=   Zcurrent_sizeresr   )r>   r   getSidechainsPerPartition   s    
	z!Library.getSidechainsPerPartitionc             C   s   |  |}t|}g }x:t|D ].}g }x|D ]}|||  q.W || q W g }xrt|D ]f\}	}g }
g }x:t|D ].\}}| j| }|| }||j||d qzW t|}| r`|| q`W |S )znum_partitions -> [Library(..), Library(...)]

        Return new libraries that are chunks of this one.
        These are the libraries that get sampled to see of
        sidechains participate in good products.
        )r,   r(   )	rE   r;   rangerC   rB   r8   r.   r7   r:   )r
   Znum_partitionsZ
partitionsZmax_subsetsZenumeration_indicesicombinationsrA   Zlibrary_setsZsubset_indexZlibsZpartitioned_rgroupsZ	lib_indexZlibpart_indexlibr(   r   r   r   r.      s*    



zLibrary.chunkc             C   s.   d}x| j D ]}|| 7 }qW |t| j  S )z8-> returns the average effectiveness of this library setg        )r8   r%   r   )r
   sumr9   r   r   r   r%      s    zLibrary.effectivenessc       	      C   s   dd | j D }d}d}x`ttj| D ]N\}}xD|D ]}||s6|d7 }P q6W |d7 }x|D ]}| jd7  _q`W q(W ||d fS )zprops -> num_good_enumerations, total_enumerations

        props: a list of Property evaluators for the fragments.

        returns the number of good enumerations and the total number of
        enumerations for this Libraryc             S   s   g | ]
}|j qS r   )r   )r<   r9   r   r   r   r?      s    z$Library.evaluate.<locals>.<listcomp>r   r*   )r8   rB   	itertoolsr   r   r   )	r
   r   ZfragsgoodZbadrG   ZfragpZ	sidechainr   r   r   r      s    


zLibrary.evaluateN)r   r   r   r   r   r:   r#   rE   r.   r%   r   r   r   r   r   r7      s   "r7   c               @   s"   e Zd ZdZdddZd	d
 ZdS )Glarea  Glare Algorithm.  Implementation of

    GLARE: A New Approach for Filtering Large Reagent Lists in 
           Combinatorial Library Design Using Product Properties
    Jean-Francois Truchon* and Christopher I. Bayly

    http://pubs.acs.org/doi/pdf/10.1021/ci0504871

    Usage:
       # somehow make sidechains1/2 with props [mw, alogp, tpsa]
       r1 = RGroups(sidechains1)
       r2 = RGroups(sidechains2)
       lib = Library([r1, r2])
       props = [
         Property("mw", 0, 0, 500),
         Property("alogp", 1, -2.4, 5),
         Property("tpsa", 2, 0, 90)
       ] 

      glare = Glare()
      glare.optimize(lib, props)
    ffffff?d         @N   c             C   s<   | | _ | _|| _|| _|d k	r,|d | _n|| _|| _d S )Ng      Y@)ZfractionGooddesiredFinalGoodnessmaxIterationsrgroupScaleinitialFractionnumPartitions)r
   rS   rT   rU   rV   rW   r   r   r   r   &  s    zGlare.__init__c             C   sT  t d t d| jd   t d| j  | jdks<| jdkrFt d nt d| jd   t d	d
dd |jD ttjdd |jD f  d}| j}xt	d| j
d D ]}x|jD ]}|  qW d }}|| j}	x2t|	D ]&\}
}||\}}||7 }||7 }qW ||7 }|| }|dk r2d}n|dkrt|}| jdk	rX| j }}ntd||  d d }}n>t|| dk rd}nd| ||  ||  | }td|}ttdd |jD }xn|jD ]d}d}| jdk	rt|j}d}dt| j || d   }|| }d| | }|d|  qW t d|  t d|d   t d|  t d|  t d|d   |st d| j  t d| j  t d | jdkrt d  nt d!| j  t d	d
d"d |jD ttjd#d |jD f  t d$| d   |r*|dk r*dS t|| j d%k sH|| jkrdS qW dS )&a  library, props
        Given a Library and the list of Propery evaluators,
        optimize the library.
        The library is modified in place by removing building blocks
        (sidechains) that are not likely to pass the property
        criteria.
        z"------- PARAMETERS: --------------zGOOODNESS THRESHOLD : %s%%rP   zMIN PARTITION SIZE : %sNg+?z$INITIAL FRACTION TO KEEP : AUTOMATICzINITIAL FRACTION TO KEEP : %s%%zActual SIZE : %s = %sz x c             S   s   g | ]}t t|jqS r   )strr   r   )r<   r9   r   r   r   r?   G  s    z"Glare.optimize.<locals>.<listcomp>c             S   s   g | ]}t |jqS r   )r   r   )r<   r9   r   r   r   r?   H  s    g        r*   g-q=gg333333?g?g-C6?g      ?c             S   s   g | ]}t |jqS r   )r   r   )r<   r9   r   r   r   r?     s    g      ?z4-------------- ITERATION : %s ----------------------zGOODNESS      : %s%%zNUMBER EVAL   : %szCUMUL EVAL    : %szKEPT IN STEP  : %s%%g      Y@zGOODNESS THRESHOLD : %szINITIAL FRACTION TO KEEP : Z	AUTOMATICz%s%%c             S   s   g | ]}t t|jqS r   )rX   r   r   )r<   r9   r   r   r   r?     s    c             S   s   g | ]}t |jqS r   )r   r   )r<   r9   r   r   r   r?     s    zEFFECTIVENESS : %s%%gMbP?)printrS   rW   rV   joinr8   r   operatormulrF   rT   r#   r.   rB   r   minabsr$   r;   rU   r   r   r'   Zexpr6   r5   r%   )r
   Zlibraryr   Zrunning_totalZGtZ	iterationr9   rL   ZtotalZchunked_libsZlibidxr.   gtZGiZfractionZG0ZK0ZKiZmax_sizeZscaleZnumSidechainsZnumerZdenomZfraction_to_rejectr   r   r   optimize6  s    	






zGlare.optimize)rO   rP   rQ   NrR   )r   r   r   r   r   ra   r   r   r   r   rN     s       
rN   c              C   s.   t dd} t dd}t dd}| ||gS )N
   i  ir      )r"   Zrandint)mwalogptpsar   r   r   makeFakeProps  s    rg   c             C   s8   g }x.t |D ]"}|t| d t| t  qW |S )N_)rF   rC   r   rX   rg   )rI   ZnumrD   rG   r   r   r   makeFakeSidechains  s    "ri   c              C   s   t tdd} t tdd}t| |g}tdddddtd	d
dddtdddddg}t }||| x>t|jD ]0\}}td|  x|j	D ]}t|j
 qW qtW d S )NZ	aldehydesi  Zboronic_acidsi  rd   r   i  giql@re   r*   g333333   gj@rf      Z   g     8@zReactants for reactant )r   ri   r7   r   rN   ra   rB   r8   rY   r   r   )abrI   r   ZglareZreactant_idxZrgroupZreactantr   r   r   	testGlare  s    ro   __main__)r   r"   r[   rK   r'   	functoolsr   r   r   r   r7   rN   rg   ri   ro   r   r   r   r   r   <module>&   s    7s 