B
    ‰°b¶  ã               @   s   d Z G dd„ dƒZdS )zDApproximate calculation of appropriate thresholds for motif finding.c               @   sV   e Zd ZdZddd„Zddd„Zd	d
„ Zdd„ Zdd„ Zdd„ Z	ddd„Z
dd„ ZdS )ÚScoreDistributiona  Class representing approximate score distribution for a given motif.

    Utilizes a dynamic programming approach to calculate the distribution of
    scores with a predefined precision. Provides a number of methods for calculating
    thresholds for motif occurrences.
    Néè  c             C   sú  |dkrFt d| ¡ ƒ| _td| ¡ ƒ| j | _||j | _| ¡ | _n:t d|j ƒ| _td|jƒ| j | _||j | _| |¡| _| j| jd  | _	dg| j | _
d| j
|  | j¡ < dg| j | _d| j|  | j¡ < |dkrx.t| ¡ | ¡ ƒD ]\}}|  |||j¡ qôW näxât|jƒD ]Ô}dg| j }dg| j }	|dd…|f }x˜| ¡ D ]Œ\}
}||
 }td||
|f ƒ| }|  |¡}xXt| jƒD ]J}||  ||¡  | j
| | 7  < |	|  ||¡  | j| | 7  < qW qTW || _
|	| _qW dS )zInitialize the class.Ng        é   g      ð?é   )ÚminÚ	min_scoreÚmaxZ	max_scoreZintervalÚlengthÚn_pointsÚicZmeanÚstepÚ
mo_densityÚ_index_diffÚ
bg_densityÚzipZlog_oddsZpwmÚmodifyÚ
backgroundÚrangeÚitemsÚpowÚ_add)ÚselfZmotifZ	precisionZpssmr   ÚloZmoZpositionÚmo_newÚbg_newZletterZscoreZbgÚdÚi© r   ú4lib/python3.7/site-packages/Bio/motifs/thresholds.pyÚ__init__   s<    

".zScoreDistribution.__init__ç        c             C   s   t || d| j  | j ƒS )Ng      à?)Úintr   )r   ÚxÚyr   r   r   r   4   s    zScoreDistribution._index_diffc             C   s   t dt| jd || ƒƒS )Né    r   )r   r   r	   )r   r   Újr   r   r   r   7   s    zScoreDistribution._addc       
      C   s¨   dg| j  }dg| j  }x~| ¡ D ]r\}}|  |¡}x^t| j ƒD ]P}	||  |	|¡  | j|	 ||  7  < ||  |	|¡  | j|	 ||  7  < q@W q"W || _|| _dS )z%Modify motifs and background density.g        N)r	   r   r   r   r   r   r   )
r   ZscoresZmo_probsZbg_probsr   r   ÚkÚvr   r   r   r   r   r   :   s    
&.zScoreDistribution.modifyc             C   s>   | j }d}x"||k r,|d8 }|| j| 7 }qW | j|| j  S )zVApproximate the log-odds threshold which makes the type I error (false positive rate).g        r   )r	   r   r   r   )r   Úfprr   Úprobr   r   r   Úthreshold_fprF   s    
zScoreDistribution.threshold_fprc             C   s<   d}d}x"||k r*|d7 }|| j | 7 }q
W | j|| j  S )zWApproximate the log-odds threshold which makes the type II error (false negative rate).éÿÿÿÿg        r   )r   r   r   )r   Úfnrr   r(   r   r   r   Úthreshold_fnrO   s    
zScoreDistribution.threshold_fnrç      ð?Fc             C   sp   | j }d}d}x4|| |k rB|d8 }|| j| 7 }|| j| 8 }qW |r\| j|| j  |fS | j|| j  S dS )zMApproximate log-odds threshold making FNR equal to FPR times rate_proportion.g        g      ð?r   N)r	   r   r   r   r   )r   Zrate_proportionZreturn_rater   r'   r+   r   r   r   Úthreshold_balancedX   s    z$ScoreDistribution.threshold_balancedc             C   s   | j d| j  dS )a"  Threshold selection mimicking the behaviour of patser (Hertz, Stormo 1999) software.

        It selects such a threshold that the log(fpr)=-ic(M)
        note: the actual patser software uses natural logarithms instead of log_2, so the numbers
        are not directly comparable.
        r   )r'   )r)   r
   )r   r   r   r   Úthreshold_patserf   s    z"ScoreDistribution.threshold_patser)Nr   NN)r   )r-   F)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r)   r,   r.   r/   r   r   r   r   r   	   s   
#
		
r   N)r3   r   r   r   r   r   Ú<module>   s   