B
    bJ                 @   sV   d Z dddddZdddd	dd
ZdddZddddddddZdZG dd dZdS )aQ  Calculate isoelectric points of polypeptides using methods of Bjellqvist.

pK values and the methos are taken from::

    * Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
    Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F.
    The focusing positions of polypeptides in immobilized pH gradients can be
    predicted from their amino acid sequences. Electrophoresis 1993, 14,
    1023-1031.

    * Bjellqvist, B., Basse, B., Olsen, E. and Celis, J.E.
    Reference points for comparisons of two-dimensional maps of proteins from
    different human cell types defined in a pH scale where isoelectric points
    correlate with polypeptide compositions. Electrophoresis 1994, 15, 529-539.

I designed the algorithm according to a note by David L. Tabb, available at:
http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
g      @g      $@g      (@gQ@)NtermKRHgffffff@g333333@g@g      "@)CtermDECYg333333@g      @)r   r   g\(\@g      @gQ@gQ @gHzG@g(\@g@)AMSPTVr   )r   r   r   r   r   r   r	   c               @   s<   e Zd ZdZdddZdd Zdd Zd	d
 ZdddZdS )IsoelectricPointa  A class for calculating the IEP or charge at given pH of a protein.

    Parameters
    ----------
    :protein_sequence: A ``Bio.Seq`` or string object containing a protein
                       sequence.
    :aa_content: A dictionary with amino acid letters as keys and its
                 occurrences as integers, e.g. ``{"A": 3, "C": 0, ...}``.
                 Default: ``None``. If ``None``, the dic will be calculated
                 from the given sequence.

    Methods
    -------
    :charge_at_pH(pH):  Calculates the charge of the protein for a given pH
    :pi():              Calculates the isoelectric point


    Examples
    --------
    The methods of this class can either be accessed from the class itself
    or from a ``ProtParam.ProteinAnalysis`` object (with partially different
    names):

    >>> from Bio.SeqUtils.IsoelectricPoint import IsoelectricPoint as IP
    >>> protein = IP("INGAR")
    >>> print(f"IEP of peptide {protein.sequence} is {protein.pi():.2f}")
    IEP of peptide INGAR is 9.75
    >>> print(f"Its charge at pH 7 is {protein.charge_at_pH(7.0):.2f}")
    Its charge at pH 7 is 0.76


    >>> from Bio.SeqUtils.ProtParam import ProteinAnalysis as PA
    >>> protein = PA("PETER")
    >>> print(f"IEP of {protein.sequence}: {protein.isoelectric_point():.2f}")
    IEP of PETER: 4.53
    >>> print(f"Charge at pH 4.53: {protein.charge_at_pH(4.53):.2f}")
    Charge at pH 4.53: 0.00

    Nc             C   sL   t | | _|s,ddlm} || j }| || _|  \| _	| _
dS )zInitialize the class.    )ProteinAnalysisN)struppersequenceZBio.SeqUtils.ProtParamr   Zcount_amino_acids_select_chargedcharged_aas_content_update_pKs_tablespos_pKsneg_pKs)selfZprotein_sequence
aa_contentZ_PA r   <lib/python3.7/site-packages/Bio/SeqUtils/IsoelectricPoint.py__init__R   s    zIsoelectricPoint.__init__c             C   s6   i }xt D ]}t|| ||< q
W d|d< d|d< |S )Ng      ?r   r   )charged_aasfloat)r   r   Zchargedaar   r   r   r   _   s    
z IsoelectricPoint._select_chargedc             C   sV   t  }t }| jd | jd  }}|tkr:t| |d< |tkrNt| |d< ||fS )z@Update pKs tables with seq specific values for N- and C-termini.r   r   r   )positive_pKscopynegative_pKsr   pKnterminalpKcterminal)r   r   r   ZntermZctermr   r   r   r   g   s    z#IsoelectricPoint._update_pKs_tablesc             C   s   d}x<| j  D ].\}}dd||  d  }|| j| | 7 }qW d}x<| j D ].\}}dd||  d  }|| j| | 7 }qRW || S )z.Calculate the charge of a protein at given pH.g        g      ?
   )r   itemsr   r   )r   pHZpositive_charger"   ZpKZpartial_chargeZnegative_charger   r   r   charge_at_pHr   s    	zIsoelectricPoint.charge_at_pH皙@333333@   c             C   sF   |  |}|| dkrB|dkr$|}n|}|| d }| |||S |S )a  Calculate and return the isoelectric point as float.

        This is a recursive function that uses bisection method.
        Wiki on bisection: https://en.wikipedia.org/wiki/Bisection_method

        Arguments:
         - pH: the pH at which the current charge of the protein is computed.
           This pH lies at the centre of the interval (mean of `min_` and `max_`).
         - min\_: the minimum of the interval. Initial value defaults to 4.05,
           which is below the theoretical minimum, when the protein is composed
           exclusively of aspartate.
         - max\_: the maximum of the the interval. Initial value defaults to 12,
           which is above the theoretical maximum, when the protein is composed
           exclusively of arginine.
        g-C6?g           )r,   pi)r   r+   Zmin_Zmax_ZchargeZnext_pHr   r   r   r1      s    
zIsoelectricPoint.pi)N)r-   r.   r/   )	__name__
__module____qualname____doc__r   r   r   r,   r1   r   r   r   r   r   )   s   '
r   N)r5   r$   r&   r(   r'   r    r   r   r   r   r   <module>   s   
