B
    b6                 @   sD  d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
 ddd	d
ddddddddddddddddgZdZeZi Zi Zi Zi ZxBeddD ]4Zee Zee Zeee< eee< eee< eee< qW dd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd4d*d+ZG d,d- d-eZG d.d/ d/ZG d0d1 d1eZ G d2d3 d3eZ!dS )5a  Polypeptide-related classes (construction and representation).

Simple example with multiple chains,

    >>> from Bio.PDB.PDBParser import PDBParser
    >>> from Bio.PDB.Polypeptide import PPBuilder
    >>> structure = PDBParser().get_structure('2BEG', 'PDB/2BEG.pdb')
    >>> ppb=PPBuilder()
    >>> for pp in ppb.build_peptides(structure):
    ...     print(pp.get_sequence())
    LVFFAEDVGSNKGAIIGLMVGGVVIA
    LVFFAEDVGSNKGAIIGLMVGGVVIA
    LVFFAEDVGSNKGAIIGLMVGGVVIA
    LVFFAEDVGSNKGAIIGLMVGGVVIA
    LVFFAEDVGSNKGAIIGLMVGGVVIA

Example with non-standard amino acids using HETATM lines in the PDB file,
in this case selenomethionine (MSE):

    >>> from Bio.PDB.PDBParser import PDBParser
    >>> from Bio.PDB.Polypeptide import PPBuilder
    >>> structure = PDBParser().get_structure('1A8O', 'PDB/1A8O.pdb')
    >>> ppb=PPBuilder()
    >>> for pp in ppb.build_peptides(structure):
    ...     print(pp.get_sequence())
    DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW
    TETLLVQNANPDCKTILKALGPGATLEE
    TACQG

If you want to, you can include non-standard amino acids in the peptides:

    >>> for pp in ppb.build_peptides(structure, aa_only=False):
    ...     print(pp.get_sequence())
    ...     print("%s %s" % (pp.get_sequence()[0], pp[0].get_resname()))
    ...     print("%s %s" % (pp.get_sequence()[-7], pp[-7].get_resname()))
    ...     print("%s %s" % (pp.get_sequence()[-6], pp[-6].get_resname()))
    MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG
    M MSE
    M MSE
    M MSE

In this case the selenomethionines (the first and also seventh and sixth from
last residues) have been shown as M (methionine) by the get_sequence method.
    N)SCOPData)Seq)PDBException)calc_dihedral
calc_angleZALAZCYSZASPZGLUZPHEZGLYZHISZILEZLYSZLEUZMETZASNZPROZGLNZARGZSERZTHRZVALZTRPZTYRZACDEFGHIKLMNPQRSTVWY   c             C   s   t |  S )zyIndex to corresponding one letter amino acid name.

    >>> index_to_one(0)
    'A'
    >>> index_to_one(19)
    'Y'
    )dindex_to_1)index r
   2lib/python3.7/site-packages/Bio/PDB/Polypeptide.pyindex_to_oneg   s    r   c             C   s   t |  S )z`One letter code to index.

    >>> one_to_index('A')
    0
    >>> one_to_index('Y')
    19
    )d1_to_index)sr
   r
   r   one_to_indexr   s    r   c             C   s   t |  S )zIndex to corresponding three letter amino acid name.

    >>> index_to_three(0)
    'ALA'
    >>> index_to_three(19)
    'TYR'
    )dindex_to_3)ir
   r
   r   index_to_three}   s    r   c             C   s   t |  S )zjThree letter code to index.

    >>> three_to_index('ALA')
    0
    >>> three_to_index('TYR')
    19
    )d3_to_index)r   r
   r
   r   three_to_index   s    r   c             C   s   t |  }t| S )a  Three letter code to one letter code.

    >>> three_to_one('ALA')
    'A'
    >>> three_to_one('TYR')
    'Y'

    For non-standard amino acids, you get a KeyError:

    >>> three_to_one('MSE')
    Traceback (most recent call last):
       ...
    KeyError: 'MSE'
    )r   r   )r   r   r
   r
   r   three_to_one   s    r   c             C   s   t |  }t| S )zsOne letter code to three letter code.

    >>> one_to_three('A')
    'ALA'
    >>> one_to_three('Y')
    'TYR'
    )r   r   )r   r   r
   r
   r   one_to_three   s    r   Fc             C   s4   t | ts|  } |  } |r&| tkS | tjkS dS )a  Return True if residue object/string is an amino acid.

    :param residue: a L{Residue} object OR a three letter amino acid code
    :type residue: L{Residue} or string

    :param standard: flag to check for the 20 AA (default false)
    :type standard: boolean

    >>> is_aa('ALA')
    True

    Known three letter codes for modified amino acids are supported,

    >>> is_aa('FME')
    True
    >>> is_aa('FME', standard=True)
    False
    N)
isinstancestrget_resnameupperr   r   protein_letters_3to1)residuestandardr
   r
   r   is_aa   s    
r   c               @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )Polypeptidez5A polypeptide is simply a list of L{Residue} objects.c             C   s(   g }x| D ]}|d }| | q
W |S )zGet list of C-alpha atoms in the polypeptide.

        :return: the list of C-alpha atoms
        :rtype: [L{Atom}, L{Atom}, ...]
        CA)append)selfca_listrescar
   r
   r   get_ca_list   s
    
zPolypeptide.get_ca_listc          	   C   s\  g }t | }xHtd|D ]8}| | }y(|d  }|d  }|d  }W n4 tk
r   |d d|jd< d|jd< wY nX |dkr| |d	  }y|d  }	t|	|||}
W q tk
r   d}
Y qX nd}
||d	 k r.| |d	  }y|d  }t||||}W n tk
r*   d}Y nX nd}||
|f |
|jd< ||jd< qW |S )
z+Return the list of phi/psi dihedral angles.r   Nr    C)NNNZPHIZPSI   )lenrange
get_vector	Exceptionr!   xtrar   )r"   ZpplZlngr   r$   nr%   cZrpZcpZphiZrnnnZpsir
   r
   r   get_phi_psi_list   sB    



zPolypeptide.get_phi_psi_listc             C   s   |   }g }xtdt|d D ]t}|| ||d  ||d  ||d  f}dd |D \}}}}t||||}	||	 ||d   }
|	|
jd< q W |S )z?List of tau torsions angles for all 4 consecutive Calpha atoms.r      r)      c             S   s   g | ]}|  qS r
   )r,   ).0ar
   r
   r   
<listcomp>  s    z,Polypeptide.get_tau_list.<locals>.<listcomp>ZTAU)r&   r+   r*   r   r!   
get_parentr.   )r"   r#   Ztau_listr   	atom_listv1v2v3Zv4Ztaur$   r
   r
   r   get_tau_list  s    (
zPolypeptide.get_tau_listc       
      C   s   g }|   }x|tdt|d D ]f}|| ||d  ||d  f}dd |D \}}}t|||}|| ||d   }	||	jd< q W |S )z8List of theta angles for all 3 consecutive Calpha atoms.r   r4   r)   c             S   s   g | ]}|  qS r
   )r,   )r5   r6   r
   r
   r   r7     s    z.Polypeptide.get_theta_list.<locals>.<listcomp>ZTHETA)r&   r+   r*   r   r!   r8   r.   )
r"   Z
theta_listr#   r   r9   r:   r;   r<   Zthetar$   r
   r
   r   get_theta_list  s    
zPolypeptide.get_theta_listc             C   s   d dd | D }t|S )znReturn the AA sequence as a Seq object.

        :return: polypeptide sequence
        :rtype: L{Seq}
         c             s   s    | ]}t j| d V  qdS )XN)r   r   getr   )r5   r$   r
   r
   r   	<genexpr>.  s    z+Polypeptide.get_sequence.<locals>.<genexpr>)joinr   )r"   r   r
   r
   r   get_sequence'  s    zPolypeptide.get_sequencec             C   s,   | d   d }| d   d }d||f S )zReturn string representation of the polypeptide.

        Return <Polypeptide start=START end=END>, where START
        and END are sequence identifiers of the outer residues.
        r   r)   z<Polypeptide start=%s end=%s>)Zget_id)r"   startendr
   r
   r   __repr__2  s    zPolypeptide.__repr__N)
__name__
__module____qualname____doc__r&   r2   r=   r>   rD   rH   r
   r
   r
   r   r      s   -r   c               @   s*   e Zd ZdZdd Zdd Zd
ddZd	S )
_PPBuilderzBase class to extract polypeptides.

    It checks if two consecutive residues in a chain are connected.
    The connectivity test is implemented by a subclass.

    This assumes you want both standard and non-standard amino acids.
    c             C   s
   || _ dS )z`Initialize the base class.

        :param radius: distance
        :type radius: float
        N)radius)r"   rN   r
   r
   r   __init__F  s    z_PPBuilder.__init__c             C   s<   t ||drdS |s4d|jkr4td|   dS dS dS )z0Check if the residue is an amino acid (PRIVATE).)r   Tr    z5Assuming residue %s is an unknown modified amino acidFN)r   Z
child_dictwarningswarnr   )r"   r   Zstandard_aa_onlyr
   r
   r   _acceptN  s    z_PPBuilder._acceptr)   c          	   C   s  | j }| j}| }|dkr.|d }| }n*|dkr@| }n|dkrP|g}ntdg }x|D ]}	t|	}
y$t|
}x|||st|
}qzW W n tk
r   wbY nX d}xf|
D ]^}|||r
|||r
|||r
|dkrt }|	| |	| |	| nd}|}qW qbW |S )a  Build and return a list of Polypeptide objects.

        :param entity: polypeptides are searched for in this object
        :type entity: L{Structure}, L{Model} or L{Chain}

        :param aa_only: if 1, the residue needs to be a standard AA
        :type aa_only: int
        Sr   Mr(   z+Entity should be Structure, Model or Chain.N)
_is_connectedrR   Z	get_levelZget_listr   iternextStopIterationr   r!   )r"   ZentityZaa_onlyZis_connectedZacceptlevelZmodelZ
chain_listZpp_listchainZchain_itprev_resppnext_resr
   r
   r   build_peptides_  sB    	





z_PPBuilder.build_peptidesN)r)   )rI   rJ   rK   rL   rO   rR   r^   r
   r
   r
   r   rM   =  s   rM   c               @   s"   e Zd ZdZdddZdd ZdS )	CaPPBuilderz)Use CA--CA distance to find polypeptides.333333@c             C   s   t | | dS )zInitialize the class.N)rM   rO   )r"   rN   r
   r
   r   rO     s    zCaPPBuilder.__init__c       
      C   s   x||gD ]}| ds
dS q
W |d }|d }| rB| }n|g}| rZ| }n|g}x,|D ]$}x|D ]}	||	 | jk rpdS qpW qfW dS )Nr    FT)has_idis_disordereddisordered_get_listrN   )
r"   r[   r]   rr/   pnlistZplistr1   r\   r
   r
   r   rU     s     




zCaPPBuilder._is_connectedN)r`   )rI   rJ   rK   rL   rO   rU   r
   r
   r
   r   r_     s   
r_   c               @   s*   e Zd ZdZd
ddZdd Zdd Zd	S )	PPBuilderz'Use C--N distance to find polypeptides.?c             C   s   t | | dS )zInitialize the class.N)rM   rO   )r"   rN   r
   r
   r   rO     s    zPPBuilder.__init__c             C   s   | dsdS | dsdS | j}|d }|d }| rD| }n|g}| r\| }n|g}xt|D ]l}xf|D ]^}	| }
|	 }|
|ks|
dks|dkrr|||	rr| r|| | r||
 dS qrW qhW dS )Nr(   Fr'    T)ra   
_test_distrb   rc   Z
get_altlocZdisordered_select)r"   r[   r]   Z	test_distr0   r/   Zclistrf   r1   ZccZn_altlocZc_altlocr
   r
   r   rU     s2    








zPPBuilder._is_connectedc             C   s   || | j k rdS dS dS )z4Return 1 if distance between atoms<radius (PRIVATE).r)   r   N)rN   )r"   r0   r/   r
   r
   r   rj     s    zPPBuilder._test_distN)rh   )rI   rJ   rK   rL   rO   rU   rj   r
   r
   r
   r   rg     s   
#rg   )F)"rL   rP   ZBio.Datar   ZBio.Seqr   ZBio.PDB.PDBExceptionsr   ZBio.PDB.vectorsr   r   Zstandard_aa_namesZaa1Zaa3r   r   r   r   r+   r   Zn1Zn3r   r   r   r   r   r   r   listr   rM   r_   rg   r
   r
   r
   r   <module>3   sb   
nV