B
    ‰°bŽ<  ã               @   sð   d Z ddlZyddlZW n( ek
r@   ddlmZ edƒ‚Y nX ddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZmZmZmZ dd
lmZmZ ddlmZ deeedœdd„Zdd„ Zdd„ Zdd„ Zeedœdd„Zddd„ZdS )zDPICIO: read and write Protein Internal Coordinate (.pic) data files.é    N)ÚMissingPythonDependencyErrorz:Install NumPy to build proteins from internal coordinates.)Ú	as_handle)ÚStructureBuilder)Ú_parse_pdb_header_list)ÚPDBException)Ú
IC_ResidueÚIC_ChainÚEdronÚAtomKey)ÚDictÚTextIO)Ú	StructureF)ÚfileÚverboseÚreturnc             C   sà  t  d¡}t  d¡}t  d¡}t  d¡}t  d¡}t  d¡}tƒ }tg ƒ}	ddddg}
|j|j|j|jg}d}t| dd	 }x| 	¡ D ]}| 
d
¡rœqŠ| 
d¡rô| |¡}|rà| d¡|	d< | d¡|	d< | d¡|	d< n|ròtd| d|ƒ qŠ| 
d¡r:| |¡}|r$| d¡ ¡ |	d< n|r”td| d|ƒ qŠ| 
d¡r–| |¡}|r|| d¡}|dkrnd}| d¡t| d¡ƒ| d¡|g}|
|krxptdƒD ]d}|
| || kr¤|| || ƒ || |
|< d|krì| |	¡ nd|kr¤d |
d< |
d< q¤W | | d ¡| d!¡t| d"¡ƒ| d#¡¡ |j}d| ¡ krpx"|j ¡ D ]}|jsX|}P qXW t|ƒ|_n|rtd| d$|ƒ dS qŠ| 
d%¡r®| |¡}|r”|dkrÔ|rÐtd| d&|ƒ dS |j| d ¡ks |jd t| d"¡ƒkr&|r"td| d'|jt|jƒd(|ƒ dS t t| d)¡ƒt| d*¡ƒt| d+¡ƒfd,¡}| | d-¡ ¡ |t| d.¡ƒt| d/¡ƒ| d0¡| d-¡t| d1¡ƒ| d2¡ ¡ ¡ qŠ| 
d3¡r,| |¡}|r”xÈ|  ¡ D ]R}|dk	rÔ| |¡}|rÔ|dk	rÔ|jrÔ|j}t| d¡ƒ|j!| d¡< qÔW qŠt"j# |¡}|rZ|dk	rZ|j $| %¡ ¡ qŠ|rrtd4| d5|ƒ dS | ¡ rŠ|rtd6| d7|d8ƒ dS qŠW W dQ R X | &¡ }x0| '¡ D ]$}t(|ƒ }|_| )¡  | *¡  q´W |S )9a¡  Load Protein Internal Coordinate (.pic) data from file.

    PIC file format:
        - comment lines start with #
        - (optional) PDB HEADER record
           - idcode and deposition date recommended but optional
           - deposition date in PDB format or as changed by Biopython
        - (optional) PDB TITLE record
        - repeat:
           - Biopython Residue Full ID - sets residue IDs of returned structure
           - (optional) PDB N, CA, C ATOM records for chain start
           - (optional) PIC Hedra records for residue
           - (optional) PIC Dihedra records for residue
           - (optional) BFAC records listing AtomKeys and b-factors

    An improvement would define relative positions for HOH (water) entries.

    N.B. dihedron (i-1)C-N-CA-CB is ignored in assembly if O exists.

    C-beta is by default placed using O-C-CA-CB, but O is missing
    in some PDB file residues, which means the sidechain cannot be
    placed.  The alternate CB path (i-1)C-N-CA-CB is provided to
    circumvent this, but if this is needed then it must be adjusted in
    conjunction with PHI ((i-1)C-N-CA-C) as they overlap.  (i-1)C-N-CA-CB is
    included by default in .pic files for consistency and informational
    (e.g. statistics gathering) purposes, as otherwise the dihedron would only
    appear in the few cases it is needed for.

    :param Bio.File file: file name or handle
    :param bool verbose: complain when lines not as expected
    :returns: Biopython Structure object, Residues with .internal_coord attributes
        but no coordinates except for chain start N, CA, C atoms if supplied,
        **OR** None on parse fail (silent unless verbose=True)

    zm^HEADER\s{4}(?P<cf>.{1,40})(?:\s+(?P<dd>\d\d\d\d-\d\d-\d\d|\d\d-\w\w\w-\d\d))?(?:\s+(?P<id>[0-9A-Z]{4}))?\s*$z^TITLE\s{5}(?P<ttl>.+)\s*$z³^\('(?P<pid>[^\s]*)',\s(?P<mdl>\d+),\s'(?P<chn>\s|\w)',\s\('(?P<het>\s|[\w\s-]+)',\s(?P<pos>-?\d+),\s'(?P<icode>\s|\w)'\)\)\s+(?P<res>[\w]{1,3})(\s\[(?P<segid>[a-zA-z\s]+)\])?\s*$a*  ^ATOM\s\s(?:\s*(?P<ser>\d+))\s(?P<atm>[\w\s]{4})(?P<alc>\w|\s)(?P<res>[\w]{3})\s(?P<chn>.)(?P<pos>[\s\-\d]{4})(?P<icode>[A-Za-z\s])\s\s\s(?P<x>[\s\-\d\.]{8})(?P<y>[\s\-\d\.]{8})(?P<z>[\s\-\d\.]{8})(?P<occ>[\s\d\.]{6})(?P<tfac>[\s\d\.]{6})\s{6}(?P<segid>[a-zA-z\s]{4})(?P<elm>.{2})(?P<chg>.{2})?\s*$z|^BFAC:\s([^\s]+\s+[\-\d\.]+)\s*([^\s]+\s+[\-\d\.]+)?\s*([^\s]+\s+[\-\d\.]+)?\s*([^\s]+\s+[\-\d\.]+)?\s*([^\s]+\s+[\-\d\.]+)?z([^\s]+)\s+([\-\d\.]+)NÚr)Úmodeú#zHEADER ZcfÚheadÚidÚidcodeÚddÚdeposition_datezReading pic filezHEADER parse fail: zTITLE ZttlÚnamezTITLE parse fail:, ú(é	   z    é   é   é   é   r   ÚresZhetÚposZicodezresidue ID parse fail: zATOM z"ATOM without residue configured:, z ATOM not in configured residue (z):ÚxÚyÚzÚfÚatmZtfacZoccZalcZserZelmzBFAC: z
PIC file: z9 error: no residue info before reading (di/h)edron data: zReading PIC filezparse fail on: .Ú.)+ÚreÚcompiler   r   Zinit_structureZ
init_modelZ
init_chainZinit_segr   Ú	readlinesÚ
startswithÚmatchÚgroupÚprintÚstripÚintÚrangeZ
set_headerZinit_residueZresidueÚis_disorderedÚ
child_dictÚvaluesÚinternal_coordr   Zresnamer   ÚstrÚnumpyZarrayÚfloatZ	init_atomÚgroupsZbfactorsr	   Zedron_reZload_PICÚ	groupdictZget_structureZ
get_chainsr   Zlink_residuesZ	init_edra)r   r   Z
pdb_hdr_reZ
pdb_ttl_reZ
biop_id_reZ
pdb_atm_reZbfac_reZbfac2_reZstruct_builderZheader_dictZ	curr_SMCSZ	SMCS_initZsb_resZhandleZalineÚmZsegidZ	this_SMCSÚir   ZcoordZ	bfac_pairZm2ZrpÚstructÚchnZchnp© r?   ú,lib/python3.7/site-packages/Bio/PDB/PICIO.pyÚread_PIC   s   $
	







"




&


"rA   c             C   s`   | j rL|r|s6| j}|s|j}|s6|jj}|j d¡}| | j  ||¡¡ n| t | ¡¡ d S )Nr   )	r5   Úparentr   ÚheaderÚgetÚwriteÚ	write_PICr   Z_residue_string)ÚentityÚfpÚpdbidÚchainidÚchainr=   r?   r?   r@   Ú_wpr  s    rL   c             C   sª   d}x|   ¡ D ]}| ¡  }P qW |r¦d}xz|  ¡ D ]n}d| ¡ kr~x\|j ¡ D ](}x"| ¡ D ]}| |¡ |d7 }q^W qPW q4x"| ¡ D ]}| |¡ |d7 }qˆW q4W d S )NFr   r   )Z	get_atomsZget_serial_numberZget_residuesr2   r3   r4   Zget_unpacked_listZset_serial_number)rG   Zneedr&   Zanumr    r   r?   r?   r@   Ú_enumerate_entity_atoms  s    


rM   c             C   sD   x|   ¡ r|   ¡ } qW d| jkr8x| D ]}t|ƒ q&W nt| ƒ dS )z2Ensure all atoms in entity have serial_number set.ÚSN)Z
get_parentÚlevelrM   )rG   Úmdlr?   r?   r@   Úenumerate_atoms)  s    


rQ   )Údatestrr   c             C   sl   | rht  d| ¡}|rhdddddddd	d
ddddgt| d¡ƒ }| d¡d | d | d¡dd…  } | S )z'Convert yyyy-mm-dd date to dd-month-yy.z(\d{4})-(\d{2})-(\d{2})ZXXXZJANZFEBZMARZAPRZMAYZJUNZJULZAUGZSEPZOCTZNOVZDECr   r   ú-r   éþÿÿÿN)r(   r,   r0   r-   )rR   r;   Zmor?   r?   r@   Úpdb_date5  s&    (rU   c          	   C   sÒ  t | ƒ t|dƒ´}y„d| jkr0tdƒ‚njd| jkr|d|  ¡ krjx0| j ¡ D ]}t||||ƒ qRW nt| |||ƒ nd| jkr®|s| j}x| D ]}t	||||ƒ q–W nìd| jkrÖxà| D ]}t	||||ƒ q¾W nÄd| jkrˆ|sô| j
 d	d
¡}| j
 dd
¡}t| j
 dd
¡ƒ}	|r>| d | ¡ |	p0d|p8d¡¡ | j
 dd
¡}
|
rh| d|
 ¡  d ¡ x0| D ]}t	||||ƒ qnW ntdt| jƒ ƒ‚W n& tk
rÂ   tdt| ƒ ƒ‚Y nX W d
Q R X d
S )a  Write Protein Internal Coordinates (PIC) to file.

    See read_PIC() for file format.  Recurses to lower entity levels (M, C, R).

    :param Entity entity: Biopython PDB Entity object: S, M, C or R
    :param Bio.File file: file name or handle
    :param str pdbid: PDB idcode, read from entity if not supplied
    :param char chainid: PDB Chain ID, set from C level entity.id if needed
    :raises PDBException: if entity level not S, M, C, or R
    :raises Exception: if entity does not have .level attribute
    ÚwÚAzNo PIC output at Atom levelÚRr   ÚCÚMrN   r   Nr   r   zHEADER    {:40}{:8}   {:4}
Ú r   z
TITLE     Ú
zCannot identify level: z2write_PIC: argument is not a Biopython PDB Entity )rQ   r   rO   r   r2   r3   r4   rL   r   rF   rC   rD   rU   rE   ÚformatÚupperr6   ÚKeyErrorÚ	Exception)rG   r   rI   rJ   rH   r   r    r>   Zhdrr   ZnamrP   r?   r?   r@   rF   M  sH    






rF   )F)NN) Ú__doc__r(   r7   ÚImportErrorZBior   ZBio.Filer   ZBio.PDB.StructureBuilderr   ZBio.PDB.parse_pdb_headerr   ZBio.PDB.PDBExceptionsr   ZBio.PDB.internal_coordsr   r   r	   r
   Útypingr   r   ZBio.PDB.Structurer   ÚboolrA   rL   rM   rQ   r6   rU   rF   r?   r?   r?   r@   Ú<module>   s*    i