B
    §dÙ÷  ã            °   @   s  d dl mZmZmZ d dlmZ d dlm  mZ	 d dl
mZ e	 d¡Zd dlT d dlZd dlZd dlmZ d dlmZ d dlZd dlZd d	lmZ d d
lmZ d dlmZmZm Z m!Z!m"Z" d dlm#Z# d dl
m$Z% d dl&Z&d dl'Z'd dl(Z(d dl)Z)e)j*Z+d‘dd„Z,dd„ Z-dd„ Z.d’dd„Z/dd„ Z0dddddd œZ1e2d!dd"dd#dd$dgƒZ3d%d&d'dd(d%d&d'dd(d)d*d+d(d%d&d'dd(ddddd)d*d+d(d,œZ4d-d.„ Z5d/d0„ Z6d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgœ6Z7dhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|gZ8e2d}d~dd€dd‚dƒd„d…d†d‡dˆd‰dŠd‹dŒddŽddd‘d’d“dfgƒZ9d”d•d–d—d˜d™dšd›dœddždŸd d¡d¢d£d¤d¥d¦d§d¨d©dªd«d¬d­d®d¯d°d±d²d³d´dµd¶d·d¸d¹dºd»d¼d½d¾d¿dÀdÁdÂdÃdÄdÅdÆdÇdÈdÉdÊdËdÌdÍdÎdÏdÐdÑdÒdÓdÔdÕdÖd×dØdÙdÚdÛdÜdÝdÞdßdàdádâdãdädådædçdèdédêdëdìdídîdïdðdñdòdódôdõdöd÷dødùdúdûdüdýdþdÿd ddddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCg°Z:dDdE„ Z;e;ƒ Z<G dFdG„ dGe=ƒZ>G dHdI„ dIe=ƒZ?G dJdK„ dKe=ƒZ@G dLdM„ dMe=ƒZAdNdO„ ZBG dPdQ„ dQe=ƒZCG dRdS„ dSe=ƒZDdeCdddfdTdU„ZEdeCdddfdVdW„ZFdXdYgZGdZZHG d[d\„ d\e=ƒZIe	 JejFeI¡ e	 KejF¡G d]d^„ d^ƒƒZLG d_d`„ d`e=ƒZMdaZNG dbdc„ dce=ƒZOde&jPfddde„ZQde&jPdffdgdh„ZRdidjdkdldmdndodpdqdrdsdtdudvgZSdweTkr8e2eSƒZSdxdy„ ZUd“dzd{„ZVddd d d gfd|d}„ZWddd~dd d d gfdd€„ZXdd‚„ ZYG dƒd„„ d„e=ƒZZd…d†„ Z[d‡dˆ„ Z\d‰dŠ„ Z]d”d‹dŒ„Z^d•ddŽ„Z_d–dd„Z`dS (—  é    )Úabsolute_importÚdivisionÚprint_function)ÚflexN)ÚzipÚiotbx_pdb_ext)Ú*)Úmatrix)Úinterpreters)Ú
smart_open)Úshow_string)Úplural_sÚhashlib_md5Údate_and_timeÚto_bytesÚSorry)ÚAuto)Ú	cStringIOFç      à?c             C   s2   | d krd S |d k	r$|  ¡ }| ¡ }| j||dS )N)Úmin_distance_sym_equivÚu_star_tolerance)r   r   Úspecial_position_settings)Úcrystal_symmetryr   Úweak_symmetryr   r   © r   úq/mnt/filia/a/genomebrowser/www/genomebrowser/fleming/tools/molprobity/modules/cctbx_project/iotbx/pdb/__init__.pyÚ#construct_special_position_settings   s    	 r   c          	   C   s,  xdD ]}|   |¡rdS qW tj| d}| ¡  ¡ }W d Q R X xæ|D ]Þ}| d¡r ytjjj	|d}W n tjjj
k
r„   wFY nX |jd k	rž|jd k	rždS qF| d¡s´| d¡rFytjd t |g¡d	}W n* tk
rä   ‚ Y n tk
rø   wFY nX | ¡  ¡ d
krF| ¡ d }|jdkrFdS qFW dS )N)ZmtzZccp4ZmrcÚpickleZpklF)Ú	file_nameÚCRYST1)Úpdb_strTzATOM  ZHETATM)Úsource_infoÚlinesé   r   z    )Úendswithr   Úfor_readingÚreadÚ
splitlinesÚ
startswithÚiotbxÚpdbÚrecordsÚcryst1ZFormatErrorZucparamsZsgroupÚextÚinputr   Ú
std_stringÚKeyboardInterruptÚ	ExceptionÚatomsÚsizeÚname)r   Zknown_binary_extensionÚfZpdb_raw_recordsr    r,   Úpdb_inpÚatomr   r   r   Úis_pdb_file-   s6    



  

   
r8   c          
   C   sV   y.t jj| d ¡ }| ¡ d }d|kr,dS W n" tk
rP } zdS d }~X Y nX d S )N)Z	file_pathr   Z
_atom_siteTF)r)   ÚcifÚreaderÚmodelÚvaluesr1   )r   Z	cif_modelZ	cif_blockÚer   r   r   Úis_pdb_mmcif_fileE   s    r>   ÚPDB_MIRROR_PDBc             C   sb   t | ƒdkrtd|  ƒ‚|  ¡ } tj| }t || dd… d|  ¡}t |¡s^td| |f ƒ‚|S )Né   z,Invalid PDB ID: %s (must be four characters)r#   é   zpdb%s.ent.gzzNo file with PDB ID %s (%s))ÚlenÚRuntimeErrorÚlowerÚosÚenvironÚopÚjoinÚisfile)Úpdb_idZenviron_keyZ
pdb_mirrorÚresultr   r   r   Úent_path_local_mirrorN   s    

rL   c                 sx   dd l } | j| j| j  }}}t|ƒ ‰ }ˆ  |¡ ˆ  |¡ ‡ fdd„}|| | }|||ƒ |||ƒ |||ƒ |S )Nr   c                s.   x(| D ] }x|D ]}ˆ   || ¡ qW qW d S )N)Úappend)ÚfirstÚsecondr5   Ús)Ú_r   r   Úxy^   s    

z systematic_chain_ids.<locals>.xy)ÚstringZascii_uppercaseZascii_lowercaseÚdigitsÚlistÚextend)rS   ÚuÚlÚdrK   rR   Úar   )rQ   r   Úsystematic_chain_idsX   s    




r[   ÚAÚCÚGÚTÚU)ÚADEÚCYTÚGUAÚTHYÚURIÚADÚCDÚGDÚTDz?Az?Cz?GÚDTÚDAÚDCÚDG)r\   r]   r^   r`   r_   z+Az+Cz+Gz+Uz+Trk   rl   rm   rj   ra   rb   rc   re   rd   ZARZCRZGRZURrf   rg   rh   ri   c             C   st   | dkrdS t | tƒst‚|  ¡ }| d¡ d¡}| d¡ d¡}| d¡}t|ƒdkr`| ¡ S t |d¡}|S dS )zD Truncate the residue name to one letter. Return the letter or None.NÚrÚRrY   ÚDú+r#   )Ú
isinstanceÚstrÚAssertionErrorÚstriprB   ÚupperÚcns_dna_rna_residue_namesÚget)ÚresnamerK   r   r   r   Úget_one_letter_rna_dna_name   s    
rz   c             C   s   t  |  ¡  ¡ ¡S )N)Úrna_dna_reference_residue_namesrx   ru   rv   )Úcommon_namer   r   r   Úrna_dna_reference_residue_nameŸ   s    r}   zC1*ÚC2zC2*zC3*ÚC4zC4*ÚC5zC5*ÚC6ÚC5MÚC8ÚH1zH1*ÚH2ÚH21ÚH22ÚH3zH3*zH4*ÚH41ÚH42ÚH5zH5*1ÚH6ÚH61ÚH62ÚH5M1ÚH5M2ÚH5M3ÚH8ÚN1ÚN2ÚN3ÚN4ÚN6ÚN7ÚN9ÚO2zO2*zO3*ÚO4zO4*zO5*ÚO6ÚO1PÚO2PÚO3TÚPzH2*2zH5*2zHO2*zHO3*zHO5*ÚHOP3)6z C1'z C2 z C2'z C3'z C4 z C4'z C5 z C5'z C6 z C7 z C8 z H1 z H1'z H2 z H21z H22z H3 z H3'z H4'z H41z H42z H5 z H5'z H6 z H61z H62z H71z H72z H73z H8 z N1 z N2 z N3 z N4 z N6 z N7 z N9 z O2 z O2'z O3'z O4 z O4'z O5'z O6 z OP1z OP2z OP3z P  zH2''zH5''zHO2'zHO3'zHO5'r¡   z N  z CA z C  z O  z OXTz H  z D  z HXTz DXTz HA z HA2z HA3z DA z DA2z DA3z H1 z H2 z H3 z D1 z D2 z D3 z C1'z C2'z C3'z C4'z C5'z H1'z H2'z H3'z H4'z H5'z O2'z O3'z O4'z O5'z OP1z OP2z OP3z P  zH2''zH5''zHO2'zHO3'zHO5')Z1D2z H21zG DG)z1D2'z H2'ÚANY)z1D2*z H2'r¢   )Z1D4z H41zC DC)z1D5'z H5'r¢   )z1D5*z H5'r¢   )Z1D5Mz H71rj   )Z1D6z H61zA DA)Z1H2z H21zG DG)z1H2'z H2'r¢   )z1H2*z H2'r¢   )Z1H4z H41zC DC)z1H5'z H5'r¢   )z1H5*z H5'r¢   )Z1H5Mz H71rj   )Z1H6z H61zA DA)Z2D2z H22zG DG)z2D2'zH2''zDA DC DG DT)z2D2*zH2''zDA DC DG DT)Z2D4z H42zC DC)z2D5'zH5''r¢   )z2D5*zH5''r¢   )Z2D5Mz H72rj   )Z2D6z H62zA DA)z2DO'zHO2'zA C G U)z2DO*zHO2'zA C G U)Z2DOPÚHOP2r¢   )Z2H2z H22zG DG)z2H2'zH2''zDA DC DG DT)z2H2*zH2''zDA DC DG DT)Z2H4z H42zC DC)z2H5'zH5''r¢   )z2H5*zH5''r¢   )Z2H5Mz H72rj   )Z2H6z H62zA DA)z2HO'zHO2'zA C G U)z2HO*zHO2'zA C G U)Z2HOPr£   r¢   )Z3D5Mz H73rj   )Z3DOPr¡   r¢   )Z3H5Mz H73rj   )Z3HOPr¡   r¢   )zC1'z C1'r¢   )zC1*z C1'r¢   )r~   z C2 r¢   )zC2'z C2'r¢   )zC2*z C2'r¢   )zC3'z C3'r¢   )zC3*z C3'r¢   )r   z C4 r¢   )zC4'z C4'r¢   )zC4*z C4'r¢   )r€   z C5 r¢   )zC5'z C5'r¢   )zC5*z C5'r¢   )r‚   z C7 rj   )r   z C6 r¢   )ZC7z C7 rj   )rƒ   z C8 z	A G DA DG)ZD1z H1 zG DG)zD1'z H1'r¢   )zD1*z H1'r¢   )ZD2z H2 zA DA)zD2'z H2'r¢   )zD2*z H2'r¢   )zD2''zH2''zDA DC DG DT)zD2'1z H2'r¢   )zD2*1z H2'r¢   )zD2'2zH2''zDA DC DG DT)zD2*2zH2''zDA DC DG DT)ZD21z H21zG DG)ZD22z H22zG DG)ZD3z H3 zU DT)zD3'z H3'r¢   )zD3*z H3'r¢   )ZD3TzHO3'r¢   )zD4'z H4'r¢   )zD4*z H4'r¢   )ZD41z H41zC DC)ZD42z H42zC DC)ZD5z H5 zC U DC)zD5'z H5'r¢   )zD5*zHO5'r¢   )zD5''zH5''r¢   )zD5'1z H5'r¢   )zD5*1z H5'r¢   )zD5'2zH5''r¢   )zD5*2zH5''r¢   )ZD5M1z H71rj   )ZD5M2z H72rj   )ZD5M3z H73rj   )ZD5TzHO5'r¢   )ZD6z H6 z	C U DC DT)ZD61z H61zA DA)ZD62z H62zA DA)ZD71z H71rj   )ZD72z H72rj   )ZD73z H73rj   )ZD8z H8 z	A G DA DG)zDO2'zHO2'zA C G U)zDO2*zHO2'zA C G U)r„   z H1 zG DG)zH1'z H1'r¢   )zH1*z H1'r¢   )r…   z H2 zA DA)zH2'z H2'r¢   )zH2*z H2'r¢   )zH2''zH2''zDA DC DG DT)zH2'1z H2'r¢   )zH2*1z H2'r¢   )zH2'2zH2''zDA DC DG DT)zH2*2zH2''zDA DC DG DT)r†   z H21zG DG)r‡   z H22zG DG)rˆ   z H3 zU DT)zH3'z H3'r¢   )zH3*z H3'r¢   )ZH3TzHO3'r¢   )zH4'z H4'r¢   )zH4*z H4'r¢   )r‰   z H41zC DC)rŠ   z H42zC DC)r‹   z H5 zC U DC)zH5'z H5'r¢   )zH5*zHO5'r¢   )zH5''zH5''r¢   )zH5'1z H5'r¢   )zH5*1z H5'r¢   )zH5'2zH5''r¢   )zH5*2zH5''r¢   )r   z H71rj   )r   z H72rj   )r‘   z H73rj   )ZH5TzHO5'r¢   )rŒ   z H6 z	C U DC DT)r   z H61zA DA)rŽ   z H62zA DA)ZH71z H71rj   )ZH72z H72rj   )ZH73z H73rj   )r’   z H8 z	A G DA DG)zHO2'zHO2'zA C G U)zHO2*zHO2'zA C G U)zHO3'zHO3'r¢   )zHO3*zHO3'r¢   )zHO5'zHO5'r¢   )zHO5*zHO5'r¢   )r£   r£   r¢   )r¡   r¡   r¢   )r“   z N1 r¢   )r”   z N2 zG DG)r•   z N3 r¢   )r–   z N4 zC DC)r—   z N6 zA DA)r˜   z N7 z	A G DA DG)r™   z N9 z	A G DA DG)r   z OP1r¢   )rš   z O2 z	C U DC DT)zO2'z O2'zA C G U)zO2*z O2'zA C G U)rž   z OP2r¢   )zO3'z O3'r¢   )zO3*z O3'r¢   )ZO3Pz OP3r¢   )rŸ   z OP3r¢   )r›   z O4 zU DT)zO4'z O4'r¢   )zO4*z O4'r¢   )zO5'z O5'r¢   )zO5*z O5'r¢   )ZO5Tz OP3r¢   )rœ   z O6 zG DG)ÚOP1z OP1r¢   )ÚOP2z OP2r¢   )ZOP3z OP3r¢   )r    z P  r¢   c              C   s,   i } x"t D ]\}}}|tkr
|| |< q
W | S )N)Úrna_dna_atom_name_aliasesÚ)rna_dna_atom_names_backbone_reference_set)rK   rZ   rn   r5   r   r   r   Ú%__rna_dna_atom_names_backbone_aliasesÁ  s
    r¨   c               @   s$   e Zd Zdd„ Zdd„ Zdd„ ZdS )Ú!rna_dna_atom_names_interpretationc             C   s2   |dkrd}n|dkst ‚|| _|| _t| ƒ d S )Nr_   rj   )z?Az?Cz?Gr\   r]   r^   r`   rk   rl   rm   rj   r_   )rt   Úresidue_nameÚ
atom_namesZ&rna_dna_atom_names_interpretation_core)Úselfrª   r«   r   r   r   Ú__init__Ë  s    z*rna_dna_atom_names_interpretation.__init__c             C   s8   g }x.t | j| jƒD ]\}}|jd kr| |¡ qW |S )N)r   r«   ÚinfosÚreference_namerM   )r¬   rK   Z	atom_nameÚinfor   r   r   Úunexpected_atom_namesÖ  s
    
z7rna_dna_atom_names_interpretation.unexpected_atom_namesc             C   sš   g }x| j D ]†}|j}|d kr*| d ¡ qt |¡}|d k	rH| |¡ q|dkrt| j d¡rh| d¡ q’| d¡ q|dksˆtd| ƒ‚| d ¡ qW |S )Nz H2'rp   zH2*1zH2*r¡   z
%s!="HOP3")r®   r¯   rM   Ú8rna_dna_atom_names_reference_to_mon_lib_translation_dictrx   rª   r(   rt   )r¬   rK   r°   ZrnÚmnr   r   r   Úmon_lib_namesÝ  s    
z/rna_dna_atom_names_interpretation.mon_lib_namesN)Ú__name__Ú
__module__Ú__qualname__r­   r±   r´   r   r   r   r   r©   É  s   r©   c               @   s   e Zd Zddd„ZdS )Ú(residue_name_plus_atom_names_interpreterNFc          	   C   sT  |  ¡  ¡ }t|ƒdkr(d | _d | _d S ddlm} | |¡}|d krLd }n|}|}t |¡}	d }
|	d k	r†|	j	|d}
|
d k	r„||
_
n¾|d ks’t‚|d k	r¬|s¬|tkr¬d }n
t|d}|d k	rDt||d}
|
jdkrt|ƒdkrü|tkrüd | _d | _d S |d kr|tkrd }
|
d k	rD|
j}|rDddd	d
dddddœ| }|| _|
| _d S )Nr   )Ú#three_letter_l_given_three_letter_d)r«   )r|   )rª   r«   r#   r\   r]   r^   r`   rf   rg   rh   ri   )r\   r]   r^   r`   rk   rl   rm   rj   )ru   rv   rB   Úwork_residue_nameÚatom_name_interpretationZiotbx.pdb.amino_acid_codesr¹   rx   Úprotein_atom_name_interpretersZmatch_atom_namesZd_aa_residue_namert   rw   r}   r©   Zn_unexpectedÚmon_lib_dna_rna_cifrª   )r¬   rª   r«   Z#translate_cns_dna_rna_residue_namesZreturn_mon_lib_dna_namerº   r¹   Zl_aa_rnZd_aa_rnZprotein_interpreterr»   Zrna_dna_ref_residue_namer   r   r   r­   ó  sh    




z1residue_name_plus_atom_names_interpreter.__init__)NF)rµ   r¶   r·   r­   r   r   r   r   r¸   ñ  s    r¸   c               @   s   e Zd Zdd„ Zddd„ZdS )Úcombine_unique_pdb_filesc          
   C   sê   i | _ i | _g | _g | _xÌ|D ]Ä}|| j kr@| j |  d7  < qd| j |< tj|d}dd„ | ¡  ¡ D ƒ}W d Q R X tƒ }| 	t
d |¡dd¡ | ¡ }| j |¡}|d k	r¾| |¡ q|g| j|< | j |¡ | j |¡ qW d S )Nr#   )r   c             S   s   g | ]}|  ¡  ¡ ‘qS r   )Ú
expandtabsÚrstrip)Ú.0rP   r   r   r   ú
<listcomp>@  s    z5combine_unique_pdb_files.__init__.<locals>.<listcomp>Ú
Úutf8)Úcodec)Úfile_name_registryÚmd5_registryZunique_file_namesZraw_recordsr   r%   r&   r'   r   Úupdater   rH   Ú	hexdigestrx   rM   rV   )r¬   Ú
file_namesr   r5   rn   ÚmrX   r   r   r   r­   5  s&    


 z!combine_unique_pdb_files.__init__NÚ c             C   s@  |d krt j}d}xPt| j ¡ ƒD ]>}| j| }|dkr"t|d|t|ƒf  |d ||d 7 }q"W |dkr†t|dt|d  |d d}xht| j 	¡ ƒD ]V}t
|ƒdkršt|d |d x$|D ]}t|dt|ƒ  |d qÀW |t
|ƒd 7 }qšW |dkrt|d	t|d  |d |dks,|dkr<t| ¡ |d d S )
Nr   r#   z(INFO: PDB file name appears %d times: %s)Úfilez"  %d repeated file name%s ignored.)Únz'INFO: PDB files with identical content:z  %sz(%d file%s with repeated content ignored.)ÚsysÚstdoutÚsortedrÆ   ÚkeysÚprintr   r   rÇ   r<   rB   rÀ   )r¬   ÚoutÚprefixZ	n_ignoredr   rÎ   Zn_identicalrÊ   r   r   r   Úreport_non_uniqueL  s0     


z*combine_unique_pdb_files.report_non_unique)NrÌ   )rµ   r¶   r·   r­   rÖ   r   r   r   r   r¾   3  s   r¾   c               @   s   e Zd Zdd„ Zdd„ ZdS )Úheader_datec             C   s.  d| _ d| _d| _d| _t|ƒdkr(dS | d¡dkr:dS |d dksR|d dkrVdS | d¡\}}}yt|ƒ| _ W n tk
rˆ   Y nX | j dk sž| j dkr¤d| _ | 	¡ dkrº| 	¡ | _yt|ƒ| _W n tk
rÜ   Y nX | jd	k sò| jd
krød| _| jdk	r*| jdk rd| j | _nd| j | _dS )zExpected format: DD-MMM-YYNé	   ú-é   é   r#   é   )ZJANZFEBZMARZAPRZMAYZJUNZJULZAUGZSEPZOCTZNOVZDECr   éc   é<   iÐ  il  )
ÚddÚmmmÚyyÚyyyyrB   ÚcountÚsplitÚintÚ
ValueErrorrv   )r¬   Úfieldrß   rà   rá   r   r   r   r­   g  s:          
   zheader_date.__init__c             C   s(   | j d k	o&| jd k	o&| jd k	o&| jd k	S )N)rß   rà   rá   râ   )r¬   r   r   r   Úis_fully_definedƒ  s    


zheader_date.is_fully_definedN)rµ   r¶   r·   r­   rè   r   r   r   r   r×   e  s   r×   c             C   sd   |   d¡r`t| dd… d}| ¡ r*|jS |  ¡ }| ¡  x$|D ]}t|d}| ¡ r@|jS q@W d S )NZHEADERé2   é;   )rç   )r(   r×   rè   râ   rä   Úreverse)ÚrecordÚdateÚfieldsrç   r   r   r   Úheader_year‰  s    
 

 
rï   c               @   s   e Zd ZdS )ÚPlease_pass_string_or_NoneN)rµ   r¶   r·   r   r   r   r   rð   ”  s    rð   c               @   s(   e Zd Zdedddfdd„Zdd„ ZdS )Úpdb_input_from_anyNFc             C   sê  d | _ d }ddlm} d}|d k	r<| d¡ |¡r<|tf}	nt|f}	d }
x^|	D ]T}y||||||d}W n4 tk
r  } z|
d krŽt ¡ }
wPW d d }~X Y nX d }
|tkrž| 	¡  
¡ }t| ¡  ¡ ƒ}| ¡  dd¡}||ksü||| kr|dkr|dkrqP|d k	r–t|ƒdkr–t|ƒ}d}xB||k rn||  ¡  d¡sbt||  ¡ ƒdkrn|d7 }q.W ||k r–|| d d	…  ¡ d
kr–qPd| _ nd| _ P qPW |
d k	rÎt |
d |
d |
d ¡ |d kràtdƒ‚|| _d S )Nr   )Ú	cif_input)z.cifz.mmcifz.gz)r   r!   r"   rJ   Úraise_sorry_if_format_errorz      ú#r#   é   Zdata_r*   r9   rÚ   z+Could not interpret input as any file type.)Zfile_formatZiotbx.pdb.mmcifrò   ru   r$   Ú	pdb_inputr1   rÏ   Úexc_infoÚunknown_sectionr3   ÚsumZrecord_type_countsr<   rx   rB   r(   ÚsixÚreraiser   Ú_file_content)r¬   r   r!   r"   rJ   ró   Úcontentrò   Z
mmcif_extsZfile_inputsr÷   Z
file_inputr=   Zn_unknown_recordsZ	n_recordsZn_blank_recordsZ	len_linesÚir   r   r   r­   ˜  s\    
 
	

*$

zpdb_input_from_any.__init__c             C   s   | j S )N)rü   )r¬   r   r   r   Úfile_contentÜ  s    zpdb_input_from_any.file_content)rµ   r¶   r·   rð   r­   rÿ   r   r   r   r   rñ   –  s   ?rñ   c          
   C   s4  |d k	r| d kst ‚t|d} | d k	r¨y>tj| dd}| ¡ }W d Q R X tjdt| ƒ t 	|¡dS  t
k
r¦ } z$|r”tdt| ƒt|ƒf ƒ‚n‚ W d d }~X Y nX |tk	s´t ‚t|tƒrÊt 	|¡}nt|ttfƒrât |¡}ytj||dS  t
k
r. } z|rtdt|ƒ ƒ‚n‚ W d d }~X Y nX d S )N)rJ   Úrt)Ú	gzip_modezfile )r!   r"   zFormat error in %s:
%szFormat error:
%s)rt   rL   r   r%   r&   r-   r.   rs   r   Úsplit_linesræ   r   rð   rr   rU   Útupler/   )r   r!   r"   rJ   ró   r5   r=   r   r   r   rö   ß  s2    



rö   c             C   s   t | ||||d ¡ S )aá  
  Main input method for both PDB and mmCIF files; will automatically determine
  the actual format and return the appropriate data type.

  Parameters
  ----------
  file_name: path to PDB or mmCIF file
  source_info: string describing source of input (e.g. file name)
  lines: flex.std_string array of input lines
  pdb_id: PDB ID to automatically retrieve from local mirror
  raise_sorry_if_format_error: re-raise any low-level parser errors as a
    libtbx.utils.Sorry exception instance for clean user feedback

  Returns
  -------
  An object representing the result of parsing, including an array of atom
  objects; the actual class will differ depending on the input format.  Much of
  the API will be the same in either case.
  )r   r!   r"   rJ   ró   )rñ   rÿ   )r   r!   r"   rJ   ró   r   r   r   r.     s    r.   ZPEAKZSITE)rø   Útitle_sectionÚremark_sectionÚprimary_structure_sectionZheterogen_sectionÚsecondary_structure_sectionÚconnectivity_annotation_sectionZmiscellaneous_features_sectionÚcrystallographic_sectionÚconnectivity_sectionZbookkeeping_sectionc               @   s¼   e Zd Zddd„Zddd„Zddd„Zdd	d
„Zddd„Zeedddddddeefdd„Z	deeddddddf	dd„Z
ddddddddddefdd„Zdddddddddddefdd„ZdS )Úpdb_input_mixinTc             C   sÔ   d}xÊ|   ¡ D ]¾}| d¡rt|dd… d}| ¡ rt|jƒ ¡ }t|ƒdkrXd| }d|t|jƒt|j	ƒf }|rt
d	d
„ ttjƒD ƒƒ}t|t|jƒ ¡  ƒ}t|ƒdkr¸d| }dt|j	ƒ||f }qW |S )zT
    Placeholder to match mmCIF functionality. Probably could parse
    REVDAT.
    NzHEADER ré   rê   )rç   r#   Ú0z%s-%s-%sc             s   s   | ]\}}|  ¡ |fV  qd S )N)rv   )rÁ   ÚkÚvr   r   r   ú	<genexpr>A  s    z2pdb_input_mixin.deposition_date.<locals>.<genexpr>)r  r(   r×   rè   rs   rß   ru   rB   rà   râ   ÚdictÚ	enumerateÚcalendarZ
month_abbrrv   )r¬   Zus_stylerK   Úlinerí   rß   ZmonthsrË   r   r   r   Údeposition_date3  s     
  zpdb_input_mixin.deposition_dateNc             C   s|   |  ¡ }|d kr| j|d}t|jƒdks.|r2|S t|jƒdkrj|jd |jd  }}| ¡ rj| ¡ rj|S |j|j|jdS )N)Ú
sort_atomsr   r#   )Zrot_matricesZtrans_vectors)ÚvalidateÚconstruct_hierarchyrB   rn   ÚtÚis_r3_identity_matrixÚis_col_zeroZapply_rotation_translation)r¬   Úmtrix_biomt_containerÚhr  Zpresentrn   r  r   r   r   Ú_expand_hierarchy_helperJ  s      z(pdb_input_mixin._expand_hierarchy_helperc             C   s   | j |  ¡ |dS )N)r  r  )r  Úprocess_MTRIX_records)r¬   r  r   r   r   Ú"construct_hierarchy_MTRIX_expandedY  s    z2pdb_input_mixin.construct_hierarchy_MTRIX_expandedc             C   s   | j |  ¡ |dS )N)r  r  )r  Úprocess_BIOMT_records)r¬   r  r   r   r   Ú"construct_hierarchy_BIOMT_expanded^  s    z2pdb_input_mixin.construct_hierarchy_BIOMT_expandedFç      à?r   c             C   s    | j ||d}t|||||dS )N)r   r   )r   r   r   r   r   )r   r   )r¬   r   r   r   r   r   r   r   r   r   e  s    z)pdb_input_mixin.special_position_settingsc             C   s¤   |	dkrt ƒ }	|tkr(d}n|tkr(d}|tkr8|  ¡ }|tkrH|  ¡ }|dk	sX|dk	rntt|||d|	d | j|	|||||d}tjr”|	 	|¡ |rœ|	S |	 
¡ S )z]
    Generate standard PDB format.  Will use built-in crystal symmetry if
    available.
    NFT)r   Úcryst1_zÚwrite_scale_records)rÍ   )Ú	cstringioÚ
append_endÚatom_hetatmÚsigatmÚanisouÚsiguij)ÚStringIOr   r   Úextract_cryst1_z_columnsrÓ   Úformat_cryst1_and_scale_recordsÚ_as_pdb_string_cstringiorú   ÚPY3ÚwriteÚgetvalue)r¬   r   r#  r$  r&  r'  r(  r)  r*  r%  Zlink_recordsZreturn_cstringioÚpy3outr   r   r   Úas_pdb_stringt  s6    
zpdb_input_mixin.as_pdb_stringc          	   C   sŠ   |t kr|  ¡ }|t kr |  ¡ }|d k	s0|d k	rn|r:d}nd}t||ƒ}tt|||d|d W d Q R X d}| j||||||	|
d d S )NrZ   Úw)r   r#  r$  )rÍ   T)r   Úopen_appendr&  r'  r(  r)  r*  )r   r   r,  ÚopenrÓ   r-  Z_write_pdb_file)r¬   r   r5  r   r#  r$  r&  r'  r(  r)  r*  Úmoder5   r   r   r   Úwrite_pdb_file¦  s,     zpdb_input_mixin.write_pdb_filec             C   s4   |dk	rd| _ | jd|||||||||	|
|dd S )zr
    Create a single cctbx.xray.structure object from the atom records, using
    only the first model found.
    NF)Úone_structure_for_each_modelr   r   Ú cryst1_substitution_buffer_layerÚunit_cube_pseudo_crystalÚfractional_coordinatesÚuse_scale_matrix_if_availabler   Ú6non_unit_occupancy_implies_min_distance_sym_equiv_zeroÚscattering_type_exactÚenable_scattering_type_unknownÚ atom_names_scattering_type_constr   )Ú_scale_matrixÚxray_structures_simple)r¬   r   r   r:  r;  r<  r=  r   r>  r?  r@  rA  r   r   r   Úxray_structure_simpleÇ  s     z%pdb_input_mixin.xray_structure_simplec             C   sò  ddl m} ddl m} ddl m} |rL|dkr8|dks<t‚|jddd}n\| j||d	}|dkrj| ¡ }| ¡ dkr”|j|j	|  
¡  ¡ |d
d}| ¡ dkr¨| ¡ }| ¡ }d}d}|sB|	r$|  ¡ }|dk	r(tdd„ t|d | ¡ ƒD ƒƒdk r(|d dddgkr| ¡ |d< nd}nd}|dk	rB|d }|d }g }|dkrTg }t||||
||  ¡ |  ¡ tjj |¡|||ƒ}|j|d}y.x(t|ƒr¸| |j||j|d¡ q’W W n0 tk
rì } ztt |ƒƒ‚W dd}~X Y nX |S )zÔ
    Create a list of cctbx.xray.structure objects, one per model in the
    input file.  Note that for most single-model structures (i.e. nearly all
    crystal structures), this will be a single-item list.
    r   )Úxray)Úcrystal)ÚuctbxN)r#   r#   r#   éZ   rH  rH  ZP1)Ú	unit_cellZspace_group_symbol)r   r   )Ú
sites_cartZbuffer_layer)rI  )	r   r   r   r   r   r   r   r   r   )r   r   r   c             S   s   g | ]\}}t || ƒ‘qS r   )Úabs)rÁ   rP   r5   r   r   r   rÂ     s    z:pdb_input_mixin.xray_structures_simple.<locals>.<listcomp>gíµ ÷Æ°>r#   )r   )r   Ú
scatterersr>  )!ÚcctbxrE  rF  rG  rt   Úsymmetryr   rI  Zcustomized_copyZnon_crystallographic_unit_cellr2   Úextract_xyzÚspace_group_infoZcell_equivalent_p1Úscale_matrixÚmaxr   Úfractionalization_matrixZ xray_structures_simple_extensionÚatoms_with_labelsZmodel_indicesÚscitbxÚstlÚsetÚ
stl_stringr   ÚnextrM   Z	structurerL  ræ   r   rs   )r¬   r9  r   r   r:  r;  r<  r   r>  r=  r?  r@  rA  rE  rF  rG  rI  Zscale_rZscale_trQ  rK   Zloopr   r=   r   r   r   rC  è  s~    





z&pdb_input_mixin.xray_structures_simple)T)NT)T)T)NFr"  r   )rµ   r¶   r·   r  r  r  r!  r   r   r3  r8  Ú(default_atom_names_scattering_type_constrD  rC  r   r   r   r   r  1  sh   
 


   
)r  c               @   s  e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
d„ Zdd„ Z	dEdd„Z
dd„ Zdd„ Zdd„ Zdd„ ZdFdd„Zdd„ Zdd„ Zd d!„ ZdGd"d#„Zd$d%„ Zd&d'„ Zd(d)„ Zd*d+„ Zd,d-„ Zd.d/„ Zd0d1„ ZdHd3d4„Zd5d6„ Zd7d8„ Zd9d:„ Zd;d<„ Zd=d>„ Z d?d@„ Z!dAdB„ Z"dCdD„ Z#dS )IrQ   zÎ
  This class parses PDB format, including non-ATOM records.  Atom objects will
  be created as part of the parsing, but the full PDB hierarchy object requires
  calling the construct_hierarchy() method.
  c             C   s    t  ¡ }x&td d… D ]}| t| |ƒƒ ¡ qW tƒ }| j|dddddd}tjr\| 	|¡ | t  
| ¡ ¡¡ x&tdd … D ]}| t| |ƒƒ ¡ q~W d|fS )NéþÿÿÿFT)r%  r&  r'  r(  r)  r*  r   )r   r/   Úinput_sectionsrV   Úgetattrr+  r.  rú   r/  r0  r  r1  )r¬   r"   ZsectionZ
pdb_stringr2  r   r   r   Ú__getinitargs__K  s"    
z_.__getinitargs__c             C   s   dS )Nr*   r   )r¬   r   r   r   Ú	file_type_  s    z_.file_typec                sÚ   ddl m} i }|  ¡ }x`|D ]X}| ¡ }| ¡ }| d¡r|d |d |d |dd … f\}}}}	| |g ¡ |	¡ qW g }
|j‰ xJt	| 
¡ | ¡ ƒD ]4\}}|
 d| ¡ |
 d ‡ fd	d
„|D ƒ¡¡ q˜W d |
¡S )Nr   )Úamino_acid_codesZSEQRESr#   rÚ   r@   z	>chain %srÌ   c                s   g | ]}ˆ   |d ¡‘qS )ú?)rx   )rÁ   r  )Úottr   r   rÂ   p  s    z*_.sequence_from_SEQRES.<locals>.<listcomp>rÃ   )Ú	iotbx.pdbr`  r  ru   rä   r(   Ú
setdefaultrV   Zone_letter_given_three_letterr   rÒ   r<   rM   rH   )r¬   r`  rY   ZpsrX   ÚlsÚkwÚi_seqZchidZrnsrK   r  Úvsr   )rb  r   Úsequence_from_SEQRESb  s    

("z_.sequence_from_SEQRESc             C   s(   x"|   ¡ D ]}| d¡r
t|ƒS q
W d S )NzHEADER )r  r(   rï   )r¬   r  r   r   r   Úextract_header_years  s    
z_.extract_header_yearc             C   s¸  d}g }dd„ }d}d}xº|   ¡ D ]®}| |¡r"| ¡  |d¡ ¡ }||d ƒrd| |d d¡ ¡ }| d¡}|t|ƒd  }| d¡d }	| d¡dkr¨|	 d¡dks²| d	¡r¸d}
nd}
||
 | }|d7 }q"W | ¡  |d¡ ¡ }||d ƒr
| |d d¡ ¡ }| |¡s´||ƒs´| d¡}xˆ|D ]€}d d
d„ |D ƒ¡}t|ƒdkr0| d¡}| 	¡  dd„ |D ƒ}d |¡}|d  
¡ r0|t|ƒd   
¡ r0| |¡ q0W |S )NZAUTHORc             S   s&   yt | ƒ dS  tk
r    dS X d S )NTF)Úfloatræ   )rP   r   r   r   Ú	is_number|  s
     z$_.extract_authors.<locals>.is_numberrÌ   r   ú,r#   Ú.rÙ   c             S   s   g | ]}|  ¡ ‘qS r   )ru   )rÁ   Úxr   r   r   rÂ   —  s    z%_.extract_authors.<locals>.<listcomp>c             S   s   g | ]}|  ¡ ‘qS r   )rv   )rÁ   ro  r   r   r   rÂ   ›  s    )r  r(   ru   Úreplacerä   rB   rã   r$   rH   ÚsortÚisalpharM   )r¬   ZtriggerrK   rl  ÚltZcntrrX   Zl_Úl1Úl2ÚjZl__r   r   r   Úextract_authorsy  sB    
 
&  



$z_.extract_authorsc             C   s6   g }d| }x$|   ¡ D ]}| |¡r| |¡ qW |S )NzREMARK %3d )r  r(   rM   )r¬   ÚiiirK   Úpatternr  r   r   r   Úextract_remark_iii_records¡  s    
z_.extract_remark_iii_recordsNc             C   s"   ddl m} |  ¡ }|j ||¡S )Nr   )Úsecondary_structure)rc  r{  r  Ú
annotationZfrom_records)r¬   Úlogr{  r+   r   r   r   Úextract_secondary_structure©  s    z_.extract_secondary_structurec             C   s8   g }x.|   ¡ D ]"}| d¡s&| d¡r| |¡ qW |S )z,
    Collect link records from PDB file
    ZLINKÚlink)r  r(   rM   )r¬   rK   r  r   r   r   Úextract_LINK_records®  s
    z_.extract_LINK_recordsc             C   s8   ddl m} x&|  ¡ D ]}| d¡r|j|dS qW d S )Nr   )Úcryst1_interpretationr   )Zcryst1_record)rc  r  r	  r(   r   )r¬   r  r  r   r   r   Úcrystal_symmetry_from_cryst1¸  s
    
z_.crystal_symmetry_from_cryst1c             C   sP   xJ|   ¡ D ]>}| d¡r
|dd … }t|ƒdk rD|ddt|ƒ  7 }|S q
W d S )Nr   éB   r@   ú )r	  r(   rB   )r¬   r  rK   r   r   r   r,  ¿  s    
 z_.extract_cryst1_z_columnsc             C   sD   ddl m} x2|  ¡ D ]&}| d¡r|j|d}|d k	r|S qW d S )Nr   )Úpdb_remarksz
REMARK sg=)Z
pdb_record)Z	iotbx.cnsr…  r  r(   Zextract_symmetry)r¬   r…  r  r   r   r   r   Ú$_crystal_symmetry_from_cns_remark_sgÇ  s    
z&_._crystal_symmetry_from_cns_remark_sgFc             C   s@   |   ¡ }|d kr|  ¡ }|d kr$|S |d kr0|S |j|| dS )N)Zother_symmetryÚforce)r‚  r†  Újoin_symmetry)r¬   r   r   Zself_symmetryr   r   r   r   Ð  s    z_.crystal_symmetryc             C   s0  t | dƒs*|  ¡ }t|ƒdkr(d| }d gd d gd g| _tƒ }g }x¢|  ¡ D ]–}| d¡rR|dd… d	krRt||d
}|j|krÜx2t	|j
ƒD ]$\}}|| jd |jd d | < q’W |j| jd |jd < | |j¡ | |j¡ qRW t|ƒdkrd | _n(t|d d… ƒdddgkr*td| ƒ‚| jS )NrB  r   z (%s)rØ   rA   ZSCALErõ   rÛ   )Ú1Ú2Ú3)r  r!   r#   rÚ   z#Improper set of PDB SCALE records%s)Úhasattrr!   rB   rB  rW  r	  r(   Úread_scale_recordrÎ   r  rn   r  ÚaddrM   rÑ   ræ   )r¬   r!   Zdone_setZ	done_listr  rn   Zi_colr  r   r   r   rQ  Þ  s,     
 
z_.scale_matrixc             C   s   dd l }|jj|  d¡dS )Nr   i^  )r"   )Úiotbx.mtrix_biomtÚmtrix_biomtZprocess_BIOMT_records_pdbrz  )r¬   r)   r   r   r   r   õ  s    z_.process_BIOMT_recordsc             C   s   dd l }|jj|  ¡ dS )Nr   )r"   )r  r  Zprocess_MTRIX_records_pdbr	  )r¬   r)   r   r   r   r  ú  s    z_.process_MTRIX_recordsc             C   s2   ddl m} |  d¡}| |  d¡¡ | ||¡S )Nr   )Ú"extract_rfactors_resolutions_sigmarÚ   rA   )rc  r‘  rz  rV   Úget_r_rfree_sigma)r¬   r   r‘  Zremark_2_and_3_recordsr   r   r   r’  ÿ  s
    
z_.get_r_rfree_sigmac             C   s
   |   ¡ jS )N)r’  Ú
resolution)r¬   r   r   r   r“    s    z_.resolutionc             C   s   |   ¡  ¡  ¡ }|dkS )NzELECTRON MICROSCOPY)Úget_experiment_typeru   rv   )r¬   Úetr   r   r   Ú#experiment_type_electron_microscopy	  s    z%_.experiment_type_electron_microscopyc             C   sZ   |   d¡}d }x0|D ](}| ¡ }tjjj|d}|d k	r|S qW |d k	rVd | ¡ ¡}|S )NrA   )ÚstrQ   )rz  ru   r)   r*   Úremark_3_interpretationZget_programrH   rä   )r¬   Zremark_3_linesrK   r  r   r   r   Úget_program_name  s    

 z_.get_program_namec             C   sø   |   d¡}g }xœ|D ]”}| ¡ }| d¡dkr| d¡dkry| | ¡ d ¡ W q tk
r¦   y | || d¡d d … ¡ W n tk
r    | |¡ Y nX Y qX qW d }t|ƒdkrôyt|d ƒ}W n& t	k
rà   Y n t
k
rò   Y nX |S )Ni  ZSOLVENTr#   ZCONTENTrÛ   ú:r   )rz  rv   rã   rM   rä   r1   ÚindexrB   rk  Ú
IndexErrorræ   )r¬   Úremark_280_linesÚmcÚremarkrK   r   r   r   Úget_solvent_content  s,    

    z_.get_solvent_contentc             C   sø   |   d¡}g }xœ|D ]”}| ¡ }| d¡dkr| d¡dkry| | ¡ d ¡ W q tk
r¦   y | || d¡d d … ¡ W n tk
r    | |¡ Y nX Y qX qW d }t|ƒdkrôyt|d ƒ}W n& t	k
rà   Y n t
k
rò   Y nX |S )Ni  ZMATTHEWSr#   ZCOEFFICIENTrÛ   rš  r   )rz  rv   rã   rM   rä   r1   r›  rB   rk  rœ  ræ   )r¬   r  rž  rŸ  rK   r   r   r   Úget_matthews_coeff-  s,    

    z_.get_matthews_coeffc             C   sZ   dd l }|  d¡}g }x.| ¡ D ]"}x| ¡ D ]}| |j¡ q.W q W |jjj|||dS )Nr   rA   )Úremark_3_recordsÚpdb_hierarchyÚ	chain_ids)	Ú!iotbx.pdb.remark_3_interpretationrz  ÚmodelsÚchainsrM   Úidr*   r˜  Zextract_tls_parameters)r¬   Ú	hierarchyr)   r¢  r¤  r;   Úchainr   r   r   Úextract_tls_paramsB  s    
z_.extract_tls_paramsc             C   s   dd l }|  d¡}t |¡S )Nr   rA   )r¥  rz  r˜  Úextract_f_model_core_constants)r¬   r)   r¢  r   r   r   r¬  N  s    
z _.extract_f_model_core_constantsTc          
   C   sä   xÞ|   ¡ D ]Ò}| d¡r
| d¡}t|ƒdks2t‚|d  dd¡ ¡  d¡}g }xj|D ]b}| ¡ dkrjqVqV| ¡ dkr‚| d ¡ qVy| t| ¡ ƒ¡ W qV t	k
r¶   | d ¡ Y qVX qVW |rØt|ƒd	krÔ|d	 S d S |S q
W d S )
NzREMARK 200  WAVELENGTH OR RANGErš  rÚ   r#   ú;rm  rÌ   ZNULLr   )
r  r(   rä   rB   rt   rp  ru   rM   rk  ræ   )r¬   Z
first_onlyr  rî   Z	subfieldsZwavelengthsrç   r   r   r   Úextract_wavelengthS  s*    


z_.extract_wavelengthc             C   s0   x*|   ¡ D ]}| d¡r
t |¡j ¡ S q
W d S )NZEXPDTA)r  r(   r+   ZexpdtaZ	techniqueru   )r¬   r  r   r   r   r”  l  s    
z_.get_experiment_typec             C   sæ   |   ¡ }t|ƒdkrdS ddlm} | t|  ¡ ƒg ¡}i }x*t|  ¡ ƒD ]\}}|j ¡ }|||< qJW xx|D ]p}| 	d¡s€t
‚tjj |¡}	||	j ¡  }|	j d¡dks°t
‚x,|	jD ]"}
|
dkr¸||  ||
 ¡  ¡ q¸W qnW |S )aÀ  
    Parse CONECT records and extract the indices of bonded atoms.  Returns
    a scitbx.array_family.shared.stl_set_unsigned object corresponding to the
    atoms array, with each element being the list of indices of bonded atoms
    (if any).  If no CONECT records are found, returns None.

    Note that the ordering of atoms may be altered by construct_hierarchy(), so
    this method should probably be called after the hierarchy is made.
    r   N)ÚsharedZCONECTrÌ   r@   )r
  rB   Úscitbx.array_familyr¯  Ústl_set_unsignedr2   r  Úserialru   r(   rt   r)   r*   r+   ZconectZserial_numbers_bonded_atomsrã   rM   )r¬   r"   r¯  ZbondsZserial_ref_hashrg  r7   r²  r  rì   Z	j_seq_strr   r   r   Úextract_connectivityr  s$    


z_.extract_connectivityc             c   sX   xR|   ¡ D ]F}|jsq
x6|   ¡ D ]*}|js,q |j|jkr:q ||ddfV  P q W P q
W d S )NZ1555)rT  Úheterory   )r¬   Úatom1Úatom2r   r   r   Úget_link_records  s       z_.get_link_recordsc             C   s   |   ¡ |  ¡ |  ¡ dœS )N)ÚCDLÚomegaÚAmber)Úused_cdl_restraintsZused_omega_restraintsÚused_amber_restraints)r¬   r   r   r   Úget_restraints_used™  s    z_.get_restraints_usedc             C   s2   d}x(|   ¡ D ]}| d¡r||krd}P qW |S )NFz
REMARK   3T)r  r(   )r¬   ÚwhatÚrcr  r   r   r   Ú_used_what_restraintsŸ  s    z_._used_what_restraintsc             C   s
   |   d¡S )Nr¸  )rÀ  )r¬   r   r   r   r»  §  s    z_.used_cdl_restraintsc             C   s
   |   d¡S )Nz	omega-cdl)rÀ  )r¬   r   r   r   Úused_omega_cdl_restraintsª  s    z_.used_omega_cdl_restraintsc             C   s
   |   d¡S )Nrº  )rÀ  )r¬   r   r   r   r¼  ­  s    z_.used_amber_restraints)N)NF)N)T)$rµ   r¶   r·   Ú__doc__r^  r_  ri  rj  rw  rz  r~  r€  r‚  r,  r†  r   rQ  r   r  r’  r“  r–  r™  r   r¡  r«  r¬  r®  r”  r³  r·  r½  rÀ  r»  rÁ  r¼  r   r   r   r   rQ   B  sD   (


 



rQ   c               @   s   e Zd Zddd„ZdS )Úrewrite_normalizedFc          	   C   s‚   t |d| _ |rDt|dƒ}td | j  ¡ ¡|d W d Q R X d }n
| j  ¡ }| j  ¡ | _|rdd }nd}| jj|||d|d d S )N)r   r4  rÃ   )rÍ   r#   T)r   r5  r   r&  Úatoms_reset_serial_first_value)	r.   r6  rÓ   rH   r	  r   r  r©  r8  )r¬   Zinput_file_nameZoutput_file_nameZ&keep_original_crystallographic_sectionZkeep_original_atom_serialr5   r   rÄ  r   r   r   r­   ²  s     "
zrewrite_normalized.__init__N)FF)rµ   r¶   r·   r­   r   r   r   r   rÃ  °  s    rÃ  aÚ  1crp 1crr
1f49 1gho
1gix 1giy
1j4z 1kpo
1jgo 1jgp 1jgq
1jyy 1jyz
1jz0 1jz1
1otz 1p0t
1pns 1pnu
1pnx 1pny
1s1h 1s1i
1ti2 1vld
1ti4 1vle
1ti6 1vlf
1utf 1utv
1voq 1vor 1vos 1vou 1vov 1vow 1vox 1voy 1voz 1vp0
1vs5 1vs6 1vs7 1vs8
1vsa 2ow8
1vsp 2qnh
1we3 1wf4
1yl3 1yl4
2avy 2aw4 2aw7 2awb
2b64 2b66
2b9m 2b9n
2b9o 2b9p
2bld 2bvi
2gy9 2gya
2gyb 2gyc
2hgi 2hgj
2hgp 2hgq
2hgr 2hgu
2i2p 2i2t 2i2u 2i2v
2j00 2j01 2j02 2j03
2jl5 2jl6 2jl7 2jl8
2qal 2qam 2qan 2qao
2qb9 2qba 2qbb 2qbc
2qbd 2qbe 2qbf 2qbg
2qbh 2qbi 2qbj 2qbk
2qou 2qov 2qow 2qox
2qoy 2qoz 2qp0 2qp1
2uv9 2uva
2uvb 2uvc
2v46 2v47 2v48 2v49
2vhm 2vhn 2vho 2vhp
2z4k 2z4l 2z4m 2z4n
2zkq 2zkr
2zuo 2zv4 2zv5
3bz1 3bz2
3d5a 3d5b 3d5c 3d5d
3df1 3df2 3df3 3df4
3f1e 3f1f 3f1g 3f1h
c               @   s   e Zd Zdd„ ZdS )Újoin_fragment_filesc             C   sr  t  ¡ }| d¡ | dtƒ  ¡ g }d }ddlm} | ¡ | _d}xÖ|D ]Î}tj	j
|d}| ¡ }	|	d k	rŒ|	 ¡ }	|	dkr†t|	ƒ}	qd }	nd }	|d krž|	}n
t||	ƒ}| ¡ }
| dt|ƒ ¡ |
d k	r|
 ¡ d k	r| dtj	j|
|	d	 ¡ | jj|
d
d| _| |jdd¡ qJW | j ¡ d k	rD| tj	j| j|d¡ tj	jj|d}|j |¡ | ¡  || _d S )Nz(REMARK JOINED FRAGMENT FILES (iotbx.pdb)zREMARK r   )rF  zJREMARK Warning: CRYST1 Z field (columns 67-70) is not an integer: "%-4.4s")r   rÌ   z	REMARK %s)ÚzT)r‡  F)r  )r   rÆ  )Úroots)r   r/   rM   r   rM  rF  rN  r   r)   r*   r.   r,  ru   rå   rR  r   rI  Úformat_cryst1_recordrˆ  r  r©  Z
join_rootsr°   rV   Zreset_i_seq_if_necessaryÚjoined)r¬   rÊ   r°   rÇ  rÆ  rF  Z	z_warningr   r6   Zz_ÚcsrK   r   r   r   r­     sD    


 


zjoin_fragment_files.__init__N)rµ   r¶   r·   r­   r   r   r   r   rÅ    s   rÅ  c       	      C   sf   t | ƒdkst‚t| ƒ}tƒ }|j|d t| ƒj}t|dƒ}| | 	¡ ¡ | 
¡  t|||d}|S )Nr   )rÔ   r4  )Úsite_clash_cutoffrÔ   )rB   rt   r¾   r+  rÖ   rÅ  rÉ  r6  r0  r3  ÚcloseÚquick_clash_check)	rÊ   Zoutput_filerË  r}  Zmerged_recordsÚwarningsZmerged_hierarchyr5   Z	n_clashesr   r   r   Ú!merge_files_and_check_for_overlap,  s    

rÏ  rõ   c             C   sT   t | d}| ¡ }|jddd}| ¡ }| ¡ }|j|d}	|	 ¡ }
|
 ¡ }t|ƒS )N)r   é
   T)r:  r@  )Zdistance_cutoff)	r.   rT  rD  Ú
sites_fracrI  Úpair_asu_tableZextract_pair_sym_tableZsimple_edge_listrB   )r   rË  rÔ   Zshow_outliersr6   Z	pdb_atomsÚxray_structurerÑ  rI  rÒ  Zpair_sym_tableZ
atom_pairsr   r   r   rÍ  ;  s    
rÍ  zR 3 :HzR 3 :RzR -3 :HzR -3 :RzR 3 2 :HzR 3 2 :RzR 3 m :HzR 3 m :RzR 3 c :HzR 3 c :Rz	R -3 m :Hz	R -3 m :Rz	R -3 c :Hz	R -3 c :RrW  c             C   sŽ   |   ¡  ¡ }|tkr(|d |dd…  }dd„ }||ƒ}t|ƒdkrŠ|  ¡  ¡ sŠddlm} || d	d
d}|d k	rŠ|| dd¡krŠ||ƒ}|S )Néÿÿÿÿr#   éýÿÿÿc             S   s   t | ƒdkr|  dd¡S | S )Né   r„  rÌ   )rB   rp  )rP   r   r   r   Úcompressa  s     z&format_cryst1_sgroup.<locals>.compressrÖ  r   )Úccp4_symbolzsyminfo.libF)rP  Zlib_nameZrequire_at_least_one_libr„  rÌ   )	ÚtypeZlookup_symbolÚ)standard_rhombohedral_space_group_symbolsrB   ÚgroupZ
is_centricZ#iotbx.mtz.extract_from_symmetry_librØ  rp  )rP  rK   r×  rØ  Zaltr   r   r   Úformat_cryst1_sgroup]  s    rÜ  c             C   s<   |d krd}nt |ƒ}d|  ¡  ¡ t|  ¡ d|f   ¡ S )NrÌ   z2CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11.11s%4.4s)rP  )rs   rI  Ú
parametersrÜ  rP  rÀ   )r   rÆ  r   r   r   rÈ  o  s    
 

rÈ  c             C   s¤   | |g  d ¡dkst‚| d k	r(|  ¡ }nt|ƒdks8t‚|}t|ƒdksLt‚d|d |d |d |d |d |d |d |d |d	 |d
 |d |d f  dd¡S )Nr#   rØ   rA   zwSCALE1    %10.6f%10.6f%10.6f     %10.5f
SCALE2    %10.6f%10.6f%10.6f     %10.5f
SCALE3    %10.6f%10.6f%10.6f     %10.5fr   rÚ   r@   rõ   rÛ   é   é   z
 -0.000000z
  0.000000)rã   rt   rS  rB   rp  )rI  rS  rW   r5   r   r   r   Úformat_scale_records  s    
rà  Tc             C   s.  ddl m} ddl m} ddl m} | d kr6d }d }	nlt| |jƒrT|  ¡ }|  ¡ }	nNt| |jjƒrl| }d }	n6t| t	t
fƒršt| ƒdksŠt‚| | ¡}d }	ntdƒ‚|d krÔ|d kr¾| d¡}n|jt |¡ ¡ d}|	d krè|jd	d
}	t|j||	d|d}
|r*|d kr| ¡ }|
dt||d 7 }
|
S )Nr   )rF  )Úsgtbx)rG  rÛ   zinvalid crystal_symmetry object)r#   r#   r#   rH  rH  rH  )Zorthogonalization_matrixzP 1)Úsymbol)rI  rP  )r   rÆ  rÃ   )rS  rW   )rM  rF  rá  rG  rr   rN  rI  rP  r-   rU   r  rB   rt   ræ   r	   ÚsqrÚinverserÈ  rS  rà  )r   r#  r$  Zscale_fractionalization_matrixZscale_urF  rá  rG  rI  rP  rK   r   r   r   r-  —  sH    


r-  c             C   sv   d}dd„ }d}x`| D ]X\}}}}|d7 }|||ƒ7 }|d7 }|||ƒ7 }|d7 }|d7 }|d7 }|d	d
 7 }qW |S )a™  
COLUMNS         DATA TYPE      FIELD           DEFINITION
-----------------------------------------------------------------------------
 1 -  6         Record name    "LINK  "
13 - 16         Atom           name1           Atom name.
17              Character      altLoc1         Alternate location indicator.
18 - 20         Residue name   resName1        Residue  name.
22              Character      chainID1        Chain identifier.
23 - 26         Integer        resSeq1         Residue sequence number.
27              AChar          iCode1          Insertion code.
43 - 46         Atom           name2           Atom name.
47              Character      altLoc2         Alternate location indicator.
48 - 50         Residue name   resName2        Residue name.
52              Character      chainID2        Chain identifier.
53 - 56         Integer        resSeq2         Residue sequence number.
57              AChar          iCode2          Insertion code.
60 - 65         SymOP          sym1            Symmetry operator atom 1.
67 - 72         SymOP          sym2            Symmetry operator atom 2.
74 - 78         Real(5.2)      Length          Link distance
zò
         1         2         3         4         5         6         7         8
12345678901234567890123456789012345678901234567890123456789012345678901234567890
LINK         O   GLY A  49                NA    NA A6001     1555   1555  2.98
c             S   s$   d| j | j| j| j| j| jf }|S )Nz%4s%s%-3s %s%4s%s)r4   Úaltlocry   Úchain_idÚresseqÚicode)r7   rK   r   r   r   Ú_format_link_atomÞ  s    
z.format_link_records.<locals>._format_link_atomrÌ   zLINK        z               r„  z   1555z %5.2fg333333@r   )Z	link_listÚtestré  rK   rµ  r¶  Zsym_op1Zsym_op2r   r   r   Úformat_link_recordsÄ  s    	rë  c               @   s    e Zd ZdddgZddd„ZdS )	r  rÎ   rn   r  rÌ   c          
   C   s  yt |dd… ƒ| _W n tk
r0   d | _Y nX | jdkrXtdt|d d… ƒ|f ƒ‚g }x–dD ]Ž}|||d … }t| ¡ ƒdkrŒd}nZyt|ƒ}W nL tk
rä   tdt|d d… ƒ|f d	 | d
 dd| df  ƒ‚Y nX | |¡ qbW |d d… |d  | _	| _
d S )Nrõ   rÛ   )r#   rÚ   rA   zUnknown PDB record %s%s)rÐ  é   é   é-   rÐ  r   z-Not a floating-point value, PDB record %s%s:
z  rÃ   z  %s%sr„  z
^^^^^^^^^^rA   )rå   rÎ   ræ   rC   r   rB   ru   rk  rM   rn   r  )ÚOr  r!   r<   rþ   ÚfldÚvaluer   r   r   r­   ÷  s&      

 <zread_scale_record.__init__N)rÌ   )rµ   r¶   r·   Ú	__slots__r­   r   r   r   r   r  ó  s   
r  c             C   s6   yt dd|  dS  tk
r0   td|  ƒ‚Y nX d S )Nr@   z%4s)ÚwidthrP   z&invalid residue sequence number: "%4s")Z
hy36decoderæ   )rP   r   r   r   Úresseq_decode  s     rô  c             C   s   t d| dS )Nr@   )ró  rñ  )Ú
hy36encode)rñ  r   r   r   Úresseq_encode  s    rö  c             C   s@   t |tƒrt|ƒ| kst‚|S t |tƒr4t| |dS tdƒ‚d S )N)ró  rñ  z'serial number value must be str or int.)rr   rs   rB   rt   rå   rõ  rC   )ró  rñ  r   r   r   Úencode_serial_number  s    

r÷  c             C   sJ  | d krt  ¡ } nt| ƒjdks$t‚|d k	r2|| _|d k	r@|| _|d k	rN|| _|d k	r\|| _|d k	rj|| _	|d k	rx|| _
|d k	r†|| _|d k	r”|| _|	d k	r¢|	| _|
d k	r°|
| _|d k	r¾|| _|d k	rÌ|| _|d k	rÚ|| _|d k	rè|| _|d k	rö|| _|d k	r|| _|d k	r|| _|d k	r&|| _|d k	r6|| _|d k	rF|| _| S )NÚatom_with_labels)r©  rø  rÙ  rµ   rt   ÚxyzÚsigxyzÚoccÚsigoccÚbÚsigbÚuijr*  r´  r²  r4   ÚsegidÚelementÚchargeÚmodel_idræ  rç  rè  rå  ry   )rK   rù  rú  rû  rü  rý  rþ  rÿ  r*  r´  r²  r4   r   r  r  r  ræ  rç  rè  rå  ry   r   r   r   Úmake_atom_with_labels  sX    
               
 
 
 
 
 r  c             C   sú  |d kr|   ¡ }| ¡ }g }x4t|j ¡ ƒD ]"}|dkrD| d¡ q,| |¡ q,W d|jfd|jfdd |¡fd|j	fg}|j
dkr–| d	d
|j
f¡ |j}d|kr¶| d|d f¡ d|ksÆd|krì| dd	¡| dd	¡ }| d|f¡ d|kr| d|d f¡ d|krhg }x,|jD ]"}	tj |	¡dkr| |	¡ qW d|d d |¡f }
| d|
f¡ d|ks|d|krêg }x0|jD ]&}	tj |	¡}|dkrˆ| |	¡ qˆW | dd	¡| dd	¡ }d|d |¡f }
| d|
f¡ | ¡ }| ¡ }t |¡}t |¡}t |¡}| dd|||f f¡ |d	krZ|d	k d¡}| dd| f¡ | ¡ }t |¡d	kr’|d	k d¡}| dd| f¡ |  ¡ }|d k	rö| ¡ }| dt|ƒf¡ | ¡ }|d k	röd d d!„ | ¡ D ƒ¡}| d"|f¡ |S )#Nr„  z' 'zNumber of atomszNumber of chainsz	Chain IDsz, zAlternate conformationsr#   r   zNumber of modelsÚcommon_amino_acidzAmino acid residuesZcommon_nucleic_acidZccp4_mon_lib_rna_dnazNucleic acid residuesZcommon_waterzWater moleculesZcommon_elementz%d (%s)zElemental ionsÚcommon_small_moleculeÚother)r  r  zOther moleculeszMean isotropic B-factorz%.2f (range: %.2f - %.2f)TzAtoms with iso. B <= 0z%d ***zAtoms with zero ocupancyzSpace groupc             S   s   g | ]}d | ‘qS )z%gr   )rÁ   ro  r   r   r   rÂ   Œ  s    z$get_file_summary.<locals>.<listcomp>z	Unit cell)r  Zoverall_countsrÑ   r¤  rÒ   rM   Ún_atomsZn_chainsrH   Z
n_alt_confZn_modelsÚinsertZresname_classesrx   Zresnamesr)   r*   Úcommon_residue_names_get_classr2   Z	extract_br   ÚmeanÚminrR  rã   Zextract_occr   rP  rs   rI  rÝ  )Úpdb_inr©  Úcountsr¤  r¨  Z	info_listZclr  Únamesr4   rñ  Z	res_classr2   Z	b_factorsZmean_bZmin_bZmax_bZ	n_bad_adprû  Z
n_zero_occZsymmZspace_grouprI  Zuc_strr   r   r   Úget_file_summaryM  s|    










r  c             C   sh   |d krt j}t| |ƒ}tdd„ |D ƒƒd }d| }x,|D ]$\}}t||d t|ƒf |d q<W |S )Nc             S   s   g | ]\}}t |ƒ‘qS r   )rB   )rÁ   rX   r  r   r   r   rÂ   ”  s    z%show_file_summary.<locals>.<listcomp>rÚ   z
%%-%ds %%srš  )rÍ   )rÏ   rÐ   r  rR  rÓ   rs   )r  r©  rÔ   r°   Zlabel_widthÚformatÚlabelrñ  r   r   r   Úshow_file_summary  s    
 r  )NFr   r   )r?   )N)NNNNNNNNNNNNNNNNNNNNN)N)NN)aÚ
__future__r   r   r   Úcctbx.array_familyr   Úboost_adaptbx.boost.pythonÚboostÚpythonÚbpÚ	six.movesr   Ú
import_extr-   r   Ziotbx.pdb.recordsr)   Ziotbx.pdb.hierarchyrU  r	   Z"iotbx.pdb.atom_name_interpretationr
   r¼   Úscitbx.array_family.sharedÚscitbx.stl.setÚlibtbxr   Úlibtbx.str_utilsr   Úlibtbx.utilsr   r   r   r   r   r   r   r+  rÏ   r  rú   rE   ÚpathrG   r   r8   r>   rL   r[   rw   rW  r½   r{   rz   r}   r²   Zprotein_atom_names_backboner§   r¦   r¨   Z#rna_dna_atom_names_backbone_aliasesÚobjectr©   r¸   r¾   r×   rï   rð   rñ   rö   r.   rZ  r\  r  ÚinjectÚinject_intorQ   rÃ  Zpdb_codes_fragment_filesrÅ  rÐ   rÏ  rÍ  rÚ  Ú__builtins__rÜ  rÈ  rà  r-  rë  r  rô  rö  r÷  r  r  r  r   r   r   r   Ú<module>   sZ  
   	
(B2$J     pT'(/	                    C