B
    bR                 @   sp   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddl	mZ G dd	 d	ZG d
d dZdS )zmmCIF parsers.    N)	as_handle)
MMCIF2Dict)StructureBuilder)PDBConstructionException)PDBConstructionWarningc               @   sB   e Zd ZdZdddZdd Zdd	 Zd
d Zdd Zdd Z	dS )MMCIFParserz1Parse a mmCIF file and return a Structure object.NFc             C   s8   |dk	r|| _ nt | _ d| _d| _d| _t|| _dS )a  Create a PDBParser object.

        The mmCIF parser calls a number of standard methods in an aggregated
        StructureBuilder object. Normally this object is instanciated by the
        MMCIParser object itself, but if the user provides his/her own
        StructureBuilder object, the latter is used instead.

        Arguments:
         - structure_builder - an optional user implemented StructureBuilder class.
         - QUIET - Evaluated as a Boolean. If true, warnings issued in constructing
           the SMCRA data will be suppressed. If false (DEFAULT), they will be shown.
           These warnings might be indicative of problems in the mmCIF file!

        Nr   )_structure_builderr   headerline_counterbuild_structureboolQUIET)selfstructure_builderr    r   2lib/python3.7/site-packages/Bio/PDB/MMCIFParser.py__init__   s    zMMCIFParser.__init__c          	   C   sV   t  > | jrt jdtd t|| _| | | j	| 
  W dQ R X | j S )zReturn the structure.

        Arguments:
         - structure_id - string, the id that will be used for the structure
         - filename - name of mmCIF file, OR an open text mode file handle

        ignore)categoryN)warningscatch_warningsr   filterwarningsr   r   _mmcif_dict_build_structurer   Z
set_header_get_headerget_structure)r   structure_idfilenamer   r   r   r   2   s    


zMMCIFParser.get_structurec             C   s$   ||kr || d }d|kr |S |S )Nr   ?r   )r   keydictZdefltZrsltr   r   r   
_mmcif_getE   s
    zMMCIFParser._mmcif_getc          
   C   s^   | j }xR|D ]J}||}y|d }W n ttfk
r@   wY nX |dkr|| j|< P qW d S )Nr   r   )r   get	TypeError
IndexErrorr	   )r   Z
target_keykeysZmdr   valitemr   r   r   _update_header_entryL   s    


z MMCIFParser._update_header_entryc             C   s   dddddd d| _ | ddddg | ddg | d	d
dg | ddg | ddg | ddddg | j d d k	ryt| j d | j d< W n tk
r   d | j d< Y nX | j S )N )nameheadidcodedeposition_datestructure_method
resolutionr,   Z	_entry_idz_exptl.entry_idz_struct.entry_idr*   z_struct.titler+   z_struct_keywords.pdbx_keywordsz_struct_keywords.textr-   z3_pdbx_database_status.recvd_initial_deposition_dater.   z_exptl.methodr/   z_refine.ls_d_res_highz_refine_hist.d_res_highz _em_3d_reconstruction.resolution)r	   r(   float
ValueError)r   r   r   r   r   X   s2    

zMMCIFParser._get_headerc       >      C   s  ddh}| j }|d }|d }|d }y|d }W n tk
rJ   d }Y nX |d }dd	 |d
 D }	dd	 |d D }
dd	 |d D }|d }|d }|d }|d }|d }ydd	 |d D }W n4 tk
r   d }Y n tk
r   tdd Y nX y8|d }|d }|d }|d }|d }|d }d}W n tk
rP   d}Y nX d|krf|d }n|d  }d }d }d }| j}|| |d! d"}d"}xtdt|D ]} |	|  yt
||  }!W n* tk
r   ||  }!td#t Y nX |	|  }"|
|  }#||  }$||  }%||  }&||  }'|'|kr8d!}'t
||  }(||  })|)|krZd!})||  }*yt||  }+W n  tk
r   td$d Y nX yt||  },W n  tk
r   td%d Y nX ||  }-|-d&kr|%d'ks|%d(krd)}.nd*}.nd!}.|.|(|)f}/|d k	rH||  }0||0krR|0}|d7 }||| d }d }d }n
|| ||&krr|&}|| d }d }||/ks||%kr|/}|%}||%|.|(|) t|"|#|$fd+}1|r||   nd }2|j|*|1|+|,|'|*|!|2d, |dkr| t|k r||  ||  ||  ||  ||  ||  f}3d-d	 |3D }4t|4d+}5||5 qW yt|d. d }6t|d/ d }7t|d0 d }8t|d1 d }9t|d2 d }:t|d3 d };t|6|7|8|9|:|;fd+}<|d4 d }=|=dd" }=|=d krt||=|< W n tk
r   Y nX d S )5N.r   z_atom_site.idz_atom_site.label_atom_idz_atom_site.label_comp_idz_atom_site.type_symbolz_atom_site.auth_asym_idc             S   s   g | ]}t |qS r   )r0   ).0xr   r   r   
<listcomp>   s    z0MMCIFParser._build_structure.<locals>.<listcomp>z_atom_site.Cartn_xc             S   s   g | ]}t |qS r   )r0   )r3   r4   r   r   r   r5      s    z_atom_site.Cartn_yc             S   s   g | ]}t |qS r   )r0   )r3   r4   r   r   r   r5      s    z_atom_site.Cartn_zz_atom_site.label_alt_idz_atom_site.pdbx_PDB_ins_codez_atom_site.B_iso_or_equivz_atom_site.occupancyz_atom_site.group_PDBc             S   s   g | ]}t |qS r   )int)r3   nr   r   r   r5      s    z_atom_site.pdbx_PDB_model_numzInvalid model numberz_atom_site_anisotrop.U[1][1]z_atom_site_anisotrop.U[1][2]z_atom_site_anisotrop.U[1][3]z_atom_site_anisotrop.U[2][2]z_atom_site_anisotrop.U[2][3]z_atom_site_anisotrop.U[3][3]   r   z_atom_site.auth_seq_idz_atom_site.label_seq_id zBPDBConstructionWarning: Some atom serial numbers are not numericalzInvalid or missing B factorzInvalid or missing occupancyHETATMZHOHZWATWHf)serial_numberelementc             S   s   g | ]}t |qS r   )r0   )r3   _r   r   r   r5     s    z_cell.length_az_cell.length_bz_cell.length_cz_cell.angle_alphaz_cell.angle_betaz_cell.angle_gammaz_symmetry.space_group_name_H-M)r   KeyErrorr1   r   r   init_structureinit_segrangelenset_line_counterr6   r   warnr   r0   
init_model
init_chaininit_residuenumpyarrayupper	init_atom
set_anisou	ExceptionZset_symmetry)>r   r   _unassigned
mmcif_dictatom_serial_listatom_id_listresidue_id_listelement_listchain_id_listx_listy_listz_listalt_list
icode_listb_factor_listoccupancy_listfieldname_listserial_list	aniso_u11	aniso_u12	aniso_u13	aniso_u22	aniso_u23	aniso_u33
aniso_flagseq_id_listcurrent_chain_idcurrent_residue_idcurrent_resnamer   current_model_idcurrent_serial_idiserialr4   yzresnamechainidaltloc
int_resseqicoder*   
tempfactor	occupancy	fieldnamehetatm_flagresseq	serial_idcoordr@   umapped_anisouanisou_arrayabcZalphaZbetaZgammaZcellZ
spacegroupr   r   r   r   }   s   

















zMMCIFParser._build_structure)NF)
__name__
__module____qualname____doc__r   r   r!   r(   r   r   r   r   r   r   r      s   
%r   c               @   s*   e Zd ZdZd
ddZdd Zdd	 ZdS )FastMMCIFParserz2Parse an MMCIF file and return a Structure object.NFc             C   s2   |dk	r|| _ nt | _ d| _d| _t|| _dS )ar  Create a FastMMCIFParser object.

        The mmCIF parser calls a number of standard methods in an aggregated
        StructureBuilder object. Normally this object is instanciated by the
        parser object itself, but if the user provides his/her own
        StructureBuilder object, the latter is used instead.

        The main difference between this class and the regular MMCIFParser is
        that only 'ATOM' and 'HETATM' lines are parsed here. Use if you are
        interested only in coordinate information.

        Arguments:
         - structure_builder - an optional user implemented StructureBuilder class.
         - QUIET - Evaluated as a Boolean. If true, warnings issued in constructing
           the SMCRA data will be suppressed. If false (DEFAULT), they will be shown.
           These warnings might be indicative of problems in the mmCIF file!

        Nr   )r   r   r
   r   r   r   )r   r   r   r   r   r   r   4  s    zFastMMCIFParser.__init__c          
   C   sR   t  : | jrt jdtd t|}| || W dQ R X W dQ R X | j S )zReturn the structure.

        Arguments:
         - structure_id - string, the id that will be used for the structure
         - filename - name of the mmCIF file OR an open filehandle

        r   )r   N)	r   r   r   r   r   r   r   r   r   )r   r   r   Zhandler   r   r   r   R  s    

 zFastMMCIFParser.get_structurec       @      C   s  ddh}d\}}g g  }}g g  }}	x|D ]}
|
 drLd}||
  q*|
 drjd}||
  q*|r~|
 dr~d}q*|r|
 drd}q*|r||
  q*|r*|	|
  q*W tttj| }tttj|	 }tt||}|tt|| |d	 }|d
 }|d }y|d }W n t	k
r<   d }Y nX |d }dd |d D }dd |d D }dd |d D }|d }|d }|d }|d }|d }ydd |d D }W n8 t	k
r   d }Y n  t
k
r   tdd Y nX y8|d }|d }|d }|d  }|d! } |d" }!d#}"W n t	k
rF   d$}"Y nX d%|kr\|d% }#n|d& }#d }$d }%d }&| j}'|'| |'d' d(}(d(})x^td$t|D ]J}*|'|* ||* }+||* },||* }-||* }.||* }/||* }0||* }1|1|krd'}1t|#|* }2||* }3|3|krd'}3||* d)}4yt||* }5W n  t
k
rZ   td*d Y nX yt||* }6W n  t
k
r   td+d Y nX ||* }7|7d,krd-}8nd'}8|8|2|3f}9|d k	r||* }:|)|:kr |:})|(d#7 }(|'|(|) d }$d }%d }&n
|'|( |$|0kr |0}$|'|$ d }%d }&|%|9ks4|&|/krL|9}%|/}&|'|/|8|2|3 t|,|-|.fd.};|rl||* nd }<|'j|4|;|5|6|1|4|+|<d/ |"d#kr|*t|k r||* ||* ||* ||* | |* |!|* f}=d0d |=D }>t|>d.}?|'|? qW d S )1Nr2   r   )FFz_atom_site.Tz_atom_site_anisotrop.#Fz_atom_site.idz_atom_site.label_atom_idz_atom_site.label_comp_idz_atom_site.type_symbolz_atom_site.auth_asym_idc             S   s   g | ]}t |qS r   )r0   )r3   r4   r   r   r   r5     s    z4FastMMCIFParser._build_structure.<locals>.<listcomp>z_atom_site.Cartn_xc             S   s   g | ]}t |qS r   )r0   )r3   r4   r   r   r   r5     s    z_atom_site.Cartn_yc             S   s   g | ]}t |qS r   )r0   )r3   r4   r   r   r   r5     s    z_atom_site.Cartn_zz_atom_site.label_alt_idz_atom_site.pdbx_PDB_ins_codez_atom_site.B_iso_or_equivz_atom_site.occupancyz_atom_site.group_PDBc             S   s   g | ]}t |qS r   )r6   )r3   r7   r   r   r   r5     s    z_atom_site.pdbx_PDB_model_numzInvalid model numberz_atom_site_anisotrop.U[1][1]z_atom_site_anisotrop.U[1][2]z_atom_site_anisotrop.U[1][3]z_atom_site_anisotrop.U[2][2]z_atom_site_anisotrop.U[2][3]z_atom_site_anisotrop.U[3][3]r8   r   z_atom_site.auth_seq_idz_atom_site.label_seq_idr9   r:   "zInvalid or missing B factorzInvalid or missing occupancyr;   r=   r>   )r?   r@   c             S   s   g | ]}t |qS r   )r0   )r3   rA   r   r   r   r5     s    )
startswithappendstripzipmapstrsplitr    updaterB   r1   r   r   rC   rD   rE   rF   rG   r6   r0   rI   rJ   rK   rL   rM   rO   rP   )@r   r   Z
filehandlerR   Z	read_atomZ
read_aniso_fieldsZ_recordsZ_anisofZ_anisorslineZ_record_tblZ_anisob_tblrS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   r   rm   rn   ro   rp   r4   rq   rr   rs   rt   ru   rv   rw   r*   rx   ry   rz   r{   r|   r}   r~   r@   r   r   r   r   r   r   r   d  s   





















z FastMMCIFParser._build_structure)NF)r   r   r   r   r   r   r   r   r   r   r   r   1  s   
r   )r   rL   r   ZBio.Filer   ZBio.PDB.MMCIF2Dictr   ZBio.PDB.StructureBuilderr   ZBio.PDB.PDBExceptionsr   r   r   r   r   r   r   r   <module>   s     