B
    d                @   s.  d dl mZmZmZ d dlm  mZ edZ	d dl
T d dlmZ d dlmZmZmZ d dlmZmZmZ d dlmZmZmZ d d	lmZ d d
lmZ d d
lmZ d dl m!Z!m"Z" d dl#Z$d dl%m&Z& d dl'm(Z( d dl)Z)d dl*m+Z, d dl*m-Z-m.Z. d dl/Z/d dl0Z0d dl1Z1d dl2Z2d dl3Z3G dd de4Z5dddddgZ6dd Z7G dd de4Z8G dd de4Z9e:e	j;e9 e<e	j;G dd dZ=e:e	j>e9 e<e	j>G dd dZ=e:e	j?e9 e<e	j?G d d dZ=e:e	j@e9 e<e	j@G d!d dZ=e:e	jAe9 e<e	jAG d"d dZ=e:e	jBe9 e<e	jBG d#d dZ=e<e	jCG d$d dZ=e<e	jDG d%d dZ=e<e	jEG d&d dZ=G d'd( d(e4ZFG d)d* d*eFZGefd+d,ZHefd-d.ZIefd/d0ZJd1d2 ZKe3jLfd3d4ZMd5d6 ZNd@d7d8ZOdAd:d;ZPd<d= ZQd>d? ZRdS )B    )absolute_importdivisionprint_functionNiotbx_pdb_hierarchy_ext)*)show_sorted_by_counts)Sorryplural_snull_out)Autodict_with_default_0
group_args)
hy36encode
hy36decodecommon_residue_names_get_class)one_letter_given_three_letter)lookup)all_chain_idsall_label_asym_ids)crystal)flex)	cStringIO)rangezipc               @   s   e Zd ZdS )pickle_import_triggerN)__name__
__module____qualname__ r   r   r/mnt/filia/a/genomebrowser/www/genomebrowser/fleming/tools/molprobity/modules/cctbx_project/iotbx/pdb/hierarchy.pyr      s    r   modelchainresidue_group
atom_groupatomc                s   |   }| dkrR|  }|d kr*d}n
d|j }td||  f   d nj fdd}| dkrxN|D ]}|| qrW n8||d  td	t| d
    d ||d  d S )Nr   z  z%szempty: "%s%s")filec                s    t d| jdd   d d S )Nz"%s"z.*.)replace_floats_with)r%   )printformat_atom_record)r$   )outprefixr   r   	show_atom%   s    
z&_show_residue_group.<locals>.show_atom   z... %d atom%s not shown   )atomssizeparentidr'   residr	   )rgr)   r*   r/   Zchr+   r$   r   )r)   r*   r   _show_residue_group   s     

 r5   c               @   s   e Zd Zdd Zd%ddZd&d	d
Zdd Zdd Zdd Zdd Z	d'ddZ
dd Zd(ddZdd Zd)ddZd*ddZd+dd Zd,d!d"Zd-d#d$ZdS ).overall_countsc             C   s   d | _ d | _d S )N)_errors	_warnings)selfr   r   r   __init__2   s    zoverall_counts.__init__N T
   c                s  d krt jg _g _fdd}fdd}dttj }	td d jdkrr|d	 j	dkr|d
 td |	j
 dd g }
jdkr|
dtj  j	dkr|
dj	  t|
dkrtdd|
 dd td jdkr|d jdkr0|d td |	j dd g }
jdkrn|
dtj  jdkr|
dj  jdkr|
dtj  t|
dkrtdd|
 dd td td |	j d td |	jj j  dd jdkr8tdj dd td jdkrV|d td |	j dd jdkrtdj dd td td |	j d jdkr|d td |	j d jdkr
|d  td! |	j d j}td" t|d t|dkrZtd# d t| d$ d% j}td& dd t|dkrtd'dd td t| d$ d% j}td(t|  d t|dkr td) d t| d$ d% j}td*t|  d t|dkrtd+ d t| d$ d% dtttj j!j"j# }	td, d td- |	j  d td. |	j! d td/ |	j" d j#dkr|d0 td1 |	j# d j$d% j%}td2t|  d t|dkrtd3 d d d4d d5d6d7d8d9d:d;d<
 t| d$  fd=d>|& D d? tj'dkr|d@ j(|dA tj)dkr|dB j*|dA j+|dA d S )CNc                s(    rt |  d j|   d S )N)r%   )r'   r7   appendstrip)msg)flag_errorsr)   r*   r9   r   r   add_err@   s     z$overall_counts.show.<locals>.add_errc                s(    rt |  d j|   d S )N)r%   )r'   r8   r=   r>   )r?   )flag_warningsr)   r*   r9   r   r   add_warnC   s     z%overall_counts.show.<locals>.add_warnz%%%ddztotal number of:)r%   r   z$  ### ERROR: duplicate model ids ###z  ### WARNING: empty model ###z  models:    r;   )endr%   z%d with duplicate model id%sz%d emptyz (%s)z; z&  ### WARNING: duplicate chain ids ###z  ### WARNING: empty chain ###z  chains:    z%d with duplicate chain id%sz%d explicit chain break%sz  alt. conf.:z  residues:  z (%d with mixed residue names)z&  ### ERROR: duplicate atom labels ###z  atoms:     z (%d with duplicate labels)z  anisou:    z&  ### WARNING: empty residue_group ###z  empty residue_groups:z#  ### WARNING: empty atom_group ###z  empty atom_groups:z$number of atom element+charge types:z+histogram of atom element+charge frequency:z  )r)   r*   zresidue name classes:z Noneznumber of chain ids: %dz histogram of chain id frequency:znumber of alt. conf. ids: %dz%histogram of alt. conf. id frequency:zresidue alt. conf. situations:z  pure main conf.:    z  pure alt. conf.:    z  proper alt. conf.:  z$  ### ERROR: improper alt. conf. ###z  improper alt. conf.:znumber of residue names: %dz$histogram of residue name frequency:z   modified amino acidz   modified rna/dnaz   common waterz   common small moleculez   common elementz   otherz   D-amino acidz  common saccharide)
common_amino_acidmodified_amino_acidcommon_rna_dnamodified_rna_dnacommon_watercommon_small_moleculecommon_elementotherd_amino_acidZcommon_saccharidec                s   g | ]} t |d  qS ))name)r   ).0rN   )annotation_appearancer   r   
<listcomp>   s   z'overall_counts.show.<locals>.<listcomp>)r)   r*   annotationsz;### WARNING: consecutive residue_groups with same resid ###)r)   r*   max_showzE### ERROR: residue group with multiple resnames using same altloc ###),sysstdoutr7   r8   lenstrn_atomsr'   Zn_duplicate_model_idsZn_empty_modelsn_modelsr=   r	   joinZn_duplicate_chain_idsZn_empty_chainsn_chainsZn_explicit_chain_breaks
n_alt_conf
n_residuesZn_residue_groupsZn_empty_residue_groupsn_duplicate_atom_labelsZn_anisouZn_empty_atom_groupsZelement_charge_typesr   itemsresname_classes	chain_idsZalt_conf_idsmaxZn_alt_conf_noneZn_alt_conf_pureZn_alt_conf_propern_alt_conf_improper4show_chains_with_mix_of_proper_and_improper_alt_confresnameskeys*consecutive_residue_groups_with_same_resid/show_consecutive_residue_groups_with_same_resid7residue_groups_with_multiple_resnames_using_same_altloc<show_residue_groups_with_multiple_resnames_using_same_altlocshow_duplicate_atom_labels)r9   r)   r*   r@   rB   residue_groups_max_showduplicate_atom_labels_max_showrA   rC   fmtinfoscr   )rP   r@   rB   r)   r*   r9   r   show6   s     



 
 



 


zoverall_counts.showc             C   s    t  }| j||||d | S )N)r)   r*   rl   rm   )StringIOrq   getvalue)r9   r*   rl   rm   r)   r   r   r   as_str   s    zoverall_counts.as_strc             C   s   | j d kr| jt d | j S )N)r)   )r7   rq   r
   )r9   r   r   r   errors   s    
 zoverall_counts.errorsc             C   s4   d}x*| j  D ]\}}t||kr||7 }qW |S )Nr   )re   r_   r   )r9   classesresultresnamecountr   r   r   get_n_residues_of_classes   s
    z(overall_counts.get_n_residues_of_classesc             C   s   | j d kr| jt d | j S )N)r)   )r8   rq   r
   )r9   r   r   r   warnings   s    
 zoverall_counts.warningsc             C   s   |   |   S )N)ru   r{   )r9   r   r   r   errors_and_warnings   s    z"overall_counts.errors_and_warningsc          	   C   s   | j dkrd S |d krtj}x~| jdf| jdfgD ]f\}}|d krDq2t|d|  |d x>| D ]2}x,| D ] }t|d|jdd  |d qpW qbW q2W d S )	Nr   ZproperZimproperzresidue with %s altloc)r%   z  "%s"z.*.)r&   )	rc   rT   rU   Zalt_conf_properZalt_conf_improperr'   atom_groupsr/   r(   )r9   r)   r*   r"   labelagr$   r   r   r   show_improper_alt_conf   s    
  
 
z%overall_counts.show_improper_alt_confc             C   s6   t  }| j|d | }t|dkr2t| d S )N)r)   r   )rr   r   rs   rV   r   rstrip)r9   sior?   r   r   r   $raise_improper_alt_conf_if_necessary   s
     z3overall_counts.raise_improper_alt_conf_if_necessaryc             C   sH   |d krt j}| j}t|d ||d |dkr6|d7 }| j||d d S )Nz2chains with mix of proper and improper alt. conf.:)r%   r   z  )r)   r*   )rT   rU   1n_chains_with_mix_of_proper_and_improper_alt_confr'   r   )r9   r)   r*   nr   r   r   rd      s      zCoverall_counts.show_chains_with_mix_of_proper_and_improper_alt_confc             C   s4   | j dkrd S t }| j|d t|  d S )Nr   )r)   )r   rr   rd   r   rs   r   )r9   r   r   r   r   Braise_chains_with_mix_of_proper_and_improper_alt_conf_if_necessary   s
    
zQoverall_counts.raise_chains_with_mix_of_proper_and_improper_alt_conf_if_necessaryc       
      C   s<  | j }t|dkrd S |d kr$tj}t|dt|  |d |d krNt|}n|dkrZd S |d d }d }x|d | D ]}x|tddg|D ]j\}}	|d k	r| |	 krqn|dkr|d k	rt||d |	}t|d|  |d t|	||d	 d
 qW qxW t||kr8t||d t|dtt||   |d d S )Nr   z8number of consecutive residue groups with same resid: %d)r%   z  z*------------------------------------------r;   znext z  %sresidue group:z    )r4   r)   r*   z'  ... %d remaining instance%s not shown)	rg   rV   rT   rU   r'   r   	memory_idr5   r	   )
r9   r)   r*   rS   consdelimZprev_rgrgsnextr4   r   r   r   rh     s6       
  z>overall_counts.show_consecutive_residue_groups_with_same_residc             C   s   | j }t|dkrd S t|d t||d |d kr>tt}n|dkrJd S x6|d | D ]&}t|d |d t|||d d qXW t||krt|dtt||   |d d S )Nr   z8residue groups with multiple resnames using same altloc:)r%   z  residue group:z    )r4   r)   r*   z'  ... %d remaining instance%s not shown)ri   rV   r'   r   r5   r	   )r9   r)   r*   rS   r   r4   r   r   r   rj   !  s      
 zKoverall_counts.show_residue_groups_with_multiple_resnames_using_same_altlocc             C   s8   t  }| j||d | }t|dkr4t| d S )N)r)   rS   r   )rr   rj   rs   rV   r   r   )r9   rS   r   r?   r   r   r   Jraise_residue_groups_with_multiple_resnames_using_same_altloc_if_necessary2  s    
 zYoverall_counts.raise_residue_groups_with_multiple_resnames_using_same_altloc_if_necessaryc             C   s$  | j }t|dkrd S |d kr$tj}dtt| j }t|d |t| |d t|d || j |d |d krzt|}n|dkrd S xj|d | D ]Z}d}xP|D ]H}|jdd}	|	d d	 d
 |	dd   }
t|| d|
  |d d}qW qW t||kr t|dtt||   |d d S )Nr   z%%%ddz*number of groups of duplicate atom labels:)r%   z*  total number of affected atoms:         z  group z.*.)r&      z    .*.   z"%s"z        z$  ... %d remaining group%s not shown)	Zduplicate_atom_labelsrV   rT   rU   rW   r^   r'   r(   r	   )r9   r)   r*   rS   duprn   r/   Zprfxr$   Zatom_strZa_sr   r   r   rk   ;  s0       
 
z)overall_counts.show_duplicate_atom_labelsc             C   s8   t  }| j||d | }t|dkr4t| d S )N)r)   rS   r   )rr   rk   rs   rV   r   r   )r9   rS   r   r?   r   r   r   (raise_duplicate_atom_labels_if_necessaryR  s
     z7overall_counts.raise_duplicate_atom_labels_if_necessary)Nr;   TTr<   r<   )r;   r<   r<   )Nr;   )Nr;   )Nr;   r<   )Nr;   r<   )r<   )Nr;   r<   )r<   )r   r   r   r:   rq   rt   ru   rz   r{   r|   r   r   rd   r   rh   rj   r   rk   r   r   r   r   r   r6   0   s:        
   
	
 
  
  


r6   c               @   s$   e Zd Zdd Zdd Zdd ZdS )__hash_eq_mixinc             C   s   t |  S )N)hashr   )r9   r   r   r   __hash__Z  s    z__hash_eq_mixin.__hash__c             C   s    t || jr|  | kS dS )NF)
isinstance	__class__r   )r9   rL   r   r   r   __eq__]  s    z__hash_eq_mixin.__eq__c             C   s
   | |k S )Nr   )r9   rL   r   r   r   __ne__b  s    z__hash_eq_mixin.__ne__N)r   r   r   r   r   r   r   r   r   r   r   X  s   r   c               @   s  e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd ZdddZdd Zdd  Zdd!defd"d#Zd!defd$d%Zddd&d'd(dd&d&d&d&d&defd)d*Zdd+d,Zd-d. Zdd/d0Zd1d2 Zdd5d6Zdd7d8Zdd9d:Zdd;d<Zd=d> Z d?d@ Z!dAdB Z"ddCdDZ#ddEdFZ$dGdH Z%dIdJ Z&dKdL Z'dMdN Z(dOdP Z)dQdR Z*dSdT Z+dUdV Z,dWdX Z-dYdZ Z.dd]d^Z/dd_d`Z0dadb Z1dcdd Z2ded4dfdggfdhdiZ3djdk Z4dldm Z5dndo Z6dpdq Z7e8 fdrdsZ9dtdu Z:dvdw Z;dxdy Z<dzd{ Z=d|d} Z>dd~dZ?dd Z@dddZAdd ZBdd ZCdd ZDdddZEdd ZFdddZGdd ZHdd ZIdddZJdddZKdddZLdddZMdd ZNdd ZOdddZPdddZQdddZRdd ZSdddZTdd ZUdd ZVdd ZWdS )_a6  
  Root node of the PDB hierarchy object.  This is returned by the method
  construct_hierarchy() of the PDB/mmCIF input objects, but it may also be
  created programatically.  Note that it does not contain any reference to
  crystal symmetry or source scattering information, meaning that in practice
  it must often be tracked alongside an equivalent cctbx.xray.structure object.
  Pickling is supported, simply by writing out and reading back the PDB-format
  representation of the hierarchy.

  Examples
  --------
  >>> hierarchy = iotbx.pdb.hierarchy.root()
  c          
   C   sH   d}t  }| j|ddd ddddd}tjr4|| |t | j| fS )Nr-   Tr   )	cstringio
append_endinterleaved_confatoms_reset_serial_first_valueatom_hetatmsigatmanisousiguij)rr   _as_pdb_string_cstringiosixPY3writer   infors   )r9   version
pdb_stringpy3outr   r   r   __getstate__w  s    
z_.__getstate__c             C   s   t |dkst|d }|dkr2t |dksTtn"|dkrLt |dksTtntd|d | _dd l}|jjdt|d	 d
j	dd
 }| jt |d x|D ]}| j|d qW d S )Nr,   r      r-   r   z!Unknown version of pickled state.pickler.   )source_infolinesF)
sort_atoms)Znumber_of_additional_models)r    )rV   AssertionErrorRuntimeErrorr   	iotbx.pdbpdbinputr   split_linesconstruct_hierarchymodelsZpre_allocate_modelsappend_model)r9   stater   iotbxr   r    r   r   r   __setstate__  s      

z_.__setstate__c             c   s.   x(|   D ]}x| D ]
}|V  qW q
W dS )z0
    Iterate over all chains in all models.
    N)r   chains)r9   r    r!   r   r   r   r     s    z_.chainsc             c   s@   x:|   D ].}x(| D ]}x| D ]
}|V  q&W qW q
W dS )z9Iterate over all residue groups (by model and then chain)N)r   r   residue_groups)r9   r    r!   r4   r   r   r   r     s    z_.residue_groupsc             c   sR   xL|   D ]@}x:| D ].}x(| D ]}x| D ]
}|V  q4W q&W qW q
W dS )zQ
    Iterate over all atom groups (by model, then chain, then residue group)
    N)r   r   r   r}   )r9   r    r!   r4   r   r   r   r   r}     s
    z_.atom_groupsc             C   s   |   dkst|  d S )Nr   r   )models_sizer   r   )r9   r   r   r   
only_model  s    z_.only_modelc             C   s   |    S )N)r   
only_chain)r9   r   r   r   r     s    z_.only_chainc             C   s   |    S )N)r   only_residue_group)r9   r   r   r   r     s    z_.only_residue_groupc             C   s   |    S )N)r   only_conformer)r9   r   r   r   r     s    z_.only_conformerc             C   s   |    S )N)r   only_atom_group)r9   r   r   r   r     s    z_.only_atom_groupc             C   s   |    S )N)r   only_residue)r9   r   r   r   r     s    z_.only_residuec             C   s   |    S )N)r   	only_atom)r9   r   r   r   r     s    z_.only_atomNc             C   sV   |rBt t|  dkrBtjj }||  d   |	 S t	 }| 
| |S )z
    Calculate basic statistics for contents of the PDB hierarchy, including
    number of residues of each type.

    :parameter only_one_model:  return results for first model only
    :returns: iotbx.pdb.hierarchy.overall_counts object
    r   r   )rV   listr   r   r   	hierarchyrootr   detached_copyr6   Zget_overall_counts)r9   only_one_modelZone_model_phrw   r   r   r   r6     s    	
z_.overall_countsc             C   sL  d}|    }t|}|dk d}t||k d}|d |  }|d| k|d| k @ d}|d |  }|d| k|d| k @ d}	|	d |  }
|d| kd}|d |  }tt| 	 }d}t
 }x<| 	 D ]0}t| }|dkr|d7 }||  d7  < qW t|||||||	|
|||d | |d	S )
Ngư>r   Tg      Y@g      ?d   g        r   )meannegative
zero_countzero_fractionequal_to_1_countequal_to_1_fractionbetween_0_and_1_countbetween_0_and_1_fractiongreater_than_1_countgreater_than_1_fractionZalt_conf_fracalt_loc_dist)r/   extract_occr   r   ry   absr0   rV   r   r   collectionsCounter
conformersr   )r9   epsoccr   r   r   r   r   r   r   r   r   r   Znumber_of_residuesZnumber_of_alt_confsr   r4   Zn_confsr   r   r   occupancy_counts  sB    

z_.occupancy_countsc                s      d fdd	}d}t }x> | D ]&}x | D ]}||  d7  < qHW q:W t 	 t
t |d|d|d|d	d
d||||dd
d|dd
d|dd
d||d
ddS )NFc                s:     | }|r |  S tt| S d S )N)	selectionselectr/   r0   rV   r   r   )sel_stras_atomssel)ascr9   r   r   rc  s    
z_.composition.<locals>.rcz$not (water or nucleotide or protein)r   ZproteinZ
nucleotidewaterzelement H or element DT)r   r   z(protein and not (element H or element D))r   z+nucleotide and not (element H or element D))rX   r[   Z	n_proteinZn_nucleotideZn_waterZn_hdZn_other
other_cntsZn_protein_atomsZn_nucleotide_atomsZn_water_atomsZn_other_atoms)F)atom_selection_cacher   r   r   r   r   unique_resnamesr   r/   r0   rV   r   r   )r9   r   Zsel_str_otherr   r4   rx   r   )r   r9   r   composition  s(    




z_.compositionr;   c          
   C   sn  |dkrd}yt |}W n  tk
r:   |d| Y nX |dkrJtj}|  dkrft|d |d t }x"|  D ]}||j	  d7  < qvW x|  D ]}|
 }||j	 dkrd}	nd	}	t|d
|j	  dt||	f |d |dkrq| dkrt|d |d t }
x |D ]}|
|j	  d7  < qW x,|D ]"}| }|
|j	 dkrbd}	nd	}	t|d|j	  dt||	f |d |dkrq>| dkrt|d |d d}d	}x|D ]}|js|st|d |d d}| }t }x| D ]}||j qW g }t|dkr8|d | }||krT|d |}t|dkrvdd| }	nd	}	t|d|  dt||	f |d |dkrq| dkrt|d |d x|D ]}| }t|d|j  d|j dt| |d |dkrq| dkr.t|d  |d x$|D ]}t|d!|j  |d q4W qW qW q>W qW dS )"z2
    Display a summary of hierarchy contents.
    Nr$   zUnknown level_id="%s"r   z ### WARNING: empty hierarchy ###)r%   r   z#  ### ERROR: duplicate model id ###r;   zmodel id="%s"z#chains=%d%sz  ### WARNING: empty model ###z%  ### WARNING: duplicate chain id ###z  chain id="%s"z#residue_groups=%d%sz     ### WARNING: empty chain ###Tz    ### chain break ###Fzwith mixed residue nameszsame as previous residz  ### Info: %s ###z; z    resid="%s"z#atom_groups=%d%sr-   z*      ### WARNING: empty residue_group ###z      altloc="%s"zresname="%s"z	#atoms=%dr,   z)        ### WARNING: empty atom_group ###z        "%s")	level_idsindex
ValueErrorrT   rU   r   r'   r   r   r2   r   rV   chains_sizer   residue_groups_sizelink_to_previousr}   setaddrx   r=   r3   rZ   atom_groups_sizer/   altloc
atoms_sizerN   )r9   r)   r*   level_idlevel_id_exceptionZlevel_no	model_idsr    r   sZmodel_chain_idsr!   r   Zsuppress_chain_breakZ
prev_residr4   agsre   r   ro   r3   r/   r$   r   r   r   rq     s        

  

 
 
 

 
z_.showc             C   s    t  }| j||||d | S )z
    Alias for show().
    )r)   r*   r   r   )rr   rq   rs   )r9   r*   r   r   r)   r   r   r   rt   `  s    z_.as_strTFr   c             C   s   |dkrt  }|tkr(d}n|tkr(d}|dk	s8|dk	rZddlm} t||||d|d | j|||||||	|
|d	}tjr|| |r|S |	 S )	aL  
    Generate complete PDB-format string representation.  External crystal
    symmetry is strongly recommended if this is being output to a file.

    :param crystal_symmetry: cctbx.crystal.symmetry object or equivalent (such
      as an xray.structure object or Miller array)
    :param write_scale_records: write fractional scaling records (SCALE) if
      crystal symmetry is provided
    :param anisou: write ANISOU records for anisotropic atoms
    :param sigatm: write SIGATM records if applicable
    :param siguij: write SIGUIJ records if applicable
    :returns: Python str
    NFTr   )format_cryst1_and_scale_records)crystal_symmetrycryst1_zwrite_scale_records)r%   )	r   r   r   r   r   r   r   r   output_break_records)
rr   r   r   r   r'   r   r   r   r   rs   )r9   r   r   r   r   r   r   r   r   r   r   r   r   return_cstringior   r   r   r   r   as_pdb_stringo  s6    
z_.as_pdb_stringc             C   s.   ddl }| j|d}|jjdt|d}|S )z2
    Generate corresponding pdb.input object.
    r   N)r   pdb_hierarchy)r   r   )r   r  r   r   r   r   )r9   r   r   pdb_strpdb_inpr   r   r   as_pdb_input  s    z_.as_pdb_inputc             C   s@   g }x6|   D ]*}x$| D ]}| }|r||7 }qW qW |S )N)r   r   as_list_of_residue_names)r9   sequencer    r!   seqr   r   r   r    s    z_.as_list_of_residue_namesc             C   s2   ddl }|jj|   |d}|j||d |S )z Returns simple version of model object based on this hierarchy
     Requires crystal_symmetry.  Optional unit_cell_crystal_symmetry and
     shift_cart
     r   N)Zmodel_inputr   )unit_cell_crystal_symmetry
shift_cart)Zmmtbx.modelr    Zmanager	deep_copyr  Z-set_unit_cell_crystal_symmetry_and_shift_cart)r9   r   r	  r
  mmtbxmmr   r   r   as_model_manager  s    
z_.as_model_managerc             C   sJ   d}i }x<|   d | D ](}x"| D ]}| }|||j< q(W qW |S )Nr   )r   r   &as_dict_of_resseq_as_int_residue_namesr2   )r9   
max_modelsddmrp   Znew_ddr   r   r   r    s    z(_.as_dict_of_resseq_as_int_residue_namesXNc             C   sz   |rdnt t|  }g }xF|  d| D ]2}x,| D ] }	|	j|||d}
|
r<||
7 }q<W q.W |rrd|S |S dS )a   Uses chain.as_sequence() for all chains and returns the catenation
    :param substitute_unknown: character to use for unrecognized 3-letter codes
    :param substitute_unknown_na: character to use for unrecognized na codes
    :param ignore_all_unknown: set substitute_unknown and substitute_unknown_na to ''
    :param as_string: return string (default is to return list)
    :param only_one_model: Only use the first model if more than one
    r   N)substitute_unknownsubstitute_unknown_naignore_all_unknownr;   )rV   r   r   r   as_sequencerZ   )r9   r  r  r  	as_stringr   r  r  r  rp   Znew_seqr   r   r   r    s    
z_.as_sequencec       	      C   sZ   g }x>|   D ]2}x,| D ] }|j|||d}|r||7 }qW qW |rRd|S |S dS )ay   uses format_fasta for all chains and returns catenation
    :param substitute_unknown: character to use for unrecognized 3-letter codes
    :param substitute_unknown_na: character to use for unrecognized na codes
    :param ignore_all_unknown: set substitute_unknown and substitute_unknown_na to ''
    :param as_string: return string (default is to return list of lines)
    )r  r  r  
N)r   r   format_fastarZ   )	r9   r  r  r  r  Zseq_fasta_linesr  rp   Z	new_linesr   r   r   r    s    
z_.format_fastac             C   s,   |dk	r|  |j|dS |  | S dS )a  
    Generate the equivalent cctbx.xray.structure object.  If the crystal
    symmetry is not provided, this will be placed in a P1 box.  In practice it
    is usually best to keep the original xray structure object around, but this
    method is helpful in corner cases.
    N)min_distance_sym_equiv)r  xray_structure_simple)r9   r   r  r   r   r   extract_xray_structure  s    
z_.extract_xray_structurec                s   ddl m  |  |  kr(td|  }| }| j fdd}dd }xt	||D ]r\}}|
 }	|	dd	 }
t|
}t|j}|r||j}||
 }||krtd
|j|
 f |||d qjW dS )z
    Apply the current (refined) atomic parameters from the cctbx.xray.structure
    object to the atoms in the PDB hierarchy.  This will fail if the labels of
    the scatterers do not match the atom labels.
    r   )adptbxz4Incompatible size of hierarchy and scatterers array.c                s   |j | jd |j| jd |j | d | j r`| j	dkr`|j
| d n|  |j| jd |j| jd |  \}}|| || d S )N)new_xyz)new_occ)Znew_b)g      g      g      g      g      g      )Znew_uij)Znew_fp)Znew_fdp)set_xyzsiteset_occ	occupancyset_bZu_as_bZu_iso_or_equivflagsZuse_u_anisoZu_starZset_uijZu_cart_plus_u_isoZ	uij_eraseZset_fpfpZset_fdpfdpZelement_and_charge_symbolsZset_element
set_charge)scaelementcharge)r  orthucr   r   set_attr2  s    
z(_.adopt_xray_structure.<locals>.set_attrc             S   s`   dd t | D }t|dk r"d S |dd  \}}d| ||d  dddd }|S )Nc             S   s   g | ]\}}|d kr|qS )"r   )rO   poscharr   r   r   rQ   A  s    z:_.adopt_xray_structure.<locals>.get_id.<locals>.<listcomp>r-   r   r;   r   r2  )	enumeraterV   rZ   replacesplit)lrijr   r   r   get_id@  s     *z&_.adopt_xray_structure.<locals>.get_idr<      zMismatch: 
 %s 
 %s 
)r+  r,  N)cctbxr  r   
scatterersr0   r   atoms_with_labels	unit_cellorthogonalizer   id_strr   rx   r~   )r9   xray_structureZassert_identical_id_strZawlr?  r1  r<  r+  r,  rC  Zresname_from_scZcl1Zcl2l1l2r   )r  r/  r0  r   adopt_xray_structure%  s(    

z_.adopt_xray_structurec             C   s   g }xt ||D ]\}}x|  D ]|}tjj }tjj }xJ| D ]>}	|	 }	|		 
 }
|j|
 | }|		 | ||	 qHW || || q"W qW tjjj|d}|  |S )zN
    LIMITATION: ANISOU records in resulting hierarchy will be invalid!!!
    )roots)r   r   r   r   r   r   r    r   r   r/   extract_xyzelemsr"  append_chainr   r=   
join_rootsreset_i_seq_if_necessary)r9   rot_matricestrans_vectorsrH  r9  tr    r   r  rp   xyzr   rw   r   r   r   apply_rotation_translationR  s     
z_.apply_rotation_translationc          
   C   s   |    tj|d}|j|   d}| }g }x|  D ]}x| D ]}d}x"| 	 D ]}	|	|krbd}P qbW |sLx0|
 D ]$}
t|
dkst|
dkrtdqW x,|
 D ] }
|d|j| |
g qW |j|d	 qLW q>W |S )
N)r   )
sites_cartTFrE   rG   z0Amino-acid residue or NA is on special position.,)r"   )rM  r   special_position_settingssite_symmetry_tabler/   rI  Zspecial_position_indicesr   r   extract_i_seqr   r   r   r=   rZ   r2   r3   remove_residue_group)r9   r   rU  rV  Zspiremovedrp   r4   keepr:  rx   r   r   r   ?remove_residue_groups_with_atoms_on_special_positions_selectiveg  s0    
 zA_.remove_residue_groups_with_atoms_on_special_positions_selectivec             C   s,  |  }||   }tt| }tt| }t||d }tt| t|}d }x|D ]}	x|D ]}
x|D ]|}||	|
|g }|	|}||
 }|d dkrz|d dk rz|d dkrz|d dk rz|d dkrz|d dk rz|	|
|g}qzW qpW qfW |d k	st|  |	||  d S )Nr-   r   r   )rA  fractionalizer/   rI  r   minrb   r   intrB  r   r   r"  )r9   r   r0  
sites_fracr8  r9  ZrlZrrZ
shift_bestxyzsfr+  Zcmfr   r   r   shift_to_origin  s&    



z_.shift_to_originc             C   s  dd l }dd l}t }t }dd |jD }dd |jD }dd |  D }	g }
xn|  D ]`}xX| 	 D ]F}|
  }|j|}|r| rqt|  }|j|d |d |d f}x| D ]}t| }||
kr|
| | }|  }| |}| |j| | }| | |
  rZ|  sd}xH|D ]@}x0|D ](}|| }||	krn|	| |}P qnW |rdP qdW ||_|| qW qtW q^W || |S )	Nr   c             S   s   g | ]}|qS r   r   )rO   r:  r   r   r   rQ     s    z"_.expand_to_p1.<locals>.<listcomp>c             S   s   g | ]}|qS r   r   )rO   r:  r   r   r   rQ     s    c             S   s   g | ]
}|j qS r   )r2   )rO   rp   r   r   r   rQ     s    r   r-   F) stringscitbx.matrixr   r    ascii_lowercaseascii_uppercaser   r   space_groupZall_opsr9  	as_doublematrixsqris_r3_identity_matrixrP  colrV   r/   r=   r   rI  rA  r\  rB  rJ  r"  Z
is_unit_mxis_zeror2   rK  r   )r9   r   Zexclude_selfre  scitbxr9  r  ZidlZiduZtakenrX   Zm_Zsmxm3rP  c_Zn_atrQ  r   foundZidu_Zidl_Zid_r   r   r   expand_to_p1  sP      




 
z_.expand_to_p1c             C   s   |r8|rd}nd}t ||}t||d W d Q R X d}|d k	sH|d k	r|rRd}nd}ddlm} t ||}t||||d|d W d Q R X d}| j||||||	|
||d	 d S )	Nr,  w)r%   Tr   )r   )r   r   r   )		file_nameopen_appendr   r   r   r   r   r   r   )openr'   r   r   _write_pdb_file)r9   rv  rw  r   r   r   r   r   r   r   r   r   r   link_recordsmodefr   r   r   r   write_pdb_file  s8      z_.write_pdb_filec             C   s"   |   |kst| |  | S )N)r   r   get_label_alt_id_atomr/   )r9   iseqr   r   r   get_label_alt_id_iseq  s    z_.get_label_alt_id_iseqc             C   s   |  j}|dkrd}|S )Nr;   .)r1   r   )r9   r$   Zalt_idr   r   r   r~    s    
 z_.get_label_alt_id_atomc             C   s>   |   |ks td|   |f | |  |    S )Nz%d, %d)r   r   get_auth_asym_idr/   r1   )r9   r  r   r   r   get_auth_asym_id_iseq  s     z_.get_auth_asym_id_iseqc             C   sX   |j }t| d j t|kr6| d j }| dkrT| | d }|S )Nr   r;   )r2   rV   r/   segidr>   get_label_asym_idr   )r9   r!   auth_asym_idr   r   r   r    s    z_.get_auth_asym_idc             C   s*   |   |kst| |  |   S )N)r   r   r  r/   r1   )r9   r  r   r   r   get_label_asym_id_iseq  s    z_.get_label_asym_id_iseqc             C   sF  t | dsi | _d}t }x|  D ]}x| D ]}d }x| D ]}| d j }t	|}	|
 }
|	dkr|dkr|d k	r|d7 }|| | j|
< d}qF|	dkr|dkr|d k	r|d7 }d}|| | j|
< qF|d k	r|d7 }d}|| | j|
< qFW |d7 }d }q4W |d7 }q&W |
 }
| j|
d }|d krBt|  |S )	N_lai_lookupr   )rE   rF   rG   rH   polyr   )rI   r   Zligand)hasattrr  r   r   r   r   r}   rx   r>   r   r   getr'   rC  )r9   r"   Znumber_label_asym_idlabel_asym_idsr    r!   Zpreviousr4   rx   residue_classZrg_midrw   r   r   r   r    sB    
z_.get_label_asym_idc             C   s*   |   |kst| |  |   S )N)r   r   get_auth_seq_idr/   r1   )r9   r  r   r   r   get_auth_seq_id_iseq4  s    z_.get_auth_seq_id_iseqc             C   s
   |j  S )N)resseqr>   )r9   r4   r   r   r   r  8  s    z_.get_auth_seq_idc             C   s6   |   |ks td|   |f | |  |  S )Nz%d, %d)r   r   get_label_seq_idr/   r1   )r9   r  r   r   r   get_label_seq_id_iseq;  s     z_.get_label_seq_id_iseqc             C   s   t | dsd}i | _x|  D ]}x| D ]}d}x|| D ]p}xj| D ]^}|j|j |j }||krv|d7 }|}d}	|j	
 }
t|
}|dkrt|}	|	| j| < qLW q>W q,W qW | j|  S )N_label_seq_id_dictr;   r   r   r  )rE   rF   )r  r  r   r   r   r}   r2   r  icoderx   r>   r   rW   r   )r9   r#   Zprev_ac_keyr    r!   Zlabel_seq_idr4   r   Z
cur_ac_keyZlabel_seq_id_strcomp_idr  r   r   r   r  ?  s&    

z_.get_label_seq_id   r,   c       R         s  |d krt  }|jdd}tjj }d|  d| }d| }	d| tjjjdd}
tjjjdd}|
d }|
d }|
d	 }|
d
 }|
d }|
d }|
d }|
d }|
d }|
d }|
d }|
d }|
d }|
d }|
d }|
d }|
d }|
d }|
d }|
d }|
d } |d }!|d }"|d }#|d }$|d  }%|d! }&|d" }'|d# }(|d$ })|d% }*|d& }+|d' },|d( }-t }.t	
 }/t	
 }0tjjjd)d}1tjjjd*d}2g }3g }4g }5t }6x|  D ]z}7|7j}8|8d+krd,}8x\|7 D ]N}9| |9}:x:|9 D ],};| |;}<| |;}=|;j}>|>d-ksB|>d+krFd.}>x|; D ]}?|?j }@d.}Ax|? D ]}Bd/}C|Bjrd0}C fd1d2|BjD \}D}E}F|B }G|Gd krd.}Gn|G }G|Gd+krd.}G|Bj|Bj }H}I|Hd3kr|Id3krd4}Hd4}Ind5|H }Hd5|I }I||C |ttd6|Bj d7 ||Bj!  |Bj! |4kr^|4|Bj!  || "|B ||@ |@|3kr|3|@ ||: ||= ||> ||D ||E ||F |||Bj#  ||	|Bj$  ||Bj%  ||G ||H ||I ||<  |< |5kr@|5|<  ||A || &|? | |8  |B' rrfd8d2|Bj(D \}J}K}L}M}N}O|!ttd6|Bj d7 |"|Bj!  |#| "|B |$|@ |%|: |&|= |'|> |(|J |)|K |*|L |+|M |,|N |-|O qrW qRW qW qW qW x$d9D ]}P|
|P )d4rR|
|P= qRW |*|
 |+ d3kr|*| |,| |3-  x|3D ]}Q|1.|Qg qW |*|1 |4-  x|5D ]}Q|2.|Qg qW |*|2 |S ):NZmmcif)formatz%%.%if)z_atom_site.group_PDBz_atom_site.idz_atom_site.label_atom_idz_atom_site.label_alt_idz_atom_site.label_comp_idz_atom_site.auth_asym_idz_atom_site.auth_seq_idz_atom_site.pdbx_PDB_ins_codez_atom_site.Cartn_xz_atom_site.Cartn_yz_atom_site.Cartn_zz_atom_site.occupancyz_atom_site.B_iso_or_equivz_atom_site.type_symbolz_atom_site.pdbx_formal_chargez&_atom_site.phenix_scat_dispersion_realz&_atom_site.phenix_scat_dispersion_imagz_atom_site.label_asym_idz_atom_site.label_entity_idz_atom_site.label_seq_idz_atom_site.pdbx_PDB_model_num)header)z_atom_site_anisotrop.idz&_atom_site_anisotrop.pdbx_auth_atom_idz&_atom_site_anisotrop.pdbx_label_alt_idz&_atom_site_anisotrop.pdbx_auth_comp_idz&_atom_site_anisotrop.pdbx_auth_asym_idz%_atom_site_anisotrop.pdbx_auth_seq_idz&_atom_site_anisotrop.pdbx_PDB_ins_codez_atom_site_anisotrop.U[1][1]z_atom_site_anisotrop.U[2][2]z_atom_site_anisotrop.U[3][3]z_atom_site_anisotrop.U[1][2]z_atom_site_anisotrop.U[1][3]z_atom_site_anisotrop.U[2][3]z_atom_site.group_PDBz_atom_site.idz_atom_site.label_atom_idz_atom_site.label_alt_idz_atom_site.label_comp_idz_atom_site.auth_asym_idz_atom_site.auth_seq_idz_atom_site.pdbx_PDB_ins_codez_atom_site.Cartn_xz_atom_site.Cartn_yz_atom_site.Cartn_zz_atom_site.occupancyz_atom_site.B_iso_or_equivz_atom_site.type_symbolz_atom_site.pdbx_formal_chargez&_atom_site.phenix_scat_dispersion_realz&_atom_site.phenix_scat_dispersion_imagz_atom_site.label_asym_idz_atom_site.label_entity_idz_atom_site.label_seq_idz_atom_site.pdbx_PDB_model_numz_atom_site_anisotrop.idz&_atom_site_anisotrop.pdbx_auth_atom_idz&_atom_site_anisotrop.pdbx_label_alt_idz&_atom_site_anisotrop.pdbx_auth_comp_idz&_atom_site_anisotrop.pdbx_auth_asym_idz%_atom_site_anisotrop.pdbx_auth_seq_idz&_atom_site_anisotrop.pdbx_PDB_ins_codez_atom_site_anisotrop.U[1][1]z_atom_site_anisotrop.U[2][2]z_atom_site_anisotrop.U[3][3]z_atom_site_anisotrop.U[1][2]z_atom_site_anisotrop.U[1][3]z_atom_site_anisotrop.U[2][3])z_chem_comp.id)z_struct_asym.idr;   1 ?ZATOMHETATMc                s   g | ]} | qS r   r   )rO   r:  )coord_fmt_strr   r   rQ     s    z"_.as_cif_block.<locals>.<listcomp>r   r  z%.4fr  )widthr   c                s   g | ]} | qS r   r   )rO   r:  )u_aniso_fmt_strr   r   rQ   	  s    )z&_atom_site.phenix_scat_dispersion_realz&_atom_site.phenix_scat_dispersion_imag)/r   symmetryas_cif_blockr   cifr    blockloopr   r   
std_stringr   r   r2   r   r  r   r  r  r  r}   rx   r>   r/   heterorQ  charge_tidyr(  r)  r=   rW   r   serialrN   r~  r   br-  r  Zuij_is_defineduijall_eqZadd_loopr0   updatesortZadd_row)Rr9   r   Zcoordinate_precisionZoccupancy_precisionZb_iso_precisionZu_aniso_precisionZcs_cif_blockZh_cif_blockZocc_fmt_strZb_iso_fmt_strZatom_site_loopZ
aniso_loopZatom_site_group_PDBZatom_site_idZatom_site_label_atom_idZatom_site_label_alt_idZatom_site_label_comp_idZatom_site_auth_asym_idZatom_site_auth_seq_idZatom_site_pdbx_PDB_ins_codeZatom_site_Cartn_xZatom_site_Cartn_yZatom_site_Cartn_zZatom_site_occupancyZatom_site_B_iso_or_equivZatom_site_type_symbolZatom_site_pdbx_formal_chargeZ%atom_site_phenix_scat_dispersion_realZ%atom_site_phenix_scat_dispersion_imagZatom_site_label_asym_idZatom_site_label_entity_idZatom_site_label_seq_idZatom_site_pdbx_PDB_model_numZatom_site_anisotrop_idZ%atom_site_anisotrop_pdbx_auth_atom_idZ%atom_site_anisotrop_pdbx_label_alt_idZ%atom_site_anisotrop_pdbx_auth_comp_idZ%atom_site_anisotrop_pdbx_auth_asym_idZ$atom_site_anisotrop_pdbx_auth_seq_idZ%atom_site_anisotrop_pdbx_PDB_ins_codeZatom_site_anisotrop_U11Zatom_site_anisotrop_U22Zatom_site_anisotrop_U33Zatom_site_anisotrop_U12Zatom_site_anisotrop_U13Zatom_site_anisotrop_U23Zunique_chain_idsZauth_asym_idsr  Zchem_comp_loopZstruct_asym_loopZchem_comp_idsZchem_comp_atom_idsZstruct_asym_idsra   r    model_idr!   r  r"   Zlabel_asym_idZseq_idr  r#   r  Z	entity_idr$   Z	group_pdbr`  ra  rb  Zatom_charger(  r)  Zu11Zu22Zu33Zu12Zu13Zu23keyrowr   )r  r  r   r  U  s(   
 


 
 

 


 











 








(




 

 
z_.as_cif_blockc          	   C   sN   t jj }|d krd}| j|d||< t|d}t||d W d Q R X d S )Nphenix)r   ru  )r%   )r   r  r    r  rx  r'   )r9   rv  r   Zdata_block_nameZ
cif_objectr|  r   r   r   write_mmcif_file+  s    z_.write_mmcif_filec             c   s   x|   D ]}x| D ]z}d}xp| D ]d}|p6|j }xR| D ]F}x@| D ]4}t||j|j|j|j	|j
|j||d	V  d}d}qRW qDW q*W qW q
W dS )z~
    Generator for atom_with_labels objects, presented in the same order as
    the array returned by the atoms() method.
    T)	r$   r  chain_idr  r  r   rx   is_first_in_chainis_first_after_breakFN)r   r   r   r   r}   r/   atom_with_labelsr2   r  r  r   rx   )r9   r    r!   r  r4   r  r   r$   r   r   r   r@  7  s&    
z_.atoms_with_labelsc             C   sr   |   }t|d}|  }d|kr*d}nd}t| }x2t|D ]&\}}|dkrVqD||| ||  qDW |S )Nr   r;   r   )r   r   size_taltloc_indicessortedrf   r5  set_selected)r9   Zn_seqconformer_indicesr  pZaltlocsr:  r   r   r   r   get_conformer_indicesP  s      z_.get_conformer_indicesCACOc             C   s   | }x|  D ]}x| D ]}x|| D ]p}g }| }x:|D ]2}x,| D ] }	|	j }
|
|krN||
 qNW q@W x"|D ]}||kr||j|d P q|W q*W t	| dkr|j
|d qW qW d S )N)r"   r   )r!   )r   r   r   r}   r/   rN   r>   r=   rX  rV   remove_chain)r9   Zrequired_atom_namesr   r    r!   r"   Zall_atom_names_foundr}   r#   r$   	atom_namer9  r   r   r   $remove_incomplete_main_chain_protein\  s"    



z&_.remove_incomplete_main_chain_proteinc          	   C   s  | }x|  D ]}x| D ]}x| D ]z}| }t|dksRtd}|rt|dkrx|d jdkrxq4g }x8|D ]0}	|	jdkrqt|		 
 }
||	|
f qW |jtddd x$|dd  D ]\}	}|j|	d qW |d \}}d|_nRx.|D ]&}	|	jdkr(|j|	d nd|	_q
W t| dkrV|j|d d	}|r4| dkr4| }x@tt|d dd
D ](}||d ||  |||  qW q4W t| dkr"|j|d q"W qW |	 }t| d}|| d S )Nr   Tr   r;   )r  reverse)r#   )r;   A)r"   Fr.   )r!   g      ?)r   r   r   r}   rV   r   r   r   r   r/   r   r=   r  operator
itemgetterremove_atom_grouprX  r   r   Zmerge_atom_groupsr  doubler0   r$  )r9   Zalways_keep_one_conformerr   r    r!   r"   r}   Zcleanup_neededZatom_groups_and_occupanciesr#   Zmean_occr   Zsingle_confr   r:  r/   r!  r   r   r   remove_alt_confsq  sL    


z_.remove_alt_confsc             C   s8   x2|   D ]&}x | D ]}|j|kr||_qW q
W d S )N)r   r   r2   )r9   Zold_idZnew_idr    r!   r   r   r   rename_chain_id  s    
z_.rename_chain_idc             C   s2   |dkr|dk st t|  d| }| |S )Nr   g      ?r   )r   r   random_boolr   r   )r9   ZfractionZsel_keepr   r   r   remove_atoms  s    z_.remove_atomsc             C   s   t |tst|d krtdt|dkr2td| dkrFtd|dkrTd}n|dk rjdt| }nd| }|  }x|D ]}|| }|| qW d S )	Nz/Specify an atom selection to apply a charge to.r<   z-The charge must be in the range from -9 to 9.r   z'Empty selection for charge modificationz  z%1d-z%1d+)r   r^  r   r   r   r0   r/   r*  )r9   
iselectionr.  r/   i_seqr$   r   r   r   set_atomic_charge  s     
z_.set_atomic_chargec                s   |   }|  t x|  D ]v}xp| D ]d}x^| D ]R fdd}| r:x8 D ],}x&|  D ]}|j|krj|j|d qjW q\W q:W q,W qW d S )Nc                 s$   x  D ]} | j kr
dS q
W dS )NTF)r}   rx   )r   )aa_resnamesr4   r   r   have_amino_acid  s    
z+_.truncate_to_poly.<locals>.have_amino_acid)r$   )	r/   reset_i_seqr   r   r   r   r}   rN   remove_atom)r9   atom_names_set	pdb_atomsr    r!   r  r   r$   r   )r  r4   r   truncate_to_poly  s    
z_.truncate_to_polyc             C   s   | j tddddgd d S )Nz N  z CA z C  z O  )r  )r  r   )r9   r   r   r   truncate_to_poly_gly  s    z_.truncate_to_poly_glyc             C   s   | j tdddddgd d S )Nz N  z CA z C  z O  z CB )r  )r  r   )r9   r   r   r   truncate_to_poly_ala  s    z_.truncate_to_poly_alac             C   s   x|   D ]}x| D ]|}xv| D ]j}xd| D ]X}|jdkr4d|_xB| D ]6}d|_|j dkrR|j	 
 dkrRd|_d|_	qRW q4W q&W qW q
W d S )NMSEMETFSEz SD z S)r   r   r   r}   rx   r/   r  rN   r>   r-  upper)r9   r    r!   r"   r#   r$   r   r   r   convert_semet_to_met  s    
z_.convert_semet_to_metc             C   s   x|   D ]}x| D ]|}xv| D ]j}xd| D ]X}|jdkr4d|_xB| D ]6}d|_|j dkrR|j	 
 dkrRd|_d|_	qRW q4W q&W qW q
W d S )Nr  r  TZSDSz SE r  )r   r   r   r}   rx   r/   r  rN   r>   r-  r  )r9   r    r!   r"   r#   r$   r   r   r   convert_met_to_semet  s    
z_.convert_met_to_semetc             C   s   d}|  }x<t|   |D ]*\}}|d7 }td|d|_|j|d qW |  |  }}||krxF||d  D ]6}|d7 }ttd|dd}|j|d | j|d qrW d S )Nr   r   r   )r  value)rL   )r2   )r    )r   r   r   r2   transfer_chains_from_otherr   r    r   )r9   rL   Zi_modelZother_modelsmdZother_mdZmszZomszr   r   r   r    s    z_.transfer_chains_from_otherc             C   s   ddl m} || |dS )Nr   )cache)r   rU  )Ziotbx.pdb.atom_selectionr  )r9   rU  r  r   r   r   r     s    z_.atom_selection_cachec             C   s"   |   }|j|d}|  |S )zE Apply atom selection string and return deep copy with selected atoms)re  )r   r   r  r   )r9   atom_selectionr   r   r   r   r   apply_atom_selection  s    z_.apply_atom_selectionc             C   st   |r|    }n|   jddd}g }xD|  D ]8}|d krV| rJd}| rVd}||j||d q2W ~|S )Nr   r   )first_value	incrementrE   rG   )common_residue_name_class_onlyalways_group_adjacent)r/   Z%reset_tmp_for_occupancy_groups_simple	reset_tmpr   
is_proteinis_naextendoccupancy_groups_simple)r9   r  r  ignore_hydrogenssentinelrw   r!   r   r   r   r  	  s    z_.occupancy_groups_simplec          	   C   s   |   }|  }x|D ]}d}g }g }x&|D ]}||t|  q,W x0|D ](}	tt|	dkrx||	d  qTd}qTW t	|dkrd}|rt
||}
x>t|D ]2\}}|t|t|
| gt|  qW qW dS )a  Round occupancies of those alternative conformations that cannot be
    rounded properly to the sum of 1 by standard round procedure in the output.
    The rest occupancies left intact.

    Args:
        ndigits (int): number of significant digits after the dot
    Tr   r   Fg      ?N)r/   r  r=   r   r   r  r   rV   r   sumgroup_roundingr5  r$  r  )r9   ndigitsh_atomsZogsZ	occ_groupZcan_be_roundedZoccsZoccs_valuesgoZ
round_occsr:  r   r   r   round_occupancies_in_place  s$    



z_.round_occupancies_in_placec       
      C   s   g }|dk r|S x|   D ]}x| D ]}t }d}xd| D ]X}|  }d}	||krn|| d}	n$|| t }|| d}d}	|d7 }qBW t	|dks|	s(|
 dkst|| q(W qW |S )Nr   r   TF)r   r   r   r  r   r/   rW  r  r=   rV   r0   r   )
r9   Zresidues_per_chunkrw   r    r!   Zresidue_range_selcntrr4   i_seqsZ
last_addedr   r   r   chunk_selections<  s.     


z_.chunk_selectionsc       
      C   s   xt |  D ]\}}i }x| D ]|}d|| f }||d}|rx(| D ]}||  || qRW |	 }|
| |	 }	|	| q$|||< q$W qW dS )zyTransfered from qrefine for merging single H/D atoms from the end of the
    PDB input to the correct residue object
    z%s %sN)r5  r   r}   rC  
setdefaultr/   append_atomr   r  r1   r  rX  )
r9   r  r    residuesr   r  Zprevious_instancer$   r4   r!   r   r   r   merge_atoms_at_end_to_residuesU  s    
z _.merge_atoms_at_end_to_residuesc             C   sx   xr|   D ]f}| r
t|j dk r
|j|j dkr
|jd dkr
d|j  |_|r
td|   q
W d S )Nr   r   )r  23z %-3szcorrected PDB format of %s)r/   element_is_hydrogenrV   rN   r>   findr'   quote)r9   verboser$   r   r   r   format_correction_for_Hk  s     z_.format_correction_for_Hc             C   s  dd l }ddlm} dd }ddddgdd	gdd
gddgddggdddddgdd	gddggdddddgdd	gddggdddddgdd	gddgddgddgddggddddd gd!d"d	gdd gd#d$gd%d&gd'd(ggd)ddddgd!d"d	gddgd*d+gd,d-gd.d/ggd)d0}|d1 |d2< |   }|  }d3}x|  D ]}x| D ]r}	||	jd }
|
d krtqRd4|
krd5|
krt	d"}d4|
krg }x4|
d4 D ](}|	
|}|d krP ||j qW t|d6krqR||d7d8}t|d9|
d: d	  d6 krd7}nd5|
krg }x4|
d5 D ](}|	
|}|d krHP ||j q.W t|d6krlqR|d;d< |D |
d: d |
d: d	 d=}t|d>krd7}|rR|d?| j|	j|jf 7 }g }|	 }x~|
d@ D ]r}|	
|d }|	
|d	 }|d kr|d krqtttd ||gd	kr@g }|dA7 }P |||g qW xf|D ]^\}}x6dBD ].}t||}t||t|| t||| qhW |dC|j |j f 7 }qZW |dD7 }qRW q@W |sdE}|dF|  |  7 }|S )GNr   )dihedral_anglec       
      S   sl   | d | d  }| d | d  }| d | d  }| |}||}d}|rX|dk rXd}|||  }	|	d S )Nr   r   r-   r,   r.   )crossdot)
sitesvolume_ideal
both_signsd_01Zd_02Zd_03Zd_02_cross_d_03Zvolume_modelZ
delta_signdeltar   r   r   chirality_deltaw  s    

z5_.flip_symmetric_amino_acids.<locals>.chirality_deltaCDZNEZCZZNH1r   ZNH2ZHH11ZHH21ZHH12ZHH22)dihedralr  pairsr  ZCBZCGZOD1ZOD2ZOE1ZOE2ZCD1ZCD2ZCE1ZCE2ZHD1ZHD2ZHE1ZHE2CG1CG2g      FZHG11ZHG21ZHG12ZHG22ZHG13ZHG23)chiralr  r  ZHD11ZHD21ZHD12ZHD22ZHD13ZHD23)ARGASPGLUPHEVALLEUr  TYRr;   r  r  r   T)r
  degg     v@r  c             S   s   g | ]}t |gqS r   )r   vec3_double)rO   rQ  r   r   r   rQ     s    z0_.flip_symmetric_amino_acids.<locals>.<listcomp>)r
  r  r  g       @z    Residue "%s %s %s":r  z not complete - not flipped)rQ  r  z "%s" <-> "%s"r  z	    None
z   Time to flip residues: %0.2fs
)timescitbx.mathr  r/   rI  r   r}   r  rx   r   Zget_atomr=   rQ  rV   r   r1   r2   r  r   filtergetattrsetattrrN   r>   )r9   r  r  r  datarS  t0r   r4   r   Z	flip_dataZflip_itr
  dr$   r  r  Zflips_storedr/   pairatom1atom2attrtmpr   r   r   flip_symmetric_amino_acidst  s    






 


  


  
 

 z_.flip_symmetric_amino_acidsffffff?      @c             C   sL   ddl m} |   }|  | }| }|  }|j|||||dS )Nr   )distance_based_connectivity)rS  elementsr  fallback_expected_bond_lengthfallback_search_max_distance)	Zcctbx.crystalr/  r/   r  Z(set_chemical_element_simple_if_necessaryrI  extract_elementr  Zbuild_simple_two_way_bond_sets)r9   r1  r2  r/  r/   rS  r0  r  r   r   r   'distance_based_simple_two_way_bond_sets  s    z)_.distance_based_simple_two_way_bond_setsc             C   s&   |   }| }|dr"|  d S )Nr   )r/   rW  r  r  )r9   r/   r  r   r   r   rM    s    
z_.reset_i_seq_if_necessaryc             C   s   t  }|   }| dkr6|dd ds6tx|  D ]t}xn| D ]b}x\|	 D ]P}xJ|
 D ]>}t|jdkrjx*| D ]}|j dkr||j qW qjW q\W qNW q@W |S )zI
    Extract atom selection (flex.size_t) for protein C-alpha atoms.
    r   Nr   rE   r  )r   r  r/   rW  r0   all_ner   r   r   r   r}   r   rx   rN   r>   r=   r  )r9   rw   r  r    r!   r4   r   r$   r   r   r   get_peptide_c_alpha_selection  s      z_.get_peptide_c_alpha_selectionc             C   sR   |s|   }|jddgd}|jdgd}|j| dkrJ||j|  |kS ||kS )zV
    Inspect residue names and counts to determine if enough of them are protein.
    rE   rF   )rv   rI   r   )r6   rz   r]   )r9   min_contentocZn_prot_residuesn_water_residuesr   r   r   contains_protein  s    
z_.contains_proteinc             C   sR   |s|   }|jddgd}|jdgd}|j| dkrJ||j|  |kS ||kS )z]
    Inspect residue names and counts to determine if enough of
    them are RNA or DNA.
    rG   rH   )rv   rI   r   )r6   rz   r]   )r9   r7  r8  Zn_na_residuesr9  r   r   r   contains_nucleic_acid   s    
z_.contains_nucleic_acidc             C   sD   |s|   }x2|j D ]$\}}t|dkrd| krdS qW dS )zS
    Inspect residue names and counts to determine if any of
    them are RNA.
    rG   DTF)r6   re   r_   r   r  )r9   r8  rx   ry   r   r   r   contains_rna/  s    z_.contains_rnac             C   sD   |s|   }x2|j D ]$\}}t|dkrd| krdS qW dS )zS
    Inspect residue names and counts to determine if any of
    them are DNA.
    rG   r<  TF)r6   re   r_   r   r  )r9   r8  rx   ry   r   r   r   contains_dna<  s    z_.contains_dnac             C   sR   |   }g }| j|dr"|d | j|dr8|d | j|drN|d |S )zT
    Inspect residue names and counts to determine what chain types are present
    )r8  ZPROTEINZDNAZRNA)r6   r:  r=   r>  r=  )r9   r8  chain_typesr   r   r   r?  I  s    


z_.chain_typesc             C   s(   |   }|r t|dkr |d S dS dS )z
    Inspect residue names and counts to determine what chain types are present
    If only one chain type, return it. Otherwise return None
    r   r   N)r?  rV   )r9   r?  r   r   r   
chain_typeW  s    z_.chain_typec             C   sT   xN|   D ]B}x<| D ]0}|dk	r0|j|kr0qx| D ]
}| S W qW q
W dS )z Return residue number of first residue in specified chain, as integer.
        If chain not specified, first residue in hierarchy.
    N)r   r   r2   r   resseq_as_int)r9   r  r    r!   r4   r   r   r   first_resseq_as_intb  s     z_.first_resseq_as_intc             C   sZ   d}xP|   D ]D}x>| D ]2}|dk	r4|j|kr4qx| D ]}| }q>W qW qW |S )z Return residue number of last residue in specified chain, as integer.
        If chain not specified, last residue in hierarchy.
    N)r   r   r2   r   rA  )r9   r  Z
last_resnor    r!   r4   r   r   r   last_resseq_as_intl  s     z_.last_resseq_as_intc             C   sF   g }x<|   D ]0}x*| D ]}|r.|j|kr||j qW qW |S )z= Get list of chain IDS, return unique set if unique_only=True)r   r   r2   r=   )r9   Zunique_onlyra   r    r!   r   r   r   ra   y  s    z_.chain_idsc             C   s   |   }|r|d S dS dS )z Get first chain ID r   N)ra   )r9   ra   r   r   r   first_chain_id  s    z_.first_chain_idc             C   s   d}x|   D ]}x| D ]}x| D ]x}x\| D ]P}x4| D ](}|j  dkrF|| |d7 }qFW |	 dkr8|
| q8W | dkr*|| q*W | dkr|| qW | dkr| | qW |r|    |S )za
    Remove all hydrogen/deuterium atoms in-place.  Returns the number of atoms
    deleted.
    r   )Hr<  r   )r   r   r   r}   r/   r-  r>   r  r  r   r  r   rX  r   r  r   Zremove_modelr  )r9   r  Z	n_removedZ	pdb_modelZ	pdb_chainZpdb_residue_groupZpdb_atom_groupZpdb_atomr   r   r   	remove_hd  s(    
z_.remove_hdc             C   s  g }x|   D ]t}xl| D ]^}xV| D ]H}x@t| D ].\}}|  }tt|tddg } xt| D ]\}	}
|
  }tt|tddg }|r|rq|j	|
j	kr|	|krx| D ]}|j
 }|j dd  }xt|
 D ]h}|j
 }|j dd  }|dkr|dkr||kr||kr|t|jgt|jgg qW qW qW qJW q4W q"W qW |S )NrE  r<  r   )rE  r<  )r   r   r   r5  r}   r/   r3  r   r   r   r-  r>   rN   r=   r^  r  )r9   rw   r    r!   r"   Zi_gr1Zatom_group_1Zelements_group1Znon_H_atoms_group1Zi_gr2Zatom_group_2Zelements_group2Znon_H_atoms_group2r(  e1n1r)  e2n2r   r   r   exchangeable_hd_selections  s.     


<z_.exchangeable_hd_selectionsc          
   C   s  |   }ddlm} |  }g g  }}xx|D ]p}|d d |d d  }}xP||gD ]D}	||	 j  dkrx||	 ||	 j  dkrT||	 qTW q,W tj	j
}
xB|  D ]4}x,| D ]}x| D ]}x | D ]}x|  D ]}|j}|j  dkr,||kr,|| q| j }|
|ddkr~|j  dkrhd	| _| r~|| q|j  dkr||krd
|_d| _|j  dkrd|_|jddd}||_qW qW qW qW qW dS )zy
    Remove all D atoms and replace with H. Keep only H at hydrogen/deuterium
    sites. Changes hierarchy in place.
    r   )utilsr   r<  rE  )rN   rI   r  HOHg      ?r;   N)r/   r  rL  rK  r-  r>   r  r=   r   r   r   r   r   r   r}   r  r  r1   rx   r  r   r   rN   r6  )r9   r/   rL  Zhd_group_selectionsZhd_site_d_iseqsZhd_site_h_iseqsZgselr:  r;  _i	get_classr  rp   r4   r   r,  rx   new_namer   r   r   de_deuterate  sF    







z_.de_deuteratec             C   s&   d}x|   D ]}|o| }qW |S )a-  
    Determine if hierarchy consists only from CA atoms.
    Upgrade options:
      - implement threshold for cases where several residues are present in
        full;
      - figure out how to deal with HETATM records of the same chain.
      - Ignore possible incorrect alignment of atom names.
    T)r   
is_ca_only)r9   rw   r    r   r   r   rR    s    	z_.is_ca_only)N)N)NN)r  r  NFF)r  r  NF)NN)T)F)FNNTFr   NTTTTN)Nr  r,   r  r  )NN)N)NTT)F)r-  r.  )r   N)r   N)N)N)N)N)F)F)Xr   r   r   __doc__r   r   r   r   r}   r   r   r   r   r   r   r   r6   r   r   r   rq   rt   r   r  r  r  r  r  r  r  r  rG  rR  r[  rd  rt  r}  r  r~  r  r  r  r  r  r  r  r  r  r  r@  r  r  r  r  r  r  r   r  r  r  r  r  r  r   r  r  r  r  r  r  r,  r4  rM  r6  r:  r;  r=  r>  r?  r@  rB  rC  ra   rD  rF  rK  rQ  rR  r   r   r   r   r   f  s   

#G2

 

    
   
 

-
,           
(    
 T 
	*
  

	x 








	
/r   c               @   sX   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd ZdS )r   a  
  Class representing MODEL blocks in a PDB file (or equivalent mmCIF).  There
  will always be at least one of these in a hierarchy root extracted from a
  PDB file even if no MODEL records are present.

  Example
  -------
  >>> hierarchy = iotbx.pdb.hierarchy.root()
  >>> model = iotbx.pdb.hierarchy.model(id="1")
  >>> hierarchy.append_model(model)
  >>> model = hierarchy.only_model()
  c             c   s.   x(|   D ]}x| D ]
}|V  qW q
W d S )N)r   r   )r9   r!   r4   r   r   r   r   
  s    z_.residue_groupsc             c   s@   x:|   D ].}x(| D ]}x| D ]
}|V  q&W qW q
W d S )N)r   r   r}   )r9   r!   r4   r   r   r   r   r}     s    z_.atom_groupsc             C   s   |   dkst|  d S )Nr   r   )r   r   r   )r9   r   r   r   r     s    z_.only_chainc             C   s   |    S )N)r   r   )r9   r   r   r   r     s    z_.only_residue_groupc             C   s   |    S )N)r   r   )r9   r   r   r   r     s    z_.only_conformerc             C   s   |    S )N)r   r   )r9   r   r   r   r     s    z_.only_atom_groupc             C   s   |    S )N)r   r   )r9   r   r   r   r   "  s    z_.only_residuec             C   s   |    S )N)r   r   )r9   r   r   r   r   %  s    z_.only_atomc             C   s&   d}x|   D ]}|o| }qW |S )a)  
    Determine if model consists only from CA atoms.
    Upgrade options:
      - implement threshold for cases where several residues are present in
        full;
      - figure out how to deal with HETATM records of the same chain.
      - Ignore possible incorrect alignment of atom names.
    T)r   rR  )r9   rw   r!   r   r   r   rR  (  s    	z_.is_ca_onlyN)r   r   r   rS  r   r}   r   r   r   r   r   r   rR  r   r   r   r   r     s   c               @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
d3ddZdd Zdd Zdd Zdd Zd4dd Zd5d"d#Zd6d$d%Zd7d&d'Zd8d(d)Zd9d*d+Zd:d-d.Zd;d/d0Zd1d2 ZdS )<r   a  
  Class representing a continuous chain of atoms, as defined by the combination
  of chain ID field and TER records (or the chain index in mmCIF format).  Note
  that this does not necessarily correspond to a covalently linked entity, as
  it may be used to group various heteroatoms (including water), but
  chemically distinct protein or nucleic acid chains will typically be
  grouped into exactly one chain object apiece.
  c             c   s.   x(|   D ]}x| D ]
}|V  qW q
W d S )N)r   r}   )r9   r4   r   r   r   r   r}   C  s    z_.atom_groupsc             C   s   |   dkst|  d S )Nr   r   )r   r   r   )r9   r   r   r   r   H  s    z_.only_residue_groupc             C   s    |   }t|dkst|d S )Nr   r   )r   rV   r   )r9   r   r   r   r   r   L  s    z_.only_conformerc             C   s   |    S )N)r   r   )r9   r   r   r   r   Q  s    z_.only_atom_groupc             C   s   |    S )N)r   r   )r9   r   r   r   r   T  s    z_.only_residuec             C   s   |    S )N)r   r   )r9   r   r   r   r   W  s    z_.only_atomc             C   s   |    S )N)r   r  )r9   r   r   r   r  Z  s    z
_.residuesNTc                s   g |   t}dg|   fdd}x| j|dD ]t\}}d}d }xDt||D ]6}	|	 }
t|	  }|d kr|}qX||krXd}qXW |r||| q<|r<||| q<W x(t|D ]}	 |	 rq||	|	d  qW jdd d	 S )
NFc                sJ  g }i }xt | |D ]}d |< | }x|  D ]}|j}|dkrx| D ]0}|jdk rbqR|jdkrR|jdk rR||j qRW q6g }	x(| D ]}|jdk rq|	|j qW t|	dkr6||g 	|	 q6W qW t
| }t|dkr(x|D ]}	|	  qW |jtdd | x|D ]}
|
gg q.W d S )NTr;   r   r   )r  )r   r}   r   r/   r+  r   r=   rV   r  r  r   valuesr  r  r  )i_begini_endZisolated_var_occgroupsi_rgr4   r   r   r$   groupr:  )doner   rw   r   r   process_rangec  s8    
 
 
 

z0_.occupancy_groups_simple.<locals>.process_range)r  Tr   c             S   s   | d d S )Nr   r   )r-  r   r   r   <lambda>      z+_.occupancy_groups_simple.<locals>.<lambda>)r  )r   rV   Zfind_pure_altloc_rangesr   r   r  )r9   r  r  Zn_rgr[  rU  rV  Zdo_this_stepncrX  r4   Zn_confr   )rZ  r   rw   r   r  ]  s4    
  z_.occupancy_groups_simplec       	      C   s   ddl m} g }t }xp|  D ]d}| d }||jdd | D d}|j}|| |dkrjd}n
t	|d}||  d7  < q W ||fS )	z
    Extract the residue names and counts of each residue type (protein,
    nucleic acid, etc) within the chain.

    :returns: a tuple containing a list of residue names, and a dictionary of
      residue type frequencies.
    r   )(residue_name_plus_atom_names_interpreterc             S   s   g | ]
}|j qS r   )rN   )rO   r$   r   r   r   rQ     s    z3_.get_residue_names_and_classes.<locals>.<listcomp>)residue_name
atom_namesN)rN   r   )
r   r_  r   r   r}   rx   r/   work_residue_namer=   r   )	r9   r_  rn_seqresidue_classesr"   r#   rnpanirnrp   r   r   r   get_residue_names_and_classes  s    

z_.get_residue_names_and_classesc             C   s4   t jj }t jj }|| ||   |S )N)r   r   r   r   r    r   rK  r   )r9   Znew_hr  r   r   r   as_new_hierarchy  s
    
z_.as_new_hierarchyc             C   s:   g }x0|   D ]$}x| D ]}||j P qW qW |S )N)r   r}   r=   rx   )r9   r  r4   r#   r   r   r   r    s    
z_.as_list_of_residue_namesc             C   s<   i }x2|   D ]&}x | D ]}|j|| < P qW qW |S )N)r   r}   rx   rA  )r9   r  r4   r#   r   r   r   r    s    
z(_.as_dict_of_resseq_as_int_residue_namesr  r  Fc          	   C   s2  |rd}d}t |tr"t|dks&tt |tr<t|dks@tddddddddd}|  \}}|d	 |d
  }|d |d  }	g }
||	krt}x|D ]2}|tkr|
t|| q|
||| qW nJ|	dkrx>|D ]6}||kr|t	krt	|d}|
||d qW |r*d
|
S |
S dS )a  
    Naively extract single-character protein or nucleic acid sequence, without
    accounting for residue numbering.

    :param substitute_unknown: character to use for unrecognized 3-letter codes
    :param substitute_unknown_na: character to use for unrecognized na codes
    :param ignore_all_unknown: set substitute_unknown and substitute_unknown_na to ''
    :param as_string: return string (default is to return list)
    r;   r   r  r  GUT)r  r  ri  rj  DADCDGDTrE   rF   rG   rH   r   r  N)r   rW   rV   r   rg  r   aa_3_as_1_modr=   r  na_3_as_1_modrZ   )r9   r  r  r  r  common_rna_dna_codesrc  rd  n_aan_nar  	aa_3_as_1rf  r   r   r   r    sB    





z_.as_sequenceO   c             C   s   | j |||d}t|}|dkr$dS dg}|d| j  d|g}	d}
x8t||
| }||
krdP |	d||
|  |}
qLW |rd|	S |	S dS )	a   Format this chain as Fasta
    :param max_line_length: length of lines in formatted output
    :param substitute_unknown: character to use for unrecognized 3-letter codes
    :param substitute_unknown_na: character to use for unrecognized na codes
    :param ignore_all_unknown: set substitute_unknown and substitute_unknown_na to ''
    :param as_string: return string (default is to return list of lines)
    )r  r  r  r   N>zchain "%2s"r  r;   r  )r  rV   r=   r2   rZ   r]  )r9   max_line_lengthr  r  r  r  r  r   commentZ	seq_linesr:  r;  r   r   r   r    s(      
z_.format_fastac             C   s*   t |}ddg}|r"|dddg7 }||kS )ak  
    Helper function for checking if a residue is an amino acid or
    nucleic acid

    Parameters
    ----------
      residue_name: str
        The residue name
      include_modified: bool
        If set, include modified amino and nucleic acids

    Returns
    -------
      bool
        True if the residue is an amino or nucleic acid, false otherwise
    rE   rG   rM   rF   rH   )r   )r9   r`  Zinclude_modifiedr  Zacceptable_classesr   r   r   _residue_is_aa_or_na	  s
    z_._residue_is_aa_or_nac             C   s   |   }g }d}	d}
d}xt|  D ]\}}|r>|jdkr>q&|rV| | d sVq&| }|r||	d krx0t||	 d D ]}|	dkr|sP || qW |}	|||  q&W d	|S )z
    Extract protein or nucleic acid sequence, taking residue numbering into
    account so that apparent gaps will be filled with substitute characters.
    r   r  r   r;   )
r  r5  r   r  rz  r   rA  r   r=   rZ   )r9   missing_charskip_insertionspadr  pad_at_startignore_hetatmr  
padded_seqlast_resseq
last_icoder:  r"   r  r`  r   r   r   as_padded_sequence5	  s&     z_.as_padded_sequencec             C   s   g }d}d}xt |  D ]\}}	|r2|	jdkr2q|rJ| |	 d sJq|	 }
|r|
|d krx0t|
| d D ]}|dkr|sP |d  qtW |
}||	  qW |S )Nr   r  r   )	r5  r   r  rz  r   rA  r   r=   r3   )r9   r|  r}  r~  r  residsr  r  r:  r"   r  r`  r   r   r   get_residue_idsO	  s"     z_.get_residue_idsc             C   s   g }d}d}xt |  D ]\}}	|r2|	jdkr2q|rJ| |	 d sJq|	 }
|r|
|d krx0t|
| d D ]}|dkr|sP |d  qtW |
}||	 d  qW |S )Nr   r  r   )r5  r   r  rz  r   rA  r   r=   )r9   r|  r}  r~  r  re   r  r  r:  r"   r  r`  r   r   r   get_residue_names_paddedb	  s"     z_.get_residue_names_padded皙?c             C   s   |   \}}|d |d  }|d |d  }|rNx|ddkrL|d q2W t|dkr^dS ||krz|t| |krzdS |d	gt| krdS dS )
a  
    Determine whether the chain represents an amino acid polymer, based on the
    frequency of residue names.
    Very slow due to usage of residue_name_plus_atom_names_interpreter in
    get_residue_names_and_classes (majority of the processing is unnecessary)
    rE   rF   rG   rH   rM  r   FTZUNK)rg  ry   removerV   )r9   r7  ignore_waterrc  rd  rs  rt  r   r   r   r  w	  s    z_.is_proteinc             C   s~   |   \}}|d |d  }|d |d  }|rNx|ddkrL|d q2W t|dkr^dS ||krz|t| |krzdS dS )	a
  
    Determine whether the chain represents a nucleic acid polymer, based on the
    frequency of base names.
    Very slow due to usage of residue_name_plus_atom_names_interpreter in
    get_residue_names_and_classes (majority of the processing is unnecessary)
    rE   rF   rG   rH   rM  r   FT)rg  ry   r  rV   )r9   r7  r  rc  rd  rs  rt  r   r   r   r  	  s    z_.is_nac             C   s   |    }|dS )a)  
    Determine if chain consists only from CA atoms.
    Upgrade options:
      - implement threshold for cases where several residues are present in
        full;
      - figure out how to deal with HETATM records of the same chain.
      - Ignore possible incorrect alignment of atom names.
    z CA )r/   extract_namer  )r9   ra  r   r   r   rR  	  s    	z_.is_ca_only)NT)r  r  NF)rv  r  r  NF)T)r  FTr  TF)FTTF)FTTF)r  T)r  T)r   r   r   rS  r}   r   r   r   r   r   r  r  rg  rh  r  r  r  r  rz  r  r  r  r  r  rR  r   r   r   r   r   7  sD   
 
8   
1    

  
 
 


c               @   s$   e Zd Zdd Zdd Zdd ZdS )r   c             C   s   |   dkst|  d S )Nr   r   )r   r   r}   )r9   r   r   r   r   	  s    z_.only_atom_groupc             C   s   |    S )N)r   r   )r9   r   r   r   r   	  s    z_.only_atomc             C   s,   d}|   }|d k	r|j}d|| j| jf S )Nr;   z	%2s%4s%1s)r1   r2   r  r  )r9   r  r!   r   r   r   rC  	  s
    z_.id_strN)r   r   r   r   r   rC  r   r   r   r   r   	  s   c               @   s(   e Zd Zdd Zd	ddZd
ddZdS )r   c             C   s   |   dkst|  d S )Nr   r   )r   r   r/   )r9   r   r   r   r   	  s    z_.only_atomNc             C   sJ   d}d}|   }|d k	r6| }|  }|d k	r6|j}d| j| j||f S )Nr;   z%1s%3s%2s%5s)r1   r3   r2   r   rx   )r9   Zsuppress_segidr  r3   r4   r!   r   r   r   rC  	  s    z_.id_strFc             C   sT   |    }t|dkst| }|j|jkrN|rNtd|  |j|jf |j	S )zu
    Calculate the mean occupancy for atoms in this group, with option of
    raising ValueError if they differ.
    r   z?Non-uniform occupancies for atom group %s (range: %.2f - %.2f).)
r/   r   rV   r   min_max_meanr]  rb   r   rC  r   )r9   Zraise_error_if_non_uniformZatom_occupanciesr  r   r   r   r%  	  s    
z_.occupancy)N)F)r   r   r   r   rC  r%  r   r   r   r   r   	  s   
c               @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )r   a  
  The basic unit of the PDB hierarchy (or the PDB input object in general),
  representing a single point scatterer corresponding to an ATOM or HETATM
  record in PDB format (plus associated ANISOU or related records if present).
  Note that this does not directly store attributes of higher-level entities
  whose identity is also recorded in ATOM records, such as the chain ID or
  residue name.  These may be retrieved either by walking up the hierarchy
  starting with atom.parent(), or by calling atom.fetch_labels().
  c             C   s,   |   }|dk	r(|  }|dk	r(|  S dS )zr
    Convenience method for fetching the chain object associated with this
    atom (or None of not defined).
    N)r1   )r9   r   r4   r   r   r   r!   	  s    z_.chainc       	      C   sp   | j dd}|j dd}|j}|j}t|dkrHt|dkrH||krHdS dd }||}||}| | kS )z
    Indicate whether two atoms are part of the same conformer and thus are
    capable of interacting directly, as defined by the parent atom_group and
    model object(s).
    F)optionalr   c             S   s   | j ddj ddj ddS )NF)r  )r1   )r   r   r   r   p3
  s    z%_.is_in_same_conformer_as.<locals>.p3)r1   r   rV   r   )	r9   rL   Zag_iZag_jZaltloc_iZaltloc_jr  Zmodel_iZmodel_jr   r   r   is_in_same_conformer_as	  s    z_.is_in_same_conformer_asc             C   s   ddl m} ||dd\}}| j | j  }t|dkrt|dkrP|dkrPdS ||dd\}}||krt||krtdS d|  | _d| | _dS )	Nr   )get_element_and_charge_symbolsF)scattering_typeexactr;   z%2sz%-2sT)Zcctbx.eltbx.xray_scatteringr  r-  r>   r.  rV   r  )r9   r  ZgecZsct_eZsct_cZpdb_ecZpdb_eZpdb_cr   r   r   8set_element_and_charge_from_scattering_type_if_necessary
  s    
z:_.set_element_and_charge_from_scattering_type_if_necessaryc             C   sN   |   }|dkrdS |dr$d}nd}|d}|dkrF|t| S dS dS )zq
    Extract the atomic charge from the (string) charge field.

    :returns: Python int, defaulting to zero
    Nr   -r.   r   z -+r;   )r  endswithr>   r^  )r9   r.  signr   r   r   charge_as_int 
  s    

z_.charge_as_intN)r   r   r   rS  r!   r  r  r  r   r   r   r   r   	  s
   
c               @   sx   e Zd ZdZdd Zdd Zdd Zdd	d
ZdddZd ddZ	d!ddZ
d"ddZd#ddZd$ddZd%ddZdS )&r   z
  Alternate view into a chain object, grouping sequential residues with
  equivalent altlocs.  As a general rule it is preferrable to iterate over
  chain.residue_groups() instead.
  c             C   s    |   }t|dkst|d S )Nr   r   )r  rV   r   )r9   r  r   r   r   r   ;
  s    z_.only_residuec             C   s   |    S )N)r   r   )r9   r   r   r   r   @
  s    z_.only_atomc             C   sd   g }t  }xP|  D ]D}| }|j}|| |d kr>d }n
t|d}||  d7  < qW ||fS )N)rN   r   )r   r  r_  rb  r=   r   )r9   rc  rd  residuere  rf  rp   r   r   r   rg  C
  s    

z_.get_residue_names_and_classes皙?c             C   s\   |   \}}|d |d  }|d |d  }t||dd }||krX|| |krXdS dS )	NrE   rF   rG   rH   rI   r   TF)rg  rV   r  )r9   r7  rc  rd  rs  rt  	non_waterr   r   r   r  T
  s    z_.is_proteinc             C   s\   |   \}}|d |d  }|d |d  }t||dd }||krX|| |krXdS dS )	NrE   rF   rG   rH   rI   r   TF)rg  rV   r  )r9   r7  rc  rd  rs  rt  r  r   r   r   r  _
  s    z_.is_nar  c       
   	   C   s   t |trt|dkstddddddddd}|  \}}|d |d	  }|d
 |d  }g }||krt}x|D ]2}	|	tkr|t|	| qr|||	| qrW nD|dkrx:|D ]2}	|	|kr|	t	krt	|	d}	|||	d qW |S )Nr   r  r  ri  rj  rk  )r  r  ri  rj  rl  rm  rn  ro  rE   rF   rG   rH   r   r  )
r   rW   rV   r   rg  r   rp  r=   r  rq  )
r9   r  rr  rc  rd  rs  rt  r  ru  rf  r   r   r   r  j
  s4    


z_.as_sequencerv  c       	      C   s   |   }t|}|dkrd S dg}|  }|d k	rB|d|j  |d| j  d|g}d}x8t||| }||kr|P |d|||  |}qdW |S )Nr   rw  zchain "%2s"zconformer "%s"r  r;   )r  rV   r1   r=   r2   r   rZ   r]  )	r9   rx  r  r   ry  r  rw   r:  r;  r   r   r   r  
  s$      z_.format_fastaFTc             C   s   |   }g }d}d}	d}
xt|  D ]t\}
}|r>|jdkr>q&| }|r||d krx0t|| d D ]}|dkrz|szP || qhW |}|||
  q&W d|S )Nr   r  r   r;   )r  r5  r  r  rA  r   r=   rZ   )r9   r{  r|  r}  r  r~  r  r  r  r  r:  r  r  r`  r   r   r   r  
  s"     z_.as_padded_sequencec             C   s   g }d}xt |  D ]\}}	|	 }
|rh|
|d krhx0t|
| d D ]}|dkrZ|sZP || qHW d}xF|	 D ]:}||j r|d d}P qv||j rv|d d}P qvW |s|d |
}qW d|S )	Nr   r   FrE  Tr  Lr;   )r5  r  rA  r   r=   r/   r  rZ   )r9   Z
helix_seleZ
sheet_seler{  r}  r~  Zss_seqr  r:  r  r  r`  rs  r$   r   r   r   as_sec_str_sequence
  s.     




z_.as_sec_str_sequencec             C   s   g }d}d}xt |  D ]t\}}|r2|jdkr2q| }	|r||	|d kr|x0t|	| d D ]}
|dkrn|snP |d  q\W |	}||  qW |S )Nr   r  r   )r5  r  r  rA  r   r=   r3   )r9   r|  r}  r~  r  r  r  r:  r  r  r`  r   r   r   r  
  s     z_.get_residue_idsc             C   s   g }d}d}xt |  D ]r\}}|r2|jdkr2q| }	|r||	|d kr|x0t|	| d D ]}
|dkrn|snP |d  q\W |	}||j qW |S )Nr   r  r   )r5  r  r  rA  r   r=   rx   )r9   r|  r}  r~  re   r  r  r:  r  r  r`  r   r   r   r  
  s     z_.get_residue_names_paddedN)r  )r  )r  )rv  )r  FTr  T)r  TT)FTT)FTT)r   r   r   rS  r   r   rg  r  r  r  r  r  r  r  r  r   r   r   r   r   3
  s   


!
 
 

c               @   s.   e Zd Zdd Zdd Zdd Zdd	d
ZdS )r   c       
      C   s   |   }|d kr|  }|d k	s$t| }|d k	s8t| }|d k	sLtt|j| jd}t| j| jd}t	|j
d}t|j
d}t  }|| || || || x|  D ]}	||	  qW |fS )N)r   rx   )r  r  )r2   )r   r1   r   r#   r   rx   r"   r  r  r!   r2   r    r   rK  Zappend_residue_groupZappend_atom_groupr/   r  r   )
r9   Zresult_rootZorig_conformerZ
orig_chainZ
orig_modelZresult_atom_groupZresult_residue_groupZresult_chainZresult_modelr$   r   r   r   __getinitargs__
  s,    



z_.__getinitargs__c             C   s   t |  d dS )Nr   )r   )r  r  )r9   r   r   r   standalone_copy  s    z_.standalone_copyc             C   s   |   dkst|  d S )Nr   r   )r   r   r/   )r9   r   r   r   r     s    z_.only_atomNFc             C   s,   ddl m} || jdd |  D ||dS )Nr   )r_  c             S   s   g | ]
}|j qS r   )rN   )rO   r$   r   r   r   rQ     s    z>_.residue_name_plus_atom_names_interpreter.<locals>.<listcomp>)r`  ra  #translate_cns_dna_rna_residue_namesreturn_mon_lib_dna_name)r   r_  rx   r/   )r9   r  r  r_  r   r   r   r_    s    z*_.residue_name_plus_atom_names_interpreter)NF)r   r   r   r  r  r   r_  r   r   r   r   r   
  s
    c               @   s(   e Zd ZdZdd Zdd Zdd ZdS )	r   z
  Stand-in for atom object, which explicitly records the attributes normally
  reserved for parent classes such as residue name, chain ID, etc.
  c             C   s&   i }xdD ]}t | |d ||< q
W |S )N)rQ  sigxyzr   sigoccr  sigbr  r   r  r  rN   r  r-  r.  r  r  r  r  r   rx   )r"  )r9   Zlabels_dictr*  r   r   r   r   (  s    
z_.__getstate__c             C   s$   ddl m} t|}|| f| d S )Nr   )make_atom_with_labels)r   r  dict)r9   r   r  r   r   r   r   1  s    z_.__setstate__c             C   s   | S )Nr   )r9   r   r   r   fetch_labels6  s    z_.fetch_labelsN)r   r   r   rS  r   r   r  r   r   r   r   r   !  s   	c               @   sF   e Zd ZdddZdd Zdd Zd	d
 Zdd Zdd Zdd Z	dS )input_hierarchy_pairNFc             C   s(   || _ |d kr| j jd|d}|| _d S )NT)Zset_atom_i_seqr   )r   r   r   )r9   r   r   r   r   r   r   r:   B  s
    
zinput_hierarchy_pair.__init__c             C   s   ddl m} |d S )Nr   )PicklingError)r   r  )r9   r  r   r   r   r  M  s    z$input_hierarchy_pair.__getinitargs__c             C   s&   | j  }|jddd}| j  S )z
    Return the permutation selection
    (:py:class:`scitbx.array_family.flex.size_t`) mapping the atoms as ordered
    by the hierarchy to their original positions in the PDB/mmCIF file.
    r   r   )r  r  )r   r/   r  r   extract_tmp_as_size_t)r9   r  r  r   r   r   #hierarchy_to_input_atom_permutationQ  s    
z8input_hierarchy_pair.hierarchy_to_input_atom_permutationc             C   s&   | j  }|jddd}| j  S )z
    Return the permutation selection
    (:py:class:`scitbx.array_family.flex.size_t`) mapping the atoms as ordered
    in the original PDB/mmCIF file to their positions in the hierarchy.
    r   r   )r  r  )r   r/   r  r   r  )r9   Zi_atomsr  r   r   r   #input_to_hierarchy_atom_permutation[  s    
z8input_hierarchy_pair.input_to_hierarchy_atom_permutationc             O   s    |   }| jj||}||S )z
    Wrapper for the equivalent method of the input object - extracts the
    :py:class:`cctbx.xray.structure` with scatterers in the same order as in
    the hierarchy.
    )r  r   r  r   )r9   argskwdspermZxrsr   r   r   r  e  s    z*input_hierarchy_pair.xray_structure_simplec             O   s   t dt | jS )z
    Returns a reference to the existing hierarchy.  For backwards compatibility
    only, and issues a :py:class:`warnings.DeprecationWarning`.
    z'Please access input.hierarchy directly.)r{   warnDeprecationWarningr   )r9   r  r  r   r   r   r   o  s    z(input_hierarchy_pair.construct_hierarchyc             O   s   | j j||S )N)r   r   )r9   r  r  r   r   r   r   x  s    z%input_hierarchy_pair.crystal_symmetry)NF)
r   r   r   r:   r  r  r  r  r   r   r   r   r   r   r  @  s    



	r  c                   s*   e Zd ZdZddedf fdd	Z  ZS )r   a  
  Class used for reading a PDB hierarchy from a file or string.

  Attributes
  ----------
  input : iotbx.pdb.pdb_input_from_any
  hierarchy : iotbx.pdb.hierarchy.root

  Examples
  --------
  >>> import iotbx.pdb.hierarchy
  >>> pdb_in = iotbx.pdb.hierarchy.input(pdb_string='''
  ... ATOM      1  N   ASP A  37      10.710  14.456   9.568  1.00 15.78           N
  ... ATOM      2  CA  ASP A  37       9.318  14.587   9.999  1.00 18.38           C
  ... ''')
  >>> print pdb_in.hierarchy.atoms_size()
  2
  "")
  NTc                s|   ||g ddkstddl}|dk	rB|tks2t|jj|d}n"|tkrNd}|jj|t|d}tt| j	||d dS )z
    Initializes an input from a file or string.

    Parameters
    ----------
    file_name : str, optional
    pdb_string : str, optional
    source_info : str, optional
        Indicates where this PDB came from (i.e. "string")
    Nr   r   )rv  re  )r   r   )r   r   )
ry   r   r   r   r   r   r   r   superr:   )r9   rv  r   r   r   r   r  )r   r   r   r:     s     zinput.__init__)r   r   r   rS  r   r:   __classcell__r   r   )r   r   r   {  s   r   c             C   s   | t krd} | S )NZ=123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz)r   )suffixesr   r   r   suffixes_for_chain_ids  s    r  c             C   s   t |d}t| t|kstxdt| |D ]V\}}xL| D ]@}x:| D ].}t|jdksjtt|j| j|7  _qJW q<W q*W d S )N)r  r   )r  rV   r   r   r   r   r2   )rH  r  r   suffixr    r!   r   r   r   append_chain_id_suffixes  s    
r  c             C   s8   |dk	rt | |d t }x| D ]}|j|d q W |S )z
  Combine two root objects.
  N)rH  r  )rL   )r  r   r  )rH  Zchain_id_suffixesrw   rtr   r   r   rL    s    
rL  c             C   s<   ddl }|jj }|jj }||   || |S )zs
  Given a chain object, create an entirely new hierarchy object contaning only
  this chain (using a new copy).
  r   N)iotbx.pdb.hierarchyr   r   r   r    rK  r   r   )r!   r   r   r    r   r   r   new_hierarchy_from_chain  s    
r  c       	      C   s   x|   D ]}x|  D ]}|j|jkrd}x|t| k r| | }d}xZ|t| k r| | }|j|jkr|| |||  || P |d7 }qPW |d7 }q.W qW q
W dS )a  
  Delete and replace the first chain in the original hierarchy corresponding
  to each model/ID combination in the partial hierarchy.  Note that this means
  that if waters and heteroatoms are given the same ID as a protein chain
  (separated by other chains or TER record(s)), but the partial hierarchy only
  contains a substitute protein chain, the heteroatom chain will be kept.
  r   r   N)r   r2   rV   r   r  Zinsert_chainr   )	Zoriginal_hierarchyZpartial_hierarchylogZoriginal_modelZpartial_modelr:  Zoriginal_chainr;  Zpartial_chainr   r   r   find_and_replace_chains  s     	

r  c             C   s.  t |  dkstg }x|  d  D ]}g }d }d }tj }xh| D ]\}| }| }	||kr||d kr|d k	r|	||f |	}|	}n|d kr|	}|	}|}qNW |d k	r|	||f g }
x:|D ]2\}}||kr|
	d|  q|
	d||f  qW dd
|
 d |j }|	| q,W |S )Nr   r   zresid %szresid %s through %szchain '%s' and ((z) or (z)))rV   r   r   r   rT   maxsizer   rA  r3   r=   rZ   r2   )r   Zchain_clausesr!   Zresid_rangesZstart_residZ
last_residr  r"   r  r3   Zresid_clausesr1r2Zseler   r   r   get_contiguous_ranges  s<    r  c          	   C   sB  ddl m} | d k	rdt }|j| d}| }W d Q R X |t| tj	j
| |d}| }|d k	spt| }t|dkrdS |d  }t|dkrdS d}	d}
d}x|D ]~}d}xt| d  D ]`}t|jdd}d	|ksd
|kr |	d7 }	| }||d kr|
d7 }
|}qd|kr|d7 }qW qW |	|
|fS )Nr   )
smart_open)rv  )r   r   )r   r   r   iT)Zconsider_ccp4_mon_lib_rna_dnaZ
amino_acidZrna_dnar   r   )libtbxr  r   r  for_readingreadr  r   r   r   r   r   r   r   rV   r   r   r  r   rx   rA  )Zpdb_filer  r  raw_recordsr|  r   pdb_inr   r   Zn_resZn_fragZn_h2or!   r:  resZresidue_typer  r   r   r   get_residue_and_fragment_count  s@    
r  Tc             C   s  |dkrt  }i }t| d}xB|  D ]6}|jdkr@|r@q,| }||krXtd|j||< q,W xt	| D ]t\}	}|jdkr|rqt| }||krt|| \}
}}|j\}}}t
||
 d || d  || d  }|||	< qtW |r| }| | |S |S dS )z
  Given two PDB hierarchies, calculate the shift of each atom (accounting for
  possible insertions/deletions) and (optionally) apply it to the B-factor for
  display in PyMOL, plotting in PHENIX GUI, etc.
  Ng      )rM  ZWATz.Duplicate atom ID - can't extract coordinates.r-   )r
   r   r  r   r@  rx   rC  r   rQ  r5  mathsqrtr  r/   r&  )Zhierarchy_1Zhierarchy_2Zexclude_watersZreturn_hierarchyr  Zatom_lookupZdeltasr$   atom_idr  x1Zy1z1x2Zy2z2r  Zhierarchy_newr   r   r   
sites_diff:  s2    
 *r  c                sd  ddl m} |  }t }t }t| j}|dkrBtdddddg dddd	g} 	  |	   fd
d}||  }	|| }
|	 ks|
 krd}x |  D ]}|d|
  7 }qW td| x^t|  D ]N\}}|j|krqx8t| D ](\}}|j|jkr|| || qW qW |   |}| |}| | ksbt|j||d}| }|jj| |jj }|| i }i }t|   }x@|  D ]4}|j kr| | |j||j< |j||j< qW xd|D ]\}|j kr|j|kr"||j |_n||_|j|krB||j |_nd|_| | qW |j| _| S )z
  Substitute sidechain atoms from one residue for another, using
  least-squares superposition to align the backbone atoms.
  Limited functionality:
    1) Amino-acids only, 2) side chain atoms only.
  r   )	superposerE   z*Only common amino-acid residues supported.z CA z C  z N  z O  z CB c                s8   g }x&|   D ]}|j kr||j qW |  |S )N)r/   rN   r=   r  )current_groupZaa_backbone_atomsrw   r$   )aa_backbone_atoms_1r   r   get_bb_atomss  s    
z+substitute_atom_group.<locals>.get_bb_atomsr;   z
%szMain chain must be complete. %s)Zreference_sitesZother_sitesg      ?)r   r  r   r/   r   r  r   rx   r   r  r  r5  rN   r=   rI  r   r0   r   Zleast_squares_fitr9  rJ  rP  r"  r   	extract_br  r  r   r  )r  Z	new_groupr  Z	new_atomsZselection_fixedZselection_moving	res_classZaa_backbone_atoms_2r  Zaa_backbone_atoms_currentZaa_backbone_atoms_newoutlr$   r  Zj_seqZ
other_atomZsites_fixedZsites_movingZlsq_fitZ	sites_newZ
atom_b_isoZatom_occmean_br   )r  r   substitute_atom_group^  sn    	



 



  r  c       	         s  dd }t | dk r| S t| }t|ddkr2| S  fdd| D }tt| dkrZ|S td fd	d| D }|9 }td
d |D }t|| }|dd |D }x.t|D ]"}t|||   ||| < qW x4t|t |D ]"}t|||   ||| < qW |S )a  Round values to number of digits after the dot maintaining the sum of 1.
  Currently used for rounding occupancies, so when the total sum is != 1,
  no rounding occurs.
  Taken from: https://explainextended.com/2009/09/21/rounding-numbers-preserving-their-sum/

  Args:
      values (list): list of occupancies
      digits (integer): how many digits after the dot should be left

  Returns:
      list: rounded occupancies
  c                s   t tt  fddddS )Nc                s    |  S )Nr   )k)r   r   r   r\    r]  z4group_rounding.<locals>.sort_index.<locals>.<lambda>T)r  r  )r  r   rV   )r   r   )r   r   
sort_index  s    z"group_rounding.<locals>.sort_indexr-      r   c                s   g | ]}t | qS r   )round)rO   r:  )digitsr   r   rQ     s    z"group_rounding.<locals>.<listcomp>r<   c                s   g | ]}|  qS r   r   )rO   r:  )p_10r   r   rQ     s    c             S   s   g | ]}t |qS r   )r  floor)rO   r:  r   r   r   rQ     s    c             S   s   g | ]}|d  qS )r   r   )rO   r:  r   r   r   rQ     s    )	rV   r  r  powr^  r   r  ceilr  )	rT  r  r  Z
total_occsrw   Zsum_all_floorZ	n_to_ceilZsorted_indexr:  r   )r  r  r   r    s(    
""r  )NN)TTN)S
__future__r   r   r   boost_adaptbx.boost.pythonboostpythonbp
import_extextr   libtbx.str_utilsr   libtbx.utilsr   r	   r
   r  r   r   r   r   r   r   r   iotbx.pdb.amino_acid_codesr   Ziotbx.pdb.modified_aa_namesr   rp  Z iotbx.pdb.modified_rna_dna_namesrq  Ziotbx.pdb.utilsr   r   Ziotbx.cif.modelr   r>  r   cctbx.array_familyr   r   	six.movesr   rr   r   r   r   r  r{   r  rT   objectr   r   r5   r6   r   injectr   inject_intor   r    r!   r"   r#   r$   	conformerr  r  r  r   r  r  rL  r  rU   r  r  r  r  r  r  r   r   r   r   <module>   s   
  *              <  v"R B-;3	#
$  
 F