B
    bkF                 @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZmZmZ ddlmZ G d	d
 d
eZdddZdd Zdd Zdd Zdd Zdd Zdd ZedkrddlmZ e  dS )zCode for dealing with Codon Alignment.

CodonAlignment class is inherited from MultipleSeqAlignment class. This is
the core class to deal with codon alignment in biopython.
    N)MultipleSeqAlignment)	SeqRecord)
CodonTable)BiopythonWarning)_get_codon_listCodonSeq	cal_dn_ds)	chisqprobc               @   sb   e Zd ZdZdddZdd Zdd	 Zd
d Zdd Zdd Z	dddZ
dddZedd ZdS )CodonAlignmenta  Codon Alignment class that inherits from MultipleSeqAlignment.

    >>> from Bio.SeqRecord import SeqRecord
    >>> a = SeqRecord(CodonSeq("AAAACGTCG"), id="Alpha")
    >>> b = SeqRecord(CodonSeq("AAA---TCG"), id="Beta")
    >>> c = SeqRecord(CodonSeq("AAAAGGTGG"), id="Gamma")
    >>> print(CodonAlignment([a, b, c]))
    CodonAlignment with 3 rows and 9 columns (3 codons)
    AAAACGTCG Alpha
    AAA---TCG Beta
    AAAAGGTGG Gamma

     Nc             C   sJ   t | | x | D ]}t|jtstdqW |  d dkrFtddS )zInitialize the class.zACodonSeq objects are expected in each SeqRecord in CodonAlignment   r   zKAlignment length is not a multiple of three (i.e. a whole number of codons)N)r   __init__
isinstanceseqr   	TypeErrorget_alignment_length
ValueError)selfZrecordsnamerec r   <lib/python3.7/site-packages/Bio/codonalign/codonalignment.pyr   '   s    

zCodonAlignment.__init__c                s   t  j}d|    f g}|dkrF| fdd jD  nF| fdd jdd D  |d | j jd	 dd
 d|S )aD  Return a multi-line string summary of the alignment.

        This output is indicated to be readable, but large alignment
        is shown truncated. A maximum of 20 rows (sequences) and
        60 columns (20 codons) are shown, with the record identifiers.
        This should fit nicely on a single screen. e.g.

        z6CodonAlignment with %i rows and %i columns (%i codons)<   c                s   g | ]} j |d dqS )r   )length)	_str_line).0r   )r   r   r   
<listcomp>H   s    z*CodonAlignment.__str__.<locals>.<listcomp>c                s   g | ]} j |d dqS )r   )r   )r   )r   r   )r   r   r   r   J   s    N   z...)r   
)len_recordsr   get_aln_lengthextendappendr   join)r   Zrowslinesr   )r   r   __str__8   s    	
"
zCodonAlignment.__str__c                s   t |tr| j| S t |tr,t| j| S t|dkr@td|\} t |tr`| j|   S t  trd fdd| j| D S t fdd| j| D S dS )z3Return a CodonAlignment object for single indexing.   zInvalid index type.r   c             3   s   | ]}t |  V  qd S )N)str)r   r   )	col_indexr   r   	<genexpr>\   s    z-CodonAlignment.__getitem__.<locals>.<genexpr>c             3   s   | ]}|  V  qd S )Nr   )r   r   )r*   r   r   r+   _   s    N)	r   intr!   slicer
   r    r   r%   r   )r   indexZ	row_indexr   )r*   r   __getitem__O   s    




zCodonAlignment.__getitem__c             C   s   t |trJt| t|kr"tdtdt dd t| |D }t|S t |trxt| t|krltd| 	 | S t
dt| ddS )ah  Combine two codonalignments with the same number of rows by adding them.

        The method also allows to combine a CodonAlignment object with a
        MultipleSeqAlignment object. The following rules apply:

            * CodonAlignment + CodonAlignment -> CodonAlignment
            * CodonAlignment + MultipleSeqAlignment -> MultipleSeqAlignment
        zTWhen adding two alignments they must have the same length (i.e. same number or rows)zsPlease make sure the two CodonAlignment objects are sharing the same codon table. This is not checked by Biopython.c             s   s(   | ] \}}t t|j|j d V  qdS ))r   N)r   r   r   )r   leftrightr   r   r   r+   v   s   z)CodonAlignment.__add__.<locals>.<genexpr>z^Only CodonAlignment or MultipleSeqAlignment object can be added with a CodonAlignment object. z
 detected.N)r   r
   r    r   warningswarnr   zipr   toMultipleSeqAlignmentr   object)r   otherZmergedr   r   r   __add__b   s"    	

zCodonAlignment.__add__c             C   s   |   d S )zGet alignment length.r   )r   )r   r   r   r   r"      s    zCodonAlignment.get_aln_lengthc             C   s   dd | j D }t|S )zConvert the CodonAlignment to a MultipleSeqAlignment.

        Return a MultipleSeqAlignment containing all the
        SeqRecord in the CodonAlignment using Seq to store
        sequences
        c             S   s    g | ]}t |j |jd qS ))id)r   r   ZtoSeqr9   )r   r   r   r   r   r      s    z9CodonAlignment.toMultipleSeqAlignment.<locals>.<listcomp>)r!   r   )r   Z
alignmentsr   r   r   r5      s    z%CodonAlignment.toMultipleSeqAlignmentNG86c             C   s   ddl m} |dkrtjd }dd | jD }t| j}g }g }xt|D ]}|g  |g  xvt|d D ]f}	||	krt| j| | j|	 ||d\}
}|| |
 || | qp|| d || d qpW qJW |||d	}|||d	}||fS )
zAvailable methods include NG86, LWL85, YN00 and ML.

        Argument:
         - method       - Available methods include NG86, LWL85, YN00 and ML.
         - codon_table  - Codon table to use for forward translation.

        r   )DistanceMatrixN   c             S   s   g | ]
}|j qS r   )r9   )r   ir   r   r   r      s    z3CodonAlignment.get_dn_ds_matrix.<locals>.<listcomp>)methodcodon_tableg        )Zmatrix)	Bio.Phylo.TreeConstructionr;   r   generic_by_idr!   r    ranger$   r   )r   r>   r?   ZDMnamessizeZ	dn_matrixZ	ds_matrixr=   jZdnZdsdn_dmds_dmr   r   r   get_dn_ds_matrix   s0    



zCodonAlignment.get_dn_ds_matrixUPGMAc             C   s   ddl m} |dkrtjd }| j||d\}}| }| }|dkrZ||}	||}
n.|dkrx||}	||}
ntd| d	|	|
fS )
zConstruct dn tree and ds tree.

        Argument:
         - dn_ds_method - Available methods include NG86, LWL85, YN00 and ML.
         - tree_method  - Available methods include UPGMA and NJ.

        r   )DistanceTreeConstructorNr<   )r>   r?   rI   ZNJzUnknown tree method (z"). Only NJ and UPGMA are accepted.)r@   rJ   r   rA   rH   ZupgmaZnjRuntimeError)r   Zdn_ds_methodZtree_methodr?   rJ   rF   rG   Zdn_constructorZds_constructorZdn_treeZds_treer   r   r   get_dn_ds_tree   s     



zCodonAlignment.get_dn_ds_treec             C   s   dd |j D }| |S )zConvert a MultipleSeqAlignment to CodonAlignment.

        Function to convert a MultipleSeqAlignment to CodonAlignment.
        It is the user's responsibility to ensure all the requirement
        needed by CodonAlignment is met.
        c             S   s$   g | ]}t tt|j|jd qS ))r9   )r   r   r)   r   r9   )r   r=   r   r   r   r      s    z+CodonAlignment.from_msa.<locals>.<listcomp>)r!   )clsZalignr   r   r   r   from_msa   s    zCodonAlignment.from_msa)r   N)r:   N)r:   rI   N)__name__
__module____qualname____doc__r   r'   r/   r8   r"   r5   rH   rL   classmethodrN   r   r   r   r   r
      s   
%

%
r
   皙?c                s  ddl }|dkrtjd }tdd | D s4tddd | D }tt|dkrZtd	|d d
 }||j	}x|j
D ]}d||< qzW g }x8| D ]0}	|g  x |	D ] |d t j qW qW g }
xHt|D ]< g }x(|D ] } fdd|D }|| qW |
| qW d\}}}}t|d\}}x|
D ]ʉ  d j dd  }d|ks6t|dkrlq6tdd  D }|rt||}t||}t||}t||| }||7 }||7 }n<t||}t||}t||}t||| }||7 }||7 }q6W t||||gS )a  McDonald-Kreitman test for neutrality.

    Implement the McDonald-Kreitman test for neutrality (PMID: 1904993)
    This method counts changes rather than sites
    (http://mkt.uab.es/mkt/help_mkt.asp).

    Arguments:
     - codon_alns  - list of CodonAlignment to compare (each
       CodonAlignment object corresponds to gene sampled from a species)

    Return the p-value of test result.
    r   Nr<   c             s   s   | ]}t |tV  qd S )N)r   r
   )r   r=   r   r   r   r+      s    zmktest.<locals>.<genexpr>z#mktest accepts CodonAlignment list.c             S   s   g | ]}|  qS r   )r   )r   r=   r   r   r   r      s    zmktest.<locals>.<listcomp>z;CodonAlignment object for mktest should be of equal length.r   stopr   c                s   h | ]}|  qS r   r   )r   k)r=   r   r   	<setcomp>  s    zmktest.<locals>.<setcomp>)r   r   r   r   )r?   -c             s   s   | ]}t |d kV  qdS )r<   N)r    )r   rV   r   r   r   r+     s    )copyr   rA   allr   r    setrK   deepcopyforward_tablestop_codonsr$   r   r   rB   _get_codon2codon_matrixunion_get_subgraph_count_replacement_G_test)Z
codon_alnsr?   ZalpharY   Zcodon_aln_lenZ	codon_num
codon_dictrU   Z	codon_lstZ	codon_aln	codon_setZuniq_codonsrE   Z
uniq_codonZsyn_fixZ
nonsyn_fixZsyn_polyZnonsyn_polyGnonsyn_GZ	all_codonZ
fix_or_notZnonsyn_subgraphsubgraphZthis_nonZthis_synr   )r=   r   mktest   s\    












ri   c             C   s  d}dd t | j | j D }| j}x| jD ]}d||< q0W t|}i }i }i }i }	xt|D ]\}
}i ||< i |	|< xt|D ]\}}xv|D ]n}|d| | ||d d  }|| || krd|	| |< d|| |< q||krd|	| |< d|| |< qW qW qbW xz|D ]r}i ||< i ||< xZ|D ]R}||krTd|| |< d|| |< n(t|	|||| |< t||||| |< q,W qW ||fS )	zGet codon codon substitution matrix (PRIVATE).

    Elements in the matrix are number of synonymous and nonsynonymous
    substitutions required for the substitution.
    )ATCrf   c             S   s   g | ]}d |kr|qS )Ur   )r   r=   r   r   r   r   1  s   z+_get_codon2codon_matrix.<locals>.<listcomp>rU   r   r<   Ng?)listr]   keysr^   r    	enumerate	_dijkstra)r?   Z
base_tuplecodonsrd   rU   Znumrf   rg   graphZgraph_nonsynr=   ZcodonpbrE   Z	tmp_codonZcodon1Zcodon2r   r   r   r_   )  sB    
 


 r_   c             C   sz  i }i }x |   D ]}d||< d||< qW d||< t|   }xt|dkrd}d}x:|D ]2}|dkrt|| }|}qZ|| |k rZ|| }|}qZW || xD| |  D ]4\}	}
||	 || |
 kr|| |
 ||	< |||	< qW ||kr@P q@W g }|}d}x6||ks0||dkr,|d| || }qP qW |d| x6tt|d D ]"}|| ||  ||d   7 }qPW |S )a  Dijkstra's algorithm Python implementation (PRIVATE).

    Algorithm adapted from
    http://thomas.pelletier.im/2010/02/dijkstras-algorithm-python-implementation/.
    However, an obvious bug in::

        if D[child_node] >(<) D[node] + child_value:

    is fixed.
    This function will return the distance between start and end.

    Arguments:
     - graph: Dictionary of dictionary (keys are vertices).
     - start: Start vertex.
     - end: End vertex.

    Output:
       List of vertices from the beginning to the end.

    d   r   r   Nr<   )ro   rn   r    removeitemscountinsertrB   )rs   startendDPZnodeZunseen_nodesZshortestZ	temp_nodeZ
child_nodeZchild_valuepathZdistancer=   r   r   r   rq   Y  sH    


"rq   c             C   s\   ddl m} t| dkrdS t| dkrHt| }|||d  |d  S t| }t|S dS )z9Count replacement needed for a given codon_set (PRIVATE).r   )floorr<   )r   r   r(   N)mathr   r    rn   _prim)re   rf   r   rr   r   r   r   rb     s    rb   c             C   s  ddl m} ddlm} ddlm}m}m} g }g }xp|  D ]d}|	| xT| | D ]H}	||	| | |	 f|krV|	|| | |	 f|krV|	||	| | |	 f qVW q>W |t
}
x:|D ]2\}}}|
| 	|||f |
| 	|||f qW g }t|d }|
|d  dd }|| xj|r||\}}}||kr|| |	|||f x*|
| D ]}|d |krZ||| qZW qW d}x|D ]}|||d 7 }qW |S )zPrim's algorithm to find minimum spanning tree (PRIVATE).

    Code is adapted from
    http://programmingpraxis.com/2010/04/09/minimum-spanning-tree-prims-algorithm/
    r   )r   )defaultdict)heapifyheappopheappushNr(   )r   r   collectionsr   heapqr   r   r   ro   r$   rn   r[   add)rf   r   r   r   r   r   ZnodesZedgesr=   rE   ZconnZn1Zn2cZmstZusedZusable_edgesZcoster   rt   r   r   r   r     s<    
, 


r   c             C   sH   i }x>| D ]6}i ||< x(| D ] }||kr|| | || |< qW q
W |S )z<Get the subgraph that contains all codons in list (PRIVATE).r   )rr   rf   rh   r=   rE   r   r   r   ra     s    

ra   c             C   s   ddl m} d}t| }| d | d  }| d | d  }t| dd }t| dd }|| | || | || | || | g}x*t| |D ]\}	}
||	||	|
  7 }qW |d9 }t|dS )zG test for 2x2 contingency table (PRIVATE).

    Arguments:
     - site_counts - [syn_fix, nonsyn_fix, syn_poly, nonsyn_poly]

    >>> print("%0.6f" % _G_test([17, 7, 42, 2]))
    0.004924
    r   )logr(   r<   r   N)r   r   sumr4   r	   )Zsite_countsr   rf   ZtotZtot_synZtot_nonZtot_fixZtot_polyZexpZobsexr   r   r   rc     s    


rc   __main__)run_doctest)NrT   )rR   r2   Z	Bio.Alignr   ZBio.SeqRecordr   ZBio.Datar   ZBior   ZBio.codonalign.codonseqr   r   r   ZBio.codonalign.chisqr	   r
   ri   r_   rq   rb   r   ra   rc   rO   Z
Bio._utilsr   r   r   r   r   <module>
   s&    N
D0D'!