B
    b4                 @   s   d Z ddlZddlZddlZddlmZ ddlmZ ddlmZ G dd dZ	G dd	 d	e	Z
e
ZG d
d dZG dd dZG dd deZG dd dZG dd dZG dd deZG dd deZG dd deZdS )z*Classes and methods for tree construction.    N)BaseTree)MultipleSeqAlignment)substitution_matricesc               @   sT   e Zd ZdZdddZdd Zdd Zd	d
 ZdddZdd Z	dd Z
dd ZdS )_Matrixa  Base class for distance matrix or scoring matrix.

    Accepts a list of names and a lower triangular matrix.::

        matrix = [[0],
                  [1, 0],
                  [2, 3, 0],
                  [4, 5, 6, 0]]
        represents the symmetric matrix of
        [0,1,2,4]
        [1,0,3,5]
        [2,3,0,6]
        [4,5,6,0]

    :Parameters:
        names : list
            names of elements, used for indexing
        matrix : list
            nested list of numerical lists in lower triangular format

    Examples
    --------
    >>> from Bio.Phylo.TreeConstruction import _Matrix
    >>> names = ['Alpha', 'Beta', 'Gamma', 'Delta']
    >>> matrix = [[0], [1, 0], [2, 3, 0], [4, 5, 6, 0]]
    >>> m = _Matrix(names, matrix)
    >>> m
    _Matrix(names=['Alpha', 'Beta', 'Gamma', 'Delta'], matrix=[[0], [1, 0], [2, 3, 0], [4, 5, 6, 0]])

    You can use two indices to get or assign an element in the matrix.

    >>> m[1,2]
    3
    >>> m['Beta','Gamma']
    3
    >>> m['Beta','Gamma'] = 4
    >>> m['Beta','Gamma']
    4

    Further more, you can use one index to get or assign a list of elements related to that index.

    >>> m[0]
    [0, 1, 2, 4]
    >>> m['Alpha']
    [0, 1, 2, 4]
    >>> m['Alpha'] = [0, 7, 8, 9]
    >>> m[0]
    [0, 7, 8, 9]
    >>> m[0,1]
    7

    Also you can delete or insert a column&row of elemets by index.

    >>> m
    _Matrix(names=['Alpha', 'Beta', 'Gamma', 'Delta'], matrix=[[0], [7, 0], [8, 4, 0], [9, 5, 6, 0]])
    >>> del m['Alpha']
    >>> m
    _Matrix(names=['Beta', 'Gamma', 'Delta'], matrix=[[0], [4, 0], [5, 6, 0]])
    >>> m.insert('Alpha', [0, 7, 8, 9] , 0)
    >>> m
    _Matrix(names=['Alpha', 'Beta', 'Gamma', 'Delta'], matrix=[[0], [7, 0], [8, 4, 0], [9, 5, 6, 0]])

    Nc             C   s
  t |trBtdd |D rBtt|t|kr8|| _qJtdntd|dkrvdd tdt| d D }|| _	nt |trtd	d |D rtd
d dd |D D rt|t|krdd |D ttdt| d kr|| _	qtdntdntddS )zInitialize matrix.

        Arguments are a list of names, and optionally a list of lower
        triangular matrix data (zero matrix used by default).
        c             s   s   | ]}t |tV  qd S )N)
isinstancestr).0s r
   9lib/python3.7/site-packages/Bio/Phylo/TreeConstruction.py	<genexpr>Z   s    z#_Matrix.__init__.<locals>.<genexpr>zDuplicate names foundz#'names' should be a list of stringsNc             S   s   g | ]}d g| qS )r   r
   )r   ir
   r
   r   
<listcomp>e   s    z$_Matrix.__init__.<locals>.<listcomp>   c             s   s   | ]}t |tV  qd S )N)r   list)r   lr
   r
   r   r   k   s    c             s   s   | ]}t |tjV  qd S )N)r   numbersNumber)r   nr
   r
   r   r   m   s   c             S   s   g | ]}|D ]}|qqS r
   r
   )r   Zsublistitemr
   r
   r   r   n   s    c             S   s   g | ]}t |qS r
   )len)r   mr
   r
   r   r   t   s    z+'matrix' should be in lower triangle formatz,'names' and 'matrix' should be the same sizez,'matrix' should be a list of numerical lists)
r   r   allr   setnames
ValueError	TypeErrorrangematrix)selfr   r   r
   r
   r   __init__S   s&    

$

z_Matrix.__init__c                s  t |ttfrd t |tr"| n4t |trN|jkrDj| qVtdntd td krntd fddt	d D  fd	dt	 tD  S t|d
krd}d}t
dd |D r|\}}n`t
dd |D r0|\}}|jkr&|jkr&j|}j|}ntdntd|td ks\|td krdtd||kr|j| | S j| | S ntddS )am  Access value(s) by the index(s) or name(s).

        For a _Matrix object 'dm'::

            dm[i]                   get a value list from the given 'i' to others;
            dm[i, j]                get the value between 'i' and 'j';
            dm['name']              map name to index first
            dm['name1', 'name2']    map name to index first

        NzItem not found.zInvalid index type.r   zIndex out of range.c                s   g | ]}j   | qS r
   )r   )r   r   )indexr   r
   r   r      s    z'_Matrix.__getitem__.<locals>.<listcomp>r   c                s   g | ]}j |   qS r
   )r   )r   r   )r!   r   r
   r   r      s       c             s   s   | ]}t |tV  qd S )N)r   int)r   r   r
   r
   r   r      s    z&_Matrix.__getitem__.<locals>.<genexpr>c             s   s   | ]}t |tV  qd S )N)r   r   )r   r   r
   r
   r   r      s    )r   r#   r   r   r!   r   r   r   
IndexErrorr   r   r   )r   r   	row_index	col_indexrow_namecol_namer
   )r!   r   r   __getitem__}   s>    



 

$
z_Matrix.__getitem__c       	      C   s  t |ttfrd}t |tr$|}n4t |trP|| jkrF| j|}qXtdntd|t| d krptdt |t	rt
dd |D rt|t| krx$td|D ]}|| | j| |< qW x2t|t| D ]}|| | j| |< qW ntd	ntd
n
t|dkrd}d}t
dd |D r8|\}}n`t
dd |D r|\}}|| jkr|| jkr| j|}| j|}ntdntd|t| d ks|t| d krtdt |tjr||kr|| j| |< n|| j| |< ntd
ntddS )zSet value by the index(s) or name(s).

        Similar to __getitem__::

            dm[1] = [1, 0, 3, 4]    set values from '1' to others;
            dm[i, j] = 2            set the value from 'i' to 'j'

        NzItem not found.zInvalid index type.r   zIndex out of range.c             s   s   | ]}t |tjV  qd S )N)r   r   r   )r   r   r
   r
   r   r      s    z&_Matrix.__setitem__.<locals>.<genexpr>r   zValue not the same size.zInvalid value type.r"   c             s   s   | ]}t |tV  qd S )N)r   r#   )r   r   r
   r
   r   r      s    c             s   s   | ]}t |tV  qd S )N)r   r   )r   r   r
   r
   r   r      s    )r   r#   r   r   r!   r   r   r   r$   r   r   r   r   r   r   )	r   r   valuer!   r   r%   r&   r'   r(   r
   r
   r   __setitem__   sP    







$

z_Matrix.__setitem__c             C   sp   d}t |tr|}n t |tr,| j|}ntdx&t|d t| D ]}| j| |= qHW | j|= | j|= dS )z.Delete related distances by the index or name.NzInvalid index type.r   )	r   r#   r   r   r!   r   r   r   r   )r   r   r!   r   r
   r
   r   __delitem__   s    

z_Matrix.__delitem__c             C   s   t |tr|dkrt| }t |ts,td| j|| | j|dg|  x(t|t| D ]}| j| |d q^W || |< ntddS )zInsert distances given the name and value.

        :Parameters:
            name : str
                name of a row/col to be inserted
            value : list
                a row/col of values to be inserted

        NzInvalid index type.r   zInvalid name type.)	r   r   r   r#   r   r   insertr   r   )r   namer*   r!   r   r
   r
   r   r-     s    



z_Matrix.insertc             C   s
   t | jS )zMatrix length.)r   r   )r   r
   r
   r   __len__   s    z_Matrix.__len__c             C   s"   | j jdttt| j| jf  S )zReturn Matrix as a string.z(names=%s, matrix=%s))	__class____name__tuplemapreprr   r   )r   r
   r
   r   __repr__$  s    
z_Matrix.__repr__c                s:   d  fddtdt D }|d d  j }|S )z%Get a lower triangular matrix string.
c                s4   g | ],} j | d  d dd  j| D  qS )	c             S   s   g | ]}t |qS r
   )r   )r   r   r
   r
   r   r   .  s    z._Matrix.__str__.<locals>.<listcomp>.<listcomp>)r   joinr   )r   r   )r   r
   r   r   .  s   z#_Matrix.__str__.<locals>.<listcomp>r   z
	r7   )r8   r   r   r   )r   Zmatrix_stringr
   )r   r   __str__*  s
    
z_Matrix.__str__)N)N)r1   
__module____qualname____doc__r    r)   r+   r,   r-   r/   r5   r9   r
   r
   r
   r   r      s   ?
*6B
r   c               @   s2   e Zd ZdZdddZdd Zdd Zd	d
 ZdS )DistanceMatrixzDistance matrix class that can be used for distance based tree algorithms.

    All diagonal elements will be zero no matter what the users provide.
    Nc             C   s   t | || |   dS )zInitialize the class.N)r   r    _set_zero_diagonal)r   r   r   r
   r
   r   r    <  s    zDistanceMatrix.__init__c             C   s   t | || |   dS )zSet Matrix's items to values.N)r   r+   r>   )r   r   r*   r
   r
   r   r+   A  s    zDistanceMatrix.__setitem__c             C   s*   x$t dt| D ]}d| j| |< qW dS )z,Set all diagonal elements to zero (PRIVATE).r   N)r   r   r   )r   r   r
   r
   r   r>   F  s    z!DistanceMatrix._set_zero_diagonalc       	         s   | dtj d tdtttjd }dd tdtjd D }dt| d d	| d }xft	t
jjD ]P\ \}} fd
dt d tjD }t|g||}| |j|  qW dS )a  Write data in Phylip format to a given file-like object or handle.

        The output stream is the input distance matrix format used with Phylip
        programs (e.g. 'neighbor'). See:
        http://evolution.genetics.washington.edu/phylip/doc/neighbor.html

        :Parameters:
            handle : file or file-like object
                A writeable text mode file handle or other object supporting
                the 'write' method, such as StringIO or sys.stdout.

        z    r6      r   c             s   s   | ]}d t | d V  qdS ){z:.4f}N)r   )r   xr
   r
   r   r   [  s    z/DistanceMatrix.format_phylip.<locals>.<genexpr>z{0:zs}z  c             3   s   | ]}j |   V  qd S )N)r   )r   j)r   r   r
   r   r   _  s    N)writer   r   maxr3   r   r   r   r8   	enumeratezip	itertoolschainformat)	r   ZhandleZ
name_widthZ
value_fmtsZrow_fmtr.   valuesZmirror_valuesZfieldsr
   )r   r   r   format_phylipK  s     $zDistanceMatrix.format_phylip)N)r1   r:   r;   r<   r    r+   r>   rK   r
   r
   r
   r   r=   6  s
   
r=   c               @   s   e Zd ZdZedZg Zg Ze	 Z
xRe
D ]JZe	eZedkrFdZne Zeeejrjee q*ee q*W [[[
[dge e ZdddZd	d
 Zdd ZdS )DistanceCalculatora}  Class to calculate the distance matrix from a DNA or Protein.

    Multiple Sequence Alignment(MSA) and the given name of the
    substitution model.

    Currently only scoring matrices are used.

    :Parameters:
        model : str
            Name of the model matrix to be used to calculate distance.
            The attribute ``dna_models`` contains the available model
            names for DNA sequences and ``protein_models`` for protein
            sequences.

    Examples
    --------
    Loading a small PHYLIP alignment from which to compute distances::

        from Bio.Phylo.TreeConstruction import DistanceCalculator
        from Bio import AlignIO
        aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
        print(aln)

    Output::

        Alignment with 5 rows and 13 columns
        AACGTGGCCACAT Alpha
        AAGGTCGCCACAC Beta
        CAGTTCGCCACAA Gamma
        GAGATTTCCGCCT Delta
        GAGATCTCCGCCC Epsilon

    DNA calculator with 'identity' model::

        calculator = DistanceCalculator('identity')
        dm = calculator.get_distance(aln)
        print(dm)

    Output::

        Alpha	0
        Beta	0.23076923076923073	0
        Gamma	0.3846153846153846	0.23076923076923073	0
        Delta	0.5384615384615384	0.5384615384615384	0.5384615384615384	0
        Epsilon	0.6153846153846154	0.3846153846153846	0.46153846153846156	0.15384615384615385	0
            Alpha	Beta	Gamma	Delta	Epsilon

    Protein calculator with 'blosum62' model::

        calculator = DistanceCalculator('blosum62')
        dm = calculator.get_distance(aln)
        print(dm)

    Output::

        Alpha	0
        Beta	0.36904761904761907	0
        Gamma	0.49397590361445787	0.25	0
        Delta	0.5853658536585367	0.5476190476190477	0.5662650602409638	0
        Epsilon	0.7	0.3555555555555555	0.48888888888888893	0.2222222222222222	0
            Alpha	Beta	Gamma	Delta	Epsilon

    ZABCDEFGHIKLMNPQRSTVWXYZzNUC.4.4blastnidentityNc             C   sx   |r|| _ n|dkrd| _ nd| _ |dkr2d| _nB|| jkr`|dkrJd}n| }t|| _ntdd| j dS )	z!Initialize with a distance model.rN   r
   )-*NrM   zNUC.4.4z'Model not supported. Available models: z, )skip_lettersscoring_matrixmodelsupperr   loadr   r8   )r   ZmodelrQ   r.   r
   r
   r   r      s    
zDistanceCalculator.__init__c       
   	      s<  d}d} j dkr8t fddt||D }t|}nd}d}xtdt|D ]}|| }|| }	| jksP|	 jkrzqPy| j ||f 7 }W n* tk
r   td||j|f dY nX y| j |	|	f 7 }W n* tk
r   td|	|j|f dY nX | j ||	f 7 }qPW t	||}|dkr,dS d|d |  S )zCalculate pairwise distance from two sequences (PRIVATE).

        Returns a value between 0 (identical sequences) and 1 (completely
        different, or seq1 is an empty string.)
        r   Nc             3   s.   | ]&\}}| j kr| j kr||kV  qd S )N)rQ   )r   l1l2)r   r
   r   r     s   z/DistanceCalculator._pairwise.<locals>.<genexpr>z1Bad letter '%s' in sequence '%s' at position '%s'r   g      ?)
rR   sumrF   r   r   rQ   r$   r   idrD   )
r   seq1seq2scoreZ	max_scoreZ
max_score1Z
max_score2r   rV   rW   r
   )r   r   	_pairwise  sB    






zDistanceCalculator._pairwisec             C   s^   t |tstddd |D }t|}x0t|dD ] \}}| ||||j|jf< q6W |S )zReturn a DistanceMatrix for MSA object.

        :Parameters:
            msa : MultipleSeqAlignment
                DNA or Protein multiple sequence alignment.

        z+Must provide a MultipleSeqAlignment object.c             S   s   g | ]
}|j qS r
   )rY   )r   r	   r
   r
   r   r     s    z3DistanceCalculator.get_distance.<locals>.<listcomp>r"   )r   r   r   r=   rG   combinationsr]   rY   )r   msar   dmrZ   r[   r
   r
   r   get_distance  s    
zDistanceCalculator.get_distance)rN   N)r1   r:   r;   r<   r   Zprotein_alphabetZ
dna_modelsZprotein_modelsr   rU   r   r.   r   lowerissubsetalphabetappendrS   r    r]   ra   r
   r
   r
   r   rL   h  s*   ?


-rL   c               @   s   e Zd ZdZdd ZdS )TreeConstructorz$Base class for all tree constructor.c             C   s   t ddS )zvCaller to built the tree from a MultipleSeqAlignment object.

        This should be implemented in subclass.
        zMethod not implemented!N)NotImplementedError)r   r_   r
   r
   r   
build_tree  s    zTreeConstructor.build_treeN)r1   r:   r;   r<   rh   r
   r
   r
   r   rf     s   rf   c               @   sB   e Zd ZdZddgZdddZdd Zd	d
 Zdd Zdd Z	dS )DistanceTreeConstructora	  Distance based tree constructor.

    :Parameters:
        method : str
            Distance tree construction method, 'nj'(default) or 'upgma'.
        distance_calculator : DistanceCalculator
            The distance matrix calculator for multiple sequence alignment.
            It must be provided if ``build_tree`` will be called.

    Examples
    --------
    Loading a small PHYLIP alignment from which to compute distances, and then
    build a upgma Tree::

        from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
        from Bio.Phylo.TreeConstruction import DistanceCalculator
        from Bio import AlignIO
        aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
        constructor = DistanceTreeConstructor()
        calculator = DistanceCalculator('identity')
        dm = calculator.get_distance(aln)
        upgmatree = constructor.upgma(dm)
        print(upgmatree)

    Output::

        Tree(rooted=True)
            Clade(branch_length=0, name='Inner4')
                Clade(branch_length=0.18749999999999994, name='Inner1')
                    Clade(branch_length=0.07692307692307693, name='Epsilon')
                    Clade(branch_length=0.07692307692307693, name='Delta')
                Clade(branch_length=0.11057692307692304, name='Inner3')
                    Clade(branch_length=0.038461538461538464, name='Inner2')
                        Clade(branch_length=0.11538461538461536, name='Gamma')
                        Clade(branch_length=0.11538461538461536, name='Beta')
                    Clade(branch_length=0.15384615384615383, name='Alpha')

    Build a NJ Tree::

        njtree = constructor.nj(dm)
        print(njtree)

    Output::

        Tree(rooted=False)
            Clade(branch_length=0, name='Inner3')
                Clade(branch_length=0.18269230769230765, name='Alpha')
                Clade(branch_length=0.04807692307692307, name='Beta')
                Clade(branch_length=0.04807692307692307, name='Inner2')
                    Clade(branch_length=0.27884615384615385, name='Inner1')
                        Clade(branch_length=0.051282051282051266, name='Epsilon')
                        Clade(branch_length=0.10256410256410259, name='Delta')
                    Clade(branch_length=0.14423076923076922, name='Gamma')

    njupgmaNc             C   s^   |dkst |tr|| _ntdt |tr>|| jkr>|| _ntd| d d| j dS )zInitialize the class.Nz)Must provide a DistanceCalculator object.zBad method: z. Available methods: z, )r   rL   distance_calculatorr   r   methodsmethodr8   )r   rl   rn   r
   r
   r   r    _  s    
z DistanceTreeConstructor.__init__c             C   sF   | j r:| j |}d}| jdkr,| |}n
| |}|S tddS )z7Construct and return a Tree, Neighbor Joining or UPGMA.Nrk   z)Must provide a DistanceCalculator object.)rl   ra   rn   rk   rj   r   )r   r_   r`   treer
   r
   r   rh   q  s    

z"DistanceTreeConstructor.build_treec             C   s  t |tstdt|}dd |jD }d}d}d}xt|dkr|d }xNtdt|D ]<}x6td|D ](}	||||	f krr|||	f }|}|	}qrW qbW || }
|| }|d7 }t	ddt
| }|j|
 |j| |
 r|d	 d
 |
_n|d	 d
 | |
 |
_| r0|d	 d
 |_n|d	 d
 | | |_|||< ||= xTtdt|D ]B}||krf||krf|||f |||f  d	 d
 |||f< qfW dt
| |j|< ||= q<W d|_t|S )a  Construct and return an UPGMA tree.

        Constructs and returns an Unweighted Pair Group Method
        with Arithmetic mean (UPGMA) tree.

        :Parameters:
            distance_matrix : DistanceMatrix
                The distance matrix for tree construction.

        z%Must provide a DistanceMatrix object.c             S   s   g | ]}t d |qS )N)r   Clade)r   r.   r
   r
   r   r     s    z1DistanceTreeConstructor.upgma.<locals>.<listcomp>r   r   )r   r   NInnerg      ?r"   )r   r=   r   copydeepcopyr   r   r   r   rp   r   cladesre   is_terminalbranch_length
_height_ofTree)r   distance_matrixr`   rt   min_imin_jinner_countmin_distr   rB   clade1clade2inner_cladekr
   r
   r   rk   ~  sH    


.
zDistanceTreeConstructor.upgmac             C   s  t |tstdt|}dd |jD }dgt| }d}d}d}t|dkrh|d }tj|ddS t|dkrd}d}|| }	|| }
|||f d	 |	_	|||f |	j	 |
_	t
d
d}|j|	 |j|
 ||d< |d }tj|ddS xt|dkrxjtdt|D ]X}d||< x0tdt|D ]}||  |||f 7  < q2W || t|d  ||< qW |d |d  |d  }d}d}x`tdt|D ]N}xFtd|D ]8}|||f ||  ||  }||kr|}|}|}qW qW || }	|| }
|d7 }t
d
dt| }|j|	 |j|
 |||f ||  ||  d	 |	_	|||f |	j	 |
_	|||< ||= x\tdt|D ]J}||kr||kr|||f |||f  |||f  d	 |||f< qW dt| |j|< ||= qW d
}|d |kr>d|d _	|d |d _	|d j|d  |d }n4|d |d _	d|d _	|d j|d  |d }tj|ddS )zConstruct and return a Neighbor Joining tree.

        :Parameters:
            distance_matrix : DistanceMatrix
                The distance matrix for tree construction.

        z%Must provide a DistanceMatrix object.c             S   s   g | ]}t d |qS )N)r   rp   )r   r.   r
   r
   r   r     s    z.DistanceTreeConstructor.nj.<locals>.<listcomp>r   r   F)rootedr"   g       @Nrq   )r   r   )r   r=   r   rr   rs   r   r   r   rx   rv   rp   rt   re   r   r   )r   ry   r`   rt   Z	node_distrz   r{   r|   rootr~   r   r   r   rB   r}   tempr   r
   r
   r   rj     s    


"



zDistanceTreeConstructor.njc                s4   d}|  r|j}n|t fdd|jD  }|S )zECalculate clade height -- the longest path to any terminal (PRIVATE).r   c             3   s   | ]}  |V  qd S )N)rw   )r   c)r   r
   r   r   -  s    z5DistanceTreeConstructor._height_of.<locals>.<genexpr>)ru   rv   rD   rt   )r   cladeZheightr
   )r   r   rw   '  s
    z"DistanceTreeConstructor._height_of)Nrj   )
r1   r:   r;   r<   rm   r    rh   rk   rj   rw   r
   r
   r
   r   ri   $  s   7
Bgri   c               @   s   e Zd ZdZdd ZdS )Scorerz(Base class for all tree scoring methods.c             C   s   t ddS )ztCaller to get the score of a tree for the given alignment.

        This should be implemented in subclass.
        zMethod not implemented!N)rg   )r   ro   	alignmentr
   r
   r   	get_score7  s    zScorer.get_scoreN)r1   r:   r;   r<   r   r
   r
   r
   r   r   4  s   r   c               @   s   e Zd ZdZdd ZdS )TreeSearcherz*Base class for all tree searching methods.c             C   s   t ddS )znCaller to search the best tree with a starting tree.

        This should be implemented in subclass.
        zMethod not implemented!N)rg   )r   starting_treer   r
   r
   r   searchB  s    zTreeSearcher.searchN)r1   r:   r;   r<   r   r
   r
   r
   r   r   ?  s   r   c               @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )NNITreeSearcherzTree searching with Nearest Neighbor Interchanges (NNI) algorithm.

    :Parameters:
        scorer : ParsimonyScorer
            parsimony scorer to calculate the parsimony score of
            different trees during NNI algorithm.

    c             C   s   t |tr|| _ntddS )zInitialize the class.zMust provide a Scorer object.N)r   r   scorerr   )r   r   r
   r
   r   r    T  s    
zNNITreeSearcher.__init__c             C   s   |  ||S )a5  Implement the TreeSearcher.search method.

        :Parameters:
           starting_tree : Tree
               starting tree of NNI method.
           alignment : MultipleSeqAlignment
               multiple sequence alignment used to calculate parsimony
               score of different NNI trees.

        )_nni)r   r   r   r
   r
   r   r   [  s    zNNITreeSearcher.searchc             C   s\   |}xR| j ||}|}x0| |D ]"}| j ||}||k r$|}|}q$W ||krP qW |S )zESearch for the best parsimony tree using the NNI algorithm (PRIVATE).)r   r   _get_neighbors)r   r   r   Z	best_treeZ
best_scorer   tr\   r
   r
   r   r   h  s    zNNITreeSearcher._nnic             C   s  i }xH|  D ]<}||jkr||}t|dkr>|j||< q|d ||< qW g }g }x|jddD ]}||jkrv|jd }|jd }|| || | s| s|jd }	|jd }
|jd }|jd= |jd= |j| |j|	 t	|}|| |jd= |jd= |j|
 |j| t	|}|| |jd= |jd= |j|	 |j
d|
 qf||krqfqf|jd }|jd }|| }||jd kr^|jd }|jd= |jd= |j| |j| t	|}|| |jd= |jd= |j| |j| t	|}|| |jd= |jd= |j| |j
d| qf|jd }|jd= |jd= |j
d| |j| t	|}|| |jd= |jd= |j
d| |j| t	|}|| |jd= |jd= |j
d| |j
d| qfW |S )zmGet all neighbor trees of the given tree (PRIVATE).

        Currently only for binary rooted trees.
        r   level)orderr   )Zfind_cladesr   Zget_pathr   get_nonterminalsrt   re   ru   rr   rs   r-   )r   ro   parentsr   Z	node_pathZ	neighborsZroot_childsleftrightZ
left_rightZ
right_leftZright_rightZ	temp_treeparentZsisterr
   r
   r   r   x  s    

























zNNITreeSearcher._get_neighborsN)r1   r:   r;   r<   r    r   r   r   r
   r
   r
   r   r   J  s
   r   c               @   s"   e Zd ZdZdddZdd ZdS )ParsimonyScorera	  Parsimony scorer with a scoring matrix.

    This is a combination of Fitch algorithm and Sankoff algorithm.
    See ParsimonyTreeConstructor for usage.

    :Parameters:
        matrix : _Matrix
            scoring matrix used in parsimony score calculation.

    Nc             C   s"   |rt |tr|| _ntddS )zInitialize the class.zMust provide a _Matrix object.N)r   r   r   r   )r   r   r
   r
   r   r      s    zParsimonyScorer.__init__c             C   st  |  std|js|  | }|jdd d |  tdd t||D s^tdd}x
tt	|d D ]}d}|d	d	|f }|t	||d  krqv| j
s$tt|d
d |D }xX|jddD ]H}	|	j}
||
d  }||
d  }||@ }|s||B }|d }|||	< qW n@td}| j
j}t	|}i }xBtt	|D ]2}|g| }||| }d||< |||| < qNW x|jddD ]}	|	j}
||
d  }||
d  }g }xt|D ]}|}|}xjt|D ]^}| j
|| || f ||  }| j
|| || f ||  }||kr&|}||kr|}qW |||  qW |||	< qW t|}|| }qvW |S )zCalculate parsimony score using the Fitch algorithm.

        Calculate and return the parsimony score given a tree and the
        MSA using either the Fitch algorithm (without a penalty matrix)
        or the Sankoff algorithm (with a matrix).
        z(The tree provided should be bifurcating.c             S   s   | j S )N)r.   )Ztermr
   r
   r   <lambda>      z+ParsimonyScorer.get_score.<locals>.<lambda>)keyc             s   s   | ]\}}|j |jkV  qd S )N)r.   rY   )r   r   ar
   r
   r   r     s    z,ParsimonyScorer.get_score.<locals>.<genexpr>zDTaxon names of the input tree should be the same with the alignment.r   Nc             S   s   g | ]
}|hqS r
   r
   )r   r   r
   r
   r   r     s    z-ParsimonyScorer.get_score.<locals>.<listcomp>Z	postorder)r   r   inf)Zis_bifurcatingr   r   Zroot_at_midpointZget_terminalssortr   rF   r   r   r   dictr   rt   floatr   r!   re   min)r   ro   r   Ztermsr\   r   Zscore_iZcolumn_iZclade_statesr   Zclade_childsZ
left_stateZright_statestater   rd   lengthZclade_scoresrB   Zarrayr!   Z
left_scoreZright_scorer   Zmin_lZmin_rr   Zslsrr
   r
   r   r     sp    



zParsimonyScorer.get_score)N)r1   r:   r;   r<   r    r   r
   r
   r
   r   r     s   

r   c               @   s"   e Zd ZdZdddZdd ZdS )ParsimonyTreeConstructora	  Parsimony tree constructor.

    :Parameters:
        searcher : TreeSearcher
            tree searcher to search the best parsimony tree.
        starting_tree : Tree
            starting tree provided to the searcher.

    Examples
    --------
    We will load an alignment, and then load various trees which have already been computed from it::

        from Bio import AlignIO, Phylo
        aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
        print(aln)

    Output::

        Alignment with 5 rows and 13 columns
        AACGTGGCCACAT Alpha
        AAGGTCGCCACAC Beta
        CAGTTCGCCACAA Gamma
        GAGATTTCCGCCT Delta
        GAGATCTCCGCCC Epsilon

    Load a starting tree::

        starting_tree = Phylo.read('TreeConstruction/nj.tre', 'newick')
        print(starting_tree)

    Output::

        Tree(rooted=False, weight=1.0)
            Clade(branch_length=0.0, name='Inner3')
                Clade(branch_length=0.01421, name='Inner2')
                    Clade(branch_length=0.23927, name='Inner1')
                        Clade(branch_length=0.08531, name='Epsilon')
                        Clade(branch_length=0.13691, name='Delta')
                    Clade(branch_length=0.2923, name='Alpha')
                Clade(branch_length=0.07477, name='Beta')
                Clade(branch_length=0.17523, name='Gamma')

    Build the Parsimony tree from the starting tree::

        scorer = Phylo.TreeConstruction.ParsimonyScorer()
        searcher = Phylo.TreeConstruction.NNITreeSearcher(scorer)
        constructor = Phylo.TreeConstruction.ParsimonyTreeConstructor(searcher, starting_tree)
        pars_tree = constructor.build_tree(aln)
        print(pars_tree)

    Output::

        Tree(rooted=True, weight=1.0)
            Clade(branch_length=0.0)
                Clade(branch_length=0.19732999999999998, name='Inner1')
                    Clade(branch_length=0.13691, name='Delta')
                    Clade(branch_length=0.08531, name='Epsilon')
                Clade(branch_length=0.04194000000000003, name='Inner2')
                    Clade(branch_length=0.01421, name='Inner3')
                        Clade(branch_length=0.17523, name='Gamma')
                        Clade(branch_length=0.07477, name='Beta')
                    Clade(branch_length=0.2923, name='Alpha')

    Nc             C   s   || _ || _dS )zInitialize the class.N)searcherr   )r   r   r   r
   r
   r   r      s    z!ParsimonyTreeConstructor.__init__c             C   s4   | j dkr$ttdd}||| _ | j| j |S )zBuild the tree.

        :Parameters:
            alignment : MultipleSeqAlignment
                multiple sequence alignment to calculate parsimony tree.

        NrN   rk   )r   ri   rL   rh   r   r   )r   r   Zdtcr
   r
   r   rh     s    

z#ParsimonyTreeConstructor.build_tree)N)r1   r:   r;   r<   r    rh   r
   r
   r
   r   r   G  s   @
r   )r<   rG   rr   r   Z	Bio.Phylor   Z	Bio.Alignr   r   r   r=   Z_DistanceMatrixrL   rf   ri   r   r   r   r   r   r
   r
   r
   r   <module>   s.     &/ 2   f