B
    b*                 @   s   d Z ddlmZ yddlZW n( ek
rD   ddlmZ edY nX G dd dZdd	 Zd
d Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zd#ddZed krdd!lmZ edd" dS )$z8Maximum Entropy code.

Uses Improved Iterative Scaling.
    )reduceN)MissingPythonDependencyErrorz0Install NumPy if you want to use Bio.MaxEntropy.c               @   s   e Zd ZdZdd ZdS )
MaxEntropya  Hold information for a Maximum Entropy classifier.

    Members:
    classes      List of the possible classes of data.
    alphas       List of the weights for each feature.
    feature_fns  List of the feature functions.

    Car data from example Naive Bayes Classifier example by Eric Meisner November 22, 2003
    http://www.inf.u-szeged.hu/~ormandi/teaching

    >>> from Bio.MaxEntropy import train, classify
    >>> xcar = [
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Yellow', 'Sports', 'Domestic'],
    ...     ['Yellow', 'Sports', 'Imported'],
    ...     ['Yellow', 'SUV', 'Imported'],
    ...     ['Yellow', 'SUV', 'Imported'],
    ...     ['Yellow', 'SUV', 'Domestic'],
    ...     ['Red', 'SUV', 'Imported'],
    ...     ['Red', 'Sports', 'Imported']]
    >>> ycar = ['Yes','No','Yes','No','Yes','No','Yes','No','No','Yes']

    Requires some rules or features

    >>> def udf1(ts, cl):
    ...     return ts[0] != 'Red'
    ...
    >>> def udf2(ts, cl):
    ...     return ts[1] != 'Sports'
    ...
    >>> def udf3(ts, cl):
    ...     return ts[2] != 'Domestic'
    ...
    >>> user_functions = [udf1, udf2, udf3]  # must be an iterable type
    >>> xe = train(xcar, ycar, user_functions)
    >>> for xv, yv in zip(xcar, ycar):
    ...     xc = classify(xe, xv)
    ...     print('Pred: %s gives %s y is %s' % (xv, xc, yv))
    ...
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is Yes
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is No
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is Yes
    Pred: ['Yellow', 'Sports', 'Domestic'] gives No y is No
    Pred: ['Yellow', 'Sports', 'Imported'] gives No y is Yes
    Pred: ['Yellow', 'SUV', 'Imported'] gives No y is No
    Pred: ['Yellow', 'SUV', 'Imported'] gives No y is Yes
    Pred: ['Yellow', 'SUV', 'Domestic'] gives No y is No
    Pred: ['Red', 'SUV', 'Imported'] gives No y is No
    Pred: ['Red', 'Sports', 'Imported'] gives No y is Yes
    c             C   s   g | _ g | _g | _dS )zInitialize the class.N)classesalphasfeature_fns)self r	   -lib/python3.7/site-packages/Bio/MaxEntropy.py__init__O   s    zMaxEntropy.__init__N)__name__
__module____qualname____doc__r   r	   r	   r	   r
   r      s   4r   c             C   sl   g }t | jt | jkstxJ| jD ]@}d}x,t| j| jD ]\}}||||| 7 }q<W || q$W |S )zCalculate the log of the probability for each class.

    me is a MaxEntropy object that has been trained.  observation is a vector
    representing the observed data.  The return value is a list of
    unnormalized log probabilities for each class.
    g        )lenr   r   AssertionErrorr   zipappend)meobservationscoresklassZlprobfnalphar	   r	   r
   	calculateV   s    r   c             C   sZ   t | |}|d | jd  }}x6tdt|D ]$}|| |kr.|| | j|  }}q.W |S )z%Classify an observation into a class.r      )r   r   ranger   )r   r   r   Z	max_scorer   ir	   r	   r
   classifyg   s    
r   c             C   sZ   i }xPt t|D ]@}x:t t|D ]*}| || || }|dkr$||||f< q$W qW |S )a>  Evaluate a feature function on every instance of the training set and class (PRIVATE).

    fn is a callback function that takes two parameters: a
    training instance and a class.  Return a dictionary of (training
    set index, class index) -> non-zero value.  Values of 0 are not
    stored in the dictionary.
    r   )r   r   )r   xsr   valuesr   jfr	   r	   r
   _eval_feature_fnq   s    r#   c                s   i  xt |D ]\}}| |< qW  fdd|D }g }t| }xL|D ]D}	d}
x(t|D ]}|
|	||| fd7 }
qXW |t|
|  qFW |S )zCalculate the expectation of each function from the data (PRIVATE).

    This is the constraint for the maximum entropy distribution. Return a
    list of expectations, parallel to the list of features.
    c                s   g | ]} | qS r	   r	   ).0y)class2indexr	   r
   
<listcomp>   s    z+_calc_empirical_expects.<locals>.<listcomp>r   )	enumerater   r   getr   float)r   ysr   featuresindexkeyZys_iZexpectNfeaturesr   r	   )r&   r
   _calc_empirical_expects   s    
r2   c             C   sh   t | |||}g }xP|D ]H}d}x,| D ] \\}}	}
||| |	 |
 7 }q*W ||t|   qW |S )zCalculate the expectation of each feature from the model (PRIVATE).

    This is not used in maximum entropy training, but provides a good function
    for debugging.
    g        )_calc_p_class_given_xitemsr   r   )r   r   r,   r   p_yxZexpectsr0   sumr   r!   r"   r	   r	   r
   _calc_model_expects   s    
r7   c             C   s   t t| t|f}t|t|ks*txHt||D ]:\}}x0| D ]$\\}}}	|| |  ||	 7  < qHW q6W t |}x0tt| D ] }
t||
 }||
 | ||
< qW |S )zCalculate conditional probability P(y|x) (PRIVATE).

    y is the class and x is an instance from the training set.
    Return a XSxCLASSES matrix of probabilities.
    )	numpyzerosr   r   r   r4   expr   r6   )r   r   r,   r   prob_yxr0   r   xr%   r"   r   zr	   r	   r
   r3      s     
r3   c             C   sN   t | |f}x:|D ]2}x,| D ] \\}}}|| |  |7  < q"W qW |S )z/Calculate a matrix of f sharp values (PRIVATE).)r8   r9   r4   )r/   Znclassesr,   f_sharpr0   r   r!   r"   r	   r	   r
   _calc_f_sharp   s
    
r?   c             C   s   d}d}x||k rd }	}
xZ|  D ]N\\}}}|| | | t||| |   }|	|7 }	|
||| |  7 }
q$W ||	|   |
 |   }	}
|	|
 }||8 }t||k rP |d }q
W td|S )z,Solve delta using Newton's method (PRIVATE).g        r   r   z Newton's method did not converge)r4   r8   r:   fabsRuntimeError)r/   r0   r>   Z	empiricalr;   max_newton_iterationsnewton_convergedeltaitersZf_newtonZ	df_newtonr   r!   r"   ZprodZratior	   r	   r
   _iis_solve_delta   s     
&rF   c          	   C   sh   t | |||}t| }	|dd }
x@tt|D ]0}t|	|| ||| |||}|
|  |7  < q0W |
S )zBDo one iteration of hill climbing to find better alphas (PRIVATE).N)r3   r   r   rF   )r   r   r,   r>   r   e_empiricalrB   rC   r5   r/   Z	newalphasr   rD   r	   r	   r
   
_train_iis   s    rH   '  h㈵>d   绽|=c          	      s  st dtt|kr$t d| }}	tt|  fdd|D }
ttt |
}t||	 |
}dgt|
 }d}x||k rt| |
|||||}dd t||D }tt	j
|d}|}t }| |  |_|_|_|dk	r|| ||k rP qW td	|S )
aF  Train a maximum entropy classifier, returns MaxEntropy object.

    Train a maximum entropy classifier on a training set.
    training_set is a list of observations.  results is a list of the
    class assignments for each observation.  feature_fns is a list of
    the features.  These are callback functions that take an
    observation and class and return a 1 or 0.  update_fn is a
    callback function that is called at each training iteration.  It is
    passed a MaxEntropy object that encapsulates the current state of
    the training.

    The maximum number of iterations and the convergence criterion for IIS
    are given by max_iis_iterations and iis_converge, respectively, while
    max_newton_iterations and newton_converge are the maximum number
    of iterations and the convergence criterion for Newton's method.
    zNo data in the training set.z2training_set and results should be parallel lists.c                s   g | ]}t | qS r	   )r#   )r$   r   )r   training_setr	   r
   r'   +  s    ztrain.<locals>.<listcomp>g        r   c             S   s   g | ]\}}t || qS r	   )r8   r@   )r$   r<   r%   r	   r	   r
   r'   @  s    NzIIS did not converge)
ValueErrorr   sortedsetr?   r2   rH   r   r   r8   addr   r   r   r   rA   )rM   Zresultsr   Z	update_fnZmax_iis_iterationsZiis_convergerB   rC   r   r+   r,   r>   rG   r   rE   ZnalphasZdiffr   r	   )r   rM   r
   train  s@    
rR   __main__)run_doctest)verbose)NrI   rJ   rK   rL   )r   	functoolsr   r8   ImportErrorZBior   r   r   r   r#   r2   r7   r3   r?   rF   rH   rR   r   Z
Bio._utilsrT   r	   r	   r	   r
   <module>
   s2   =

$    
D