B
    ‰°b¹  ã               @   sX   d Z ddlZdd„ Zdd„ Zeƒ Zdd„ Zd	d
„ Zdd„ ZedkrTddl	m
Z
 e
ƒ  dS )z3Functions to calculate assorted sequence checksums.é    Nc             C   s4   yt | ƒ}W n tk
r(   |  ¡ }Y nX t |¡S )zÅReturn the crc32 checksum for a sequence (string or Seq object).

    Note that the case is important:

    >>> crc32("ACGTACGTACGT")
    20049947
    >>> crc32("acgtACGTacgt")
    1688586483

    )ÚbytesÚ	TypeErrorÚencodeÚbinasciiÚcrc32)ÚseqÚs© r	   ú4lib/python3.7/site-packages/Bio/SeqUtils/CheckSum.pyr      s
    r   c              C   sr   g } xht dƒD ]\}|}d}xDt dƒD ]8}|d@ }|dL }|d@ rH|dO }|dL }|r$|dN }q$W |  |¡ qW | S )Né   r   é   é   l        l      0 )ÚrangeÚappend)Ú_table_hÚiZpart_lZpart_hÚjZrflagr	   r	   r
   Ú_init_table_h%   s    r   c             C   sb   d}d}xL| D ]D}|d@ d> }|d? }|d? |B }|t |ƒA d@ }|t| A }|}qW d||f S )zßReturn the crc64 checksum for a sequence (string or Seq object).

    Note that the case is important:

    >>> crc64("ACGTACGTACGT")
    'CRC-C4FBB762C4A87EBD'
    >>> crc64("acgtACGTacgt")
    'CRC-DA4509DC64A87EBD'

    r   éÿ   é   r   zCRC-%08X%08X)Úordr   )r   ZcrclZcrchÚcZshrZtemp1hZtemp1lÚidxr	   r	   r
   Úcrc64:   s    
r   c             C   sF   d }}x4| D ],}|d7 }||t | ¡ ƒ 7 }|dkrd}qW |d S )aá  Return the GCG checksum (int) for a sequence (string or Seq object).

    Given a nucleotide or amino-acid sequence (or any string),
    returns the GCG checksum (int). Checksum used by GCG program.
    seq type = str.

    Based on BioPerl GCG_checksum. Adapted by Sebastian Bassi
    with the help of John Lenton, Pablo Ziliani, and Gabriel Genellina.

    All sequences are converted to uppercase.

    >>> gcg("ACGTACGTACGT")
    5688
    >>> gcg("acgtACGTacgt")
    5688

    r   r   é9   i'  )r   Úupper)r   ÚindexZchecksumÚcharr	   r	   r
   ÚgcgR   s    
r   c             C   st   ddl }ddl}| ¡ }yt| ƒ} W n tk
r@   |  ¡ } Y nX | |  ¡ ¡ | | 	¡ ¡}| 
¡  dd¡ d¡S )a  Return the SEGUID (string) for a sequence (string or Seq object).

    Given a nucleotide or amino-acid sequence (or any string),
    returns the SEGUID string (A SEquence Globally Unique IDentifier).
    seq type = str.

    Note that the case is not important:

    >>> seguid("ACGTACGTACGT")
    'If6HIvcnRSQDVNiAoefAzySc6i4'
    >>> seguid("acgtACGTacgt")
    'If6HIvcnRSQDVNiAoefAzySc6i4'

    For more information about SEGUID, see:
    http://bioinformatics.anl.gov/seguid/
    https://doi.org/10.1002/pmic.200600032
    r   NÚ
Ú ú=)ÚhashlibÚbase64Zsha1r   r   r   Úupdater   ZencodebytesZdigestÚdecodeÚreplaceÚrstrip)r   r"   r#   ÚmZtmpr	   r	   r
   Úseguidm   s    r)   Ú__main__)Úrun_doctest)Ú__doc__r   r   r   r   r   r   r)   Ú__name__Z
Bio._utilsr+   r	   r	   r	   r
   Ú<module>   s   !