B
    b%                 @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddl
mZ dd	lmZ d
dlmZ dd Zdd Zdd Zdd Zdd Zdd Zdd ZeeeedZdd Zd$ddZd%d d!ZG d"d# d#eZdS )&zBio.SeqIO support for the SnapGene file format.

The SnapGene binary format is the native format used by the SnapGene program
from GSL Biotech LLC.
    )datetime)sub)unpack)parseString)Seq)FeatureLocation)
SeqFeature)	SeqRecord   )SequenceIteratorc             c   s   x|  d}t|dk rdS td|d }|  d}t|dk rHtdtd|d }|  |}t||k rttd|||fV  qW dS )a?  Iterate over the packets of a SnapGene file.

    A SnapGene file is made of packets, each packet being a TLV-like
    structure comprising:

      - 1 single byte indicating the packet's type;
      - 1 big-endian long integer (4 bytes) indicating the length of the
        packet's data;
      - the actual data.
    r
   Nz>Br      zUnexpected end of packetz>I)readlenr   
ValueError)handlepacket_typelengthdata r   3lib/python3.7/site-packages/Bio/SeqIO/SnapGeneIO.py_iterate   s    


r   c             C   s`   |j rtdtd| d  |\}}t|d|_ d|jd< |d@ rRd|jd< n
d	|jd< d
S )z}Parse a DNA sequence packet.

    A DNA sequence packet contains a single byte flag followed by the
    sequence itself.
    z*The file contains more than one DNA packetz>B%dsr
   ASCIIZDNAZmolecule_typeZcircularZtopologyZlinearN)seqr   r   r   decodeannotations)r   r   recordflagsZsequencer   r   r   _parse_dna_packet6   s    
r   c             C   s   t |d}t|d}|dkr,d|jd< n
d|jd< t|d}|rVt|d|jd	< t|d
}|rj||_t|d}|r|ddd |_||_	|s|j|_dS )zParse a 'Notes' packet.

    This type of packet contains some metadata about the sequence. They
    are stored as a XML string with a 'Notes' root node.
    zUTF-8ZTypeZ	SyntheticZSYNZdata_file_divisionZUNCZLastModifiedz%Y.%m.%ddateZAccessionNumberZComments r
   r   N)
r   r   _get_child_valuer   r   ZstrptimeidsplitnameZdescription)r   r   r   xmltyper   Zacccommentr   r   r   _parse_notes_packetH   s"    




r'   c             C   s,   t d|\}}}}|ddkr(tddS )zParse a SnapGene cookie packet.

    Every SnapGene file starts with a packet of this type. It acts as
    a magic cookie identifying the file as a SnapGene file.
    z>8sHHHr   SnapGenez%The file is not a valid SnapGene fileN)r   r   r   )r   r   r   ZcookieZseq_typeZexp_versionZimp_versionr   r   r   _parse_cookie_packete   s    r)   c             C   sd   dd |  dD \}}|d }||krRt|t||d}td||d}|| }nt|||d}|S )Nc             S   s   g | ]}t |qS r   )int).0xr   r   r   
<listcomp>q   s    z#_parse_location.<locals>.<listcomp>-r
   )strandr   )r"   r   r   )Z	rangespecr/   r   startendl1l2locationr   r   r   _parse_locationp   s    
r5   c             C   s  t |d}x|dD ]}i }t|ddd}d}tt|ddd}|d	krVd
}d}	x>|dD ]0}
t|
d}|	st|||}	qf|	t||| }	qfW |	stdx|dD ]}t|ddd}g }xx|dD ]j}|dr|t	|j
d j q|dr|t	|j
d j q|dr|t|j
d j qW |||< qW t|d}|rd|krt|g|d< n||d kr|g|d< t|	||d}|j| qW dS )zParse a sequence features packet.

    This packet stores sequence features (except primer binding sites,
    which are in a dedicated Primers packet). The data is a XML string
    starting with a 'Features' root node.
    zUTF-8ZFeaturer%   Zmisc_feature)defaultr
   directionality1   NZSegmentrangezMissing feature locationQr#   zMissing qualifier name)errorVtextZpredefr*   label)r%   
qualifiers)r   r   getElementsByTagName_get_attribute_valuer*   r5   r   hasAttributeappend_decode
attributesvaluer   features)r   r   r   r$   featurequalsr%   r/   r7   r4   ZsegmentrngZ	qualifierZqnameZqvaluesrH   r#   r   r   r   _parse_features_packet~   sJ    





rM   c             C   s   t |d}x|dD ]}i }t|d}|r:|g|d< xf|dD ]X}t|ddd}tt|d	d
d}	|	dkrxd}	nd}	tt||	|d|d}
|j|
 qFW qW dS )zParse a Primers packet.

    A Primers packet is similar to a Features packet but specifically
    stores primer binding features. The data is a XML string starting
    with a 'Primers' root node.
    zUTF-8ZPrimerr#   r@   ZBindingSiter4   zMissing binding site location)r=   ZboundStrand0)r6   r
   r:   Zprimer_bind)r%   rA   N)	r   r   rB   rC   r*   r   r5   rI   rE   )r   r   r   r$   ZprimerrK   r#   ZsiterL   r/   rJ   r   r   r   _parse_primers_packet   s$    


rO   )r         
   c             C   s   t dd| S )Nz<[^>]+> )r   )r?   r   r   r   rF      s    rF   Nc             C   s0   |  |rt| j| jS |r(t|n|S d S )N)rD   rF   rG   rH   r   )noder#   r6   r=   r   r   r   rC      s
    

rC   c             C   sP   |  |}|r:|d jr:|d jj| jkr:t|d jjS |rHt|n|S d S )Nr   )rB   Z
childNodesZ
firstChildZnodeTypeZ	TEXT_NODErF   r   r   )rT   r#   r6   r=   Zchildrenr   r   r   r       s    


r    c                   s0   e Zd ZdZ fddZdd Zdd Z  ZS )SnapGeneIteratorzParser for SnapGene files.c                s   t  j|ddd dS )a   Parse a SnapGene file and return a SeqRecord object.

        Argument source is a file-like object or a path to a file.

        Note that a SnapGene file can only contain one sequence, so this
        iterator will always return a single record.
        br(   )modeZfmtN)super__init__)selfsource)	__class__r   r   rY     s    zSnapGeneIterator.__init__c             C   s   |  |}|S )z9Start parsing the file, and return a SeqRecord generator.)iterate)rZ   r   Zrecordsr   r   r   parse  s    
zSnapGeneIterator.parsec             c   s   t d}t|}yt|\}}}W n tk
r@   tddY nX |dkrRtdt||| x0|D ](\}}}t|}|dk	rd|||| qdW |jstd|V  dS )z.Iterate over the records in the SnapGene file.NzEmpty file.	   z5The file does not start with a SnapGene cookie packetzNo DNA packet in file)	r	   r   nextStopIterationr   r)   _packet_handlersgetr   )rZ   r   r   Zpacketsr   r   r   Zhandlerr   r   r   r]     s     
zSnapGeneIterator.iterate)__name__
__module____qualname____doc__rY   r^   r]   __classcell__r   r   )r\   r   rU     s   
rU   )NN)NN)rg   r   rer   Zstructr   Zxml.dom.minidomr   ZBio.Seqr   ZBio.SeqFeaturer   r   ZBio.SeqRecordr	   Z
Interfacesr   r   r   r'   r)   r5   rM   rO   rb   rF   rC   r    rU   r   r   r   r   <module>   s0   :"
	
