B
    bq                 @   sJ   d Z ddlmZ ddlmZ ddlmZ ddlmZ dddZdd	 Z	d
S )a?  Bio.SeqIO support for the "swiss" (aka SwissProt/UniProt) file format.

You are expected to use this module via the Bio.SeqIO functions.
See also the Bio.SwissProt module which offers more than just accessing
the sequences as SeqRecord objects.

See also Bio.SeqIO.UniprotIO.py which supports the "uniprot-xml" format.
    )
SeqFeature)	SwissProt)Seq)	SeqRecordc          	   C   s  | dkrt  S yt td|t|  S  tk
r<   Y nX | dry t td|t| dd  S  tk
r|   Y nX n| dry t td|t| dd  S  tk
r   Y nX nD| dry t 	td|t| dd  S  tk
r   Y nX t
d|  dS )zTurn a Swiss location position into a SeqFeature position object (PRIVATE).

    An offset of -1 is used with a start location to make it pythonic.
    ?r   <   N>zCannot parse location '%s')r   ZUnknownPositionZExactPositionmaxint
ValueError
startswithZBeforePositionZAfterPositionZUncertainPositionNotImplementedError)Zlocation_stringoffset r   0lib/python3.7/site-packages/Bio/SeqIO/SwissIO.py_make_position   s.    
 
 r   c             c   sp  t | }x^|D ]T}tt|j|jd |j|j|jd}xN|j	D ]D}t
|dk rVqD|dd \}}d||f }||jkrD|j| qDW |j}d|d< |j|d< |jr|j|d	< |jr|j\}	}
|	|d
< |
|d< |jr|j\}	}
|	|d< |
|d< |jr|j\}	}
|	|d< |
|d< |jr(|j|d< |jd|d< |j|d< |j|d< |jr^|j|d< |jrp|j|d< |jrd|j|d< |jrNg |d< x|jD ]}t }ddd |jD |_x\|jD ]R\}}|dkr||_n6|dkr||_ n$|dkrn|dkrnt!d | qW |j"|_"|j#|_#|j$|_%|d | qW |j&rb|j&|jd!< |V  qW dS )"a  Break up a Swiss-Prot/UniProt file into SeqRecord objects.

    Argument source is a file-like object or a path to a file.

    Every section from the ID line to the terminating // becomes
    a single SeqRecord with associated annotation and features.

    This parser is for the flat file "swiss" format as used by:
     - Swiss-Prot aka SwissProt
     - TrEMBL
     - UniProtKB aka UniProt Knowledgebase

    For consistency with BioPerl and EMBOSS we call this the "swiss"
    format. See also the SeqIO support for "uniprot-xml" format.

    Rather than calling it directly, you are expected to use this
    parser via Bio.SeqIO.parse(..., format="swiss") instead.
    r   )idnamedescriptionfeatures   Nz%s:%sZproteinZmolecule_type
accessionsprotein_existencedateZsequence_versionZdate_last_sequence_updateZdate_last_annotation_updateZentry_version	gene_name.organismZtaxonomyZ
ncbi_taxidZorganism_hostZhost_ncbi_taxid
comment
references c             s   s   | ]}d | V  qdS )z%s=%s;Nr   ).0Zk_vr   r   r   	<genexpr>|   s    z SwissIterator.<locals>.<genexpr>ZPubMedZMEDLINEZDOIZAGRICOLAz"Unknown key %s found in referenceskeywords)'r   parser   r   Zsequencer   Z
entry_namer   r   Zcross_referenceslenZdbxrefsappendannotationsr   ZcreatedZsequence_updateZannotation_updater   r   rstripZorganism_classificationZtaxonomy_idZhost_organismZhost_taxonomy_idZcommentsjoinr    r   Z	Referencer   Z	pubmed_idZ
medline_idr   ZauthorstitlelocationZjournalr$   )sourceZswiss_recordsZswiss_recordrecordZcross_referenceZdatabaseZ	accessionZdbxrefr(   r   versionZ	referenceZfeaturekeyvaluer   r   r   SwissIterator7   s    
















r2   N)r   )
__doc__ZBior   r   ZBio.Seqr   ZBio.SeqRecordr   r   r2   r   r   r   r   <module>   s   
 