B
    b                 @   s\   d Z ddlmZ ddlZddlmZ G dd deZG dd deZdd	d
Z	dddZ
dS )zCode to work with the BLAST XML output.

The BLAST XML DTD file is on the NCBI FTP site at:
ftp://ftp.ncbi.nlm.nih.gov/blast/documents/xml/NCBI_BlastOutput.dtd
    )RecordN)ContentHandlerc               @   s:   e Zd ZdZdddZdd Zdd Zd	d
 Zdd ZdS )
_XMLparserzGeneric SAX Parser (PRIVATE).

    Just a very basic SAX parser.

    Redefine the methods startElement, characters and endElement.
    r   c             C   s(   g | _ d| _|| _g | _d| _d| _dS )zuInitialize the parser.

        Arguments:
         - debug - integer, amount of debug information to print

            N)_tag_value_debug_debug_ignore_list_method_name_level_method_map)selfdebug r   0lib/python3.7/site-packages/Bio/Blast/NCBIXML.py__init__   s    z_XMLparser.__init__c             C   s   | j | t| j dkr(| | dS d| | }|| jkrd| j|   | jdkrtd|  n,| jdkr|| jkrtd|  | j| | j	
 rtd| j	|f d	| _	dS )
zFound XML start tag.

        No real need of attr, BLAST DTD doesn't use them

        Arguments:
         - name -- name of the tag
         - attr -- tag attributes

        r   NZstart_   zNCBIXML: Parsed:     zNCBIXML: Ignored: z,What should we do with %s before the %r tag?r   )r   appendlen_on_root_node_node_method_namer   r	   printr
   r   strip
ValueError)r   nameattrmethodr   r   r   startElement*   s"    






z_XMLparser.startElementc             C   s   |  j |7  _ dS )zOFound some text.

        Arguments:
         - ch -- characters read

        N)r   )r   Zchr   r   r   
charactersR   s    z_XMLparser.charactersc             C   s   d|  | }|| jkrB| j|   | jdkrttd|| jf  n2| jdkrt|| jkrttd|| jf  | j| d| _| j  dS )zLFound XML end tag.

        Arguments:
         - name -- tag name

        Zend_   zNCBIXML: Parsed:  %s %sr   zNCBIXML: Ignored: %s %sr   N)	r   r   r	   r   r   r
   r   r   pop)r   r   r   r   r   r   
endElement[   s    




z_XMLparser.endElementc             C   s&   | j dkr|S d| j| j  d  S )Nr   /)r   joinr   )r   r   r   r   r   r   x   s    
z_XMLparser._node_method_nameN)r   )	__name__
__module____qualname____doc__r   r   r   r"   r   r   r   r   r   r      s   
(	r   c               @   s  e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Zd7d8 Zd9d: Zd;d< Z d=d> Z!d?d@ Z"dAdB Z#dCdD Z$dEdF Z%dGdH Z&dIdJ Z'dKdL Z(dMdN Z)dOdP Z*dQdR Z+dSdT Z,dUdV Z-dWdX Z.dYdZ Z/d[d\ Z0d]d^ Z1d_d` Z2dadb Z3dcdd Z4dedf Z5dgdh Z6didj Z7dkdl Z8dmdn Z9dodp Z:dqdr Z;dsdt Z<dudv Z=dwdx Z>dydz Z?d{d| Z@d}d~ ZAdd ZBdd ZCdd ZDdS )BlastParsera  Parse XML BLAST data into a Record.Blast object.

    Parses XML output from BLAST (direct use discouraged).
    This (now) returns a list of Blast records.
    Historically it returned a single Blast record.
    You are expected to use this via the parse or read functions.

    All XML 'action' methods are private methods and may be:

    - ``_start_TAG`` called when the start tag is found
    - ``_end_TAG`` called when the end tag is found

    r   c             C   s   t | | tj | _| j|  | jtjjj	d | jtjjj
d | jtjjjd | jtjjjd d| _|   dS )zuInitialize the parser.

        Arguments:
         - debug - integer, amount of debug information to print

        r   r   N)r   r   xmlZsaxZmake_parserZ_parserZsetContentHandlerZ
setFeatureZhandlerZfeature_validationZfeature_namespacesZfeature_external_pesZfeature_external_ges_xml_versionreset)r   r   r   r   r   r      s    zBlastParser.__init__c             C   s&   g | _ t | _t | _d| j_dS )z>Reset all the data allowing reuse of the BlastParser() object.N)_recordsr   ZHeader_headerZ
Parameters_parametersfilter)r   r   r   r   r,      s    

zBlastParser.resetc             C   s4   |dkr|    n|dkr$|   ntd| d S )NZBlastOutputZ	BlastXML2zOInvalid root node name: %s. Root node should be either BlastOutput or BlastXML2)_setup_blast_v1_setup_blast_v2r   )r   r   r   r   r   r      s    

zBlastParser._on_root_nodec          3   C   s   | j | j| j| j| j| j| j| j| j| j	| j
| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j | j!| j"| j#| j$| j%| j&| j'| j(| j)| j*| j+| j,| j-| j.| j/| j0| j1d2| _2d S )N)2Zstart_IterationZend_IterationZend_BlastOutput_programZend_BlastOutput_versionZend_BlastOutput_referenceZend_BlastOutput_dbzend_BlastOutput_query-IDzend_BlastOutput_query-defzend_BlastOutput_query-lenzend_Iteration_query-IDzend_Iteration_query-defzend_Iteration_query-lenend_BlastOutput_hitsZend_Parameters_matrixZend_Parameters_expectzend_Parameters_sc-matchzend_Parameters_sc-mismatchzend_Parameters_gap-openzend_Parameters_gap-extendZend_Parameters_filterZ	start_HitZend_HitZ
end_Hit_idZend_Hit_defZend_Hit_accessionZend_Hit_lenZ	start_HspZend_Hsp_scorezend_Hsp_bit-scoreZend_Hsp_evaluezend_Hsp_query-fromzend_Hsp_query-tozend_Hsp_hit-fromzend_Hsp_hit-tozend_Hsp_query-framezend_Hsp_hit-frameZend_Hsp_identityZend_Hsp_positiveZend_Hsp_gapszend_Hsp_align-lenZend_Hsp_qseqZend_Hsp_hseqZend_Hsp_midlinezend_Statistics_db-numzend_Statistics_db-lenzend_Statistics_hsp-lenzend_Statistics_eff-spaceZend_Statistics_kappaZend_Statistics_lambdaZend_Statistics_entropy)3_start_blast_record_end_blast_record_set_header_application_set_header_version_set_header_reference_set_header_database_set_header_query_id_set_header_query_set_header_query_letters_set_record_query_id_set_record_query_def_set_record_query_letters_set_record_hits_set_parameters_matrix_set_parameters_expect_set_parameters_sc_match_set_parameters_sc_mismatch_set_parameters_gap_penalties_set_parameters_gap_extend_set_parameters_filter
_start_hit_end_hit
set_hit_idset_hit_defset_hit_accessionset_hit_len
_start_hsp_set_hsp_score_set_hsp_bit_score_set_hsp_e_value_set_hsp_query_start_set_hsp_query_end_set_hsp_hit_from_set_hsp_hit_to_set_hsp_query_frame_set_hsp_hit_frame_set_hsp_identity_set_hsp_positive_set_hsp_gaps_set_hsp_align_len_set_hsp_query_seq_set_hsp_subject_seq_set_hsp_midline_set_statistics_db_num_set_statistics_db_len_set_statistics_hsp_len_set_statistics_eff_space_set_statistics_kappa_set_statistics_lambda_set_statistics_entropyr   )r   r   r   r   r1      sd    zBlastParser._setup_blast_v1c          7   C   s   d| _ d| _| j| j| j| j| j| j| j| j	| j
| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j| j | j!| j"| j#| j$| j%| j&| j'| j(| j)| j*| j+| j,| j-| j.| j/| j0| j1| j2| j3| j4| j5| j6| j7d6| _8d S )Nr    )6zstart_report/Reportzend_report/Reportzend_Report/programzend_Report/versionzend_Report/referencezend_Target/dbzend_Search/query-idzend_Search/query-titlezend_Search/query-lenr3   zend_Parameters/matrixzend_Parameters/expectzend_Parameters/sc-matchzend_Parameters/sc-mismatchzend_Parameters/gap-openzend_Parameters/gap-extendzend_Parameters/filterzstart_hits/Hitzend_hits/Hitzstart_description/HitDescrzend_description/HitDescrzend_HitDescr/idzend_HitDescr/accessionzend_HitDescr/titlezend_HitDescr/taxidzend_HitDescr/scinamezend_Hit/lenzstart_hsps/Hspzend_hsps/Hspzend_Hsp/scorezend_Hsp/bit-scorezend_Hsp/evaluezend_Hsp/query-fromzend_Hsp/query-tozend_Hsp/hit-fromzend_Hsp/hit-tozend_Hsp/query-framezend_Hsp/hit-framezend_Hsp/query-strandzend_Hsp/hit-strandzend_Hsp/identityzend_Hsp/positivezend_Hsp/gapszend_Hsp/align-lenzend_Hsp/qseqzend_Hsp/hseqzend_Hsp/midlinezend_Statistics/db-numzend_Statistics/db-lenzend_Statistics/hsp-lenzend_Statistics/eff-spacezend_Statistics/kappazend_Statistics/lambdazend_Statistics/entropy)9r   r+   r4   r5   r6   r7   r8   r9   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   _start_hit_descr_item_end_hit_descr_item_end_description_id_end_description_accession_end_description_title_end_description_taxid_end_description_scinamerM   rN   _end_hsprO   rP   rQ   rR   rS   rT   rU   rV   rW   _set_hsp_query_strand_set_hsp_hit_strandrX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   r   )r   r   r   r   r2      sp    zBlastParser._setup_blast_v2c             C   s   t  | _dS )zStart interaction (PRIVATE).N)r   ZBlast_blast)r   r   r   r   r4   (  s    zBlastParser._start_blast_recordc             C   s<  | j j| j_| j j| j_| j j| j_| j j| j_| j j| j_t| jdrP| jjs\| j j| j_t| jdrp| jj	s|| j j	| j_	t| jdr| jj
s| j j
| j_
| jj
| j_| jj| j_| jj| j_| jj| j_| jj| j_| jj| j_| jj| j_| jj| j_| jj| j_| jj| j_| j| j d| _| jr8td dS )zEnd interaction (PRIVATE).queryquery_idquery_lettersNz&NCBIXML: Added Blast record to results)r.   	referencerp   dateversiondatabaseapplicationhasattrrq   rr   rs   Zquery_lengthnum_letters_in_databaseZdatabase_lengthnum_sequences_in_databaseZdatabase_sequencesr/   matrixZnum_seqs_better_egap_penaltiesr0   expectsc_matchsc_mismatchr-   r   r	   r   )r   r   r   r   r5   ,  s2    zBlastParser._end_blast_recordc             C   s   | j  | j_dS )zsBLAST program, e.g., blastp, blastn, etc. (PRIVATE).

        Save this to put on each blast record object
        N)r   upperr.   rx   )r   r   r   r   r6   d  s    z#BlastParser._set_header_applicationc             C   sh   | j  }|d | j_t|dkrd|d d dkrX|d d dkrX|d dd | j_n|d | j_dS )	zVersion number and date of the BLAST engine (PRIVATE).

        e.g. "BLASTX 2.2.12 [Aug-07-2005]" but there can also be
        variants like "BLASTP 2.2.18+" without the date.

        Save this to put on each blast record object
        r   r   r    r   []N)r   splitr.   rv   r   ru   )r   partsr   r   r   r7   k  s    
 zBlastParser._set_header_versionc             C   s   | j | j_dS )zRecord any article reference describing the algorithm (PRIVATE).

        Save this to put on each blast record object
        N)r   r.   rt   )r   r   r   r   r8     s    z!BlastParser._set_header_referencec             C   s   | j | j_dS )ziRecord the database(s) searched (PRIVATE).

        Save this to put on each blast record object
        N)r   r.   rw   )r   r   r   r   r9     s    z BlastParser._set_header_databasec             C   s   | j | j_dS )zRecord the identifier of the query (PRIVATE).

        Important in old pre 2.2.14 BLAST, for recent versions
        <Iteration_query-ID> is enough
        N)r   r.   rr   )r   r   r   r   r:     s    z BlastParser._set_header_query_idc             C   s   | j | j_dS )zRecord the definition line of the query (PRIVATE).

        Important in old pre 2.2.14 BLAST, for recent versions
        <Iteration_query-def> is enough
        N)r   r.   rq   )r   r   r   r   r;     s    zBlastParser._set_header_queryc             C   s   t | j| j_dS )zRecord the length of the query (PRIVATE).

        Important in old pre 2.2.14 BLAST, for recent versions
        <Iteration_query-len> is enough
        N)intr   r.   rs   )r   r   r   r   r<     s    z%BlastParser._set_header_query_lettersc             C   s   | j | j_dS )z-Record the identifier of the query (PRIVATE).N)r   rp   rr   )r   r   r   r   r=     s    z BlastParser._set_record_query_idc             C   s   | j | j_dS )z2Record the definition line of the query (PRIVATE).N)r   rp   rq   )r   r   r   r   r>     s    z!BlastParser._set_record_query_defc             C   s   t | j| j_dS )z)Record the length of the query (PRIVATE).N)r   r   rp   rs   )r   r   r   r   r?     s    z%BlastParser._set_record_query_lettersc             C   s   t | j| j_dS )zAHits to the database sequences, one for every sequence (PRIVATE).N)r   r   rp   Znum_hits)r   r   r   r   r@     s    zBlastParser._set_record_hitsc             C   s   | j | j_dS )z+Matrix used (-M on legacy BLAST) (PRIVATE).N)r   r/   r|   )r   r   r   r   rA     s    z"BlastParser._set_parameters_matrixc             C   s   | j | j_dS )zExpect values cutoff (PRIVATE).N)r   r/   r~   )r   r   r   r   rB     s    	z"BlastParser._set_parameters_expectc             C   s   t | j| j_dS )z@Match score for nucleotide-nucleotide comparison (-r) (PRIVATE).N)r   r   r/   r   )r   r   r   r   rC     s    z$BlastParser._set_parameters_sc_matchc             C   s   t | j| j_dS )zEMismatch penalty for nucleotide-nucleotide comparison (-r) (PRIVATE).N)r   r   r/   r   )r   r   r   r   rD     s    z'BlastParser._set_parameters_sc_mismatchc             C   s   t | j| j_dS )z"Gap existence cost (-G) (PRIVATE).N)r   r   r/   r}   )r   r   r   r   rE     s    z)BlastParser._set_parameters_gap_penaltiesc             C   s   | j jt| jf| j _dS )z"Gap extension cose (-E) (PRIVATE).N)r/   r}   r   r   )r   r   r   r   rF     s    z&BlastParser._set_parameters_gap_extendc             C   s   | j | j_dS )z(Record filtering options (-F) (PRIVATE).N)r   r/   r0   )r   r   r   r   rG     s    z"BlastParser._set_parameters_filterc             C   s`   | j jt  | jdkr$t nt | _| j j	| j g | j _
| j jd | _d| j_dS )z Start filling records (PRIVATE).r   r   r   N)rp   Z
alignmentsr   r   Z	Alignmentr+   ZDescriptionZDescriptionExt_descrZdescriptionsmultiple_alignment_hitnum_alignments)r   r   r   r   rH     s    zBlastParser._start_hitc             C   s   d| j _d| _d| _dS )zClear variables (PRIVATE).N)rp   r   r   r   )r   r   r   r   rI     s    zBlastParser._end_hitc             C   s   | j | j_| j d | j_dS )z9Record the identifier of the database sequence (PRIVATE). N)r   r   hit_idtitle)r   r   r   r   rJ     s    
zBlastParser.set_hit_idc             C   s,   | j | j_| j j| j 7  _| jj| j_dS )z>Record the definition line of the database sequence (PRIVATE).N)r   r   Zhit_defr   r   )r   r   r   r   rK     s    
zBlastParser.set_hit_defc             C   s   | j | j_| j | j_dS )z>Record the accession value of the database sequence (PRIVATE).N)r   r   	accessionr   )r   r   r   r   rL     s    
zBlastParser.set_hit_accessionc             C   s   t | j| j_dS )zRecord the length of the hit.N)r   r   r   length)r   r   r   r   rM     s    zBlastParser.set_hit_lenc             C   sV   t  | _d | j_| jj| j | j jd7  _| j	j
t   | j	j
d | _d S )Nr   r   )r   ZHSP_hsp	positivesr   Zhspsr   r   r   rp   r   ZMultipleAlignmentZ_mult_al)r   r   r   r   rN   "  s    
zBlastParser._start_hspc             C   s,   | j jr(t| j jdkr(| j  jd7  _d S )Nr   )r   )r   framer   )r   r   r   r   rm   ,  s    zBlastParser._end_hspc             C   s,   t | j| j_| jjdkr(t | j| j_dS )z&Record the raw score of HSP (PRIVATE).N)floatr   r   Zscorer   )r   r   r   r   rO   1  s    zBlastParser._set_hsp_scorec             C   s,   t | j| j_| jjdkr(t | j| j_dS )z&Record the Bit score of HSP (PRIVATE).N)r   r   r   bitsr   )r   r   r   r   rP   7  s    zBlastParser._set_hsp_bit_scorec             C   s,   t | j| j_| jjdkr(t | j| j_dS )z-Record the expect value of the HSP (PRIVATE).N)r   r   r   r~   r   e)r   r   r   r   rQ   =  s    zBlastParser._set_hsp_e_valuec             C   s   t | j| j_dS )zEOffset of query at the start of the alignment (one-offset) (PRIVATE).N)r   r   r   Zquery_start)r   r   r   r   rR   C  s    z BlastParser._set_hsp_query_startc             C   s   t | j| j_dS )zCOffset of query at the end of the alignment (one-offset) (PRIVATE).N)r   r   r   Z	query_end)r   r   r   r   rS   G  s    zBlastParser._set_hsp_query_endc             C   s   t | j| j_dS )zLOffset of the database at the start of the alignment (one-offset) (PRIVATE).N)r   r   r   Zsbjct_start)r   r   r   r   rT   K  s    zBlastParser._set_hsp_hit_fromc             C   s   t | j| j_dS )zJOffset of the database at the end of the alignment (one-offset) (PRIVATE).N)r   r   r   Z	sbjct_end)r   r   r   r   rU   O  s    zBlastParser._set_hsp_hit_toc             C   s:   t | j}|f| j_| jjdkr6|dkr,dndf| j_dS )z+Frame of the query if applicable (PRIVATE).BLASTNr   PlusMinusN)r   r   r   r   r.   rx   strand)r   vr   r   r   rV   [  s    

z BlastParser._set_hsp_query_framec             C   sh   t | j}t| jjdkr(d|f| j_n| j j|f7  _| jjdkrd| j j|dkrZdndf7  _dS )z7Frame of the database sequence if applicable (PRIVATE).r   r   r   r   N)r   r   r   r   r   r.   rx   r   )r   r   r   r   r   rW   b  s    
zBlastParser._set_hsp_hit_framec             C   s4   | j f| j_| jjdkr0| j dkr&dndf| j_dS )z+Frame of the query if applicable (PRIVATE).r   r   r   r   N)r   r   r   r.   rx   r   )r   r   r   r   rn   l  s    z!BlastParser._set_hsp_query_strandc             C   sD   | j  j| jf7  _| jjdkr@| j  j| jdkr6dndf7  _dS )z7Frame of the database sequence if applicable (PRIVATE).r   r   r   r   N)r   r   r   r.   rx   r   )r   r   r   r   ro   r  s    zBlastParser._set_hsp_hit_strandc             C   s*   t | j}|| j_| jjdkr&|| j_dS )z;Record the number of identities in the alignment (PRIVATE).N)r   r   r   Z
identitiesr   )r   r   r   r   r   rX   x  s    
zBlastParser._set_hsp_identityc             C   s   t | j| j_dS )zVRecord the number of positive (conservative) substitutions in the alignment (PRIVATE).N)r   r   r   r   )r   r   r   r   rY     s    zBlastParser._set_hsp_positivec             C   s   t | j| j_dS )z5Record the number of gaps in the alignment (PRIVATE).N)r   r   r   Zgaps)r   r   r   r   rZ     s    zBlastParser._set_hsp_gapsc             C   s   t | j| j_dS )z-Record the length of the alignment (PRIVATE).N)r   r   r   Zalign_length)r   r   r   r   r[     s    zBlastParser._set_hsp_align_lenc             C   s   | j | j_dS )z4Record the alignment string for the query (PRIVATE).N)r   r   rq   )r   r   r   r   r\     s    zBlastParser._set_hsp_query_seqc             C   s   | j | j_dS )z7Record the alignment string for the database (PRIVATE).N)r   r   sbjct)r   r   r   r   r]     s    z BlastParser._set_hsp_subject_seqc             C   sF   | j | j_t| jjt| jjks&tt| jjt| jjksBtdS )zBRecord the middle line as normally seen in BLAST report (PRIVATE).N)r   r   matchr   rq   AssertionErrorr   )r   r   r   r   r^     s    
zBlastParser._set_hsp_midlinec             C   s   t | j| j_dS )z9Record the number of sequences in the database (PRIVATE).N)r   r   rp   r{   )r   r   r   r   r_     s    z"BlastParser._set_statistics_db_numc             C   s   t | j| j_dS )z7Record the number of letters in the database (PRIVATE).N)r   r   rp   rz   )r   r   r   r   r`     s    z"BlastParser._set_statistics_db_lenc             C   s   t | j| j_dS )z*Record the effective HSP length (PRIVATE).N)r   r   rp   Zeffective_hsp_length)r   r   r   r   ra     s    z#BlastParser._set_statistics_hsp_lenc             C   s   t | j| j_dS )z,Record the effective search space (PRIVATE).N)r   r   rp   Zeffective_search_space)r   r   r   r   rb     s    z%BlastParser._set_statistics_eff_spacec             C   s   t | j| j_dS )z&Karlin-Altschul parameter K (PRIVATE).N)r   r   rp   	ka_params)r   r   r   r   rc     s    z!BlastParser._set_statistics_kappac             C   s   t | j| jjf| j_dS )z+Karlin-Altschul parameter Lambda (PRIVATE).N)r   r   rp   r   )r   r   r   r   rd     s    z"BlastParser._set_statistics_lambdac             C   s   | j jt| jf | j _dS )z&Karlin-Altschul parameter H (PRIVATE).N)rp   r   r   r   )r   r   r   r   re     s    z#BlastParser._set_statistics_entropyc             C   s   t  | _dS )z#XML v2. Start hit description item.N)r   ZDescriptionExtItem_hit_descr_item)r   r   r   r   rf     s    z!BlastParser._start_hit_descr_itemc             C   s.   | j | j | jjs$t| j| j_d| _dS )z#XML v2. Start hit description item.N)r   Zappend_itemr   r   r   str)r   r   r   r   rg     s    zBlastParser._end_hit_descr_itemc             C   s    | j | j_| jjs| j | j_dS )z9XML v2. The identifier of the database sequence(PRIVATE).N)r   r   idr   r   )r   r   r   r   rh     s    
zBlastParser._end_description_idc             C   s&   | j | j_t| jdds"| j | j_dS )z?XML v2. The accession value of the database sequence (PRIVATE).r   N)r   r   r   getattrr   )r   r   r   r   ri     s    
z&BlastParser._end_description_accessionc             C   s   | j | j_dS )z,XML v2. The hit description title (PRIVATE).N)r   r   r   )r   r   r   r   rj     s    z"BlastParser._end_description_titlec             C   s,   yt | j| j_W n tk
r&   Y nX d S )N)r   r   r   Ztaxidr   )r   r   r   r   rk     s    z"BlastParser._end_description_taxidc             C   s   | j | j_d S )N)r   r   Zsciname)r   r   r   r   rl     s    z$BlastParser._end_description_scinameN)r   )Er%   r&   r'   r(   r   r,   r   r1   r2   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rm   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rn   ro   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   r   r   r   r   r)   ~   s   
6<8	

r)   c             C   sd   t | |}yt|}W n tk
r4   tddY nX yt| tdW n tk
r^   Y nX |S )aO  Return a single Blast record (assumes just one query).

    Uses the BlastParser internally.

    This function is for use when there is one and only one BLAST
    result in your XML file.

    Use the Bio.Blast.NCBIXML.parse() function if you expect more than
    one BLAST record (i.e. if you have more than one query sequence).
    zNo records found in handleNz$More than one record found in handle)parsenextStopIterationr   )handler   iteratorrecordr   r   r   read  s    
r   c             c   s  ddl m} d}d}d}d}d}d}| |}	t|	trHd}d	}d
}d
}|	sTtdx|	r|	|std||	dd f | }
t|}|j	|
_
|j|
_|j|
_|
|	d x*|jr|jd }|jdd |_|V  qW x|| | d }	}|	s
|
|d P | |}|| |	| krR|
|	d xt|jrN|jdV  q4W q|	| || d\}	}|| }|
|	d x|jr|jdV  qW || }	}P qW |rt|t|jdksXtt|jqXW |	rt|	|rt|t|jdkstt|jdS )a  Return an iterator a Blast record for each query.

    Incremental parser, this is an iterator that returns
    Blast records.  It uses the BlastParser internally.

    handle - file handle to and XML file to parse
    debug - integer, amount of debug information to print

    This is a generator function that returns multiple Blast records
    objects - one for each query sequence given to blast.  The file
    is read incrementally, returning complete records as they are read
    in.

    Should cope with new BLAST 2.2.14+ which gives a single XML file
    for multiple query records.

    Should also cope with XML output from older versions BLAST which
    gave multiple XML files concatenated together (giving a single file
    which strictly speaking wasn't valid XML).
    r   )expati   
   z<?xml
r   s   <?xml   
    zYour XML file was emptyz5Your XML file did not start with %r... but instead %rN   Fr   T)Zxml.parsersr   r   
isinstancebytesr   
startswithZParserCreater)   r   ZStartElementHandlerr"   ZEndElementHandlerr   ZCharacterDataHandlerZParser-   r!   r   r   r   )r   r   r   ZBLOCKZMARGINZ	XML_STARTZNEW_LINEZNULLpendingtextZexpat_parserZblast_parserr   r   r   r   r     sh    









 r   )r   )r   )r(   Z	Bio.Blastr   Zxml.saxr*   Zxml.sax.handlerr   r   r)   r   r   r   r   r   r   <module>   s   j    e
