B
    b3              -   @   s   d Z ddlZddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ dZeddddd	d
ddddddddddddddddddddddddddddddddddddddf-ddZde_dd ZdS )zCode to invoke the NCBI BLAST server over the internet.

This module provides code to work with the WWW version of BLAST
provided by the NCBI. https://blast.ncbi.nlm.nih.gov/
    N)StringIO)urlopen)	urlencode)Request)BiopythonWarningz(https://blast.ncbi.nlm.nih.gov/Blast.cgiz(none)g      $@2   i  ZXMLc0       ?   $   C   s  dddddg}0| |0kr,t d| d|0f | rh| dkrhd} t|d	k rhd
}	d}d}d}
d}tdt d|fd|fd|fd|fd|fd|fd|	fd|
fd|fd|fd|fd|fd|fd|fd|-fd|fd|fd|fd |fd!|fd"|fd#| fd$|fd%|fd&|fd'|fd(|fd)|fd*|fd+| fd,|.fd-|/fd.|fd/|fd0|fd1g$}1d2d3 |1D }2t|2 }3t||3d4d5i}4t	|4}5t
|5\}6}7d6|!fd7|"fd8|#fd9|$fd:|%fd;|&fd<|'fd=|(fd>|)fd?|*fd@|6fdA|+fd*|fdB|,fdCg}1dDd3 |1D }2t|2 }3dE}8xt }9tj|8 |9 }:|:dFkr.t|: |9|: t_n|9t_|8dGk rL|tkrLdG}8t||3d4d5i}4t	|4}5|5  };|;dHkr~qdI|;krP |;dI}<|;dJ|<}=|;|<tdI |=  }>|> dKkrP qW t|;S )La 	  BLAST search using NCBI's QBLAST server or a cloud service provider.

    Supports all parameters of the old qblast API for Put and Get.

    Please note that NCBI uses the new Common URL API for BLAST searches
    on the internet (http://ncbi.github.io/blast-cloud/dev/api.html). Thus,
    some of the parameters used by this function are not (or are no longer)
    officially supported by NCBI. Although they are still functioning, this
    may change in the future.

    The Common URL API (http://ncbi.github.io/blast-cloud/dev/api.html) allows
    doing BLAST searches on cloud servers. To use this feature, please set
    ``url_base='http://host.my.cloud.service.provider.com/cgi-bin/blast.cgi'``
    and ``format_object='Alignment'``. For more details, please see
    https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=CloudBlast

    Some useful parameters:

     - program        blastn, blastp, blastx, tblastn, or tblastx (lower case)
     - database       Which database to search against (e.g. "nr").
     - sequence       The sequence to search.
     - ncbi_gi        TRUE/FALSE whether to give 'gi' identifier.
     - descriptions   Number of descriptions to show.  Def 500.
     - alignments     Number of alignments to show.  Def 500.
     - expect         An expect value cutoff.  Def 10.0.
     - matrix_name    Specify an alt. matrix (PAM30, PAM70, BLOSUM80, BLOSUM45).
     - filter         "none" turns off filtering.  Default no filtering
     - format_type    "HTML", "Text", "ASN.1", or "XML".  Def. "XML".
     - entrez_query   Entrez query to limit Blast search
     - hitlist_size   Number of hits to return. Default 50
     - megablast      TRUE/FALSE whether to use MEga BLAST algorithm (blastn only)
     - short_query    TRUE/FALSE whether to adjust the search parameters for a
                      short query sequence. Note that this will override
                      manually set parameters like word size and e value. Turns
                      off when sequence length is > 30 residues. Default: None.
     - service        plain, psi, phi, rpsblast, megablast (lower case)

    This function does no checking of the validity of the parameters
    and passes the values to the server as is.  More help is available at:
    https://ncbi.github.io/blast-cloud/dev/api.html

    ZblastnZblastpZblastxZtblastnZtblastxz+Program specified is %s. Expected one of %sz, N   i        z"SHORT_QUERY_ADJUST" is incorrectly implemented (by NCBI) for blastn. We bypass the problem by manually adjusting the search parameters. Thus, results may slightly differ from web page searches.ZAUTO_FORMATZCOMPOSITION_BASED_STATISTICSZDATABASEZDB_GENETIC_CODEZ	ENDPOINTSZENTREZ_QUERYZEXPECTZFILTERZGAPCOSTSZGENETIC_CODEZHITLIST_SIZEZI_THRESHZLAYOUTZ
LCASE_MASKZ	MEGABLASTZMATRIX_NAMEZNUCL_PENALTYZNUCL_REWARDZOTHER_ADVANCEDZ
PERC_IDENTZPHI_PATTERNZPROGRAMZQUERYZ
QUERY_FILEZQUERY_BELIEVE_DEFLINEZ
QUERY_FROMZQUERY_TOZSEARCHSP_EFFZSERVICEZSHORT_QUERY_ADJUSTZTEMPLATE_TYPEZTEMPLATE_LENGTHZ	THRESHOLDZUNGAPPED_ALIGNMENTZ	WORD_SIZE)CMDZPutc             S   s   g | ]}|d  dk	r|qS )r
   N ).0xr   r   0lib/python3.7/site-packages/Bio/Blast/NCBIWWW.py
<listcomp>   s    zqblast.<locals>.<listcomp>z
User-AgentZBiopythonClientZ
ALIGNMENTSZALIGNMENT_VIEWZDESCRIPTIONSZENTREZ_LINKS_NEW_WINDOWZ
EXPECT_LOWZEXPECT_HIGHZFORMAT_ENTREZ_QUERYZFORMAT_OBJECTZFORMAT_TYPEZNCBI_GIZRIDZRESULTS_FILEZSHOW_OVERVIEW)r   ZGetc             S   s   g | ]}|d  dk	r|qS )r
   Nr   )r   r   r   r   r   r      s       r   <   z

zStatus=
ZREADY)
ValueErrorjoinlenwarningswarnr   r   encoder   r   _parse_qblast_ref_pagetimeqblast	_previousZsleepNCBI_BLAST_URLreaddecodeindexstripupperr   )?ZprogramZdatabaseZsequenceZurl_baseZauto_formatZcomposition_based_statisticsZdb_genetic_codeZ	endpointsZentrez_queryZexpectfilterZgapcostsZgenetic_codeZhitlist_sizeZi_threshZlayoutZ
lcase_maskZmatrix_nameZnucl_penaltyZnucl_rewardZother_advancedZ
perc_identZphi_patternZ
query_fileZquery_believe_deflineZ
query_fromZquery_toZsearchsp_effZserviceZ	thresholdZungapped_alignmentZ	word_sizeZshort_queryZ
alignmentsZalignment_viewZdescriptionsZentrez_links_new_windowZ
expect_lowZexpect_highZformat_entrez_queryZformat_objectZformat_typeZncbi_giZresults_fileZshow_overviewZ	megablastZtemplate_typeZtemplate_lengthZprogramsZ
parametersZquerymessageZrequesthandleridrtoeZdelayZcurrentwaitZresultsijZstatusr   r   r   r   !   s    \	




r   c             C   s  |    }|d}|dkr$d}n$|d|}||td |  }|d}|dkr`d}n$|d|}||td |  }|s|s|d}|dkr||td d  }|ddd	 ddd	  }|rtd
| |d}|dkrH||td d  }|ddd	 ddd	  }|rHtd
| |d}|dkr||d ddd	 ddd	  }td
| tdn&|std| n|std| y|t|fS  tk
r   td| dY nX dS )zExtract a tuple of RID, RTOE from the 'please wait' page (PRIVATE).

    The NCBI FAQ pages use TOE for 'Time of Execution', so RTOE is probably
    'Request Time of Execution' and RID would be 'Request Identifier'.
    zRID =Nr   zRTOE =z<div class="error msInf">z</div>r
   r   zError message from NCBI: %sz<p class="error">z</p>zMessage ID#<zNo RID and no RTOE found in the 'please wait' page, there was probably an error in your request but we could not extract a helpful error message.z<No RID found in the 'please wait' page. (although RTOE = %r)z<No RTOE found in the 'please wait' page. (although RID = %r)z6A non-integer RTOE found in the 'please wait' page, %r)r   r    findr   r"   splitr   int)r&   sr*   r'   r+   r(   msgr   r   r   r     sT    


 

 

(
r   )__doc__r   ior   r   Zurllib.requestr   Zurllib.parser   r   ZBior   r   r   r   r   r   r   r   r   <module>   sp    B