B
    b+R                 @   s   d Z ddlmZmZ ddlmZmZ ddlmZ G dd deZ	dd Z
d	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZG dd deZdS ) a  Implementations of Biopython-like Seq objects on top of BioSQL.

This allows retrieval of items stored in a BioSQL database using
a biopython-like SeqRecord and Seq interface.

Note: Currently we do not support recording per-letter-annotations
(like quality scores) in BioSQL.
    )SeqSequenceDataAbstractBaseClass)	SeqRecord_RestrictedDict)
SeqFeaturec                   s6   e Zd ZdZdZd
 fdd	Zdd Zdd	 Z  ZS )_BioSQLSequenceDataz9Retrieves sequence data from a BioSQL database (PRIVATE).)
primary_idadaptor_lengthstartr   c                s&   || _ || _|| _|| _t   dS )aU  Create a new _BioSQLSequenceData object referring to a BioSQL entry.

        You wouldn't normally create a _BioSQLSequenceData object yourself,
        this is done for you when retrieving a DBSeqRecord object from the
        database, which creates a Seq object using a _BioSQLSequenceData
        instance as the data provider.
        N)r   r	   r
   r   super__init__)selfr   r	   r   length)	__class__ ,lib/python3.7/site-packages/BioSQL/BioSeq.pyr      s
    z_BioSQLSequenceData.__init__c             C   s   | j S )z"Return the length of the sequence.)r
   )r   r   r   r   __len__-   s    z_BioSQLSequenceData.__len__c       
      C   s.  t |tr:|| j\}}}tt|||}|dkrdS nd|}|dk rb|| j7 }|dk rtt|n|| jkrtt|| j| j	| j
| | j
| d }t|S |dkr|dkr|| jkr| j| j	| j
| j
| j }|dS t| j	| j| j
| |S n2| j| j	| j
| | j
| }	|	dd| dS dS )z@Return a subsequence as a bytes or a _BioSQLSequenceData object.r          ASCIIN)
isinstancesliceindicesr
   lenrange
IndexErrorr	   Zget_subseq_as_stringr   r   ordencoder   )
r   keyr   endstepsizeicZsequenceZfullr   r   r   __getitem__1   s2    




z_BioSQLSequenceData.__getitem__)r   r   )	__name__
__module____qualname____doc__	__slots__r   r   r%   __classcell__r   r   )r   r   r      s
   r   c             C   s8   |  d|f}|sd S t|dks&t|d \}t|S )Nz5SELECT length FROM biosequence WHERE bioentry_id = %sr   r   )execute_and_fetchallr   AssertionErrorint)r	   r   seqsgiven_lengthr   r   r   _retrieve_seq_lenZ   s    

r1   c       	      C   s   |  d|f}|sd S t|dks&t|d \}}}y$t|}t|}||ksRtd}W nn tk
r   |d kstt|  d|f}t|dkst|d \}}}|d ks|dkstt|}d}~Y nX ~|rt|| d|d}t|S td |d	S d S )
NzLSELECT alphabet, length, length(seq) FROM biosequence WHERE bioentry_id = %sr   r   TzDSELECT alphabet, length, seq FROM biosequence WHERE bioentry_id = %s F)r   r   )r   )r,   r   r-   r.   	TypeErrorr   r   )	r	   r   r/   Zmoltyper0   r   Zhave_seqseqdatar   r   r   _retrieve_seqf   s8    r6   c             C   sZ   g }|  d|f}xB|D ]:\}}}|r<|dkr<d||f }n|}|d||f  qW |S )zBRetrieve the database cross references for the sequence (PRIVATE).z{SELECT dbname, accession, version FROM bioentry_dbxref join dbxref using (dbxref_id) WHERE bioentry_id = %s ORDER BY "rank"0z%s.%sz%s:%s)r,   append)r	   r   _dbxrefsdbxrefsdbname	accessionversionvr   r   r   _retrieve_dbxrefs   s    r?   c             C   s  d}|  ||f}g }x|D ]\}}}|  d|f}i }	x"|D ]\}
}|	|
g | qBW |  d|f}x.|D ]&\}
}d|
|f }|	dg | qtW |  d|f}g }x|D ]\}}}}|r|d8 }|dkrd }|d	krtd
||f |d k	r:|d k	r:||k r:dd l}ddlm} |d|||f | |d krLt	 }|d kr^t	 }|||||f qW |  d|f}i }xR|D ]J\}}}}|r|dkrd||f }n|}|dkrd }||f||< qW tj|d}||_
|	|_t|dkrnt|dkr^|d \}}}}t| ||_||d\}}t|||_||_||_||_ng }xD|D ]<}|\}}}}||d\}}|tj|||||d qhW dd |D }t|dkrd|kr|d d d }t|d|_|| qW |S )NzSELECT seqfeature_id, type.name, "rank" FROM seqfeature join term type on (type_term_id = type.term_id) WHERE bioentry_id = %s ORDER BY "rank"zvSELECT name, value FROM seqfeature_qualifier_value  join term using (term_id) WHERE seqfeature_id = %s ORDER BY "rank"zSELECT dbxref.dbname, dbxref.accession FROM dbxref join seqfeature_dbxref using (dbxref_id) WHERE seqfeature_dbxref.seqfeature_id = %s ORDER BY "rank"z%s:%sZdb_xrefzeSELECT location_id, start_pos, end_pos, strand FROM location WHERE seqfeature_id = %s ORDER BY "rank"r   r   )r   Nz8Invalid strand %s found in database for seqfeature_id %s)BiopythonWarningz<Inverted location start/end (%i and %i) for seqfeature_id %szsSELECT location_id, dbname, accession, version FROM location join dbxref using (dbxref_id) WHERE seqfeature_id = %sr7   z%s.%sr2   )type)NN)strandrefref_dbc             S   s   h | ]
}|j qS r   )rC   ).0lr   r   r   	<setcomp>  s    z%_retrieve_features.<locals>.<setcomp>r@   join)r,   
setdefaultr8   
ValueErrorwarningsBiorA   warnr   ZUnknownPositionZ_seqfeature_id
qualifiersr   "_retrieve_location_qualifier_valueZlocation_operatorgetFeatureLocationlocationrC   rE   rD   ZCompoundLocation)r	   r   ZsqlresultsZseq_feature_listZseqfeature_idZseqfeature_typeZseqfeature_rankqvsrO   Zqv_nameZqv_valuevalueZ	locationslocation_idr   r    rC   rL   rA   Zremote_resultslookupr;   r<   r=   r>   ZfeatureZlocsrS   Zstrandsr   r   r   _retrieve_features   s    





rY   c             C   s0   |  d|f}y|d S  tk
r*   dS X d S )NzASELECT value FROM location_qualifier_value WHERE location_id = %sr   r2   )execute_and_fetch_col0r   )r	   rW   rV   r   r   r   rP   -  s    rP   c             C   sZ   i }| t| | | t| | | t| | | t| || | t| | |S )N)update_retrieve_alphabet_retrieve_qualifier_value_retrieve_reference_retrieve_taxon_retrieve_comment)r	   r   taxon_idannotationsr   r   r   _retrieve_annotations8  s    rc   c             C   s   |  d|f}t|dkst|d }t|dks6t|d }|dkrLd}n |dkrZd}n|dkrhd}nd }|d k	r|d	|iS i S d S )
Nz7SELECT alphabet FROM biosequence WHERE bioentry_id = %sr   r   ZdnaZDNAZrnaZRNAZproteinmolecule_type)r,   r   r-   )r	   r   rT   Z	alphabetsZalphabetrd   r   r   r   r\   B  s     
r\   c             C   sb   |  d|f}i }xJ|D ]B\}}|dkr.d}n|dkr<d}n|dkrHd}||g | qW |S )NzqSELECT name, value FROM bioentry_qualifier_value JOIN term USING (term_id) WHERE bioentry_id = %s ORDER BY "rank"keywordkeywordsZdate_changedZdateZsecondary_accessionZ
accessions)r,   rJ   r8   )r	   r   rU   rO   namerV   r   r   r   r]   X  s    r]   c             C   s   |  d|f}g }x|D ]\}}}}}}	}
t }|d k	sB|d k	rb|d k	rR|d8 }t||g|_|rl||_|rv||_||_|	dkr|
|_n|	dkr|
|_	|
| qW |rd|iS i S d S )NzSELECT start_pos, end_pos,  location, title, authors, dbname, accession FROM bioentry_reference JOIN reference USING (reference_id) LEFT JOIN dbxref USING (dbxref_id) WHERE bioentry_id = %s ORDER BY "rank"r   ZPUBMEDZMEDLINE
references)r,   r   Z	ReferencerR   rS   authorstitleZjournalZ	pubmed_idZ
medline_idr8   )r	   r   Zrefsrh   r   r    rS   rj   ri   r;   r<   Z	referencer   r   r   r^   m  s.    r^   c             C   s   i }|  d|f}|r"|d |d< |  d|f}|r@|d |d< |  d|f}|rr|d rr|d dkrr|d |d< g }x6|r| d	|f\}}	}
||
krP |d| |
}qxW |r||d
< |S )NzVSELECT name FROM taxon_name WHERE taxon_id = %s AND name_class = 'genbank common name'r   sourcezRSELECT name FROM taxon_name WHERE taxon_id = %s AND name_class = 'scientific name'Zorganismz3SELECT ncbi_taxon_id FROM taxon WHERE taxon_id = %sr7   Z
ncbi_taxidzSELECT taxon_name.name, taxon.node_rank, taxon.parent_taxon_id FROM taxon, taxon_name WHERE taxon.taxon_id=taxon_name.taxon_id AND taxon_name.name_class='scientific name' AND taxon.taxon_id = %staxonomy)rZ   execute_oneinsert)r	   r   ra   aZcommon_namesZscientific_namesZncbi_taxidsrl   rg   ZrankZparent_taxon_idr   r   r   r_     s6    
r_   c             C   s0   |  d|f}dd |D }|r(d|iS i S d S )NzESELECT comment_text FROM comment WHERE bioentry_id=%s ORDER BY "rank"c             S   s   g | ]}|d  qS )r   r   )rF   Zcommr   r   r   
<listcomp>  s    z%_retrieve_comment.<locals>.<listcomp>comment)r,   )r	   r   rU   Zcommentsr   r   r   r`     s    r`   c               @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zeeeed
Z	dd Z
dd Zdd Zee
eedZdd Zdd Zdd ZeeeedZdd Zdd Zdd ZeeeedZd S )!DBSeqRecordz4BioSQL equivalent of the Biopython SeqRecord object.c          	   C   sv   || _ || _| j d| jf\| _| _| _}}| _| _| _|rV|dkrVd||f | _	n|| _	t
||}t|d| _dS )a8  Create a DBSeqRecord object.

        Arguments:
         - adaptor - A BioSQL.BioSeqDatabase.Adaptor object
         - primary_id - An internal integer ID used by BioSQL

        You wouldn't normally create a DBSeqRecord object yourself,
        this is done for you when using a BioSeqDatabase object
        zSELECT biodatabase_id, taxon_id, name, accession, version, identifier, division, description FROM bioentry WHERE bioentry_id = %sr7   z%s.%s)r   N)_adaptor_primary_idrm   Z_biodatabase_id	_taxon_idrg   _identifier	_divisionZdescriptionidr1   r   Z_per_letter_annotations)r   r	   r   r<   r=   r   r   r   r   r     s    
&
zDBSeqRecord.__init__c             C   s    t | dst| j| j| _| jS )N_seq)hasattrr6   rs   rt   ry   )r   r   r   r   Z	__get_seq  s    
zDBSeqRecord.__get_seqc             C   s
   || _ d S )N)ry   )r   r4   r   r   r   Z	__set_seq  s    zDBSeqRecord.__set_seqc             C   s   | ` d S )N)ry   )r   r   r   r   Z	__del_seq  s    zDBSeqRecord.__del_seqz
Seq objectc             C   s    t | dst| j| j| _| jS )Nr9   )rz   r?   rs   rt   r9   )r   r   r   r   Z__get_dbxrefs  s    
zDBSeqRecord.__get_dbxrefsc             C   s
   || _ d S )N)r9   )r   r:   r   r   r   Z__set_dbxrefs  s    zDBSeqRecord.__set_dbxrefsc             C   s   | ` d S )N)r9   )r   r   r   r   Z__del_dbxrefs  s    zDBSeqRecord.__del_dbxrefszDatabase cross referencesc             C   s    t | dst| j| j| _| jS )N	_features)rz   rY   rs   rt   r{   )r   r   r   r   Z__get_features   s    
zDBSeqRecord.__get_featuresc             C   s
   || _ d S )N)r{   )r   featuresr   r   r   Z__set_features%  s    zDBSeqRecord.__set_featuresc             C   s   | ` d S )N)r{   )r   r   r   r   Z__del_features(  s    zDBSeqRecord.__del_featuresZFeaturesc             C   sH   t | dsBt| j| j| j| _| jr0| j| jd< | jrB| j| jd< | jS )N_annotationsZgiZdata_file_division)rz   rc   rs   rt   ru   r}   rv   rw   )r   r   r   r   Z__get_annotations-  s    
zDBSeqRecord.__get_annotationsc             C   s
   || _ d S )N)r}   )r   rb   r   r   r   Z__set_annotations8  s    zDBSeqRecord.__set_annotationsc             C   s   | ` d S )N)r}   )r   r   r   r   Z__del_annotations;  s    zDBSeqRecord.__del_annotationsZAnnotationsN)r&   r'   r(   r)   r   Z_DBSeqRecord__get_seqZ_DBSeqRecord__set_seqZ_DBSeqRecord__del_seqpropertyr4   Z_DBSeqRecord__get_dbxrefsZ_DBSeqRecord__set_dbxrefsZ_DBSeqRecord__del_dbxrefsr:   Z_DBSeqRecord__get_featuresZ_DBSeqRecord__set_featuresZ_DBSeqRecord__del_featuresr|   Z_DBSeqRecord__get_annotationsZ_DBSeqRecord__set_annotationsZ_DBSeqRecord__del_annotationsrb   r   r   r   r   rr     s(   (rr   N)r)   ZBio.Seqr   r   ZBio.SeqRecordr   r   rM   r   r   r1   r6   r?   rY   rP   rc   r\   r]   r^   r_   r`   rr   r   r   r   r   <module>   s"   @* 
'7