B
    ‰°bÞ  ã               @   s0   d Z dd„ Zdd„ ZG dd„ deƒZdd„ Zd	S )
a)  Parser for the cellosaurus.txt file from ExPASy.

See https://web.expasy.org/cellosaurus/

Tested with the release of Version 18 (July 2016).

Functions:
 - read       Reads a file containing one cell line entry
 - parse      Reads a file containing multiple cell line entries

Classes:
 - Record     Holds cell line data.

Examples
--------
You need to download the Cellosaurus database for this examples to
run, e.g. from ftp://ftp.expasy.org/databases/cellosaurus/cellosaurus.txt

    >> from Bio.ExPASy import cellosaurus
    >> with open('cellosaurus.txt') as handle:
    ...    records = cellosaurus.parse(handle)
    ...    for record in records:
    ...        if 'Homo sapiens' in record['OX'][0]:
    ...            print(record['ID'])
    ...
    #15310-LN
    #W7079
    (L)PC6
    00136
    ...

c             c   s   xt | ƒ}|sP |V  qW dS )z§Parse cell line records.

    This function is for parsing cell line files containing multiple
    records.

    Arguments:
     - handle   - handle to the file.

    N)Ú__read)ÚhandleÚrecord© r   ú5lib/python3.7/site-packages/Bio/ExPASy/cellosaurus.pyÚparse(   s
    
r   c             C   s    t | ƒ}|  ¡ }|rtdƒ‚|S )z«Read one cell line record.

    This function is for parsing cell line files containing
    exactly one record.

    Arguments:
     - handle   - handle to the file.

    z$More than one cell line record found)r   ÚreadÚ
ValueError)r   r   Z	remainderr   r   r   r   9   s
    
r   c               @   s(   e Zd ZdZdd„ Zdd„ Zdd„ ZdS )	ÚRecorda{  Holds information from an ExPASy Cellosaurus record as a Python dictionary.

    Each record contains the following keys:

     ---------  ---------------------------     ----------------------
     Line code  Content                         Occurrence in an entry
     ---------  ---------------------------     ----------------------
     ID         Identifier (cell line name)     Once; starts an entry
     AC         Accession (CVCL_xxxx)           Once
     AS         Secondary accession number(s)   Optional; once
     SY         Synonyms                        Optional; once
     DR         Cross-references                Optional; once or more
     RX         References identifiers          Optional: once or more
     WW         Web pages                       Optional; once or more
     CC         Comments                        Optional; once or more
     ST         STR profile data                Optional; once or more
     DI         Diseases                        Optional; once or more
     OX         Species of origin               Once or more
     HI         Hierarchy                       Optional; once or more
     OI         Originate from same individual  Optional; once or more
     SX         Sex (gender) of cell            Optional; once
     CA         Category                        Once
     //         Terminator                      Once; ends an entry

    c             C   s†   t  | ¡ d| d< d| d< d| d< d| d< g | d< g | d< g | d< g | d	< g | d
< g | d< g | d< g | d< g | d< d| d< d| d< dS )zInitialize the class.Ú ÚIDÚACÚASÚSYÚDRÚRXÚWWÚCCÚSTÚDIÚOXÚHIÚOIÚSXÚCAN)ÚdictÚ__init__)Úselfr   r   r   r   f   s     
zRecord.__init__c             C   sP   | d r@| d r*d| j j| d | d f S d| j j| d f S nd| j j S d S )Nr   r   z%s (%s, %s)z%s (%s)z%s ( ))Ú	__class__Ú__name__)r   r   r   r   Ú__repr__y   s
    zRecord.__repr__c             C   s  d| d  }|d| d  7 }|d| d  7 }|d| d  7 }|d	t | d
 ƒ 7 }|dt | d ƒ 7 }|dt | d ƒ 7 }|dt | d ƒ 7 }|dt | d ƒ 7 }|dt | d ƒ 7 }|dt | d ƒ 7 }|dt | d ƒ 7 }|dt | d ƒ 7 }|d| d  7 }|d| d  7 }|S )NzID: r   z AC: r   z AS: r   z SY: r   z DR: r   z RX: r   z WW: r   z CC: r   z ST: r   z DI: r   z OX: r   z HI: r   z OI: r   z SX: r   z CA: r   )Úrepr)r   Úoutputr   r   r   Ú__str__‚   s     zRecord.__str__N)r   Ú
__module__Ú__qualname__Ú__doc__r   r   r"   r   r   r   r   r	   K   s   	r	   c             C   sÎ   d }x¸| D ]°}|d d… |dd …   ¡  }}|dkrDtƒ }||d< q
|dkr^||  |7  < q
|dkrv||  |¡ q
|dkr¨| d¡\}}|d  | ¡ | ¡ f¡ q
|dkr
|r
|S q
q
W |rÊtd	ƒ‚d S )
Né   é   r   )r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   ú;z//zUnexpected end of stream)Úrstripr	   ÚappendÚsplitÚstripr   )r   r   ÚlineÚkeyÚvalueÚkÚvr   r   r   r   ˜   s&    

r   N)r%   r   r   r   r	   r   r   r   r   r   Ú<module>%   s   M