B
    b-                 @   s.   d Z dd Zdd ZG dd dZdd Zd	S )
a~  Parser for the prosite dat file from Prosite at ExPASy.

See https://www.expasy.org/prosite/

Tested with:
 - Release 20.43, 10-Feb-2009
 - Release 2017_03 of 15-Mar-2017.

Functions:
 - read                  Reads a Prosite file containing one Prosite record
 - parse                 Iterates over records in a Prosite file.

Classes:
 - Record                Holds Prosite data.

c             c   s   xt | }|sP |V  qW dS )zParse Prosite records.

    This function is for parsing Prosite files containing multiple
    records.

    Arguments:
     - handle   - handle to the file.

    N)__read)handlerecord r   1lib/python3.7/site-packages/Bio/ExPASy/Prosite.pyparse   s
    
r   c             C   s    t | }|  }|rtd|S )zRead one Prosite record.

    This function is for parsing Prosite files containing
    exactly one record.

    Arguments:
     - handle   - handle to the file.

    z"More than one Prosite record found)r   read
ValueError)r   r   Z	remainderr   r   r   r   +   s
    
r   c               @   s   e Zd ZdZdd ZdS )Recordag  Holds information from a Prosite record.

    Main attributes:
     - name           ID of the record.  e.g. ADH_ZINC
     - type           Type of entry.  e.g. PATTERN, MATRIX, or RULE
     - accession      e.g. PS00387
     - created        Date the entry was created.  (MMM-YYYY for releases
       before January 2017, DD-MMM-YYYY since January 2017)
     - data_update    Date the 'primary' data was last updated.
     - info_update    Date data other than 'primary' data was last updated.
     - pdoc           ID of the PROSITE DOCumentation.
     - description    Free-format description.
     - pattern        The PROSITE pattern.  See docs.
     - matrix         List of strings that describes a matrix entry.
     - rules          List of rule definitions (from RU lines).  (strings)
     - prorules       List of prorules (from PR lines). (strings)

    NUMERICAL RESULTS:
     - nr_sp_release  SwissProt release.
     - nr_sp_seqs     Number of seqs in that release of Swiss-Prot. (int)
     - nr_total       Number of hits in Swiss-Prot.  tuple of (hits, seqs)
     - nr_positive    True positives.  tuple of (hits, seqs)
     - nr_unknown     Could be positives.  tuple of (hits, seqs)
     - nr_false_pos   False positives.  tuple of (hits, seqs)
     - nr_false_neg   False negatives.  (int)
     - nr_partial     False negatives, because they are fragments. (int)

    COMMENTS:
     - cc_taxo_range  Taxonomic range.  See docs for format
     - cc_max_repeat  Maximum number of repetitions in a protein
     - cc_site        Interesting site.  list of tuples (pattern pos, desc.)
     - cc_skip_flag   Can this entry be ignored?
     - cc_matrix_type
     - cc_scaling_db
     - cc_author
     - cc_ft_key
     - cc_ft_desc
     - cc_version     version number (introduced in release 19.0)

    The following are all lists if tuples (swiss-prot accession, swiss-prot name).

    DATA BANK REFERENCES:
     - dr_positive
     - dr_false_neg
     - dr_false_pos
     - dr_potential   Potential hits, but fingerprint region not yet available.
     - dr_unknown     Could possibly belong
     - pdb_structs    List of PDB entries.

    c             C   s   d| _ d| _d| _d| _d| _d| _d| _d| _d| _g | _	g | _
g | _g | _d| _d| _d| _d| _d| _d| _d| _d| _d| _d| _g | _d| _g | _g | _g | _g | _g | _g | _dS )zInitialize the class. )NNN)nametype	accessioncreateddata_updateinfo_updatepdocdescriptionpatternmatrixrulesprorulespostprocessingnr_sp_release
nr_sp_seqsnr_totalnr_positive
nr_unknownnr_false_posnr_false_neg
nr_partialcc_taxo_rangecc_max_repeatcc_sitecc_skip_flagdr_positivedr_false_negdr_false_posdr_potential
dr_unknownpdb_structs)selfr   r   r   __init__q   s>    zRecord.__init__N)__name__
__module____qualname____doc__r+   r   r   r   r   r	   =   s   2r	   c             C   s  dd l }d }x| D ]}|d d |dd    }}|dkrt }|d}t|dkrhtd| |d |_|d d|_q|d	kr|d
|_q|dkrP|dd}|d 	dr|d 
ddd |_ntd| |d 	dr|d 
ddd |_ntd| |d 	drB|d 
ddd |_ntd| q|dkrb||_q|dkr|| j|7  _q|dkr|j| q|dkr|j|d
 q|dkr|j| q|dkr|d
}x|D ]}|sqdd |dD \}	}
|	dkr8|
d\}}||_t||_n|	dkrNt|
|_n|	dkrdt|
|_n|	dkr|d|
}|std |
|f ttt| }|	d!kr||_n4|	d"kr||_ n"|	d#kr||_!n|	d$kr||_"ntd%|	|f qW q|d&krT|d
}x|D ].}|r|d d' d(kr@q|#ddkrTqd)d |dD \}	}
|	d*kr~|
|_$n|	d+kr|
|_%n|	d,kr|
d\}}|j&t||f n|	d-kr|
|_'n||	d.kr|
|_(nj|	d/kr|
|_)nX|	d0kr|
|_*nF|	d1kr|
|_+n4|	d2kr*|
|_,n"|	d3kr<|
|_-ntd%|	|f qW q|d4kr6|d
}x|D ]}|s|qnd5d |dD \}}}|d6kr|j.||f n||d7kr|j/||f n`|d8kr|j0||f nD|d9kr|j1||f n(|d:kr"|j2||f ntd;| qnW q|d<krl| }x~|D ]}|j3|d
 qNW q|d=kr|d
}|j4| q|d>kr|d
|_5q|d?kr|sqP qtd@| qW d S |stdA|S )BN          ZIDz; z)I don't understand identification line
%s   .ZAC;ZDT)z
 (CREATED)z CREATED zI don't understand date line
%s)z (DATA UPDATE)z DATA UPDATE)z (INFO UPDATE)z INFO UPDATEZDEZPAZMAZPPZRUZNRc             S   s   g | ]}|  qS r   )lstrip).0wordr   r   r   
<listcomp>   s    z__read.<locals>.<listcomp>=z/RELEASE,z
/FALSE_NEGz/PARTIAL)z/TOTALz	/POSITIVEz/UNKNOWNz
/FALSE_POSz(\d+)\((\d+)\)z!Broken data %s in comment line
%rz/TOTALz	/POSITIVEz/UNKNOWNz
/FALSE_POSz"Unknown qual %s in comment line
%rZCC   zAutomatic scalingc             S   s   g | ]}|  qS r   )r7   )r8   r9   r   r   r   r:      s    z/TAXO-RANGEz/MAX-REPEATz/SITEz
/SKIP-FLAGz/MATRIX_TYPEz/SCALING_DBz/AUTHORz/FT_KEYz/FT_DESCz/VERSIONZDRc             S   s   g | ]}|  qS r   )strip)r8   r9   r   r   r   r:     s    TFNP?zI don't understand type flag %sZ3DZPRZDOz//zUnknown keyword %s foundzUnexpected end of stream.)6rerstripr	   splitlenr   r   r   r   endswithrsplitr   r   r   r   r   r   appendr   extendr   r   intr   r   r   match	Exceptiontuplemapgroupsr   r   r   r   countr    r!   r"   r#   Zcc_matrix_typeZcc_scaling_dbZ	cc_authorZ	cc_ft_keyZ
cc_ft_descZ
cc_versionr$   r&   r%   r'   r(   r)   r   r   )r   rD   r   linekeywordvalueZcolsZdatescolZqualdatareleaseZseqsmhitsposZdescZrefsrefZaccr   r   idr   r   r   r   r      s   











































r   N)r/   r   r   r	   r   r   r   r   r   <module>   s   _