B
    b.                 @   sJ   d Z ddlmZ G dd dejeZG dd deZddd	Zd
d ZdS )zParsing TRANSFAC files.    )motifsc               @   s.   e Zd ZdZdddddddhZd	d
ddhZdS )Motifa  Store the information for one TRANSFAC motif.

    This class inherits from the Bio.motifs.Motif base class, as well
    as from a Python dictionary. All motif information found by the parser
    is stored as attributes of the base class when possible; see the
    Bio.motifs.Motif base class for a description of these attributes. All
    other information associated with the motif is stored as (key, value)
    pairs in the dictionary, where the key is the two-letter fields as found
    in the TRANSFAC file. References are an exception: These are stored in
    the .references attribute.

    These fields are commonly found in TRANSFAC files::

        AC:    Accession number
        AS:    Accession numbers, secondary
        BA:    Statistical basis
        BF:    Binding factors
        BS:    Factor binding sites underlying the matrix
               [sequence; SITE accession number; start position for matrix
               sequence; length of sequence used; number of gaps inserted;
               strand orientation.]
        CC:    Comments
        CO:    Copyright notice
        DE:    Short factor description
        DR:    External databases
               [database name: database accession number]
        DT:    Date created/updated
        HC:    Subfamilies
        HP:    Superfamilies
        ID:    Identifier
        NA:    Name of the binding factor
        OC:    Taxonomic classification
        OS:    Species/Taxon
        OV:    Older version
        PV:    Preferred version
        TY:    Type
        XX:    Empty line; these are not stored in the Record.

    References are stored in an .references attribute, which is a list of
    dictionaries with the following keys::

        RN:    Reference number
        RA:    Reference authors
        RL:    Reference data
        RT:    Reference title
        RX:    PubMed ID

    For more information, see the TRANSFAC documentation.
    BFOVHPBSHCDTDRRXRARTRLN)__name__
__module____qualname____doc__multiple_value_keysreference_keys r   r   2lib/python3.7/site-packages/Bio/motifs/transfac.pyr      s   1r   c               @   s    e Zd ZdZdd Zdd ZdS )RecordzStore the information in a TRANSFAC matrix table.

    The record inherits from a list containing the individual motifs.

    Attributes:
     - version - The version number, corresponding to the 'VV' field
       in the TRANSFAC file;

    c             C   s
   d| _ dS )zInitialize the class.N)version)selfr   r   r   __init__S   s    zRecord.__init__c             C   s   t | S )z'Turn the TRANSFAC matrix into a string.)write)r   r   r   r   __str__W   s    zRecord.__str__N)r   r   r   r   r   r   r   r   r   r   r   H   s   	r   Tc          
   C   s  i }g }d}t  }x| D ]}| }|s.q|dd}|d  }|rft|dkrftd| dt|dkr|d  }	|r|dd std| d|d	kr|	|_n|d
kri }|	 dd ddddgkrtd| d| d}
xdD ]}g ||< qW x| D ]}| }|dd}|d  }t|dkrx|d  }	|rx|dd sxtd| dyt|}W n tk
r   P Y nX |
dkr|dkr|rtd| n|
d7 }
||
krtd| d|r>t|dkr td|dd|dd|ddt|dkr>td| d|	 dd }t|dkrltd| dx*td|D ]\}}|| 	t
| qxW qW |dkrq|dkrl|	d \}}}|d d!krtd"| d#| d|d$ d%krtd"| d&| dt|dd$ }t||d krXtd"|dd't|d dd|dd||	i}|	| q|d(kr|dk	rtd|d)}|| ||_|	| i }g }q|tjkr|	||< q|tjkr||krg ||< || 	|	 q|	||< qW |S )*z4Parse a transfac format handle into a Record object.N   r      zAThe key value of a TRANSFAC motif line should have 2 characters:""z  zKA TRANSFAC motif line should have 2 spaces between key and value columns: "ZVV)P0ZPO   ACGTzA TRANSFAC matrix "z(" line should be followed by "A C G T": ZACGTzaA TRANSFAC matrix should start with "01" as first row of the matrix, but this matrix uses "00": "zKThe TRANSFAC matrix row number does not match the position in the matrix: "zLA TRANSFAC matrix line should have a 2 digit key at the start of the line ("Z02dz"), but this matrix uses "dz": "sz".z7A TRANSFAC matrix line should have a key and a value: "zQA TRANSFAC matrix line should have a value for each nucleotide (A, C, G and T): "XXRN;[zThe index "z2" in a TRANSFAC RN line should start with a "[": "]z0" in a TRANSFAC RN line should end with a "]": "zP" of the TRANSFAC RN line does not match the current number of seen references "z//)alphabetcounts)r   stripsplitlen
ValueError	partitionr   intzipappendfloatr   update
referencesr   r   )ZhandlestrictZannotationsr:   r/   recordline	key_valuekeyvaluelengthcivaluesvindexZ	separatorZ	accession	referencemotifr   r   r   read\   s    



 

(





rI   c                s  g }y
| j }W n tk
r"   Y nX |dk	r>d| }|| tj}d}x&| D ]g }x|D ]}d}x|D ]}	|	dkr.j}
|
dkrqtj}tj}d	dg| }|| xht
|
D ]\ d	d	gd
d |D  d }|t d g fdd|D  |  g  }|| qW d}ny|	}W n tk
rV   d}Y nX |dk	r|	|krx<|D ]}d|	|f }|| qrW nd|	|f }|| d}|	dkrty
j}W n tk
r   Y qtX d}xP|D ]H}x@|D ]8}	||	}|dkrqd|	|f }|| d}qW qW qtW |rbd}|| qbW d}|| d	|d }|| qPW d	|}|S )z7Write the representation of a motif in TRANSFAC format.NzVV  %s
XX
//
))ZACZAS)ZID)r	   ZCO)ZNA)ZDE)ZTY)ZOSZOC)r   r   )r   )r    )ZBA)r   )ZCC)r
   )r   PVFr    r   z       z%02.dc             S   s   g | ]}d qS )z%6.20gr   ).0_r   r   r   
<listcomp>  s    zwrite.<locals>.<listcomp>z      %sr   c                s   g | ]}j |   qS r   )r/   )rL   l)rC   rH   r   r   rN     s    Tz%s  %srJ   )r)   r   r   r   r   r(   z//
 )r   AttributeErrorr7   r   r   rA   Zdegenerate_consensussortedr.   joinrangetuplegetr:   )r   Zblocksr   blockr   ZsectionslinesZsectionZblankr?   rA   ZsequenceZlettersr=   r@   rE   r:   keysrG   textr   )rC   rH   r   r      s    




,












r   N)T)	r   ZBior   r   dictlistr   rI   r   r   r   r   r   <module>   s   :
 