B
    ž3RcÀ	  ã               @   sr   d Z ddlZddlZG dd„ dƒZddejfdd„ZedkrnddlZejd	 Z	ee	ƒ\Z
Zed
e
ƒ edeƒ dS )z" Generic file manipulation stuff

é    Nc               @   s2   e Zd ZdZdd„ Zdd„ Zdd„ Zddd„ZdS )ÚReFilezÛconvenience class for dealing with files with comments

  blank (all whitespace) lines, and lines beginning with comment
    characters are skipped.

  anything following a comment character on a line is stripped off
  c             C   s@   d}x6|dkr:| j  ¡ }|dkr$dS | j |¡d  ¡ }qW |S )z> read the next line and return it.

    return '' on EOF

    Ú r   )ÚinFileÚreadlineÚregExpÚsplitÚstrip)ÚselfÚresultZinLine© r   ú-lib/python3.7/site-packages/rdkit/ML/files.pyr      s    

zReFile.readlinec             C   sF   g }| j  ¡ }x2|D ]*}| j |¡d  ¡ }|dkr| |¡ qW |S )zW return a list of all the lines left in the file

    return [] if there are none

    r   r   )r   Ú	readlinesr   r   r   Úappend)r	   ZresZinLinesÚliner
   r   r   r   r   #   s    

zReFile.readlinesc             C   s   | j  d¡ dS )z0 rewinds the file (seeks to the beginning)

    r   N)r   Úseek)r	   r   r   r   Úrewind2   s    zReFile.rewindÚrú#ú\nc             C   s8   |d k	r|dkr|d | }t  |¡| _t||ƒ| _d S )Nr   ú|)ÚreÚcompiler   Úopenr   )r	   ÚfileNameÚmodeÚcommentZtrailerr   r   r   Ú__init__8   s    zReFile.__init__N)r   r   r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   r   r   r   r      s
   r   r   c             C   s¸   t | ƒ}| ¡ }t|ƒ}|tjtjtjgkr2t}nt}t|d  ¡ ƒd }t 	||f|¡}	t 	||¡}
xHt
|ƒD ]<}||  ¡ }||| ƒ|
|< ||= t||ƒ|	|dd…f< qpW |	|
fS )a±   read in the data file and return a tuple of two Numeric arrays:
  (independent variables, dependant variables).

  **ARGUMENTS:**

  - fileName: the fileName

  - comment: the comment character for the file

  - depVarcol: the column number containing the dependant variable

  - dataType: the Numeric short-hand for the data type

  RETURNS:

   a tuple of two Numeric arrays:

    (independent variables, dependant variables).

  r   é   N)r   r   ÚlenÚnumpyÚfloatZfloat32Zfloat64Úintr   ZzerosÚrangeÚmap)r   r   Z	depVarColZdataTyper   Z	dataLinesZnPtsZ	_convfuncZnIndVarsZ	indVarMatZ
depVarVectÚiZ	splitLiner   r   r   ÚReadDataFile?   s    r)   Ú__main__r!   ziV:zdV:)r    r   r#   r   r$   r)   r   ÚsysÚargvZfileNZiVZdVÚprintr   r   r   r   Ú<module>   s   3*

