B
    ̵aKB                 @  sf  d Z ddlmZ ddlZddlZddlmZmZ ddlm	Z	 ddl
mZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZmZmZmZm Z  dddddZ!d2dddddddddZ"G dd dZ#G dd de#Z$G d d! d!e#Z%eej&d" d#d3d&d'dd(d)dd*d+d,d-d.Z'eej&d" d#d4dddd/d0d1Z(dS )5z parquet compat     )annotationsN)AnyAnyStr)catch_warnings)FilePathOrBufferStorageOptions)import_optional_dependency)AbstractMethodError)doc)	DataFrame
MultiIndex
get_option)generic)Version)	IOHandles
get_handleis_fsspec_urlis_urlstringify_pathstrBaseImpl)enginereturnc             C  s   | dkrt d} | dkrzttg}d}xF|D ]>}y| S  tk
rf } z|dt| 7 }W dd}~X Y q*X q*W td| | dkrt S | dkrt S td	dS )
zreturn our implementationautozio.parquet.engine z
 - NzUnable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:pyarrowfastparquetz.engine must be one of 'pyarrow', 'fastparquet')r   PyArrowImplFastParquetImplImportErrorr   
ValueError)r   Zengine_classesZ
error_msgsZengine_classerr r"   0lib/python3.7/site-packages/pandas/io/parquet.py
get_engine%   s"    
&	r$   rbFr   r   r   boolz.tuple[FilePathOrBuffer, IOHandles | None, Any])pathfsstorage_optionsmodeis_dirr   c             C  s   t | }t|r:|dkr:td}|jj|f|p0i \}}n|rVt|rN|dkrVtdd}|s|st|trt	j
|st||d|d}d}|j}|||fS )zFile handling for PyArrow.Nfsspecr%   z8storage_options passed with buffer, or non-supported URLF)is_textr)   )r   r   r   ZcoreZ	url_to_fsr   r    
isinstancer   osr'   isdirr   handle)r'   r(   r)   r*   r+   path_or_handler,   handlesr"   r"   r#   _get_path_or_handleG   s"    
r4   c               @  s6   e Zd ZeddddZddddZd
dd	ZdS )r   r   )dfc             C  sx   t | tstdt | jtr>tdd | jjD sRtdn| jjdkrRtdtdd | jj	D }|sttdd S )	Nz+to_parquet only supports IO with DataFramesc             s  s   | ]}|j d kV  qdS )>   emptystringN)inferred_type).0xr"   r"   r#   	<genexpr>w   s    z.BaseImpl.validate_dataframe.<locals>.<genexpr>z
                    parquet must have string column names for all values in
                     each level of the MultiIndex
                    >   r6   r7   z%parquet must have string column namesc             s  s    | ]}|d k	rt |tV  qd S )N)r.   r   )r9   namer"   r"   r#   r;      s    z!Index level names must be strings)
r.   r   r    columnsr   allZlevelsr8   indexnames)r5   Zvalid_namesr"   r"   r#   validate_dataframen   s    
zBaseImpl.validate_dataframec             K  s   t | d S )N)r	   )selfr5   r'   compressionkwargsr"   r"   r#   write   s    zBaseImpl.writeNc             K  s   t | d S )N)r	   )rB   r'   r=   rD   r"   r"   r#   read   s    zBaseImpl.read)N)__name__
__module____qualname__staticmethodrA   rE   rF   r"   r"   r"   r#   r   m   s   c               @  s>   e Zd Zdd Zdddddd	d
dddZdd	dddZdS )r   c             C  s&   t ddd dd l}dd l}|| _d S )Nr   z(pyarrow is required for parquet support.)extrar   )r   Zpyarrow.parquetZpandas.core.arrays._arrow_utilsapi)rB   r   pandasr"   r"   r#   __init__   s
    
zPyArrowImpl.__init__snappyNr   zFilePathOrBuffer[AnyStr]z
str | Nonezbool | Noner   zlist[str] | None)r5   r'   rC   r?   r)   partition_colsc             K  s   |  | d|dd i}|d k	r*||d< | jjj|f|}	t||dd |d|d k	d\}
}|d< zH|d k	r| jjj|	|
f||d| n| jjj|	|
fd|i| W d |d k	r|	  X d S )NZschemaZpreserve_index
filesystemwb)r)   r*   r+   )rC   rP   rC   )
rA   poprL   ZTableZfrom_pandasr4   parquetZwrite_to_datasetZwrite_tableclose)rB   r5   r'   rC   r?   r)   rP   rD   Zfrom_pandas_kwargstabler2   r3   r"   r"   r#   rE      s.    



zPyArrowImpl.writeF)r)   c             K  sD  d|d< i }|rdd l }| j | | j | | j | | j |	 | j
 | | j | | j | | j | | j | | j | i
}|j|d< td}	|	dkrd|d< t||dd |d	d
\}
}|d< z>| jjj|
fd|i|jf |}|	dkr(|jddd}|S |d k	r>|  X d S )NTZuse_pandas_metadatar   Ztypes_mapperzmode.data_managerZarrayZsplit_blocksrQ   r%   )r)   r*   r=   F)copy)rM   rL   Zint8Z	Int8DtypeZint16Z
Int16DtypeZint32Z
Int32DtypeZint64Z
Int64DtypeZuint8Z
UInt8DtypeZuint16ZUInt16DtypeZuint32ZUInt32DtypeZuint64ZUInt64DtypeZbool_ZBooleanDtyper7   ZStringDtypegetr   r4   rS   rT   Z
read_table	to_pandasZ_as_managerrU   )rB   r'   r=   use_nullable_dtypesr)   rD   Zto_pandas_kwargsZpdmappingZmanagerr2   r3   resultr"   r"   r#   rF      s@    



zPyArrowImpl.read)rO   NNN)NFN)rG   rH   rI   rN   rE   rF   r"   r"   r"   r#   r      s      (  r   c               @  s6   e Zd Zdd Zdddddd	Zddd
ddZdS )r   c             C  s   t ddd}|| _d S )Nr   z,fastparquet is required for parquet support.)rK   )r   rL   )rB   r   r"   r"   r#   rN      s    
zFastParquetImpl.__init__rO   Nr   r   )r5   r)   c          	     s   |  | d|kr$|d k	r$tdnd|kr6|d}|d k	rFd|d< t|}t|rrtd  fdd|d< nr~td	td
d$ | jj||f|||d| W d Q R X d S )Npartition_onzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning dataZhiveZfile_schemer,   c               s    j | dfpi   S )NrR   )open)r'   _)r,   r)   r"   r#   <lambda>#  s   z'FastParquetImpl.write.<locals>.<lambda>	open_withz?storage_options passed with file object or non-fsspec file pathT)record)rC   Zwrite_indexr]   )	rA   r    rS   r   r   r   r   rL   rE   )rB   r5   r'   rC   r?   rP   r)   rD   r"   )r,   r)   r#   rE     s.    


zFastParquetImpl.write)r)   c       
        s   i }| dd}t| jjtdkr,d|d< |r8tdt|}d }t|rtd t| jjtdkr j|dfpxi j	|d	< qć fd
d|d< n,t
|trtj|st|ddd}|j}| jj|f|}|jf d|i|}	|d k	r|  |	S )NrZ   Fz0.7.1Zpandas_nullszNThe 'use_nullable_dtypes' argument is not supported for the fastparquet enginer,   z0.6.1r%   r(   c               s    j | dfpi   S )Nr%   )r^   )r'   r_   )r,   r)   r"   r#   r`   L  s   z&FastParquetImpl.read.<locals>.<lambda>ra   )r-   r)   r=   )rS   r   rL   __version__r    r   r   r   r^   r(   r.   r   r/   r'   r0   r   r1   ZParquetFilerY   rU   )
rB   r'   r=   r)   rD   Zparquet_kwargsrZ   r3   Zparquet_filer\   r"   )r,   r)   r#   rF   5  s0    zFastParquetImpl.read)rO   NNN)NN)rG   rH   rI   rN   rE   rF   r"   r"   r"   r#   r      s      ,r   r)   )r)   r   rO   r   zFilePathOrBuffer | Nonez
str | Nonezbool | Nonezlist[str] | Nonezbytes | None)r5   r'   r   rC   r?   r)   rP   r   c       
      K  sr   t |tr|g}t|}|dkr(t n|}	|j| |	f||||d| |dkrjt |	tjsbt|	 S dS dS )a  
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : str or file-like object, default None
        If a string, it will be used as Root Directory path
        when writing a partitioned dataset. By file-like object,
        we refer to objects with a write() method, such as a file handle
        (e.g. via builtin open function) or io.BytesIO. The engine
        fastparquet does not accept file-like objects. If path is None,
        a bytes object is returned.

        .. versionchanged:: 1.2.0

    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    compression : {{'snappy', 'gzip', 'brotli', None}}, default 'snappy'
        Name of the compression to use. Use ``None`` for no compression.
    index : bool, default None
        If ``True``, include the dataframe's index(es) in the file output. If
        ``False``, they will not be written to the file.
        If ``None``, similar to ``True`` the dataframe's index(es)
        will be saved. However, instead of being saved as values,
        the RangeIndex will be stored as a range in the metadata so it
        doesn't require much space and is faster. Other indexes will
        be included as columns in the file output.
    partition_cols : str or list, optional, default None
        Column names by which to partition the dataset.
        Columns are partitioned in the order they are given.
        Must be None if path is not a string.
    {storage_options}

        .. versionadded:: 1.2.0

    kwargs
        Additional keyword arguments passed to the engine

    Returns
    -------
    bytes if no path argument is provided else None
    N)rC   r?   rP   r)   )r.   r   r$   ioBytesIOrE   AssertionErrorgetvalue)
r5   r'   r   rC   r?   r)   rP   rD   implZpath_or_bufr"   r"   r#   
to_parqueta  s     9
ri   )r   r)   rZ   c             K  s"   t |}|j| f|||d|S )a   
    Load a parquet object from the file path, returning a DataFrame.

    Parameters
    ----------
    path : str, path object or file-like object
        Any valid string path is acceptable. The string could be a URL. Valid
        URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
        expected. A local file could be:
        ``file://localhost/path/to/table.parquet``.
        A file URL can also be a path to a directory that contains multiple
        partitioned parquet files. Both pyarrow and fastparquet support
        paths to directories as well as file URLs. A directory path could be:
        ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``

        If you want to pass in a path object, pandas accepts any
        ``os.PathLike``.

        By file-like object, we refer to objects with a ``read()`` method,
        such as a file handle (e.g. via builtin ``open`` function)
        or ``StringIO``.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    columns : list, default=None
        If not None, only these columns will be read from the file.

    {storage_options}

        .. versionadded:: 1.3.0

    use_nullable_dtypes : bool, default False
        If True, use dtypes that use ``pd.NA`` as missing value indicator
        for the resulting DataFrame. (only applicable for the ``pyarrow``
        engine)
        As new dtypes are added that support ``pd.NA`` in the future, the
        output with this option will change to use those dtypes.
        Note: this is an experimental option, and behaviour (e.g. additional
        support dtypes) may change without notice.

        .. versionadded:: 1.2.0

    **kwargs
        Any additional kwargs are passed to the engine.

    Returns
    -------
    DataFrame
    )r=   r)   rZ   )r$   rF   )r'   r   r=   r)   rZ   rD   rh   r"   r"   r#   read_parquet  s    <rj   )Nr%   F)Nr   rO   NNN)r   NNF))__doc__Z
__future__r   rd   r/   typingr   r   warningsr   Zpandas._typingr   r   Zpandas.compat._optionalr   Zpandas.errorsr	   Zpandas.util._decoratorsr
   rM   r   r   r   Zpandas.corer   Zpandas.util.versionr   Zpandas.io.commonr   r   r   r   r   r$   r4   r   r   r   Z_shared_docsri   rj   r"   r"   r"   r#   <module>   s@   	%  !$ig      H   