
    \
qi_                       d Z ddlmZ ddlZddlZddlZddlmZmZm	Z	 ddl
mZmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZ ddlmZmZm Z m!Z!m"Z" erddl#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d>dZ*	 	 	 d?d@d Z+ G d! d          Z, G d" d#e,          Z- G d$ d%e,          Z.	 	 	 	 	 	 	 dAdBd3Z/ ed4          d&ddej0        dddfdCd=            Z1dS )Dzparquet compat    )annotationsN)TYPE_CHECKINGAnyLiteral)catch_warningsfilterwarnings)lib)import_optional_dependency)AbstractMethodErrorPandas4Warning)
set_module)check_dtype_backend)	DataFrame
get_option)arrow_table_to_pandas)	IOHandles
get_handleis_fsspec_urlis_urlstringify_path)DtypeBackendFilePathParquetCompressionOptions
ReadBufferStorageOptionsWriteBufferenginestrreturnBaseImplc                d   | dk    rt          d          } | dk    r_t          t          g}d}|D ]:}	  |            c S # t          $ r}|dt	          |          z   z  }Y d}~3d}~ww xY wt          d|           | dk    rt                      S | dk    rt                      S t          d	          )
zreturn our implementationautozio.parquet.engine z
 - NzUnable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:pyarrowfastparquetz.engine must be one of 'pyarrow', 'fastparquet')r   PyArrowImplFastParquetImplImportErrorr   
ValueError)r   engine_classes
error_msgsengine_classerrs        q/var/www/html/bestrading.cuttalo.com/services/ml-inference/venv/lib/python3.11/site-packages/pandas/io/parquet.py
get_enginer/   4   s    /00%7
* 	1 	1L1#|~~%%% 1 1 1gC00





1   
 
 	
 }}	=	 	    
E
F
FFs   	=
A&A!!A&rbFpath1FilePath | ReadBuffer[bytes] | WriteBuffer[bytes]fsr   storage_optionsStorageOptions | Nonemodeis_dirboolVtuple[FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], IOHandles[bytes] | None, Any]c                `   t          |           }|t          dd          }t          dd          }|'t          ||j                  r|rt	          d          nA|t          ||j        j                  rn$t          dt          |          j	                   t          |          r||Tt          d          }t          d          }	 |j                            |           \  }}n# t          |j        f$ r Y nw xY w|'t          d          } |j        j        |fi |pi \  }}n&|r$t!          |          r|d	k    rt          d
          d}	|sR|sPt          |t"                    r;t$          j                            |          st+          ||d|          }	d}|	j        }||	|fS )zFile handling for PyArrow.Nz
pyarrow.fsignore)errorsfsspecz8storage_options not supported with a pyarrow FileSystem.z9filesystem must be a pyarrow or fsspec FileSystem, not a r$   r0   z8storage_options passed with buffer, or non-supported URLFis_textr4   )r   r
   
isinstance
FileSystemNotImplementedErrorspecAbstractFileSystemr)   type__name__r   from_uri	TypeErrorArrowInvalidcore	url_to_fsr   r   osr1   isdirr   handle)
r1   r3   r4   r6   r7   path_or_handlepa_fsr=   pahandless
             r.   _get_path_or_handlerS   V   s2    $D))N	~*<III+HXFFFB0@!A!A )N   Jr6;3Q$R$R-b*- -   ^$$ U"+I66B.|<<E%*%5%>%>t%D%D"NNr/   :/99F!6!6" "#2#8b" "B 
 U&"8"8 UDDLL STTTG(( ~s++( n--	( D%
 
 
  7B&&s   C. .DDc                  8    e Zd Zed	d            Zd	dZd
ddZdS )r    dfr   r   Nonec                N    t          | t                    st          d          d S )Nz+to_parquet only supports IO with DataFrames)r@   r   r)   )rU   s    r.   validate_dataframezBaseImpl.validate_dataframe   s0    "i(( 	LJKKK	L 	L    c                     t          |           Nr   )selfrU   r1   compressionkwargss        r.   writezBaseImpl.write       !$'''rY   Nc                     t          |           r[   r\   )r]   r1   columnsr_   s       r.   readzBaseImpl.read   ra   rY   )rU   r   r   rV   r[   )r   r   )rF   
__module____qualname__staticmethodrX   r`   rd    rY   r.   r    r       sc        L L L \L( ( ( (( ( ( ( ( ( (rY   c                  J    e Zd ZddZ	 	 	 	 	 dddZddej        dddfddZdS )r&   r   rV   c                F    t          dd           dd l}dd l}|| _        d S )Nr$   z(pyarrow is required for parquet support.extrar   )r
   pyarrow.parquet(pandas.core.arrays.arrow.extension_typesapi)r]   r$   pandass      r.   __init__zPyArrowImpl.__init__   sF    "G	
 	
 	
 	
 	 	8777rY   snappyNrU   r   r1   FilePath | WriteBuffer[bytes]r^   r   indexbool | Noner4   r5   partition_colslist[str] | Nonec                R   |                      |           d|                    dd           i}	|||	d<    | j        j        j        |fi |	}
|j        rBdt          j        |j                  i}|
j        j	        }i ||}|

                    |          }
t          |||d|d u          \  }}}t          |t          j                  rlt          |d          r\t          |j        t"          t$          f          r;t          |j        t$                    r|j                                        }n|j        }	 | | j        j        j        |
|f|||d| n | j        j        j        |
|f||d| ||                                 d S d S # ||                                 w w xY w)	Nschemapreserve_indexPANDAS_ATTRSwb)r4   r6   r7   name)r^   rv   
filesystem)r^   r~   )rX   popro   Tablefrom_pandasattrsjsondumpsry   metadatareplace_schema_metadatarS   r@   ioBufferedWriterhasattrr}   r   bytesdecodeparquetwrite_to_datasetwrite_tableclose)r]   rU   r1   r^   rt   r4   rv   r~   r_   from_pandas_kwargstabledf_metadataexisting_metadatamerged_metadatarO   rR   s                   r.   r`   zPyArrowImpl.write   s(    	###.6

8T8R8R-S38/0**2DD1CDD8 	C)4:bh+?+?@K % 5B!2BkBO11/BBE.A+!-/
 /
 /
+ ~r'899	5//	5 >.e==	5
 .-u55 5!/!4!;!;!=!=!/!4	 )1 1" !,#1)      - ," !,)	 
    " #"w" #s   7<F F&dtype_backendDtypeBackend | lib.NoDefaultto_pandas_kwargsdict[str, Any] | Nonec                   d|d<   t          |||d          \  }	}
}	  | j        j        j        |	f|||d|}t	                      5  t          ddt                     t          |||          }d d d            n# 1 swxY w Y   |j        j	        r9d	|j        j	        v r+|j        j	        d	         }t          j        |          |_        ||
|
                                 S S # |
|
                                 w w xY w)
NTuse_pandas_metadatar0   )r4   r6   )rc   r~   filtersr;   make_block is deprecated)r   r   s   PANDAS_ATTRS)rS   ro   r   
read_tabler   r   r   r   ry   r   r   loadsr   r   )r]   r1   rc   r   r   r4   r~   r   r_   rO   rR   pa_tableresultr   s                 r.   rd   zPyArrowImpl.read   s    )-$%.A+	/
 /
 /
+	 2tx'2%	 
  H  !! 
 
."  
 /"/%5  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 ' ;"ho&>>>"*/":?"KK#':k#:#:FL" #w" #s0   *C& )A=1C& =BC& BA	C& &C?r   rV   rr   NNNN)rU   r   r1   rs   r^   r   rt   ru   r4   r5   rv   rw   r   rV   )r   r   r4   r5   r   r   r   r   )rF   re   rf   rq   r`   r	   
no_defaultrd   rh   rY   r.   r&   r&      s        	 	 	 	 2:!15+/@  @  @  @  @ J 69n1526.  .  .  .  .  .  . rY   r&   c                  >    e Zd ZddZ	 	 	 	 	 dddZ	 	 	 	 	 dddZdS )r'   r   rV   c                6    t          dd          }|| _        d S )Nr%   z,fastparquet is required for parquet support.rk   )r
   ro   )r]   r%   s     r.   rq   zFastParquetImpl.__init__"  s+     1!O
 
 
 rY   rr   NrU   r   r^   *Literal['snappy', 'gzip', 'brotli'] | Noner4   r5   c                  	 |                      |           d|v r|t          d          d|v r|                    d          }|d|d<   |t          d          t	          |          }t          |          rt          d          		fd|d<   nrt          d	          t          d
          5   | j        j	        ||f|||d| d d d            d S # 1 swxY w Y   d S )Npartition_onzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datahivefile_scheme9filesystem is not implemented for the fastparquet engine.r=   c                J     j         | dfi pi                                  S )Nr|   )open)r1   _r=   r4   s     r.   <lambda>z'FastParquetImpl.write.<locals>.<lambda>M  s8    +&+d3 3.4"3 3dff rY   	open_withz?storage_options passed with file object or non-fsspec file pathT)record)r^   write_indexr   )
rX   r)   r   rB   r   r   r
   r   ro   r`   )
r]   rU   r1   r^   rt   rv   r4   r~   r_   r=   s
         `  @r.   r`   zFastParquetImpl.write*  s    	###V##(BK   V###ZZ77N%$*F=!!%K  
 d## 
	/99F# # # # #F;  	Q   4((( 	 	DHN (!+    	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   6CC #C r   dict | Nonec                X   i }|                     dt          j                  }	d|d<   |	t          j        urt          d          |t	          d          |t	          d          t          |          }d }
t          |          r)t          d          } |j        |dfi |pi j	        |d	<   nNt          |t                    r9t          j                            |          st          |dd|
          }
|
j        }	  | j        j        |fi |}t'                      5  t)          ddt*                      |j        d||d|cd d d            |
|
                                 S S # 1 swxY w Y   	 |
|
                                 d S d S # |
|
                                 w w xY w)Nr   Fpandas_nullszHThe 'dtype_backend' argument is not supported for the fastparquet enginer   z?to_pandas_kwargs is not implemented for the fastparquet engine.r=   r0   r3   r>   r;   r   )rc   r   rh   )r   r	   r   r)   rB   r   r   r
   r   r3   r@   r   rL   r1   rM   r   rN   ro   ParquetFiler   r   r   	to_pandasr   )r]   r1   rc   r   r4   r~   r   r_   parquet_kwargsr   rR   r=   parquet_files                r.   rd   zFastParquetImpl.read_  sY    *,

?CNCC).~&..%   !%K   '%Q   d## 	"/99F#.6;tT#U#Uo>SQS#U#U#XN4  c"" 	"27==+>+> 	" !dE?  G >D	 /48/GGGGL!!  ."  
 .|- #W 8>        " #         " #"w" #s0   ?!F  &E*F *E..F 1E.2F F)r   r   )rU   r   r^   r   r4   r5   r   rV   )NNNNN)r4   r5   r   r   r   r   )rF   re   rf   rq   r`   rd   rh   rY   r.   r'   r'   !  s            CK153 3 3 3 3p 15(,7  7  7  7  7  7  7 rY   r'   r"   rr   rU   r   $FilePath | WriteBuffer[bytes] | Noner^   r   rt   ru   rv   rw   r~   bytes | Nonec           	        t          |t                    r|g}t          |          }	|t          j                    n|}
 |	j        | |
f|||||d| |0t          |
t          j                  sJ |
                                S dS )a  
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : str, path object, file-like object, or None, default None
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``write()`` function. If None, the result
        is returned as bytes. If a string, it will be used as Root Directory
        path when writing a partitioned dataset. The engine fastparquet does
        not accept file-like objects.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.

        When using the ``'pyarrow'`` engine and no storage options are provided
        and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
        (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
        Use the filesystem keyword with an instantiated fsspec filesystem
        if you wish to use its implementation.
    compression : {{'snappy', 'gzip', 'brotli', 'lz4', 'zstd', None}},
        default 'snappy'. Name of the compression to use. Use ``None``
        for no compression.
    index : bool, default None
        If ``True``, include the dataframe's index(es) in the file output. If
        ``False``, they will not be written to the file.
        If ``None``, similar to ``True`` the dataframe's index(es)
        will be saved. However, instead of being saved as values,
        the RangeIndex will be stored as a range in the metadata so it
        doesn't require much space and is faster. Other indexes will
        be included as columns in the file output.
    partition_cols : str or list, optional, default None
        Column names by which to partition the dataset.
        Columns are partitioned in the order they are given.
        Must be None if path is not a string.
    storage_options : dict, optional
        Extra options that make sense for a particular storage connection, e.g.
        host, port, username, password, etc. For HTTP(S) URLs the key-value
        pairs are forwarded to ``urllib.request.Request`` as header options.
        For other URLs (e.g. starting with "s3://", and "gcs://") the
        key-value pairs are forwarded to ``fsspec.open``. Please see ``fsspec``
        and ``urllib`` for more details, and for more examples on storage
        options refer `here <https://pandas.pydata.org/docs/user_guide/io.html?
        highlight=storage_options#reading-writing-remote-files>`_.
    filesystem : fsspec or pyarrow filesystem, default None
        Filesystem object to use when reading the parquet file. Only implemented
        for ``engine="pyarrow"``.

        .. versionadded:: 2.1.0

    **kwargs
        Additional keyword arguments passed to the engine:

        * For ``engine="pyarrow"``: passed to :func:`pyarrow.parquet.write_table`
          or :func:`pyarrow.parquet.write_to_dataset` (when using partition_cols)
        * For ``engine="fastparquet"``: passed to :func:`fastparquet.write`

    Returns
    -------
    bytes if no path argument is provided else None
    N)r^   rt   rv   r4   r~   )r@   r   r/   r   BytesIOr`   getvalue)rU   r1   r   r^   rt   r4   rv   r~   r_   implpath_or_bufs              r.   
to_parquetr     s    V .#&& *()fDAESWKDJ
	  %'	 	 	 	 	 |+rz22222##%%%trY   rp   FilePath | ReadBuffer[bytes]rc   r   r   r   &list[tuple] | list[list[tuple]] | Noner   r   c           
     h    t          |          }	t          |            |	j        | f||||||d|S )aI  
    Load a parquet object from the file path, returning a DataFrame.

    The function automatically handles reading the data from a parquet file
    and creates a DataFrame with the appropriate structure.

    Parameters
    ----------
    path : str, path object or file-like object
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``read()`` function.
        The string could be a URL. Valid URL schemes include http, ftp, s3,
        gs, and file. For file URLs, a host is expected. A local file could be:
        ``file://localhost/path/to/table.parquet``.
        A file URL can also be a path to a directory that contains multiple
        partitioned parquet files. Both pyarrow and fastparquet support
        paths to directories as well as file URLs. A directory path could be:
        ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.

        When using the ``'pyarrow'`` engine and no storage options are provided
        and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
        (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
        Use the filesystem keyword with an instantiated fsspec filesystem
        if you wish to use its implementation.
    columns : list, default=None
        If not None, only these columns will be read from the file.
    storage_options : dict, optional
        Extra options that make sense for a particular storage connection, e.g.
        host, port, username, password, etc. For HTTP(S) URLs the key-value
        pairs are forwarded to ``urllib.request.Request`` as header options.
        For other URLs (e.g. starting with "s3://", and "gcs://") the
        key-value pairs are forwarded to ``fsspec.open``. Please see ``fsspec``
        and ``urllib`` for more details, and for more examples on storage
        options refer `here <https://pandas.pydata.org/docs/user_guide/io.html?
        highlight=storage_options#reading-writing-remote-files>`_.
    dtype_backend : {{'numpy_nullable', 'pyarrow'}}
        Back-end data type applied to the resultant :class:`DataFrame`
        (still experimental). If not specified, the default behavior
        is to not use nullable data types. If specified, the behavior
        is as follows:

        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
        * ``"pyarrow"``: returns pyarrow-backed nullable
          :class:`ArrowDtype` :class:`DataFrame`

        .. versionadded:: 2.0

    filesystem : fsspec or pyarrow filesystem, default None
        Filesystem object to use when reading the parquet file. Only implemented
        for ``engine="pyarrow"``.

        .. versionadded:: 2.1.0

    filters : List[Tuple] or List[List[Tuple]], default None
        To filter out data.
        Filter syntax: [[(column, op, val), ...],...]
        where op is [==, =, >, >=, <, <=, !=, in, not in]
        The innermost tuples are transposed into a set of filters applied
        through an `AND` operation.
        The outer list combines these sets of filters through an `OR`
        operation.
        A single list of tuples can also be used, meaning that no `OR`
        operation between set of filters is to be conducted.

        Using this argument will NOT result in row-wise filtering of the final
        partitions unless ``engine="pyarrow"`` is also specified.  For
        other engines, filtering is only performed at the partition level, that is,
        to prevent the loading of some row-groups and/or files.

        .. versionadded:: 2.1.0

    to_pandas_kwargs : dict | None, default None
        Keyword arguments to pass through to :func:`pyarrow.Table.to_pandas`
        when ``engine="pyarrow"``.

        .. versionadded:: 3.0.0

    **kwargs
        Additional keyword arguments passed to the engine:

        * For ``engine="pyarrow"``: passed to :func:`pyarrow.parquet.read_table`
        * For ``engine="fastparquet"``: passed to
          :meth:`fastparquet.ParquetFile.to_pandas`

    Returns
    -------
    DataFrame
        DataFrame based on parquet file.

    See Also
    --------
    DataFrame.to_parquet : Create a parquet object that serializes a DataFrame.

    Examples
    --------
    >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
    >>> original_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9
    >>> df_parquet_bytes = original_df.to_parquet()
    >>> from io import BytesIO
    >>> restored_df = pd.read_parquet(BytesIO(df_parquet_bytes))
    >>> restored_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9
    >>> restored_df.equals(original_df)
    True
    >>> restored_bar = pd.read_parquet(BytesIO(df_parquet_bytes), columns=["bar"])
    >>> restored_bar
        bar
    0    5
    1    6
    2    7
    3    8
    4    9
    >>> restored_bar.equals(original_df[["bar"]])
    True

    The function uses `kwargs` that are passed directly to the engine.
    In the following example, we use the `filters` argument of the pyarrow
    engine to filter the rows of the DataFrame.

    Since `pyarrow` is the default engine, we can omit the `engine` argument.
    Note that the `filters` argument is implemented by the `pyarrow` engine,
    which can benefit from multithreading and also potentially be more
    economical in terms of memory.

    >>> sel = [("foo", ">", 2)]
    >>> restored_part = pd.read_parquet(BytesIO(df_parquet_bytes), filters=sel)
    >>> restored_part
        foo  bar
    0    3    8
    1    4    9
    )rc   r   r4   r   r~   r   )r/   r   rd   )
r1   r   rc   r4   r   r~   r   r   r_   r   s
             r.   read_parquetr     s_    @ fD&&&49	'#)	 	 	 	 	rY   )r   r   r   r    )Nr0   F)r1   r2   r3   r   r4   r5   r6   r   r7   r8   r   r9   )Nr"   rr   NNNN)rU   r   r1   r   r   r   r^   r   rt   ru   r4   r5   rv   rw   r~   r   r   r   )r1   r   r   r   rc   rw   r4   r5   r   r   r~   r   r   r   r   r   r   r   )2__doc__
__future__r   r   r   rL   typingr   r   r   warningsr   r   pandas._libsr	   pandas.compat._optionalr
   pandas.errorsr   r   pandas.util._decoratorsr   pandas.util._validatorsr   rp   r   r   pandas.io._utilr   pandas.io.commonr   r   r   r   r   pandas._typingr   r   r   r   r   r   r/   rS   r    r&   r'   r   r   r   rh   rY   r.   <module>r      s>     " " " " " " 				  				         
       
       > > > > > >        / . . . . . 7 7 7 7 7 7       
 2 1 1 1 1 1                              G G G GJ .2<' <' <' <' <'~
( 
( 
( 
( 
( 
( 
( 
(|  |  |  |  | ( |  |  | ~u  u  u  u  u h u  u  u t 26-5-1'+` ` ` ` `F H  $-125.6:$(k k k k k k krY   