a
    GGb                     @   s  d Z ddlZddlmZ ddlZddlZddlmZmZ ddl	Z
ddlZddlmZ ddlmZmZmZ ddlm  mZ ddlZddlmZ ddlmZ ddlmZmZm Z m!Z!m"Z" zddl#Z#d	Z$W n e%y   d
Z$Y n0 zTe 4 edde&d edde'd ddl(Z(W d   n1 s"0    Y  d	Z)W n e%yJ   d
Z)Y n0 ej*dZ+ej,ej-dej*j.e) pzeddkdddej-dej*j.e$ dddgddd Z/ej,dd Z0ej,dd Z1ej,dd  Z2ej,d!d" Z3ej,d#d$ Z4ej,ej5ej6j7ej5ej6j8ej5ej6j9ej:d%d&ej:d'd&ej:d(d&ej:d)d&gdd*d+ Z;dJd-d.Z<d/d0 Z=d1d2 Z>d3d4 Z?d5d6 Z@d7d8 ZAd9d: ZBd;d< ZCd=d> ZDd?d@ ZEG dAdB dBZFG dCdD dDeFZGej*dEG dFdG dGeFZHG dHdI dIeFZIdS )Kz test parquet compat     N)BytesIO)catch_warningsfilterwarnings)
get_option)pa_version_under2p0pa_version_under5p0pa_version_under6p0)Version)FastParquetImplPyArrowImpl
get_engineread_parquet
to_parquetTFignorez	`np.bool`)categoryz.*Int64Index.*z4ignore:RangeIndex.* is deprecated:DeprecationWarningfastparquetmode.data_managerarrayz4fastparquet is not installed or ArrayManager is usedreason)Zmarkspyarrowpyarrow is not installed)paramsc                 C   s   | j S Nparamrequest r   ;lib/python3.9/site-packages/pandas/tests/io/test_parquet.pyengine@   s    r    c                   C   s   t std dS )Nr   r   )_HAVE_PYARROWpytestskipr   r   r   r   paU   s    
r$   c                   C   s*   t std ntddkr&td dS )Nzfastparquet is not installedr   r   z.ArrayManager is not supported with fastparquetr   )_HAVE_FASTPARQUETr"   r#   r   r   r   r   r   fp\   s
    
r&   c                   C   s   t g dddS )N         foo)AB)pd	DataFramer   r   r   r   	df_compate   s    r0   c               
   C   sB   t tdttddtjddddg dt jd	d
dd} | S )Nabcr(            @      @float64dtypeTFT20130101r*   periods)abdef)r.   r/   listrangenparange
date_range)dfr   r   r   df_cross_compatj   s    rG   c                   C   s   t tddtjdgg dg dg dttddtd	d
dtjdddddtjdgg dt jdd	dt 	dt j
t 	dgdS )Nr1   r<   cr<   NrH   )   foo   bars   bazr+   barbazr(   r2   r*      Zu1r3   r4   r5   r6          @g      @r8   r9   r:   Z20130103)stringZstring_with_nanZstring_with_nonebytesZunicodeintZuintfloatZfloat_with_nanbooldatetimeZdatetime_with_nat)r.   r/   rA   rC   nanrB   rD   astyperE   	TimestampZNaTr   r   r   r   df_full|   s$    

rZ   z2019-01-04T16:41:24+0200z%Y-%m-%dT%H:%M:%S%zz2019-01-04T16:41:24+0215z2019-01-04T16:41:24-0200z2019-01-04T16:41:24-0215c                 C   s   | j S r   r   r   r   r   r   timezone_aware_date_list   s    r[   r)   c
                    s   p
ddipi du r |r4|d< |d<  fdd}
du rt  |
|	 W d   q1 s|0    Y  n|
|	 dS )a  Verify parquet serializer and deserializer produce the same results.

    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.

    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    compressionNr    c              	      sr   t | D ]d}jfi  tdd  tfi }W d    n1 sN0    Y  tj| d qd S )NTrecord)check_names
check_likecheck_dtype)rB   r   r   r   tmassert_frame_equal)repeat_Zactualra   r`   r_   rF   expectedpathread_kwargswrite_kwargsr   r   compare   s    .z!check_round_trip.<locals>.compare)rb   ensure_clean)rF   r    rh   rj   ri   rg   r_   r`   ra   rd   rk   r   rf   r   check_round_trip   s    "
(rm   c                 C   s|   t rLddlm} |j| dd}t|jjt|ks6J |jjt|ksxJ n,ddlm	} |j	| dd}|j
jj|ksxJ dS )zCheck partitions of a parquet file are as expected.

    Parameters
    ----------
    path: str
        Path of the dataset.
    expected: iterable of str
        Expected partition names.
    r   NF)Zvalidate_schemaZhive)partitioning)r   pyarrow.parquetparquetZParquetDatasetlenZ
partitionsZpartition_namessetZpyarrow.datasetdatasetrn   schemanames)rh   rg   pqrs   Zdsr   r   r   check_partition_names   s    
rw   c                 C   sB   d}t jt|d t| dd W d    n1 s40    Y  d S )Nz.engine must be one of 'pyarrow', 'fastparquet'matchr+   rM   )r"   raises
ValueErrorrm   )r0   msgr   r   r   test_invalid_engine   s    r}   c                 C   s8   t dd t|  W d    n1 s*0    Y  d S )Nio.parquet.enginer   r.   option_contextrm   )r0   r$   r   r   r   test_options_py  s    r   c                 C   s8   t dd t|  W d    n1 s*0    Y  d S )Nr~   r   r   )r0   r&   r   r   r   test_options_fp  s    r   c                 C   s8   t dd t|  W d    n1 s*0    Y  d S )Nr~   autor   )r0   r&   r$   r   r   r   test_options_auto  s    r   c                 C   sV  t tdtsJ t tdts$J tddF t tdtsDJ t tdtsVJ t tdtshJ W d    n1 s|0    Y  tddF t tdtsJ t tdtsJ t tdtsJ W d    n1 s0    Y  tddL t tdts
J t tdtsJ t tdts2J W d    n1 sH0    Y  d S )Nr   r   r~   r   )
isinstancer   r   r
   r.   r   )r&   r$   r   r   r   test_options_get_engine  s    00r   c                  C   s  ddl m}  | d}| d}ts(dnttjt|k }tsBdnttjt|k }to\| }tof| }|s~|s~|rd| d}t	j
t|d td	 W d    q1 s0    Y  n:d
}t	j
t|d td	 W d    n1 s0    Y  |rBd| d}t	j
t|d td	 W d    n1 s60    Y  n<d}t	j
t|d td	 W d    n1 st0    Y  d S )Nr   )VERSIONSr   r   FzPandas requires version .z. or newer of .pyarrow.rx   r   z%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.)Zpandas.compat._optionalr   getr!   r	   r   __version__r%   r   r"   rz   ImportErrorr   )r   Z
pa_min_verZ
fp_min_verZhave_pa_bad_versionZhave_fp_bad_versionZhave_usable_paZhave_usable_fpry   r   r   r   "test_get_engine_auto_error_message*  s8    



(&*r   c                 C   s~   | }t  ^}|j||d d t||d}t || t||ddgd}t ||ddg  W d    n1 sp0    Y  d S )Nr    r\   r    r<   r>   r    columns)rb   rl   r   r   rc   )rG   r$   r&   rF   rh   resultr   r   r   test_cross_engine_pa_fpU  s    
r   c              	   C   s   |}t  }|j||d d tddN t||d}t || t||ddgd}t ||ddg  W d    n1 s|0    Y  W d    n1 s0    Y  d S )Nr   Tr]   r   r<   r>   r   )rb   rl   r   r   r   rc   )r   rG   r$   r&   rF   rh   r   r   r   r   test_cross_engine_fp_pac  s    
r   c                   @   s:   e Zd Zdd Zdd Zejjejddddd	 Z	d
S )Basec              	   C   sj   t  N}tj||d  t|||d d W d    n1 s>0    Y  W d    n1 s\0    Y  d S )Nrx   r\   )rb   rl   r"   rz   r   )selfrF   r    excerr_msgrh   r   r   r   check_error_on_writer  s    
zBase.check_error_on_writec              	   C   sf   t  J}t |  t|||d d W d    n1 s:0    Y  W d    n1 sX0    Y  d S )Nr   )rb   rl   Zexternal_error_raisedr   )r   rF   r    r   rh   r   r   r   check_external_error_on_writex  s    
z"Base.check_external_error_on_writedhttps://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/parquet/simple.parquetT)urlZcheck_before_testc                 C   s.   |dkrt | d}t|}t|| d S )Nr   r   )r"   importorskipr   rb   rc   )r   r0   r    r   rF   r   r   r   test_parquet_read_from_url~  s    	
zBase.test_parquet_read_from_urlN)
__name__
__module____qualname__r   r   r"   markZnetworkrb   r   r   r   r   r   r   q  s   r   c                   @   s   e Zd Zdd Zdd Zdd Zejdg dd	d
 Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zejd!g d"d#d$ Zd%S )&	TestBasicc                 C   sF   t g dddt dtg dfD ]}d}| ||t| q(d S )Nr'   r(   r+   r9   z+to_parquet only supports IO with DataFrames)r.   SeriesrY   rC   r   r   r{   )r   r    objr|   r   r   r   
test_error  s    zTestBasic.test_errorc                 C   s6   t tdttddd}ddg|_t|| d S )Nr1   r(   r2   rQ   rS   r+   rM   )r.   r/   rA   rB   r   rm   )r   r    rF   r   r   r   test_columns_dtypes  s    
zTestBasic.test_columns_dtypesc                 C   s   t tdttddd}d}ddg|_| ||t| ddg|_| ||t| td	ddddtd	ddddg|_| ||t| d S )
Nr1   r(   r2   r   %parquet must have string column namesr   rJ   rK   i  )r.   r/   rA   rB   r   r   r{   rV   )r   r    rF   r|   r   r   r   test_columns_dtypes_invalid  s    

z%TestBasic.test_columns_dtypes_invalidr\   )NZgzipsnappybrotlic                 C   sN   |dkrt d n|dkr&t d tdg di}t||d|id d S )Nr   r   r,   r'   r\   rj   )r"   r   r.   r/   rm   )r   r    r\   rF   r   r   r   test_compression  s    
zTestBasic.test_compressionc                 C   sJ   t tdttddd}t dtdi}t|||ddgid d S )Nr1   r(   r2   r   rQ   r   rg   ri   )r.   r/   rA   rB   rm   )r   r    rF   rg   r   r   r   test_read_columns  s
    zTestBasic.test_read_columnsc                 C   s   |dk}t dg di}t|| g dt jdddtdg d	g}|D ]2}||_t|t jrn|jd |_t|||d
 qJg d|_d|j_	t|| d S )Nr   r,   r'   )r)   r*   r2   r9   r*   r:   r1   )r(   r*   r2   )r_   )r   r(   r)   r+   )
r.   r/   rm   rE   rA   indexr   ZDatetimeIndex
_with_freqname)r   r    r_   rF   Zindexesr   r   r   r   test_write_index  s     

zTestBasic.test_write_indexc                 C   s:   |}t dg di}t jg d}||_t|| d S )Nr,   r'   )r<   r(   )r<   r)   )r=   r(   )r.   r/   
MultiIndexfrom_tuplesr   rm   )r   r$   r    rF   r   r   r   r   test_write_multiindex  s
    zTestBasic.test_write_multiindexc                 C   s   |}t jdddd}t jtjdt| dtdd}t jj	d	d
g|gddgd}|j
d d}||fD ]4}||_t|| t||dddgi|ddg d qhd S )Nz01-Jan-2018z01-Dec-2018ZMS)freqr)   r*   ABCr   Level1Level2leveldate)ru   r   r,   r-   )ri   rg   )r.   rE   r/   rC   randomrandnrq   rA   r   Zfrom_productcopyr   rm   )r   r$   r    ZdatesrF   Zindex1Zindex2r   r   r   r   test_multiindex_with_columns  s    $
z&TestBasic.test_multiindex_with_columnsc                 C   s   t g dg dd}d dd}|jdd}t||||d t jg dg ddg d	d
}t||||d g dg dg}t jttddd tdD d|d
}|jdd}t||||d d S )Nr'   )qrs)r<   r=   F)r\   r   T)droprj   rg   )ZzyxZwvuZtsr)r   rM   rM   rN   rN   r+   r+   quxr   onetwor   r   r   r   r   r      c                 S   s   g | ]
}| qS r   r   ).0ir   r   r   
<listcomp>      z7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)r   r   )r.   r/   Zreset_indexrm   rA   rB   )r   r    rF   rj   rg   arraysr   r   r   test_write_ignoring_index  s     
 z#TestBasic.test_write_ignoring_indexc                 C   s@   t jg d}t jtjdd|d}d}| ||t| d S )Nr   r2   r*   r   Y\s*parquet must have string column names for all values in\s*each level of the MultiIndex)	r.   r   r   r/   rC   r   r   r   r{   )r   r    Z
mi_columnsrF   r|   r   r   r   test_write_column_multiindex  s
    z&TestBasic.test_write_column_multiindexc                 C   sP   |}g dg dg}t jtjdd|d}ddg|j_d}| ||t| d S )Nr   )r(   r)   r(   r)   r(   r)   r(   r)   r   r   r   r   r   )	r.   r/   rC   r   r   r   ru   r   r{   r   r$   r    r   rF   r|   r   r   r   &test_write_column_multiindex_nonstring'  s    z0TestBasic.test_write_column_multiindex_nonstringc                 C   sF   |}g dg dg}t jtjdd|d}ddg|j_t|| d S )Nr   r   r   r   Z	ColLevel1Z	ColLevel2)r.   r/   rC   r   r   r   ru   rm   r   r$   r    r   rF   r   r   r   #test_write_column_multiindex_string9  s    z-TestBasic.test_write_column_multiindex_stringc                 C   s:   |}g d}t jtjdd|d}d|j_t|| d S )N)rM   rN   r+   r   r   r2   r   Z	StringCol)r.   r/   rC   r   r   r   r   rm   r   r   r   r   test_write_column_index_stringH  s
    z(TestBasic.test_write_column_index_stringc                 C   sD   |}g d}t jtjdd|d}d|j_d}| ||t| d S )Nr(   r)   r*   r2   r   r2   r   ZNonStringColr   )	r.   r/   rC   r   r   r   r   r   r{   r   r   r   r   !test_write_column_index_nonstringT  s    z+TestBasic.test_write_column_index_nonstringc           
   
   C   st  dd l m} |dkr.tjjdd}|j| tt	g ddt	g ddt	g dt	g d	t	g d
dd}t
 6}||| t||d}t||dd}W d    n1 s0    Y  |d jtdksJ ttj	g dddtj	g dddtj	g dddtj	g d	ddtj	g d
ddd}	|dkrd|jddd}|	jddd}	t
||	 d S )Nr   r   z.Fastparquet nullable dtype support is disabledr   r(   r)   r*   NZint64Zuint8)r<   r=   rH   N)TFTNr   )r<   r=   rH   r>   r?   r   Tr    use_nullable_dtypesr<   r5   Int64r6   UInt8rQ   booleanrH   r(   )Zaxis)ro   rp   r"   r   ZxfailZnodeZ
add_markerr   tabler   rb   rl   Zwrite_tabler   r7   rC   r.   r/   r   rc   )
r   r    r   rv   r   r   rh   Zresult1Zresult2rg   r   r   r   test_use_nullable_dtypes`  s>    

,	
z"TestBasic.test_use_nullable_dtypesr7   )	r   r   r   objectzdatetime64[ns, UTC]rT   z	period[D]ZFloat64rQ   c                 C   s.   t dt jg |di}t||ddid d S )Nvaluer6   r   T)ri   )r.   r/   r   rm   )r   r$   r7   rF   r   r   r   test_read_empty_array  s
    zTestBasic.test_read_empty_arrayN)r   r   r   r   r   r   r"   r   parametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s*   
		 
,r   z8ignore:CategoricalBlock is deprecated:DeprecationWarningc                   @   sR  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Ze	j
jdd Ze	j
jdd Ze	j
jede	j
ddgg gdd Zeddd Zeddd Zdd Zdd Ze	j
deejgd d! Zd"d# Zd$d% Zedd&d' Zejdd(d)d*d+ Zedd,d- Zd.d/ Zd0d1 Z ejdd(d)d2d3 Z!d4d5 Z"d6S )7TestParquetPyArrowc                 C   s@   |}t jdddd}|d }||d< g d|d< t|| d S )Nr9   r*   Europe/Brusselsr;   Ztzdatetime_tz)TNTZbool_with_none)r.   rE   r   rm   )r   r$   rZ   rF   dtir   r   r   
test_basic  s    
zTestParquetPyArrow.test_basicc                 C   s<   |}t jdddd|d< t|||ddg dddgid	 d S )
Nr9   r*   r   r   r   rQ   rS   r   r   )r.   rE   rm   )r   r$   rZ   rF   r   r   r   test_basic_subset_columns  s    

z,TestParquetPyArrow.test_basic_subset_columnsc                 C   s:   |j |d}t|tsJ t|}t|}t|| d S )Nr   )r   r   rR   r   r   rb   rc   )r   r$   rZ   Z	buf_bytesZ
buf_streamresr   r   r   *test_to_bytes_without_path_or_buf_provided  s
    z=TestParquetPyArrow.test_to_bytes_without_path_or_buf_providedc                 C   s8   t jtdddtdd }| ||td d S )N   r2   r*   aaar   zDuplicate column names found	r.   r/   rC   rD   ZreshaperA   r   r   r{   r   r$   rF   r   r   r   test_duplicate_columns  s    $z)TestParquetPyArrow.test_duplicate_columnsc                 C   sL   t dt jdddi}| ||t t dg di}| ||tj d S )Nr<   1 dayr*   r:   r<   r(   rP   )r.   r/   timedelta_ranger   NotImplementedErrorr   ZArrowExceptionr   r   r   r   test_unsupported  s    z#TestParquetPyArrow.test_unsupportedc                 C   sd   t  }t td|d< t jg dt g dd|d< t jg dg dd	d
|d< t|| d S )NZabcdefr<   )rM   r+   r+   rM   NrM   rL   r6   r=   )r<   r=   rH   r<   rH   r=   )r=   rH   r>   T)Z
categoriesZorderedrH   )r.   r/   CategoricalrA   ZCategoricalDtyperm   r   r   r   r   test_categorical  s    

z#TestParquetPyArrow.test_categoricalc                 C   s8   t d}|jf i |}d|i}t||d||d d S )Ns3fsZ
filesystemzpandas-test/pyarrow.parquetrh   ri   rj   )r"   r   ZS3FileSystemrm   )r   r0   s3_resourcer$   s3sor   Zs3kwr   r   r   test_s3_roundtrip_explicit_fs  s    
z0TestParquetPyArrow.test_s3_roundtrip_explicit_fsc                 C   s   d|i}t ||d||d d S )Nstorage_optionsz s3://pandas-test/pyarrow.parquetr   rm   )r   r0   r   r$   r  r   r   r   test_s3_roundtrip  s    z$TestParquetPyArrow.test_s3_roundtripr   partition_colr,   c              
   C   sR   |  }|r*trdnd}|| |||< t|||dd|i|d |dddd d S )	NZint32r   zs3://pandas-test/parquet_dirr  )partition_colsr\   r  Tr(   )rg   rh   ri   rj   r`   rd   )r   r   rX   rm   )r   r0   r   r$   r  r  Zexpected_dfZpartition_col_typer   r   r   test_s3_roundtrip_for_dir  s&    z,TestParquetPyArrow.test_s3_roundtrip_for_dirr   c                 C   s(   t  }|| t|}t|| d S r   )r   r   r   rb   rc   )r   r0   bufferZdf_from_bufr   r   r   test_read_file_like_obj_support2  s    
z2TestParquetPyArrow.test_read_file_like_obj_supportc                 C   s   | dd | dd tjtdd td W d    n1 sD0    Y  tjtdd |d W d    n1 s|0    Y  d S )NHOMEZTestingUserZUSERPROFILEz.*TestingUser.*rx   z~/file.parquet)Zsetenvr"   rz   OSErrorr   r   )r   r0   monkeypatchr   r   r   test_expand_user9  s    &z#TestParquetPyArrow.test_expand_userc                 C   sf   ddg}|}t  >}|j||d d t|| t|j|jksDJ W d    n1 sX0    Y  d S )NrU   rS   r  r\   rb   ensure_clean_dirr   rw   r   shape)r   r$   rZ   r  rF   rh   r   r   r   test_partition_cols_supportedB  s    

z0TestParquetPyArrow.test_partition_cols_supportedc                 C   sh   d}|g}|}t  >}|j||d d t|| t|j|jksFJ W d    n1 sZ0    Y  d S )NrU   r  r  )r   r$   rZ   r  partition_cols_listrF   rh   r   r   r   test_partition_cols_stringK  s    

z-TestParquetPyArrow.test_partition_cols_string	path_typec           	      C   sd   d}|g}|}t  :}||}|j||d t|j|jksBJ W d    n1 sV0    Y  d S )Nr-   )r  )rb   r  r   r   r  )	r   r$   r0   r  r  r  rF   Zpath_strrh   r   r   r   test_partition_cols_pathlibU  s    
z.TestParquetPyArrow.test_partition_cols_pathlibc                 C   s   t  }t|| d S r   )r.   r/   rm   r   r   r   r   test_empty_dataframeb  s    z'TestParquetPyArrow.test_empty_dataframec                 C   sV   dd l }tdddgi}||jd| dg}|t}t||d|i|d d S )Nr   xr(   )typert   r   )	r   r.   r/   rt   ZfieldZbool_rX   rU   rm   )r   r$   r   rF   rt   Zout_dfr   r   r   test_write_with_schemag  s
    
z)TestParquetPyArrow.test_write_with_schemac                 C   sp   t t jg dddt jg dddt jg dddd}t|| t dt jg d	ddi}t|| d S )
Nr'   r   r6   ZUInt32rI   rQ   )r<   r=   rH   r<   r   )r.   r/   r   rm   r   r   r   r    test_additional_extension_arrayso  s    
z3TestParquetPyArrow.test_additional_extension_arraysz1.0.0)Zmin_versionc              	   C   sh   t dt jg dddi}t d|, t|||d| dd W d    n1 sZ0    Y  d S )	Nr<   rI   zstring[pyarrow]r6   string_storagezstring[]rg   )r.   r/   r   r   rm   rX   )r   r$   r  rF   r   r   r    test_pyarrow_backed_string_array  s    z3TestParquetPyArrow.test_pyarrow_backed_string_arrayc                 C   s(   t dt jddddi}t|| d S )Nr>   z
2012-01-01r*   D)r;   r   )r.   r/   period_rangerm   r   r   r   r   test_additional_extension_types  s
    z2TestParquetPyArrow.test_additional_extension_typesc                 C   s>   t s
d}nd}tdtjddddi}t||d|id	 d S )
Nz2.6z2.0r<   z
2017-01-01Z1n
   r   r;   versionr   )r   r.   r/   rE   rm   )r   r$   ZverrF   r   r   r   test_timestamp_nanoseconds  s
    z-TestParquetPyArrow.test_timestamp_nanosecondsc                 C   s:   t st  d|g }tj|d|id}t||dd d S )N   index_as_colr   dataF)ra   )r   r"   r#   r.   r/   rm   )r   r$   r[   idxrF   r   r   r   test_timezone_aware_index  s
    
z,TestParquetPyArrow.test_timezone_aware_indexc                 C   sr   t dttddi}t .}||| t||dgdd}W d    n1 sT0    Y  t|dksnJ d S )Nr<   r   r*   r<   z==r   F)filtersZuse_legacy_datasetr(   )	r.   r/   rA   rB   rb   rl   r   r   rq   )r   r$   rF   rh   r   r   r   r   test_filter_row_groups  s    

$z)TestParquetPyArrow.test_filter_row_groupsc                 C   s   t jtjddg dd}t &}||| t||}W d    n1 sP0    Y  |rvt	|j
t jjjsJ nt	|j
t jjjsJ d S )Nr%  r*   )r,   r-   Cr   )r.   r/   rC   r   r   rb   rl   r   r   r   Z_mgrcoreZ	internalsZArrayManagerZBlockManager)r   r$   Zusing_array_managerrF   rh   r   r   r   r   test_read_parquet_manager  s    
(z,TestParquetPyArrow.test_read_parquet_managerN)#r   r   r   r   r   r   r   r   r   r"   r   
single_cpur  r  tdZ
skip_if_nor   r	  r  r  r  r  strpathlibPathr  r  r  r  r!  r$  r(  r.  r1  r4  r   r   r   r   r     sP   

"

	






r   c                   @   s   e Zd Zdd Zejjdddd Zdd Zd	d
 Z	dd Z
dd Zejjdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS ) TestParquetFastParquetc                 C   sF   |}t jdddd}|d }||d< t jddd|d< t|| d S )	Nr9   r*   z
US/Easternr   r   r   r:   Z	timedelta)r.   rE   r   r   rm   )r   r&   rZ   rF   r   r   r   r   r     s    
z!TestParquetFastParquet.test_basicznot supportedr   c                 C   s<   t jtdddtdd }d}| ||t| d S )Nr   r2   r*   r   r   z9Cannot create parquet dataset with duplicate column namesr   r   r&   rF   r|   r   r   r   r     s    $z-TestParquetFastParquet.test_duplicate_columnsc                 C   s@   t dg di}t jddtjdgidd}t|||dd d S )	Nr<   )TNFg      ?g        Zfloat16r6   F)rg   ra   )r.   r/   rC   rW   rm   r   r&   rF   rg   r   r   r   test_bool_with_none  s    z*TestParquetFastParquet.test_bool_with_nonec                 C   sT   t dt jddddi}| ||td  t dg di}d}| ||t| d S )Nr<   Z2013Mr*   r&  r   z"Can't infer object conversion type)r.   r/   r#  r   r{   r;  r   r   r   r     s
    z'TestParquetFastParquet.test_unsupportedc                 C   s&   t dt tdi}t|| d S )Nr<   r1   )r.   r/   r   rA   rm   )r   r&   rF   r   r   r   r     s    z'TestParquetFastParquet.test_categoricalc                 C   sz   dt tddi}t|}t 2}|j||d dd t||dgd}W d    n1 s\0    Y  t|dksvJ d S )Nr<   r   r*   r(   )r\   Zrow_group_offsetsr/  )r0  )	rA   rB   r.   r/   rb   rl   r   r   rq   )r   r&   r>   rF   rh   r   r   r   r   r1    s    

.z-TestParquetFastParquet.test_filter_row_groupsc                 C   s    t ||dd|id |dd d S )Nz$s3://pandas-test/fastparquet.parquetr  )r\   r  r   r  )r   r0   r   r&   r  r   r   r   r    s    z(TestParquetFastParquet.test_s3_roundtripc                 C   s   ddg}|}t  X}|j|d|d d tj|s8J dd l}||dj}t	|dks^J W d    n1 sr0    Y  d S )NrU   rS   r   r    r  r\   r   Fr)   
rb   r  r   osrh   existsr   ZParquetFileZcatsrq   r   r&   rZ   r  rF   rh   r   Zactual_partition_colsr   r   r   r    s    
z4TestParquetFastParquet.test_partition_cols_supportedc                 C   s|   d}|}t  X}|j|d|d d tj|s4J dd l}||dj}t	|dksZJ W d    n1 sn0    Y  d S )NrU   r   r?  r   Fr(   r@  rC  r   r   r   r    s    
z1TestParquetFastParquet.test_partition_cols_stringc                 C   s   ddg}|}t  X}|j|dd |d tj|s8J dd l}||dj}t	|dks^J W d    n1 sr0    Y  d S )NrU   rS   r   )r    r\   partition_onr   Fr)   r@  rC  r   r   r   test_partition_on_supported'  s    
z2TestParquetFastParquet.test_partition_on_supportedc              	   C   s~   ddg}|}d}t jt|dL t $}|j|dd ||d W d    n1 sR0    Y  W d    n1 sp0    Y  d S )NrU   rS   zYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datarx   r   )r    r\   rD  r  )r"   rz   r{   rb   r  r   )r   r&   rZ   r  rF   r|   rh   r   r   r   3test_error_on_using_partition_cols_and_partition_on8  s    
zJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_onc                 C   s*   t  }| }d|j_t|||d d S )Nr   r   r.   r/   r   r   r   rm   r<  r   r   r   r  J  s    z+TestParquetFastParquet.test_empty_dataframec                 C   s>   d|g }t j|d|id}| }d|j_t|||d d S )Nr)  r*  r+  r   r   rG  )r   r&   r[   r-  rF   rg   r   r   r   r.  Q  s
    
z0TestParquetFastParquet.test_timezone_aware_indexc              	   C   s   t dddgi}t V}|| tjtdd t|ddd W d    n1 sX0    Y  W d    n1 sv0    Y  d S )	Nr<   r(   r)   z!not supported for the fastparquetrx   r   Tr   )	r.   r/   rb   rl   r   r"   rz   r{   r   )r   r  r&   rF   rh   r   r   r   &test_use_nullable_dtypes_not_supportedZ  s
    

z=TestParquetFastParquet.test_use_nullable_dtypes_not_supportedN)r   r   r   r   r"   r   r#   r   r=  r   r   r1  r5  r  r  r  rE  rF  r  r.  rH  r   r   r   r   r:    s    	


	r:  )	NNNNNTFTr)   )J__doc__rV   ior   rA  r8  warningsr   r   ZnumpyrC   r"   Zpandas._configr   Zpandas.compat.pyarrowr   r   r   Zpandas.util._test_decoratorsutilZ_test_decoratorsr6  Zpandasr.   Zpandas._testingZ_testingrb   Zpandas.util.versionr	   Zpandas.io.parquetr
   r   r   r   r   r   r!   r   DeprecationWarningFutureWarningr   r%   r   Z
pytestmarkZfixturer   Zskipifr    r$   r&   r0   rG   rZ   ZnowtimezoneZutcminmaxstrptimer[   rm   rw   r}   r   r   r   r   r   r   r   r   r   r   r:  r   r   r   r   <module>   s   
(







         
A+   
  &