a
    GGb>%                     @   s  d Z ddlmZ ddlZddlZddlZddlZddlm	Z	m
Z
 ddlmZ ejdZedd Zedd	 Zeejd
ddgejdg ddd Zedd Zedd Zeejddi e	ddgifdddie	ddgifdddgie	dddgifd dgd!d"e	ddgifd dgd#d"e	dejdgifgd$d% Zed&d' Zeejd(g d)d*d+ Zeejd,d!d#gd-d. Zed/d0 Zejdg d1d2d3 Zeejdg d4d5d6 Zed7d8 Zed9d: Z ejd;ejd<d=d>gd?d@ Z!dS )AzZ
Tests encoding functionality during parsing
for all of the parsers defined in parsers.py
    )BytesION)	DataFrameread_csvZpyarrow_skipc                 C   sL   d}| }t d|}|j|d|d}tddggddgd	}t|| d S )
Ncp1255u   שלום:1234
562:123:)sepencodingi2  {   u   שלוםZ1234columnsr   encoder   r   tmassert_frame_equal)all_parsersr   parserdataresultexpected r   Clib/python3.9/site-packages/pandas/tests/io/parser/test_encoding.pytest_bytes_io_input   s    r   c                 C   s@   | }t d }|j|ddd d}tddgg}t|| d S )Nu   Łaski, Jan;1;utf-8)r   r   headeru   Łaski, Jan   r   )r   r   r   r   r   r   r   r   test_read_csv_unicode"   s
    r   r   ,	r   )utf-16zutf-16lezutf-16bec              	   C   s  | }d d|}dtd d}|dd}d}t|}d	d
lm} ||}	t|d}
|
|	 W d    n1 s|0    Y  t	||}|||d}|j
|fd|i|}|j
|fd|i|}|  t|| W d    n1 s0    Y  d S )Nz)skip this
skip this too
A,B,C
1,2,3
4,5,6r   __
   z__.csv   )r   Zskiprowsr   r   )TextIOWrapperwbr   r   )replacer   Zrandsensure_cleanior#   r   openwriter   r   closer   )r   r   r   r   r   pathkwargsutf8r#   
bytes_datafZbytes_bufferr   r   r   r   r   test_utf16_bom_skiprows,   s$    

(r1   c                 C   s6   t j|d}| }|j|ddd}t|dks2J d S )Nzutf16_ex.txtr   r   )r   r   2   )osr,   joinr   len)r   csv_dir_pathr,   r   r   r   r   r   test_utf16_exampleO   s    r7   c                 C   sL   t j|d}| }|j|d dd}|d}|d d }d}||ksHJ d S )Nunicode_series.csvlatin-1)r   r   r   r   i`  u$   Á köldum klaka (Cold Fever) (1994))r3   r,   r4   r   Z	set_index)r   r6   r,   r   r   Zgotr   r   r   r   test_unicode_encodingW   s    
r:   zdata,kwargs,expectedza
1ar   z"a"
1Z	quotechar"zb
1namesb1z
1T)r=   Zskip_blank_linesFc                    sD   | }d d fdd}|j ||fdi|}t|| d S )Nu   ﻿r   c                    s    |   }t|S )N)r   r   )_dataZbom_dataZbomr.   r   r   _encode_data_with_bom~   s    z,test_utf8_bom.<locals>._encode_data_with_bomr   )r   r   r   )r   r   r-   r   r   rB   r   r   rA   r   test_utf8_bomd   s    rC   c                 C   sL   t dgdgd}| }||}d|}|jt||d}t|| d S )Ng333333@test)Zmb_numZ	multibytezmb_num,multibyte
4.8,testr%   )r   formatr   r   r   r   r   )r   	utf_valueencoding_fmtr   r   r   r   r   r   r   r   test_read_csv_utf_aliases   s    

rH   zfile_path,encoding)))r(   r   Zcsvz	test1.csvr   ))r(   r   r   r8   r9   ))r(   r   r   zsauron.SHIFT_JIS.csvshiftjisc                 C   s  | }|| }|j ||d}t||d$}| |}	|jr<J W d    n1 sP0    Y  t||	 t|dd(}
|j |
|d}	|
jrJ W d    n1 s0    Y  t||	 t|ddd(}
|j |
|d}	|
jrJ W d    n1 s0    Y  t||	 d S )Nr%   rbmoder   )rL   	buffering)r   r)   closedr   r   )r   r6   Z	file_pathr   datapathr   Zfpathr   Zfar   Zfbr   r   r   test_binary_mode_file_buffers   s    
(((rP   pass_encodingc           	      C   s   | }| |}tddgi}tjd|ddF}|d |d |j||rP|nd d}t|| W d    n1 sx0    Y  d S )	NZfoobarzw+T)rL   r   Zreturn_filelikezfoo
barr   r%   )rE   r   r   r'   r*   seekr   r   )	r   rF   rG   rQ   r   r   r   r0   r   r   r   r   test_encoding_temp_file   s    


rT   c                 C   s   | }d}d}d}t ||gi}t X}|| d| | |d |j||d}t|| |j	rpJ W d    n1 s0    Y  d S )Nz	shift-jisu	   てすとu   こむ
r   r%   )
r   tempfileZNamedTemporaryFiler*   r   rS   r   r   r   rN   )r   r   r   titler   r   r0   r   r   r   r   test_encoding_named_temp_file   s    

rX   )r   r   z	utf-16-bez	utf-16-lezutf-32c                 C   sR   d}t || }t|d| d}tddgddgdd	ggd
dgd}t|| d S )Nu   a	b
：foo	0
bar	1
baz	2r   )Z	delimiterr   u   ：foor   rR   r   Zbazr"   r;   r>   )r   r   r   )r   r   Zencoded_datar   r   r   r   r   %test_parse_encoded_special_characters   s
    "rY   )r   Nr   r   r9   c                 C   sx   | }t g dg dg dd}t 0}|j|d|d |j||dd}W d    n1 s^0    Y  t|| d S )	N)ZRaphaelZ	DonatellozMiguel AngelZLeonardo)ZredZpurpleZorangeZblue)Zsaizbo staffZnunchunkZkatana)namemaskZweaponF)indexr   T)r   
memory_map)r   r   r'   to_csvr   r   )r   r   r   r   filedfr   r   r   test_encoding_memory_map   s    
.ra   c                 C   s|   | }t dgd d}d|jd< td4}|j|dddd	 |j|d
ddd}W d
   n1 sb0    Y  t|| d
S )zO
    Chunk splits a multibyte character with memory_map=True

    GH 43540
    Zaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaai   )r   u   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaąi  zbug-gh43540.csvFr   r\   r   r   NTc)r   r]   engine)r   Zilocr   r'   r^   r   r   )r   r   r`   fnamedfrr   r   r    test_chunk_splits_multibyte_char   s    
0rg   c              	   C   s   g }d}d}d}t t|t||D ]V}ddd t ||d D d }z|d W n tyn   Y q$Y n0 || q$| }t|}td	6}	|j	|	d
d
dd |j
|	ddddd}
W d   n1 s0    Y  t||
 dS )zg
    GH 43787

    Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
        u   𐂀 c                 S   s   g | ]}t |qS r   )chr).0rc   r   r   r   
<listcomp>      z,test_readcsv_memmap_utf8.<locals>.<listcomp>rU   r   zutf8test.csvFrb   NTrc   )r   r]   rd   r   )rangeordr4   r   UnicodeEncodeErrorappendr   r   r'   r^   r   r   )r   linesZline_lengthZ
start_charZend_charlnumliner   r`   re   rf   r   r   r   test_readcsv_memmap_utf8  s&    "

$rv   Zpyarrow_xfailrL   zw+bzw+tc                 C   s|   | }d}d|v rd}t j|d.}|| |d ||}W d    n1 sT0    Y  tg dgd}t|| d S )Ns   abcdtZabcdrK   r   r
   )rV   ZSpooledTemporaryFiler*   rS   r   r   r   r   )r   rL   r   ZcontentZhandler`   r   r   r   r   test_not_readable.  s    

(rx   )"__doc__r(   r   r3   rV   ZnumpyZnpZpytestZpandasr   r   Zpandas._testingZ_testingr   ZmarkZusefixturesZskip_pyarrowr   r   Zparametrizer1   r7   r:   nanrC   rH   rP   rT   rX   rY   ra   rg   rv   rx   r   r   r   r   <module>   sz   

	 








