a
    GbG                     @   sJ  d Z ddlmZ ddlmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ejdZedd	 Zd
d Zejdg dddggdd Zejdddgdd Zdd Zedd Zdd Zedd Zejdddd gid!fdd"gdd gd#d$fdd"gdd gd%d&fgd'd( Zed)d*d+gZeejd,ddd"gid-g d.d/d-ed0d1ed0d2ed0d3ed4d5ed6d7ed6d8gd/gd9d: Zeejd,ddd"gid;g d.d/d;ed0d1ed0d2ed0d3ed4d5ed6d7ed6d8gd/gd<d= Z eejd,ddd"gid;g d.d/d;ed0d1ed0d2ed0d3ed4d5ed6d7ed6d8gd/gd>d? Z!ed@dA Z"edBdC Z#edDdE Z$edFdG Z%eejdHdIdJgdKdL Z&eejd,i ddigdMdN Z'ejdOi g dPfdQdRig dSfdTg dUig dUfgdVdW Z(ejdd0d4gdXgdYdZ Z)ed[d\ Z*eejd]d^eg d_ge+g d`dafdbeg d_ge+g dcdafddeg dege+g dfdafgdgdh Z,eejdddggejdiddjdkgdkdjgdkdlggdmdn Z-edodp Z.edqdr Z/edsdt Z0edudv Z1edwdx Z2edydz Z3dS ){zx
Tests that the file header is properly handled or inferred
during parsing for all of the parsers defined in parsers.py
    )
namedtuple)StringION)ParserError)	DataFrameIndex
MultiIndexZpyarrow_skipc                 C   sR   | }d}t jt|d( td}|j|dgd W d    n1 sD0    Y  d S )Nzbut only \d+ lines in filematchz,,
   header)pytestraises
ValueErrorr   read_csv)all_parsersparsermsgs r   Alib/python3.9/site-packages/pandas/tests/io/parser/test_header.pytest_read_with_bad_header   s
    r   c                 C   sL   | }d}t jtdd" |jt|dd W d    n1 s>0    Y  d S )N$1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
zUPassing negative integer to header is invalid. For no header, use header=None insteadr   r   r   r   r   r   r   r   r   datar   r   r   test_negative_header#   s    r   r   )r         c                 C   sL   | }d}t jtdd" |jt||d W d    n1 s>0    Y  d S )Nz<1,2,3,4,5
        6,7,8,9,10
        11,12,13,14,15
        z8cannot specify multi-index header with negative integersr   r   r   )r   r   r   r   r   r   r    test_negative_multi_index_header2   s    r!   TFc                 C   sP   | }d}d}t jt|d" |jt||d W d    n1 sB0    Y  d S )NzMyColumn
a
b
a
bz#Passing a bool to header is invalidr   r   )r   r   	TypeErrorr   r   )r   r   r   r   r   r   r   r   test_bool_header_arg@   s
    r#   c                 C   s|   | }d}t jtdd$ |jt|dd d}W d    n1 s@0    Y  tg dg dg dgg d	d
}t || d S )Nr   FZcheck_stacklevelZField)prefixr      r      r               	   r
                  )ZField0ZField1ZField2ZField3ZField4columns)tmassert_produces_warningFutureWarningr   r   r   assert_frame_equalr   r   r   resultexpectedr   r   r   test_no_header_prefixO   s    2r>   c                 C   sZ   | }d}g d}|j t||d}tg dg dg dgg dg dd}t|| d S )	Nzfoo,1,2,3
bar,4,5,6
baz,7,8,9
ABCnamesr'   r   r(   )r   r)   r+   )r,   r-   r.   )foobarbazindexr6   r   r   r   r7   r:   )r   r   r   rD   r<   r=   r   r   r   test_header_with_index_col^   s    rL   c                 C   sD   | }d}d}|j t|ddd}|j t|ddd}t|| d S )Nzggot,to,ignore,this,line
got,to,ignore,this,line
index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
z7index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
r   r   r   	index_colr   r   r7   r:   )r   r   r   Zdata2r<   r=   r   r   r   test_header_not_first_linep   s    rP   c                 C   sF   | }t jddddd}d}|jt|g ddd	gd
}t || d S )Nr)   r(   r   r   )Zr_idx_nlevelsZc_idx_nlevels  C0,,C_l0_g0,C_l0_g1,C_l0_g2

C1,,C_l1_g0,C_l1_g1,C_l1_g2
C2,,C_l2_g0,C_l2_g1,C_l2_g2
C3,,C_l3_g0,C_l3_g1,C_l3_g2
R0,R1,,,
R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
r   r'   r   r(   r   r'   rM   )r7   ZmakeCustomDataframer   r   r:   r   r   r=   r   r<   r   r   r   test_header_multi_index   s
    rT   z
kwargs,msgrN   rF   rG   zLindex_col must only contain row numbers when specifying a multi-index headerr'   )rN   rD   z9cannot specify names when specifying a multi-index header)rN   Zusecolsz;cannot specify usecols when specifying a multi-index headerc                 C   sX   d}| }t jt|d. |jt|fdg di| W d    n1 sJ0    Y  d S )NrQ   r   r   rR   r   )r   kwargsr   r   r   r   r   r   test_header_multi_index_invalid   s    rV   
_TestTuplefirstsecondrU   r(   )aq)r[   r)r[   r   )bt)cu)r`   v)ZskiprowsrD   r[   r\   r]   r   r^   r_   r`   ra   rb   c                 C   sZ   | }t g dg dgddgtg dd}d}|jt|fdd	i|}t|| d S )
Nr'   r   r(   r   r)   r+   r,   r-   r.   r
   r0   r1   onetworZ   rI   zC,a,a,a,b,c,c
,q,r,s,t,u,v
,,,,,,
one,1,2,3,4,5,6
two,7,8,9,10,11,12rN   r   r   r   from_tuplesr   r   r7   r:   r   rU   r   r=   r   r<   r   r   r   &test_header_multi_index_common_format1   s    rj   r   c                 C   sZ   | }t g dg dgddgtg dd}d}|jt|fdd	i|}t|| d S )
Nrc   rd   re   rf   rZ   rI   z<,a,a,a,b,c,c
,q,r,s,t,u,v
one,1,2,3,4,5,6
two,7,8,9,10,11,12rN   r   rg   ri   r   r   r   &test_header_multi_index_common_format2   s    rk   c                 C   sf   | }t g dg dgddgtg dd}|jdd}d	}|jt|fd
d i|}t|| d S )Nrc   rd   re   rf   rZ   rI   T)Zdrop2a,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12rN   )r   r   rh   Zreset_indexr   r   r7   r:   ri   r   r   r   &test_header_multi_index_common_format3%  s    rm   c                 C   s   | }t tjg dg dgddtddgtg dg dgg d	g d
gddgdd}d}|jt|ddgdd}t|| d S )Nr   r(   r   r)   r+   r-   r.   r
   r0   r1   int64Zdtyper'   r,   r[   r^   r`   r]   r   r_   ra   rb   r   r   r'   r   r   r   r'   r   r(   r   r[   r\   levelscodesrD   rI   rl   r   rM   	r   nparrayr   r   r   r   r7   r:   rS   r   r   r   0test_header_multi_index_common_format_malformed1U  s    
	r|   c                 C   s   | }t tjg dg dgddtddgtg dg dgg d	g d
gd dgdd}d}|jt|ddgdd}t|| d S )Nrn   ro   rp   rq   r'   r,   rr   rs   rt   ru   r\   rv   rI   1,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12r   rM   ry   rS   r   r   r   0test_header_multi_index_common_format_malformed2j  s    

r~   c                 C   s   | }t tjg dg dgddtddgddggd	dgd	dggd
tg dg dgg dg dgd dgdd}d}|jt|d	dgd	dgd}t|| d S )N)r(   r   r)   r+   )r.   r
   r0   r1   rp   rq   r'   r,   r   r-   r   )rw   rx   rr   )r   r_   ra   rb   )r   r'   r   r   rR   r\   rv   rI   r}   rM   )r   rz   r{   r   r   r   r7   r:   rS   r   r   r   0test_header_multi_index_common_format_malformed3  s    "	r   c                 C   s^   | }d d gddgddgg}t ddg}t||d}d}|jt|d	dgd
}t|| d S )Nr'   r   r(   r   )r[   r@   )r^   rA   r5   za,b
A,B
,
1,2
3,4r   r   )r   rh   r   r   r   r7   r:   )r   r   r   r6   r=   r<   r   r   r   "test_header_multi_index_blank_line  s    r   zdata,header)1,2,3
4,5,6N)zfoo,bar,baz
1,2,3
4,5,6r   c                 C   sB   | }|j tdg dd}|j t|g d|d}t|| d S )Nr   rr   rC   rD   r   rO   )r   r   r   r   r=   r<   r   r   r   !test_header_names_backward_compat  s    r   c                 C   s8   | }t g dd}|jtdfi |}t|| d S )Nrr   r5   za,b,cr   r   r   r7   r:   )r   rU   r   r=   r<   r   r   r   test_read_only_header_no_rows  s    r   zkwargs,namesru   r%   X)ZX0ZX1ZX2ZX3ZX4rD   )rF   rG   rH   ZquuxZpandac                 C   s   | }d}t g dg dg dg|d}d| v r|tjtdd* |jt|fd	d i|}W d    q1 sp0    Y  n|jt|fd	d i|}t|| d S )
Nr   r&   r*   r/   r5   r%   Fr$   r   )r   keysr7   r8   r9   r   r   r:   )r   rU   rD   r   r   r=   r<   r   r   r   test_no_header  s    :r   Zstring_headerc                 C   sP   d}d}| }t jt|d" |jt||d W d    n1 sB0    Y  d S )Nz*header must be integer or list of integersz1,2
3,4r   r   r   )r   r   r   r   r   r   r   r   test_non_int_header  s
    r   c                 C   sH   d}| }t ddgddgddgd}|jt|dgd}t|| d S )Nza,b,c
0,1,2
1,2,3r   r'   r   r(   rr   r   r   )r   r   r   r=   r<   r   r   r   test_singleton_header  s
    r   zdata,expectedz#A,A,A,B
one,one,one,two
0,40,34,0.1)r   (   "   皙?)r@   re   r@   zone.1)r@   zone.2rA   rf   r5   z%A,A,A,B
one,one,one.1,two
0,40,34,0.1)r   r   r@   zone.1.1r   z/A,A,A,B,B
one,one,one.1,two,two
0,40,34,0.1,0.1)r   r   r   r   r   )r   r   r   r   )rA   ztwo.1c                 C   s*   | }|j t|ddgd}t|| d S )Nr   r'   r   rO   )r   r   r=   r   r<   r   r   r   test_mangles_multi_index  s    )r   r6    ZUnnamedZ
NotUnnamedc                 C   s   | }ddg}|d u r,d |p"ddgd }nd dg|p>ddg d }|jt|||d}g }|d u rpg d}t|D ]2\}}	|	sd	|d u r|n|d  d
}	||	 qxtt|ddg}tddgddgg|d}
t	
||
 d S )Nr   r'   ,r   z
0,1
2,3
4,5
z
,0,1
0,2,3
1,4,5
rM   )r   r   r   z	Unnamed: Z_level_001r   r(   r   r)   r5   )joinr   r   	enumerateappendr   rh   zipr   r7   r:   )r   rN   r6   r   r   r   r<   Zexp_columnsicolr=   r   r   r   test_multi_index_unnamed  s     r   c                 C   sL   | }d}|j t|dg dd}tddgddgd	d
gd}t|| d S )Nza, b
1,2,3
5,6,4
r   r?   )r   rD   r'   r)   r   r+   r(   r   rK   r;   r   r   r   6test_names_longer_than_header_but_equal_with_data_rowsA  s
    r   c                 C   s   | }d}d}t g d}tg dg dg|d}|jt|ddgd	}t||jd d  |jt|ddgd	}t|| d S )
NzFMale, Male, Male, Female, Female
R, R, L, R, R
.86, .67, .88, .78, .81z^Male, Male, Male, Female, Female
R, R, L, R, R
.86, .67, .88, .78, .81
.86, .67, .88, .78, .82))ZMaleR) Male R)r   z L) Femaler   )r   z R.1)Q?q=
ףp?)\(?(\?gQ?)r   r   r   r   g=
ףp=?r5   r   r'   r   )r   rh   r   r   r   r7   r:   Ziloc)r   r   s1s2mir=   Zdf1Zdf2r   r   r    test_read_csv_multiindex_columnsN  s    	r   c                 C   sP   | }d}t jtdd& |jt|ddgd W d    n1 sB0    Y  d S )Nz1row11,row12,row13
row21,row22, row23
row31,row32
z1Header rows must have an equal number of columns.r   r   r   r   r   r   r   r   r   )r   r   Zcaser   r   r   'test_read_csv_multi_header_length_checkn  s    r   c                 C   sT   | }d}|j t|ddgd d}tg ddtjtjgdg dd	}t|| d S )
Nzx,1,5
y,2
z,3
r[   r^   r   rE   r)   r[   r^   )xyz)rJ   )r   r   r   rz   nanr7   r:   r;   r   r   r   #test_header_none_and_implicit_index~  s    r   c                 C   sR   | }d}t jtdd( |jt|ddgd d W d    n1 sD0    Y  d S )Nx,1
y,2,5
z,3
z"Expected 2 fields in line 2, saw 3r   r[   r^   r   r   r   r   r   r   1test_header_none_and_implicit_index_in_second_row  s    r   c                 C   sH   | }d}|j t|ddgd dd}tddgdd	gd
}t|| d S )Nr   r[   r^   skip)rD   r   Zon_bad_linesr   r   r'   r(   r   rK   r;   r   r   r   &test_header_none_and_on_bad_lines_skip  s    r   )4__doc__collectionsr   ior   Znumpyrz   r   Zpandas.errorsr   Zpandasr   r   r   Zpandas._testingZ_testingr7   ZmarkZusefixturesZskip_pyarrowr   r   Zparametrizer!   r#   r>   rL   rP   rT   rV   rW   rj   rk   rm   r|   r~   r   r   r   r   r   r   r   rh   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sV  
	




	







		








&"




