a
    ;Zal                     @   s  d dl Z d dlZd dlmZ d dlZd dlmZ d dlm	Z	 d dlm
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d
d Zdd Zdd Zdd Zejdejejejgejdejejejgdd Zejdejejejgdd Zejdejdddgdd Zejdejdddgdd Zd d! Zd"d# Z ejjd$g d%g d&ge!g d'g d(gej!g d)g d*ge"d+ej!g d,d-ej#d.gge"d+ej!g d,d-e$d/d.gge"d+ej!g d0g d1ge"d+ej!g d2d-ej#dgge"d+ej!g d2d-e$d/dgge"d+gg d3d4d5d6 Z%ejd7d8d9gejd:dd;gd<d= Z&ejd7d8d9gejd>d?d@gdAd@gd?d@ggg dBg dCg dBgfdDd-gdEd-gdFdGgdEd-ggg dHg dIg dJgfgdKdL Z'dMdN Z(ejdejdddgejd:g dOejdPg dOdQdR Z)ejdSdTdUgejd$dAd?ge!dVdWggdXdY Z*ejdSdTdUgdZd[ Z+ejjd\d]d@gd^d@ggd]d^gd@ggej,fe!dAd?gd_d?ggdAd_gd?ggej-fej!d`d.gdad.gge"d+d`dagd.ggej,fe!d`d.gdad.ggd`dagd.ggej.fe!dAd?gej#d?ggdAej#gd?ggej/fej!d`ej#gdej#gge"d+d`dgej#ggej,fej!d`e$d/gde$d/gge"d+d`dge$d/ggej,fgg dbd4dcdd Z0ejjdeej!d-dGgge"d+j1ej!d-dfgge"d+j1g dggej,fej!dAd?ggdhd+j1ej!dAdiggdhd+j1g djgej2fej!d-dGgge"d+j1ej!d-dfgge"d+j1e!g dggej,fej!dd-gge"d+j1ej!ddGgge"d+j1g dkge"fej!d-dGgge"d+j1ej!d-ej#gge"d+j1g dlge"fej!d-dgge"d+j1ej!d-ej#gge"d+j1g dmge"fej!d-ej#gge"d+j1ej!d-dgge"d+j1d-ej#dngge"fgg dod4dpdq Z3drds Z4dtdu Z5dvdw Z6ejdejdddgejjdxd;dydzgfd{g d|fg d}d~dgfgg dd4dd Z7dd Z8ejjd$g d&g d%ge!g dg dgej!g d*g d)ge"d+gg dd4dd Z9ejjdeej!d-dGgge"d+j1ej!d-dfgge"d+j1g dggej,fej!dAd?ggdhd+j1ej!dAdiggdhd+j1g djgej2fej!d-dGgge"d+j1ej!d-dfgge"d+j1e!g dggej,fgg dd4dd Z:dd Z;dd Z<ejde$e=gdd Z>ejdddie?dfddie?dfddde?dfddAde@dfddie@dfgdd ZAdd ZBdd ZCdd ZDdd ZEdd ZFdd ZGejdej#de$d/gdd ZHejddgdggd:didfg d&g d%g dgd:ejIdGe"d+idfg d&g d%g dgd:g didfgdd ZJejd:d]d_gg dgdd ZKejjdd9d8gddgd4ejjd:d;g d¢gd;dgd4ddń ZLejdeegddȄ ZMddʄ ZNejdg d̢ejdg d΢ddЄ ZOejdejdddgejdej#dgdd҄ ZPddԄ ZQejdddgddل ZRddۄ ZSdd݄ ZTdd߄ ZUdd ZVdd ZWejdddgdd ZXejjdeej!d-ej#gge"d+j1ej!d-dGgge"d+j1ej!d-ej#dfge"d+gej,fej!d-ej#gge"d+j1ej!d-dGgge"d+j1ej!d-ej#dfge"d+gej,fej!dej#ggejd+j1ej!dVggejd+j1e!ddWej#ggejfgg dd4dd ZYejde!dej#dVggj1e!dej#dggj1e!dWggfe!g dgj1e!g dgj1e!ej#ggfej!dej#dGgge"d+j1e!dej#dggj1ej!dfgge"d+fej!g dge"d+j1e!g dgj1ej!ej#gge"d+fgdd ZZdd Z[dd Z\ejdddaggej!ddaggdd+ej!ddaggdd+gejdd`daggej!d`daggdd+ej!d`daggdd+gdd Z]dd  Z^dS (      Nsparse)NotFittedError)assert_array_equal)assert_allclose)_convert_container)is_scalar_nan)OneHotEncoder)OrdinalEncoderc                  C   s   t g dg dg} t }tdd}|| }|| }|jdksHJ |jdksVJ t|sdJ t|rrJ t| g dg dg t| | d S )N         r   r   r   Fr   r      )              ?r   r   r   )r   r   r   r   r   )	nparrayr	   fit_transformshaper   Zissparser   toarray)XZ
enc_sparseZ	enc_denseX_trans_sparseZX_trans_dense r   Hlib/python3.9/site-packages/sklearn/preprocessing/tests/test_encoders.py!test_one_hot_encoder_sparse_dense   s    


r   c                  C   s  t g dg dg dg} t g dg}tdd}||  tjtdd || W d    n1 sn0    Y  td	d}||  | }t	||
 t g d
g t|| tdd}tjtdd ||  W d    n1 s0    Y  d S )N)r   r   r   )r   r   r   )r   r   r   )   r   r   errorhandle_unknownFound unknown categoriesmatchignore)r   r   r   r   r   r   r   Z42zhandle_unknown should be either)r   r   r	   fitpytestraises
ValueError	transformcopyr   r   r   r   X2ohZ	X2_passedr   r   r   #test_one_hot_encoder_handle_unknown*   s"    

(



r/   c                  C   s`   t dgdgg} tddgd}d}tjt|d ||  W d    n1 sR0    Y  d S )Nab
categorieszqThis OneHotEncoder instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.r#   )r   r   r	   r'   r(   r   r*   r   encmsgr   r   r   test_one_hot_encoder_not_fittedF   s    r7   c                  C   sx   t g dd} t ddgd}tdd}||  | }t|| t g dg dg t|| d S )	N)Z1111111122Z333Z4444)r   Z55555r8   r%   r    )r   r   r   r   r   r   r   r   )	r   r   reshaper	   r&   r+   r   r*   r   r,   r   r   r   +test_one_hot_encoder_handle_unknown_stringsR   s    

r<   output_dtypeinput_dtypec                 C   s   t jddgg| dj}t jddgddgg|d}td|d}t|| | t||| | td|dd}t||| t|||| d S )Nr   r   dtypeauto)r3   r@   F)r3   r@   r   )	r   asarrayTr	   r   r   r   r&   r*   )r>   r=   r   
X_expectedr.   r   r   r   test_one_hot_encoder_dtyped   s    rE   c                 C   s   t d}|ddgddgd}tjg dg dg| d	}t| d	}t|| | t|	|
| | t| d
d}t||| t|	|
|| d S )Npandasr0   r1   r   r   ABr   r   r   r   r   r   r   r   r?   F)r@   r   )r'   importorskip	DataFramer   r   r	   r   r   r   r&   r*   )r=   pdX_dfrD   r.   r   r   r   !test_one_hot_encoder_dtype_pandass   s    

rP   zignore::FutureWarning:sklearn	get_namesget_feature_namesZget_feature_names_outc                 C   s   t  }g dg dg dg dg}|| t||  }| dkrPt|tjsPJ tg d| |g d}t|| g d}tg d| tj	t
d	d
" t|| ddg W d    n1 s0    Y  d S )N)Maler   girlr   r   )Female)   rT   r   
   )rS   3   Zboy   r   )rS   [   rT         rR   )Z	x0_FemaleZx0_MaleZx1_1Zx1_41Zx1_51Zx1_91Zx2_boyZx2_girlZx3_1Zx3_2Zx3_12Zx3_21Zx4_3Zx4_10Zx4_30)onetwothreeZfourZfive)Z
one_FemaleZone_MaleZtwo_1Ztwo_41Ztwo_51Ztwo_91Z	three_boyZ
three_girlZfour_1Zfour_2Zfour_12Zfour_21Zfive_3Zfive_10Zfive_30z!input_features should have lengthr#   r]   r^   )r	   r&   getattr
isinstancer   ndarrayr   rR   r'   r(   r)   )rQ   r5   r   feature_namesZfeature_names2r   r   r   "test_one_hot_encoder_feature_names   s,    
rd   c                 C   sd   t  }tjddggtdj}|| t||  }tddg| t|| dgd}tdd	g| d S )
Nu   c❤t1Zdat2r?   u	   x0_c❤t1Zx0_dat2u   n👍me)Zinput_featuresu   n👍me_c❤t1u   n👍me_dat2)r	   r   r   objectrC   r&   r`   r   )rQ   r5   r   rc   r   r   r   *test_one_hot_encoder_feature_names_unicode   s    
rf   c                  C   s   t ddggj} t }|jg dgd | d g dgksDJ ||  jdks\J |jg dgd ||  jdksJ d S )	Nr   r   )r   r   r   r   r2   r3   )r   r   )r   r   r   r   r   r   )	r   r   rC   r	   
set_paramsZ
get_paramsr   r   r   )r   r.   r   r   r   test_one_hot_encoder_set_params   s    rh   c                 C   sN   t dd}|| }t ddd}|| }t| | t|sFJ | S )NrA   r2   Fr3   r   )r	   r   r   r   r   Zisspmatrix_csr)r   r5   ZXtr1ZXtr2r   r   r   check_categorical_onehot   s    


rj   r   defr   7   abcr   rm   )rW   r   rm   )r   r   rm   )r1   rH   cat)r0   rI   rp   r?   )r1   r   rp   r0   rp   nan)Nr   rp   )r0   r   rp   )Nr   N)mixednumericre   z	mixed-nanzmixed-float-nanz
mixed-Nonezmixed-None-nanzmixed-None-float-nan)Zidsc                 C   s   t t| d d dgf }t|ddgddgg t t| d d ddgf }t|g dg dg tdd| }t| g dg dg d S )	Nr   r   )r   r   r   r   r   r   r   r   rA   r2   )r   r   r   r   r   )r   r   r   r   r   )rj   r   r   r   r	   r   r   )r   Xtrr   r   r   test_one_hot_encoder   s    rv   sparse_FTdropfirstc                 C   s  g dg dg dg}t | |d}||}tj|td}t||| ddgddgd	dgg}t | d
|d}||}t|}t||| |d u rrg dg dg dg}t | dddgddgg dgd}||}tj|td}d |d< t||| ddgddgd	dgg}t | ddgddggdd}||}tj|td}d |d< d |d d df< t||| tg dg dg}td}t	j
t|d || W d    n1 s0    Y  d S )Nrn   rk   )ro   r   rm   r   rx   r?   r   rm   r   r   rA   )r   r3   rx   r%   ro   rl   )6   rm   8   )r   r!   r3   )r   r   r{   r|   )r   r3   r!   r   r   r   r   r   r   )Shape of the passed X data is not correctr#   )r	   r   r   r   re   r   inverse_transformreescaper'   r(   r)   )rw   rx   r   r5   X_trexpr6   r   r   r   test_one_hot_encoder_inverse  sD    






r   z
X, X_transr   rm   r   r   r   r   r   r]   r^   r_   r1   r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   c                 C   s^   t |d| }d}|r"t|d}tjt|d || W d   n1 sP0    Y  dS )zCheck that `inverse_transform` raise an error with unknown samples, no
    dropped feature, and `handle_unknow="error`.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/14934
    r   zqSamples \[(\d )*\d\] can not be inverted when drop=None and handle_unknown='error' because they contain all zerosr   r#   N)r	   r&   r   r'   r(   r)   r   )r   X_transrw   r5   r6   r   r   r   ?test_one_hot_encoder_inverse_transform_raise_error_with_unknown=  s    
r   c                  C   sJ   t jddgddgddggtd} tddd	}|| }t|||  d S )
NrS   r   rU   r   r   r?   	if_binaryFrx   r   )r   r   re   r	   r   r   r   )r   oher   r   r   r   &test_one_hot_encoder_inverse_if_binary]  s     
r   )r   ry   N
reset_dropc                 C   s   t jddgddgddggtd}t|dd}|| ||}t||  }|j|d	 t|	|| t
||| tt||  | d S )
NrS   r   rU   r   r   r?   Fr   rx   )r   r   re   r	   r&   r*   r`   rg   r   r   r   )rQ   rx   r   r   r   r   rc   r   r   r   test_one_hot_encoder_drop_resetf  s     

r   methodr&   r         @      @c                 C   sJ   t  }d}tjt|d t|||  W d    n1 s<0    Y  d S )N'Expected 2D array, got 1D array insteadr#   )r	   r'   r(   r)   r`   )r   r   r.   r6   r   r   r   test_X_is_not_1Dv  s    r   c                 C   sb   t d}|g d}t }d}t jt|d t|| | W d    n1 sT0    Y  d S )NrF   )   r   r   r   r   r#   )r'   rL   Seriesr	   r(   r)   r`   )r   rN   r   r.   r6   r   r   r   test_X_is_not_1D_pandas  s    
r   zX, cat_exp, cat_dtypero   rl   r   rH   rI   )rr   rs   re   stringzmissing-floatzmissing-np.nan-objectzmissing-float-nan-objectc                 C   s   | | d d d fD ]}t dd}|| t|jts:J t|j|D ]l\}}| }t|d rt|d srJ |d d |d d ksJ n| |ksJ t	|j
|sFJ qFqd S )Nr9   rA   r2   )r	   r&   ra   categories_listziptolistr   r   
issubdtyper@   )r   Zcat_exp	cat_dtypeZXir5   resr   Zres_listr   r   r   test_one_hot_encoder_categories  s    #

r   zX, X2, cats, cat_dtypedr0   r1   cint64r   )r   r   r   )Nr0   z)r0   r1   r   )r0   Nr   r   )re   rs   zobject-stringzobject-string-nonezobject-string-nanzobject-None-and-nanzobject-nan-and-Nonec                 C   s  t |d}tg dg dg}t||  | t|jd t|d ksRJ |jd 	 t|d kspJ |jd j
|ksJ t |d}tjtdd || W d    n1 s0    Y  t |dd}tg dg d	g}t||| | d S )
Nr2   r   r   r   r   r   r   r   r"   r#   r%   r3   r!   )r   r   r   )r	   r   r   r   r   r   r   r3   r   r   r@   r'   r(   r)   r&   r*   r   r-   catsr   r5   r   r   r   r   )test_one_hot_encoder_specified_categories  s    9

(r   c                  C   sd  t jddggtdj} tg dgd}t g dg dg}t|| |  | t|	|  | |j
d  g dksJ t |j
d jt jsJ t d	d
ggj} tg dgd}d}tjt|d |	|  W d    n1 s0    Y  t d	d
t jggj} td	t jd
ggd}tjt|d |	|  W d    n1 sV0    Y  d S )Nr0   r1   r?   )r1   r0   r   r2   r   r   r   r   r   )r   r   r   z%Unsorted categories are not supportedr#   )r   r   re   rC   r	   r   r&   r*   r   r   r   r   r   r@   object_r'   r(   r)   rq   )r   r5   r   r6   r   r   r   (test_one_hot_encoder_unsorted_categories	  s     (r   c                  C   s   t jddgddggtdj} tg dg dgd}t g d	g d
g}t||  | |jd 	 g dksvJ t 
|jd jt jsJ |jd 	 g dksJ t 
|jd jt jsJ d S )Nr0   r1   r   r   r?   r   )r   r   r   r2   )r   r   r   r   r   r   )r   r   r   r   r   r   r   )r   r   re   rC   r	   r   r   r   r   r   r   r@   r   r   r5   r   r   r   r   7test_one_hot_encoder_specified_categories_mixed_columns!  s    r   c                  C   sD   t d} | ddgddgd}t|}t|g dg dg d S )	NrF   r0   r1   r   r   rG   rJ   rK   )r'   rL   rM   rj   r   )rN   rO   ru   r   r   r   test_one_hot_encoder_pandas.  s    
r   zdrop, expected_namesx0_cx2_br   )r   Zx1_2r   )r   r   r1   x0_bZx2_a)ry   Zbinarymanualc                 C   sV   g dg dg}t |d}|| t||  }| dkrHt|tjsHJ t|| d S )N)r   r   r0   )r1   r   r1   r   rR   )r	   r&   r`   ra   r   rb   r   )rQ   rx   Zexpected_namesr   r   rc   r   r   r   'test_one_hot_encoder_feature_names_drop8  s    

r   c                  C   s   ddgddgddgg} t g dg dg dg}t d d	g}td
dd}|| }t|j| t|| ddgddgddgg} t ddgddgddgg}t d	d g}td
dd}|| }t|j| t|| d S )NrW   Zyes   Znor\   )r   r   r   r   r:   )r   r   r   r   r   r   Fr   truer0   Zfalser   r   )r   r   r	   r   r   	drop_idx_r   )r   expectedZexpected_drop_idxr   resultr   r   r   *test_one_hot_encoder_drop_equals_if_binaryN  s     


r   )rW   r   rm   )r   r   rm   )rr   rs   re   c                 C   sT   t  }tjg dg dgdd}t|| |d t dd}t|| | d S )N)r   r   r   )r   r   r   r   r?   float64)r
   r   r   r   r   Zastyper   r   r   r   test_ordinal_encoderf  s
    

r   )re   rs   zobject-string-catc                 C   s   t |d}tdgdgg}t|| | t|jd t|d ksJJ |jd  t|d kshJ |jd j	|ks|J t |d}t
jtdd || W d    n1 s0    Y  d S )Nr2   r   r   r   r"   r#   )r
   r   r   r   r   r   r3   r   r   r@   r'   r(   r)   r&   r   r   r   r   )test_ordinal_encoder_specified_categoriesw  s    

r   c                  C   s   g dg dg} t  }|| }tj| td}t||| tg dg dg}td}t	j
t|d || W d    n1 s0    Y  d S )Nrn   rk   r?   )r   r   r   r   rJ   r   r#   )r
   r   r   r   re   r   r   r   r   r'   r(   r)   )r   r5   r   r   r6   r   r   r   test_ordinal_encoder_inverse  s    

r   c                  C   s   t ddd} tjddgddgdd	ggtd
}tjddgddgddggtd
}| | | |}tjddgddgddggdd
}t|| | |}tjdd gd dgddggtd
}t|| d S )Nuse_encoded_valuer!   unknown_valuer0   xr1   yr   r   r?   Zxyblar   r   r   r   )r
   r   r   re   r&   r*   r   r   )r5   X_fitr   X_trans_encr   X_trans_invinv_expr   r   r   +test_ordinal_encoder_handle_unknowns_string  s      

 

 r   r@   c                 C   s   t ddd}tjddgddgdd	gg| d
}tjddgddgddgg| d
}|| ||}tjddgddgddggdd
}t|| ||}tjdd gd dgddggtd
}t|| d S )Nr   r   r      r      r   	   r?   rY      r   r   )r
   r   r   r&   r*   r   r   re   )r@   r5   r   r   r   r   r   r   r   r   r   ,test_ordinal_encoder_handle_unknowns_numeric  s      

 

 r   zparams, err_type, err_msgr!   r   zbunknown_value should be an integer or np.nan when handle_unknown is 'use_encoded_value', got None.r   r   zTunknown_value should only be set when handle_unknown is 'use_encoded_value', got -2.r   r   zaunknown_value should be an integer or np.nan when handle_unknown is 'use_encoded_value', got bla.zhThe used value for unknown_value (1) is one of the values already used for encoding the seen categories.r%   zKhandle_unknown should be either 'error' or 'use_encoded_value', got ignore.c                 C   sd   t jddgddggtd}tf i | }tj||d || W d    n1 sV0    Y  d S )Nr0   r   r1   r   r?   r#   )r   r   re   r
   r'   r(   r&   )paramsZerr_typeerr_msgr   encoderr   r   r   *test_ordinal_encoder_handle_unknowns_raise  s    %r   c                  C   s`   t dtjd} tdgdgdgg}| | | dgdgdgg}t|dgdgtjgg d S )Nr   r   r   r   r   r   r   )r
   r   rq   r   r&   r*   r   )r5   r   r   r   r   r   (test_ordinal_encoder_handle_unknowns_nan  s
    
r   c                  C   sb   t dtjtd} tdgdgdgg}tjtdd | | W d    n1 sT0    Y  d S )Nr   )r!   r   r@   r   r   r   z'dtype parameter should be a float dtyper#   )	r
   r   rq   intr   r'   r(   r)   r&   )r5   r   r   r   r   8test_ordinal_encoder_handle_unknowns_nan_non_float_dtype  s    r   c                  C   sh   t jg dgtdj} g d}t|d}d}tjt|d ||  W d    n1 sZ0    Y  d S )N)LowMediumHighr   r   r?   )r   r   r   r2   z*Shape mismatch: if categories is an array,r#   )	r   r   re   rC   r
   r'   r(   r)   r&   )r   r   r5   r6   r   r   r   +test_ordinal_encoder_raise_categories_shape  s    
r   c                     s|  t ddtjg dg dgdd} tjddgd	d
ggddtjddgd	d
ggddtddgddggtddgddggtjddgd	dggddfD ]B   t fddtdD sJ t  |  qddgd	d
gg   tfddtdD sJ t  |  ddgd	dgg   tfddtdD sdJ t  |  d S )NrA   r2   )r   r   r   r   )r   r   r   r   r   r?   r   r   r   r   r   r0   r1   r   r      a   b   c   dre   c                    s   g | ]}j | j jkqS r   r   r@   .0ir   r5   r   r   
<listcomp>(      z'test_encoder_dtypes.<locals>.<listcomp>c                    s"   g | ]}t  j| jt jqS r   )r   r   r   r@   integerr   r5   r   r   r   -  r   c                    s   g | ]} j | jd kqS )re   r   r   r   r   r   r   2  r   )	r	   r   r   r&   allranger   r*   r   )r   r   r   r   test_encoder_dtypes  s&    

 
 
 r   c                     s  t d} tddtjg dg dgdd}| jdd	gd
dgddgddd}| tfddtd	D sxJ t	
| | | dd	gddgddgd}|d j|d j|d jg | t fddtd
D sJ t	
| | d S )NrF   rA   r2   )r   r   r   r   r   r   )r   r   r   r   r   r   r   r?   r   r   r   r   r   r   )rH   rI   Cr   c                    s   g | ]} j | jd kqS )r   r   r   r   r   r   r   B  r   z.test_encoder_dtypes_pandas.<locals>.<listcomp>r0   r1   r   r   rH   rI   r   c                    s    g | ]}j | j | kqS r   r   r   ZX_typer5   r   r   r   H  r   )r'   rL   r	   r   r   rM   r&   r   r   r   r*   r   r@   )rN   r   r   r   r   r   test_encoder_dtypes_pandas6  s    

"

 r   c                  C   s*   t  } ddgddgg}tj| j| d S )NrS   r   rU   r   )r	   r   ZtestingZassert_no_warningsr   )r5   r   r   r   r   test_one_hot_encoder_warningL  s    r   missing_valuec           	      C   sr  dddd| g}t |d}g dg ddddd| gg}|| }g dg d	g d
g}t|| |j|u spJ dd t|j|jD }||}t	j
|td}t|d rZt|d d |d d  t|d sJ t|d sJ t|d d d df |d d d df  t|dd df |dd df  t|d sFJ t|d snJ nt|| t|| d S )Nrl   rY   r   r|   r   )ro   rY   r   rm   r0   )rl   rY   r   rm   r0   )r   r   r   r   r   )r   r   r   r   r   r   c                 S   s   g | ]\}}|| qS r   r   )r   rp   Zfeaturer   r   r   r   `  s   z4test_one_hot_encoder_drop_manual.<locals>.<listcomp>r?   r9   )r9   r9   )r	   r   r   r   rx   r   r   r   r   r   r   re   r   )	r   Zcats_to_dropr5   r   Ztransr   Zdropped_catsZX_inv_transZX_arrayr   r   r    test_one_hot_encoder_drop_manualR  s2    


*"
r   zX_fit, params, err_msgrS   rU   secondz Wrong input for parameter `drop`rl   r   ;   )Zghir   r   z&The following categories were supposedc                 C   sJ   t f i |}tjt|d ||  W d    n1 s<0    Y  d S )Nr#   r	   r'   r(   r)   r&   )r   r   r   r5   r   r   r   #test_one_hot_encoder_invalid_paramsw  s    r   )ro   r   rV   r0   c                 C   s\   t | d}d}tjt|d, |g dg dg dg W d    n1 sN0    Y  d S )Nr   z-`drop` should have length equal to the numberr#   rn   rk   r   r   )rx   r5   r   r   r   r   test_invalid_drop_length  s    
r   densityr   Zdenser0   r   r1   r   c                 C   s   t | d}t | |d}g dg dg}|| || t|j|j |dkr^t|jd n0t||j|jD ]\}}}|t| |ksnJ qnt|jtj	sJ |jj
tksJ d S )Nr   rz   )r   r   r0   r   ry   r   )r	   r&   r   r   r   r   r   ra   r   rb   r@   re   )r   rx   Zohe_baseZohe_testr   Zdrop_catZdrop_idxZcat_listr   r   r   test_categories  s    



r   Encoderc                 C   s   d|    d v sJ d S )NZcategoricalZX_types)Z	_get_tags)r   r   r   r   "test_encoders_has_categorical_tags  s    r   c                  C   s`   t jddggtdj} t | }d}tjt|d |	  W d    n1 sR0    Y  d S )Nrp   dogr?   z&get_feature_names is deprecated in 1.0r#   )
r   r   re   rC   r	   r&   r'   warnsFutureWarningrR   r4   r   r   r   1test_one_hot_encoder_get_feature_names_deprecated  s
    r  zinput_dtype, category_dtype)ZOOZOUZUOZUUZUSZSOZSUZSS
array_type)r   r   Z	dataframec           
      C   s   t jdgdgg| d}t jddg|dg}t|dd|}tdgdgdgdgg|| d}||}t ddgddgddgddgg}t|| t|d|}	|	|}t dgdgdgdgg}t|| d	S )
a"  Check that encoding work with object, unicode, and byte string dtypes.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/15616
    https://github.com/scikit-learn/scikit-learn/issues/15726
    https://github.com/scikit-learn/scikit-learn/issues/19677
    r1   r0   r?   Fri   r   r   r2   N)	r   r   r	   r&   r   r*   r   r
   r   )
r>   Zcategory_dtyper  r   r3   r   X_testr   r   oer   r   r   test_encoders_string_categories  s    
"

r  c                 C   sT   t jdd|d|ggtdj}tddd|}t||  }t|ddd	| g d S )
Nr0   r1   r?   Fr%   r   r!   Zx0_ar   Zx0_)r   r   re   rC   r	   r&   r`   r   )rQ   r   r   r   namesr   r   r   )test_ohe_missing_values_get_feature_names  s    r	  c                  C   sr   t d} | jg dtjdddtjgtdddd	gd
}tg dg dg dg dg}t|}t|| d S )NrF   )r   rp   Nrp   r   r   r   r?   )col1col2r
  r  )columns)r   r   r   r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   )	r'   rL   rM   r   r   rq   floatrj   r   )rN   dfexpected_df_transru   r   r   r   %test_ohe_missing_value_support_pandas  s     
	r  pd_nan_typepd.NAznp.nanc              	   C   s   | dkrt jddd}|j}nt d}tj}|d|jdd|ddgd	d
i}tg dg dg dg dg dg}tddd}|	|}t
|| t|jdksJ t|jd d d g d t|jd d sJ d S )Nr  rF   1.0Z
minversionr
  r   r0   r1   categoryr?   )r   r   r   r   )r   r   r   r   )r   r   r   r   r   r   r   r   Fr%   r  r   r   r9   r   )r'   rL   NAr   rq   rM   r   r   r	   r   r   lenr   r   isnan)r  rN   pd_missing_valuer  r  r   df_transr   r   r   1test_ohe_missing_value_support_pandas_categorical  s.    



r  c                  C   s   ddgddgddgg} t dddd	}|| }tg d
g dg dg}t|| ddgg}tg d
g}d}tjt|d ||}W d   n1 s0    Y  t|| |	|}t
|tjddggtd dS )z@Check drop='first' and handle_unknown='ignore' during transform.r0   r   r1   r   r   ry   Fr%   rx   r   r!   r   r~   )r   r   r   r   r   tFound unknown categories in columns \[0, 1\] during transform. These unknown categories will be encoded as all zerosr#   Nr?   r	   r   r   r   r   r'   r   UserWarningr*   r   r   re   r   r   r   rD   r  warn_msgZX_invr   r   r   /test_ohe_drop_first_handle_unknown_ignore_warns"  s&    


(

r#  c                  C   s   ddgddgddgg} t dddd	}|| }tg d
g dg dg}t|| ddgg}tg dg}d}tjt|d ||}W d   n1 s0    Y  t|| |	|}t
|tjddggtd dS )zDCheck drop='if_binary' and handle_unknown='ignore' during transform.r0   r   r1   r   r   r   Fr%   r  r  rt   rJ   r   r   )r   r   r   r   r  r#   Nr?   r  r!  r   r   r   3test_ohe_drop_if_binary_handle_unknown_ignore_warnsD  s&    


(

r$  c                  C   s   ddgddgddgg} t dddddgddggd	}||  d
dgg}tddgg}d}tjt|d ||}W d   n1 s0    Y  t|| dS )zXCheck drop='first' and handle_unknown='ignore' during fit with
    categories passed in.r0   r   r1   r   r   ry   Fr%   )rx   r   r!   r3   r   zqFound unknown categories in columns \[0\] during transform. These unknown categories will be encoded as all zerosr#   N)	r	   r&   r   r   r'   r   r   r*   r   )r   r   r  rD   r"  r   r   r   r   'test_ohe_drop_first_explicit_categoriesf  s    

(r%  c                  C   sd   t t jdddggj} tt jd}d}tjt|d |	|  W d   n1 sV0    Y  dS )zDTest ordinal encoder with nan passthrough fails when dtype=np.int32.r   r   r?   zThere are missing values in features \[0\]. For OrdinalEncoder to passthrough missing values, the dtype parameter must be a floatr#   N)
r   r   rq   rC   r
   int32r'   r(   r)   r&   )r   r  r6   r   r   r   Btest_ordinal_encoder_passthrough_missing_values_float_errors_dtype  s    r'  c                  C   s   t jt jdddggt jdj} t | }t|jdks<J t	|jd ddt jg |
| }t	|t jgdgdgdgg ||}t	||  dS )z.Test ordinal encoder with nan on float dtypes.r   r   r?   r   r   r   N)r   r   rq   r   rC   r
   r&   r  r   r   r*   r   )r   r  r   	X_inverser   r   r   5test_ordinal_encoder_passthrough_missing_values_float  s    

r)  c              	   C   s0  | dkrt jddd}|j}nt d}tj}|d|jdd|ddgd	d
i}t |}t	|j
dksnJ t|j
d dd g d t|j
d d sJ ||}t|dgdgtjgdgdgg ||}|jdksJ t|dddf ddg t|dddf ddg t|d s,J dS )z0Check ordinal encoder is compatible with pandas.r  rF   r  r  r
  r   r0   r1   r  r?   r   r   Nr   r   r9          @r   r   )r   r   r   r}   )r'   rL   r  r   rq   rM   r   r
   r&   r  r   r   r  r*   r   r   r   )r  rN   r  r  r  r  r(  r   r   r   =test_ordinal_encoder_missing_value_support_pandas_categorical  s(    

 
r+  r*  )zobject-None-missing-valuezobject-nan-missing_valueznumeric-missing-valuec                 C   s   t |d}tdgtjgg}t|| | |jd j|ksBJ t |d}tj	t
dd || W d   n1 sz0    Y  dS )z.Test ordinal encoder for specified categories.r2   r   r   r"   r#   N)r
   r   r   rq   r   r   r   r@   r'   r(   r)   r&   )r   r-   r   r   r  r   r   r   r   =test_ordinal_encoder_specified_categories_missing_passthrough  s    &

r,  zX, expected_X_trans, X_testr   r   )r   r   r   )r   r*  r   r   )r   r0   r1   )r*  r   r   c                 C   s8   t ddd}|| }t|| t||dgg dS )z>Test the interaction between missing values and handle_unknownr   r9   r   g      N)r
   r   r   r*   )r   Zexpected_X_transr  r  r   r   r   r   /test_ordinal_encoder_handle_missing_and_unknown  s    

r-  c                  C   s   t g dg dg} t| }t }d}tjt|d || W d   n1 sX0    Y  tjt|d |	| W d   n1 s0    Y  |	| }t|}tjt|d |
| W d   n1 s0    Y  dS )zCheck that we raise proper error with sparse input in OrdinalEncoder.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/19878
    r   r   z6A sparse matrix was passed, but dense data is requiredr#   N)r   r   r   Z
csr_matrixr
   r'   r(   	TypeErrorr&   r   r   )r   ZX_sparser   r   r   r   r   r   r   test_ordinal_encoder_sparse  s    
((

r/  c                  C   s   t g dddt jf } tg dgddd}||  tg dgdd}tjtd	d
 ||  W d   n1 sz0    Y  dS )zCheck OrdinalEncoder.fit works with unseen category when
    `handle_unknown="use_encoded_value"`.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/19872
    )r   r   r   r   r   r   N)r9   r   r   r   r   )r3   r!   r   r   r   r"   r#   )r   r   Znewaxisr
   r&   r'   r(   r)   )r   r  r   r   r   -test_ordinal_encoder_fit_with_unseen_category/  s    
r0  X_trainZAAOUr  c                 C   s4   t ddd}||  ||}t|ddgg dS )zChecks that `OrdinalEncoder` transforms string dtypes.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/19872
    r   ir   r   N)r
   r&   r*   r   )r1  r  r5   r   r   r   r   1test_ordinal_encoder_handle_unknown_string_dtypes@  s    

r4  c                  C   sb   t g ddd} t | }t|jt j| ddj |	| }t|dgdgdgdgg dS )	zCheck that `OrdinalEncoder` accepts Python integers that are potentially
    larger than 64 bits.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20721
    )l   	HP
1& l   	H]viel   	 :?i}Ga l   IRK2e6kr9   r   r   )Zaxisr   r   N)
r   r   r;   r
   r&   r   r   sortrC   r*   )r   r   r   r   r   r   #test_ordinal_encoder_python_integer\  s    
r6  )_r   Znumpyr   Zscipyr   r'   Zsklearn.exceptionsr   Zsklearn.utils._testingr   r   r   Zsklearn.utilsr   Zsklearn.preprocessingr	   r
   r   r/   r7   r<   ZmarkZparametrizer&  Zfloat32r   rE   rP   filterwarningsrd   rf   rh   rj   r   re   rq   r  rv   r   r   r   r   r   r   r   r   Zstr_Zfloat_r   rC   r   r   r   r   r   r   r   r   r   r   r   r   r   r.  r)   r   r   r   r   r   r   r   r   rB   r   r   r   r   r  r  r	  r  r  r#  r$  r%  r'  r)  r+  r,  r-  r/  r0  r4  r6  r   r   r   r   <module>   st  

A



-*
	


 &&* !
,8




		


#
	
$







"""
!		"

