
    h$,fF                        d dl Z d dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZmZmZmZ g dg dg dg dgZej$                  j'                  d	d
g dg dg dg dgdfdg dg dg dg dgdfdg dg dg dg dgdfdg dg dg dg dgg dfdg dg dg dg dgg dfdg dg dg dg dgg dfdg dg dg dg dgg dfdg dg dg dg dgg dfg      ej$                  j)                  d      d               Zd Zej$                  j'                  dd
g      d        Zd Zej$                  j'                  d	d
g dg dg dg dgdfdg dg dg dg dgdfdg dg dg dg dgdfdg dg dg dg dgg dfdg dg dg dg dgg dfdg dg dg dg dgg dfg      ej$                  j)                  d      d                Zej$                  j)                  d!      d"        Zej$                  j)                  d      ej$                  j'                  dddg      d#               Zej$                  j'                  dg d$      d%        Zd& Zej$                  j'                  d' ed(d)            d*        Zd+ Z ej$                  j'                  d,d
g d-g d.g d/fdg d-g d0g d1fdg d2g d0g d3fg      ej$                  j)                  d      d4               Z!ej$                  j'                  d5d
g d6g d7g d8g d9gfdg d:g d:g d;g d<gfdg d=g d>g d?g d?gfg      ej$                  j)                  d      ej$                  j'                  d@g dA      dB                      Z"ej$                  j)                  d      ej$                  j'                  dg d$      dC               Z#dD Z$ej$                  j'                  dEdg dFfdg dGfg      dH        Z%dI Z&ej$                  j'                  dJejN                  ejP                  ejR                  g      ej$                  j'                  dKdejP                  ejR                  g      ej$                  j'                  d@g dA      dL                      Z*ej$                  j'                  dMejN                  ejP                  ejR                  g      ej$                  j'                  d@g dA      dN               Z+dO Z,ej$                  j'                  dPdQ edR      D  cg c]  }  edS      D ]  }dT|  dU e-|         c}} fdV edR      D  cg c]  }  edS      D ]  }dT|  dU e-|         c}} fdW edR      D  cg c]  } dT|  	 c} fg      dX        Z.ej$                  j'                  dg d$      dY        Z/ej$                  j'                  dd
dg      dZ        Z0yc c}} w c c}} w c c} w )[    N)clone)KBinsDiscretizerOneHotEncoder)assert_allcloseassert_allclose_dense_sparseassert_array_almost_equalassert_array_equal      ?)r         @      )r   g      @r         ?)   g      @r      z!strategy, expected, sample_weightuniform)r   r   r   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   kmeans)r   r   r   r   quantile)r   r   r   r   )r   r   r   r   )r   r      r   z0ignore:In version 1.5 onwards, subsample=200_000c                     t        dd|       }|j                  t        |       t        ||j	                  t                     y )Nr   ordinaln_binsencodestrategysample_weight)r   fitXr	   	transform)r   expectedr    ests       Olib/python3.12/site-packages/sklearn/preprocessing/tests/test_discretization.pytest_fit_transformr'      s6    H !I
ICGGA]G+xq!12    c                  L   t        d      j                  t               t        t        j                  dg      d         j                  t               t        d      j                  t              j                  j                  t        j                  t              k(  sJ y )Nr   r   r   )	r   fit_transformr"   nparrayr!   n_bins_dtypeint r(   r&   test_valid_n_binsr2   <   si    A,,Q/BHHaSM!,-;;A>1%))!,44::bhhsmKKKr(   r   c                     t        j                  t        t                    }t	        d|       }d}t        j                  t        |      5  |j                  t        |       ddd       y# 1 sw Y   yxY w)z=Check that we raise an error when the wrong strategy is used.)shaper   )r   r   zK`sample_weight` was provided but it cannot be used with strategy='uniform'.matchr   N)	r,   oneslenr"   r   pytestraises
ValueErrorr!   )r   r    r%   err_msgs       r&   1test_kbinsdiscretizer_wrong_strategy_with_weightsr=   B   s_     GG3q6+M
!h
7CU  
z	1 0/0 0 0s   A//A8c                     t        j                  dd      } t        |       }d}t        j                  t
        |      5  |j                  t               d d d        g d} t        |       }d}t        j                  t
        |      5  |j                  t               d d d        g d} t        |       }d}t        j                  t
        |      5  |j                  t               d d d        g d	} t        |       }d
}t        j                  t
        |      5  |j                  t               d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   mxY w# 1 sw Y   y xY w)N)r             @r*   z:n_bins must be a scalar or array of shape \(n_features,\).r5   )r   r   r   r   r   r   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 3. Number of bins must be at least 2, and must be an int.) @r   rB   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 2. Number of bins must be at least 2, and must be an int.)r,   fullr   r9   r:   r;   r+   r"   )r   r%   r<   s      r&   test_invalid_n_bins_arrayrD   N   s:   WWVS!F
&
)CKG	z	1 ! F
&
)CKG	z	1 ! F
&
)C	 
 
z	1 ! F
&
)C	 
 
z	1 ! ;    s0    E EE!E$ E	EE!$E-)r   r   r   r   rA   )r   r   r   r   )r   r   r   r   )r   r   r   r   c                    t        g dd|       j                  t        |      }t        ||j	                  t                     t        j                  t              j                  d   }|j                  j                  |fk(  sJ t        |j                  |j                        D ]  \  }}|j                  |dz   fk(  rJ  y )Nr   r   r   r   r   r   r   r   )r   r!   r"   r	   r#   r,   r-   r4   
bin_edges_zipr.   )r   r$   r    r%   
n_features	bin_edgesr   s          r&   test_fit_transform_n_bins_arrayrK   t   s    N I	c!=c)  xq!12 !""1%J>>J=000 = 0	66A:-///0r(   z&ignore: Bins whose width are too smallc            	         t        j                  dgdgdgdgdgdgg      } t        ddd	
      }|j                  | g d       t	        |j
                  d   g d       t	        |j                  |       dgdgdgdgdgdgg       y)z;Check the impact of `sample_weight` one computed quantiles.r   r   r   r   i  i  
   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r           g      ?r@   N)r,   r-   r   r!   r   rG   r#   r"   r%   s     r&   *test_kbinsdiscretizer_effect_sample_weightrP      s     	2$qcA3v67A "Y
LCGGA/G0CNN1%~6CMM!$usecUSEC53%&PQr(   c                     t        dd|       }t        j                  g dt        j                        }t        j                  |      }|j                  t        |       t        ||       y)z7Make sure that `sample_weight` is not changed in place.r   r   r   )r   r   r   r   r/   r   N)r   r,   r-   float64copyr!   r"   r   )r   r%   r    sample_weight_copys       r&   /test_kbinsdiscretizer_no_mutating_sample_weightrV      sP     !I
ICHH\<M/GGA]G+M#56r(   )r   r   r   c                    t        j                  d       t        j                  ddgddgddgddgg      }t	        | dd      }d	}t        j                  t        |
      5  |j                  |       d d d        |j                  d   dk(  sJ |j                  |      }t        |d d df   t        j                  |j                  d                y # 1 sw Y   bxY w)Nalwaysr   r   r   r   r   r   )r   r   r   z2Feature 0 is constant and will be replaced with 0.r5   )warningssimplefilterr,   r-   r   r9   warnsUserWarningr!   r.   r#   r	   zerosr4   )r   r"   r%   warning_messageXts        r&   test_same_min_maxr`      s    (#
1b'Ar7QFQF34A
HQy
ICJO	k	9 
;;q>Q	q	Br!Q$x!''!*!56 s   "CCc                     t        j                  d      } t        d      }t        j                  t
              5  |j                  |        d d d        t        d      }|j                  | j                  dd             t        j                  t
              5  |j                  |        d d d        y # 1 sw Y   jxY w# 1 sw Y   y xY w)Nr?   r   r*   r   r   )	r,   aranger   r9   r:   r;   r!   reshaper#   rO   s     r&   test_transform_1d_behaviorrd      s    
		!A
!
$C	z	" 
 !
$CGGAIIb!	z	" a  
 s   B6C6B?Cir   	   c                     t        j                  g d      j                  dd      }t        j                  g d      j                  dd      }|d| z  z  }t        dd      j	                  |      }t        ||       y )	N)r@         @g      @g       @g      $@r   r   )r   r   r   r   r   rM   r   r   r   r   )r,   r-   rc   r   r+   r	   )re   X_initXt_expectedr"   r_   s        r&   test_numeric_stabilityrl      si    XX0199"a@F((?+33B:K 	QA	9	5	C	CA	FB{B'r(   c            
         t        g dd      j                  t              } | j                  t              }t        g dd      j                  t              } | j                  t              }t	        j
                  |      rJ t        t        dD cg c]  }t        j                  |       c}d      j                  |      |       t        g dd      j                  t              } | j                  t              }t	        j
                  |      sJ t        t        dD cg c]  }t        j                  |       c}d      j                  |      j                         |j                                y c c}w c c}w )	NrF   r   ri   onehot-denseF)
categoriessparse_outputonehotT)r   r!   r"   r#   spissparser	   r   r,   rb   r+   toarray)r%   Xt_1Xt_2re   Xt_3s        r&   test_encode_optionsrx      s   
,y
A
E
Ea
HC==D
,~
F
J
J1
MC==D{{4   .:;		!;5	

-
	 ,x
@
D
DQ
GC==D;;t.:;		!;4	
 
t		 < <s   E?)Fz8strategy, expected_2bins, expected_3bins, expected_5bins)r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r?   r?   )r   r   r   r   r   r   )r   r   r   r   r   r?   )r   r   r   r   r   r   )r   r   r   r   r?   r?   c                    t        j                  g d      j                  dd      }t        d| d      }|j	                  |      }t        ||j                                t        d| d      }|j	                  |      }t        ||j                                t        d| d      }|j	                  |      }t        ||j                                y )	N)r   r   r   r   rf   rM   r   r   r   r   r   r   r   r      )r,   r-   rc   r   r+   r	   ravel)r   expected_2binsexpected_3binsexpected_5binsr"   r%   r_   s          r&   test_nonuniform_strategiesr      s     	&'//A6A !hy
IC			1	B~rxxz2 !hy
IC			1	B~rxxz2 !hy
IC			1	B~rxxz2r(   zstrategy, expected_inv)      r@         r   )r         @      r   )r   rh   r   r   )r   rh   r   r   )g      g      @g      g      )g      g      @g      g      ?)g      ?g      @g      g      ?)r   r@   r   g      )r   r   r   rN   )r   rh   r   g      ?r   )r   rq   rn   c                     t        d| |      }|j                  t              }|j                  |      }t	        ||       y )Nr   rz   )r   r+   r"   inverse_transformr   )r   r   expected_invkbdr_   Xinvs         r&   test_inverse_transformr     s=    H !hv
FC			1	B  $DlD1r(   c                 h   t        j                  g d      d d d f   }t        d| d      }|j                  |       t        j                  ddg      d d d f   }|j	                  |      }t        |j                  d      d	z   |j                         t        |j                  d      dg       y )
Nr   r   r   r   r?   r   rz   r   r{   r   )axisr   )	r,   r-   r   r!   r#   r	   maxr.   min)r   r"   r   X2X2ts        r&    test_transform_outside_fit_ranger   G  s     	q$w'A
!hy
ICGGAJ	2q'	1d7	#B
--
CswwAw*CKK8swwAw,r(   c            	      \   t        j                  g d      d d d f   } | j                         }t        dd      }|j	                  |       }t        | |       |j                         }|j                  |      }t        ||       t        |t        j                  dgdgdgdgg             y )Nr   r   r   ri   r   r   r   )r,   r-   rT   r   r+   r	   r   )r"   X_beforer%   r_   	Xt_beforer   s         r&   test_overwriter   T  s    
q$w'AvvxH
!I
6C			1	Bq(#	I  $Dr9%tRXXusecUSE&BCDr(   zstrategy, expected_bin_edges)r   r   r   )r   r   r   c                     dgdgdgdgdgdgg}t        d| d       }d}t        j                  t        |      5  |j	                  |       d d d        t        |j                  d   |       y # 1 sw Y   #xY w)Nr   r   )r   r   	subsample'Consider decreasing the number of bins.r5   )r   r9   r[   r\   r!   r   rG   )r   expected_bin_edgesr"   r   r^   s        r&   test_redundant_binsr   b  sw     qcA3aS1#&A
!h$
GC?O	k	9 
cnnQ/1CD s   A..A7c                     t        j                  g d      j                  dd      } t        j                  g d      }t        j                  g d      j                  dd      }t        ddd	      }d
}t	        j
                  t        |      5  |j                  |        d d d        t        |j                  d   |       t        |j                  |       |       y # 1 sw Y   >xY w)N)皙?r   ffffff?r   r   )r   gq=
ףp?g=
ףp=?gzG?gp=
ף?r   )r   r   r?   rM   r   r   r   r   r5   r   )r,   r-   rc   r   r9   r[   r\   r!   r   rG   r#   )r"   rJ   r_   r   r^   s        r&   !test_percentile_numeric_stabilityr   n  s    
#$,,R3A=>I	)		$	$R	+B
"Y
LC?O	k	9 
 cnnQ/;cmmA.3	 s   C  C)in_dtype	out_dtypec                 >   t        j                  t        |       }t        d||      }|j	                  |       ||}n<|.|j
                  t         j                  k(  rt         j                  }n|j
                  }|j                  |      }|j
                  |k(  sJ y NrR   r   )r   r   r/   )	r,   r-   r"   r   r!   r/   float16rS   r#   )r   r   r   X_inputr   expected_dtyper_   s          r&   test_consistent_dtyper   {  s     hhq)G
!F)
DCGGG "		w}}

: 	w	B88~%%%r(   input_dtypec                 J   t        j                  t        |       }t        d|t         j                        }|j                  |       |j                  |      }t        d|t         j                        }|j                  |       |j                  |      }t        ||       y r   )	r,   r-   r"   r   float32r!   r#   rS   r   )r   r   r   kbd_32Xt_32kbd_64Xt_64s          r&   test_32_equal_64r     s~    
 hhq,G QvRZZHF
JJwW%E QvRZZHF
JJwW%E .r(   c                     t        j                  g d      j                  dd      } t        ddd      }|j	                  |        t        |      }|j                  d        |j	                  |        t        |j                  d	   |j                  d	         D ]%  \  }}t         j                  j                  ||       ' |j                  j                  |j                  j                  k(  sJ y )
Nr
   r   r   rM   r   r   r   r   r   )r,   r-   rc   r   r!   r   
set_paramsrH   rG   testingr   r4   )r"   kbd_defaultkbd_without_subsamplingbin_kbd_defaultbin_kbd_with_subsamplings        r&   'test_kbinsdiscretizer_subsample_defaultr     s    
"#++B2A""YTKOOA#K0&&&6"58q!#:#E#Ea#H6 N11 	

""?4LMN !!''+B+M+M+S+SSSSr(   zencode, expected_namesrq   r   r?   feat_rn   r   c                 6   g dg dg dg dg}t        d|       j                  |      }|j                  |      }t        d      D cg c]  }d| 	 }}|j	                  |      }|j
                  d	   |j
                  d
   k(  sJ t        ||       yc c}w )z[Check get_feature_names_out for different settings.
    Non-regression test for #22731
    )r   r   r   )r   r   r   )r   r   r   )r   r?   r   r?   ri   r   r   r   r   N)r   r!   r#   rangeget_feature_names_outr4   r	   )r   expected_namesr"   r   r_   re   input_featuresoutput_namess           r&   *test_kbinsdiscrtizer_get_feature_names_outr     s    4 
k:z:A
!F
3
7
7
:C	q	B*/(3QQCj3N3,,^<L88A;,,,Q////|^4	 4s   	Bc                 P   t         j                  j                  |      j                  d      dz   }t	        | d|      }|j                  |       t        |      }|j                  d        |j                  |       t        |j                  d   |j                  d   d       y )	N)i r   r   iP  )r   r   random_stater   r   g{Gz?)rtol)
r,   randomRandomStaterandom_sampler   r!   r   r   r   rG   )r   global_random_seedr"   kbd_subsamplingkbd_no_subsamplings        r&   test_kbinsdiscretizer_subsampler     s     			01??LqPA&U9KO /!!D!11 ""1%'9'D'DQ'Gdr(   c                     t         j                  j                  d      j                  d      }t	        | d      }t        j                  t        d      5  |j                  |       d d d        y # 1 sw Y   y xY w)Nr   )d   r   )r   r   z)subsample=200_000 will be used by defaultr5   )	r,   r   r   r   r   r9   r[   FutureWarningr!   )r   r"   r   s      r&   test_kbd_subsample_warningr     s^     			a ..x8A
H1
=C	m+V	W 
  s   A22A;)1rY   numpyr,   r9   scipy.sparsesparserr   sklearnr   sklearn.preprocessingr   r   sklearn.utils._testingr   r   r   r	   r"   markparametrizefilterwarningsr'   r2   r=   rD   rK   rP   rV   r`   rd   r   rl   rx   r   r   r   r   r   r   r   r   rS   r   r   r   floatr   r   r   )col_idbin_ids   00r&   <module>r      s        A  +->P '	\<|LdS	L,lKTR	lL,MtT<|D	
 <|D	
 <|D	
 <|D	
 <|D	
1 D NO3 PE F3L i[10 20#L '	\<|LdS	L,lKTR	lL,MtT<|D	
 <|D	
& <|D	
7 #J NO
0 PK#L
0 DER FR NOh
%;<7 = P7 %FG
7 H
7	 eAqk*( +(2 >	&(:<NO	%'9;MN	');=OP NO3 P3*  ''%%		
 00/*		
 (&&&		
'B NO#HI2 J PCF2 NO%FG- H P-E "j)%<x>U$VEE
4 bjj"**bjj%IJtRZZ&DE#HI& J F K&" RZZ(LM#HI/ J N/$T"   $Ah#Ah  vhaf//	
  $Ah#Ah  vhaf//	
 
58<tF8_<=#,5-,5  %FG H* i%:; <q =s   "W"W3W#