
    h$,fsM                     |   d dl Z d dlZd dlZd dlmZ d dlmZmZ d dl	m
Z
 d dlmZmZmZ d dlmZmZ d Zd fd	Zd
 Zd Zd Zd Zd Zd Zej4                  j7                  ddgez   ez         d        Zd Zd Zej4                  j7                  dddg      d        Zd Z d Z!ej4                  j7                  dejD                  jG                  dd      dddfejD                  jG                  d      ejD                  jG                  d      ddddfejD                  jG                  dd      d  ddfejD                  jG                  d      ejD                  jG                  d      dd! dd"fejD                  jG                  dd      d# dd$fejD                  jG                  d      ejD                  jG                  d      dd% dd&fejD                  jG                  dd      dd&d&fejD                  jG                  d      ejD                  jG                  d      ddddfejD                  jG                  dd      d' d&d(fejD                  jG                  d      ejD                  jG                  d      dd) dd&fg
      ej4                  j7                  d*d+d,g      d-               Z$d. Z%d/ Z&d0 Z'd1 Z(ej4                  j7                  d2dd3d4gfd5 d6d7ggg      ej4                  j7                  d8d+d,g      d9               Z)d: Z*d; Z+ej4                  j7                  d<d=d>g      ej4                  j7                  d?g d@      dA               Z,ej4                  j7                  dBddC g      dD        Z-ej4                  j7                  d<d=d>g      ej4                  j7                  dBddE g      dF               Z.y)G    N)make_pipeline)FunctionTransformerStandardScaler)_get_adapter_from_container)_convert_containerassert_allclose_dense_sparseassert_array_equal)CSC_CONTAINERSCSR_CONTAINERSc                  ,   t        j                  d      } | j                  g dg dd      }t        |      }|j                  dk(  sJ d}t        j
                  t        |      5  t        |j                                ddd       y# 1 sw Y   yxY w)z4Check the behavior fo `_get_adapter_from_container`.pandas         
      d   abzAThe container does not have a registered adapter in scikit-learn.matchN)pytestimportorskip	DataFramer   container_libraises
ValueErrorto_numpy)pdXadaptererr_msgs       Ulib/python3.12/site-packages/sklearn/preprocessing/tests/test_function_transformer.pytest_get_adapter_from_containerr'      sy    			X	&B
9=9:A)!,G  H,,,QG	z	1 2#AJJL12 2 2s   'B

Bc                     | S N )r#   r   ks      r&   <lambda>r,      s         c                       fd}|S )Nc                 z    j                  |        j                  |       j                  |        |       S r)   )appendextendupdate)r#   argskwargs
args_storefunckwargs_stores      r&   _funcz_make_func.<locals>._func   s6    !$F#Awr-   r*   )r5   r7   r6   r8   s   ``` r&   
_make_funcr9      s     Lr-   c                     g } i }t        j                  d      j                  d      }t        t	        t        | |            j                  |      |d       | |gk(  sJ dj                  |              |rJ dj                  |             g | d d  |j                          t	        t        | |            j                  |      }t        ||d       | |gk(  sJ dj                  |              |rJ dj                  |             y )Nr      r   z*transform should have returned X unchangedz5Incorrect positional arguments passed to func: {args})r3   z3Unexpected keyword arguments passed to func: {args})r%   )	nparangereshaper	   r   r9   	transformformatclear)r5   r7   r#   transformeds       r&   test_delegate_to_funcrD   &   sK    JL
		"f%AJz<@AKKAN	4 	  W>EE:EVW 
 W<CCCVW JqM%:|,il  Q L
 	  W>EE:EVW 
 W<CCCVWLr-   c                      t        j                  d      j                  d      } t        t	        t         j
                        j                  |       t        j
                  |              y )Nr   r;   )r=   r>   r?   r	   r   log1pr@   r#   s    r&   test_np_logrH   P   sF    
		"f%A BHH%//2
r-   c                      t        j                  ddd      j                  d      } t        t         j                  t        d            }t        |j                  |       t        j                  | d             y 	Nr   r   r   numr;   r   decimalskw_args)r=   linspacer?   r   arounddictr	   r@   r#   Fs     r&   test_kw_argrV   Z   sS    
Aqb!))&1ABIItQ/?@A q{{1~ryyQ'?@r-   c                     t        j                  ddd      j                  d      } t        t         j                  t        d            }d|j                  d	<   t        |j                  |       t        j                  | d             y )
Nr   r   r   rK   r;   r   rM   rO   rN   	r=   rQ   r?   r   rR   rS   rP   r	   r@   rT   s     r&   test_kw_arg_updaterY   c   sa    
Aqb!))&1ABIItQ/?@AAIIj q{{1~ryyQ'?@r-   c                     t        j                  ddd      j                  d      } t        t         j                  t        d            }t        d      |_        t        |j                  |       t        j                  | d             y rJ   rX   rT   s     r&   test_kw_arg_resetr[   n   s_    
Aqb!))&1ABIItQ/?@Aa AI q{{1~ryyQ'?@r-   c                  \   t        j                  g d      j                  d      } t        t         j                  t         j
                  t        d            }t        |j                  |j                  |             t        j
                  t        j                  |       d             y )Nr      	      r   r   r   rM   )r6   inverse_funcinv_kw_args)
r=   arrayr?   r   sqrtrR   rS   r	   inverse_transformr@   rT   s     r&   test_inverse_transformrg   y   ss    
''/A 	WWYY!$	A
 	AKKN+
		"''!*q)r-   sparse_containerc                    t        j                  g dt         j                        j                  d      }|  | |      }t	        t         j
                  t         j                  | d udd      }d}t        j                  t        |      5  |j                  |       d d d        t	        t         j                  t         j                  | d udd      }t        j                         5  t        j                  dt               |j!                  |      }d d d        t#        ||j%                               y # 1 sw Y   xY w# 1 sw Y   1xY w)	Nr]   dtypera   T)r6   rb   accept_sparsecheck_inversevalidatezThe provided functions are not strictly inverse of each other. If you are sure you want to proceed regardless, set 'check_inverse=False'.r   error)r=   rd   float64r?   r   re   rR   r   warnsUserWarningfitexpm1rF   warningscatch_warningssimplefilterfit_transformr   rf   )rh   r#   transwarning_messageXts        r&   test_check_inverser|      s   
bjj199&AA#QWWYY&d2E	"  
k	9 		!  XXXX&d2E 
	 	 	" $g{3  #$ !E$;$;B$?@ $ $s   D8(,E8EEc                  4   t        j                  g dt         j                        j                  d      } t	        t         j
                  d dd      }t        j                         5  t        j                  dt               |j                  |        d d d        t	        d t         j
                  dd      }t        j                         5  t        j                  dt               |j                  |        d d d        y # 1 sw Y   oxY w# 1 sw Y   y xY w)Nr]   rj   ra   T)r6   rb   rm   rn   ro   )r=   rd   rp   r?   r   rt   ru   rv   rw   rr   rs   )r#   ry   s     r&   /test_check_inverse_func_or_inverse_not_providedr~      s     	bjj199&AAXXDtE 
	 	 	" g{3		!  tE 
	 	 	" g{3		!   s   (,D,DDDc                      t        j                  d      } | j                  t        j                  j                  dd            }t               }|j                  |      }t        |d      sJ y )Nr   r   r   loc)	r   r   r   r=   randomrandnr   rx   hasattr)r"   X_dftransformer
X_df_transs       r&   test_function_transformer_framer      sW    			X	&B<<		R01D%'K**40J:u%%%r-   X_typerd   seriesc                 X   	
 dddddd

j                         D ci c]  \  }}||
 c}}	dg d}t        | d	g
      }
fd} 	fd}t        ||dd      }d}t        j                  t
        |      5  |j                  |       ddd       yc c}}w # 1 sw Y   yxY w)zKCheck that `FunctionTransformer.check_inverse` raises error on mixed dtype.r   r   r   fivesix)onetwothreer<      object)r   r   r   r   r   r<   r   valuecolumns_namerk   c                     t        j                  t        | j                        D cg c]
  }| |       c}t              S c c}w )Nrj   )r=   rd   rangesizer   )r#   imappings     r&   r6   zDtest_function_transformer_raise_error_with_mixed_dtype.<locals>.func   s2    xxaff>11>fMM>s   Ac                 P    t        | D cg c]  }|   	 c}dg      S c c}w )Nr   r   )r   )r#   xr   rk   inverse_mappings     r&   rb   zLtest_function_transformer_raise_error_with_mixed_dtype.<locals>.inverse_func   s1    !)*+A_Q+!	
 	
+s   #FT)r6   rb   rn   rm   L'check_inverse' is only supported when all the elements in `X` is numerical.r   N)itemsr   r   r   r   r    rs   )r   keyr   datar6   rb   r   msgrk   r   r   s   `       @@@r&   6test_function_transformer_raise_error_with_mixed_dtyper      s     A&UCG4;MMODjc5uczDOE6DdF'%PDN
 &uDK YC	z	-  - E, s   B?B  B)c                      t        j                  d      } | j                  g dg dd      }t        d d d      }|j	                  |      }t        ||d	z          y
)z8Check support for dataframes with only numerical values.r   r   )r^   r<   r   r   c                     | dz   S Nr   r*   r   s    r&   r,   z`test_function_transformer_support_all_nummerical_dataframes_check_inverse_True.<locals>.<lambda>   s
    q1u r-   c                     | dz
  S r   r*   r   s    r&   r,   z`test_function_transformer_support_all_nummerical_dataframes_check_inverse_True.<locals>.<lambda>   s
    QU r-   Tr6   rb   rm   r   N)r   r   r   r   rx   r   )r"   dfr   df_outs       r&   Ntest_function_transformer_support_all_nummerical_dataframes_check_inverse_Truer      sV    			X	&B	II6	7B%?$K
 &&r*F a0r-   c                     t        j                  d      } t        d d d      }| j                  g dg dd      }d	}t        j                  t
        |
      5  |j                  |       ddd       y# 1 sw Y   yxY w)zYCheck error is raised when check_inverse=True.

    Non-regresion test for gh-25261.
    r   c                     | S r)   r*   r   s    r&   r,   zQtest_function_transformer_with_dataframe_and_check_inverse_True.<locals>.<lambda>   s    q r-   c                     | S r)   r*   r   s    r&   r,   zQtest_function_transformer_with_dataframe_and_check_inverse_True.<locals>.<lambda>   s     r-   Tr   r   r   r   cr   r   r   N)r   r   r   r   r   r    rs   )r"   r   df_mixedr   s       r&   ?test_function_transformer_with_dataframe_and_check_inverse_Truer      sn    
 
		X	&B%{$K ||)/BCH
XC	z	- "!" " "s   A66A?z.X, feature_names_out, input_features, expectedr   r   
one-to-one)x0x1x2r   c                      y)Nr   r*   r   input_featuress     r&   r,   r,         r-   c                      y)Nr   der*   r   s     r&   r,   r,     r   r-   r   c                     t        |      dz   S )N)r   tupler   s     r&   r,   r,   %      n0E0N r-   )r   r   r   r   c                     t        |      dz   S N)r   r   r   s     r&   r,   r,   ,  r   r-   r   c                     t        |      dz   S )N)r   r   r   s     r&   r,   r,   A  r   r-   )r   r   r   r   c                     t        |      dz   S r   r   r   s     r&   r,   r,   H  r   r-   rn   TFc                 H   t        | t              r&t        j                  d      }|j	                  |       } t        ||      }|j                  |        |j                  |      }t        |t        j                        sJ |j                  t        k(  sJ t        ||       y )Nr   feature_names_outrn   )
isinstancerS   r   r   r   r   rs   get_feature_names_outr=   ndarrayrk   r   r	   )r#   r   r   expectedrn   r"   r   namess           r&   /test_function_transformer_get_feature_names_outr     s    ^ !T  *LLO%+hK OOA--n=EeRZZ(((;;&   uh'r-   c                     t        dd      } t        j                  j                  dd      }| j	                  |       | j                  d      }t        |t        j                        sJ |j                  t        k(  sJ t        |d       y )Nr   Fr   r   r   r   )r   r=   r   randrx   r   r   r   rk   r   r	   )r   r#   r   s      r&   Btest_function_transformer_get_feature_names_out_without_validationr   `  sp    %uUK
		sAAa --j9EeRZZ(((;;&   uj)r-   c                      t               } t        j                  j                  dd      }| j	                  |       d}t        j                  t        |      5  | j                          d d d        y # 1 sw Y   y xY w)Nr   r   zCThis 'FunctionTransformer' has no attribute 'get_feature_names_out'r   )	r   r=   r   r   rx   r   r   AttributeErrorr   )r   r#   r   s      r&   3test_function_transformer_feature_names_out_is_Noner   k  s`    %'K
		sAAa 
OC	~S	1 ,))+, , ,s   A33A<c                     d } d }t        | |t        d      d      }t        j                  d      }|j	                  t
        j                  j                  d      t
        j                  j                  d      d	      }|j                  |       |j                         }t        |t
        j                        sJ |j                  t        k(  sJ t        |d
       y )Nc                     t        j                  | t         j                  j                  t	        |       |      gd      S )Nr   axis)r=   concatenater   r   len)r#   ns     r&   add_n_random_featureszYtest_function_transformer_feature_names_out_uses_estimator.<locals>.add_n_random_featuresv  s+    ~~q"))..Q";<1EEr-   c                 z    | j                   d   }t        |      t        |      D cg c]  }d| 	 c}z   S c c}w )Nr   rnd)rP   listr   )r   r   r   r   s       r&   r   zUtest_function_transformer_feature_names_out_uses_estimator.<locals>.feature_names_outy  s:    $N#%(&CQQCy&CCC&Cs   8r   )r   T)r6   r   rP   rn   r   r   r   )r   r   rnd0rnd1rnd2)r   rS   r   r   r   r=   r   r   rx   r   r   r   rk   r   r	   )r   r   r   r"   r   r   s         r&   :test_function_transformer_feature_names_out_uses_estimatorr   u  s    FD &"+q		K 
		X	&B	BIINN3/biinnS6IJ	KBb!--/EeRZZ(((;;&   u@Ar-   c                  $   d } d }t        j                  ddgddgddgg      }t        | |d      }|j                  |      }|j                  |j
                  d   k(  sJ |j                  |       |j                  |j
                  d   k(  sJ y	)
zSTest that function transformer does not reset estimator in
    `inverse_transform`.c                 |    t        j                  | j                  d   df      }t        j                  | |fd      S )Nr   r   r   )r=   onesshaper   )r#   X_ones     r&   add_constant_featurezHtest_function_transformer_validate_inverse.<locals>.add_constant_feature  s1    Q(~~q%jq11r-   c                     | d d d df   S )Nr*   rG   s    r&   inverse_add_constantzHtest_function_transformer_validate_inverse.<locals>.inverse_add_constant  s    CRCyr-   r   r   r   r^   T)r6   rb   rn   N)r=   rd   r   rx   n_features_in_r   rf   )r   r   r#   ry   X_transs        r&   *test_function_transformer_validate_inverser     s    2 	1a&1a&1a&)*A!)E
 !!!$G1771:---	G$1771:---r-   zfeature_names_out, expectedpetcolorc                 2    |D cg c]  }| d	 c}S c c}w )N_outr*   )estr   r   s      r&   r,   r,     s    7As$Z7 7   pet_out	color_outin_pipelinec                     t        j                  d      }|j                  ddgddgd      } fd}t        |       }|rt	        |      }|j                  |      }t        ||j                        sJ |j                         }t        |t        j                        sJ |j                  t        k(  sJ t        ||       y	)
zHCheck that get_feature_names_out works with DataFrames with string data.r   dogcatredgreen)r   r   c                     dk(  r| S  d | j                         }| j                  t        t        | j                   |                  S )Nr   columns)r  renamerS   zip)r#   namer   s     r&   r6   zCtest_get_feature_names_out_dataframe_with_string_data.<locals>.func  sA    ,H$T1995D88DQYY)=$>8??r-   r6   r   N)r   r   r   r   r   rx   r   r   r=   r   rk   r   r	   )	r   r   r   r"   r#   r6   r   r   r   s	   `        r&   5test_get_feature_names_out_dataframe_with_string_datar    s     
		X	&B
eU^ug6FGHA@ &4CTUK#K0''*Ggr||,,,--/EeRZZ(((;;&   uh'r-   c                     t        j                  d      } | j                  g dg dd      }t        t        j
                  d      }t        j                         5  t        j                  dt               |j                  d       d	d	d	       |j                  |      }t        || j                        sJ t        |j                  d
dg       t        d       }|j                  d       t        j                         5  t        j                  dt               |j                  |      }d	d	d	       t        || j                        sJ t        |j                  d
dg       t        d       }dD ]R  }|j                  |       d| d| d}t        j                  t        |      5  |j                  |       d	d	d	       T |j                  d       t        j                         5  t        j                  dt               |j                  |       d	d	d	       y	# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y	xY w)z5Check behavior of set_output with different settings.r   r   r   r   r   r   ro   r@   Nr   r   c                     d| z  S r   r*   r   s    r&   r,   z&test_set_output_func.<locals>.<lambda>  s
    q1u r-   c                 ,    t        j                  |       S r)   )r=   asarrayr   s    r&   r,   z&test_set_output_func.<locals>.<lambda>  s    "**Q- r-   )r   polarsz'When `set_output` is configured to be 'z'.*z DataFrame.*r   default)r   r   r   r   r=   logru   rv   rw   rr   
set_outputrx   r   r	   r  rq   )r"   r#   ftr   ft_npr@   r   s          r&   test_set_output_funcr    s   			X	&B
9=9:A	RVV|	DB 
	 	 	" *g{3
)* q!Ggr||,,,wc
3	_	-BMMHM% 
	 	 	" &g{3""1%& gr||,,,wc
3   78E) #	9-5i[I; O  	 \\+S1 	#"	# 	## 
y)		 	 	" g{3A A* *& &	# 	#
 s0   -H',H40I2,I'H14H>I
	Ic                  n   t        j                  d      } d }t        t        t        j
                  |      t                     }| j                  ddgddgdd	ggd
dg      }|j                  |      }|j                         j                         ddgk(  sJ t        |t        j                        sJ y)zCheck that we have a consistence between the feature names out of
    `FunctionTransformer` and the feature names in of the next step in the pipeline.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/27695
    r   c                 2    |D cg c]  }|dz   	 c}S c c}w N__logr*   _r   r  s      r&   with_suffixz?test_consistence_column_name_between_steps.<locals>.with_suffix      +014w111r   r  r   r   r   r^   r<   r   r   r   r   a__logb__logN)r   r   r   r   r=   rF   r   r   rx   r   tolistr   r   )r"   r  pipeliner   r   s        r&   *test_consistence_column_name_between_stepsr     s     
		X	&B2 BHHDnFVH 
1v1v1v.c
	CB$$R(G))+22488LLLLgrzz***r-   dataframe_libr   r  transform_output)r  r   r  c                    t        j                  |       }|dk7  rt        j                  |       |j                  g dg dd      }d }t        |      j	                  |      }|j                  |      }t        t        j                  |      t        j                  |             |j                         }t        |j                         |d|j                        k(  sJ |j                          |d|j                        k(  sJ y)	z8Check that we overwrite the column names when we should.numpyr   r   r   c                 2    |D cg c]  }|dz   	 c}S c c}w r  r*   r  s      r&   r  zEtest_function_transformer_overwrite_column_names.<locals>.with_suffix  r  r   r  r	  N)r   r   r   r   r  rx   r	   r=   r  r   r   r  r  )r!  r"  libr   r  r   r   feature_namess           r&   0test_function_transformer_overwrite_column_namesr(    s     

m
,C7",-	Y];	<B2 &DOO" P K ''+Grzz'*BJJrN;557M Kbjj$AAAA![rzz%BBBBr-   r   c                 2    |D cg c]  }| d	 c}S c c}w N_logr*   r  s      r&   r,   r,   '      u$EtvT]$E $Er   c                 b   t        j                  d      }|j                  g dg dd      }t        |       }|j	                  |      }t        t        j                  |      t        j                  |             |j                         }t        |j                        t        |      k(  sJ y)zCheck the same as `test_function_transformer_overwrite_column_names`
    but for the specific case of pandas where column names can be numerical.r   r   r   )r   r   r  N)r   r   r   r   rx   r	   r=   r  r   r   r  )r   r"   r   r   r   r'  s         r&   :test_function_transformer_overwrite_column_names_numericalr.  %  s     
		X	&B	)6	7B%8IJK''+Grzz'*BJJrN;557M D$7777r-   c                 2    |D cg c]  }| d	 c}S c c}w r*  r*   r  s      r&   r,   r,   ;  r,  r   c                     t        j                         }|j                  g dg dd      } fd}t        ||      }d}t        j                  t
        |      5  |j                  |      j                   ddd       y# 1 sw Y   yxY w)	zCheck that we raise an error when `func` returns a dataframe with new
    column names that become inconsistent with `get_feature_names_out`.r   r   r   c                 \    dk(  r| j                  ddi      S | j                  ddi      S )Nr   r   r   r   )r  )r   r!  s    r&   r6   zAtest_function_transformer_error_column_inconsistent.<locals>.funcF  s3    H$99c3Z90099c3Z((r-   r  z:The output generated by `func` have different column namesr   N)r   r   r   r   r   r    rx   r  )r!  r   r&  r   r6   r   r%   s   `      r&   3test_function_transformer_error_column_inconsistentr2  8  sx     

m
,C	Y];	<B) &4CTUKJG	z	1 .!!"%--. . .s   BB)/ru   r$  r=   r   sklearn.pipeliner   sklearn.preprocessingr   r   +sklearn.preprocessing._function_transformerr   sklearn.utils._testingr   r   r	   sklearn.utils.fixesr
   r   r'   r9   rD   rH   rV   rY   r[   rg   markparametrizer|   r~   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r(  r.  r2  r*   r-   r&   <module>r:     s      * E S 
 ?2 /C 'WTAAA +dVn-D~-UV A W AF&& GX#67 8:1"  4 IINN3"	
 ))..%BIINN3,?@	
 IINN3":	
 ))..%BIINN3,?@?	
 IINN3"N#	
 ))..%BIINN3,?@N	
 IINN3"	
 ))..%BIINN3,?@	
 IINN3"N 	
 ))..%BIINN3,?@N	
AGJV dE]3( 4WJX("*,B0.0 !	w'(	7)[9QR u6( 7(6+\+. 8X*>?+-LMC N @C, EF8	8 8X*>?EF.	 @
.r-   