a
    ;ZaZN                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dlm
Z
 d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dl m!Z! d dl m"Z" d dl m#Z# d dl m$Z$ d dl%m&Z& d dl'm(Z( e j)*dddge j)*dddgdd Z+e j)*dddge j)*dddgd d! Z,e j)*dddge j)*dddgdFd#d$Z-d%d& Z.d'd( Z/d)d* Z0e j)*dd+dgd,d- Z1e j)*dg d.e j)*dddgd/d0 Z2e j)*d1d2d3gd4d5 Z3d6d7 Z4d8d9 Z5e j)*d:d;d<gd;d<gfd;d<ged;ed<d=fd;d<gd>d? fgd@dA Z6e j)*ddBdCgdDdE Z7dS )G    N)assert_allclose)ColumnTransformer)load_diabetes)	load_iris)make_classification)make_regression)DummyClassifier)RandomForestRegressor)RandomForestClassifier)LinearRegression)LogisticRegression)SimpleImputer)permutation_importance)train_test_split)
get_scorermean_squared_errorr2_score)make_pipeline)KBinsDiscretizer)OneHotEncoder)StandardScaler)scale)parallel_backend)_convert_containern_jobs      max_samples      ?      ?c           	   	   C   s   t jd}d}tdd\}}||jd|jd d dd	}t ||g}td
dd}|	|| t
|||||| |d}|jj|jd	 |fksJ t |jd |jd d ksJ d S )N*      T)Z
return_X_yMbP?r   r   sizer   
   n_estimatorsrandom_state	n_repeatsr)   r   r   )nprandomRandomStater   normalshapereshapehstackr	   fitr   importancesallimportances_mean)	r   r   rngr+   Xyy_with_little_noiseclfresult r=   Slib/python3.9/site-packages/sklearn/inspection/tests/test_permutation_importance.py9test_permutation_importance_correlated_feature_regression!   s$     
r?   c              	   C   s   t d}tjd}d}t }|j|j }}||jd|j	d d 
dd}|j||jd	}||d
< tddd}	|	|| t|	||||| |d}
|
jj	|j	d |fksJ t|
jd |
jd d ksJ d S )Npandasr    r!   r"   r   r#   r%   r   )columnsZcorrelated_featurer&   r'   r*   )pytestimportorskipr,   r-   r.   r   datatargetr/   r0   r1   	DataFrameZfeature_namesr
   r3   r   r4   r5   r6   )r   r   pdr7   r+   Zdatasetr8   r9   r:   r;   r<   r=   r=   r>   @test_permutation_importance_correlated_feature_regression_pandasB   s*    
 
rH   r    c              	      s  t j|}d}d}d}d}d}|| }	t |}
|j|
|d t  fdd|
d | D }|t j}||k sxJ t j||	||gdd}|j
||	fksJ t| d	|d
\}}}}td|d}||| |j}|d | }||d  }| | k sJ t|||||| |d}|jj
|j
d |fks:J |jd | }|j|d  }tt |dksnJ | dk sJ | dksJ d S )Nr!     r   r   )r$   c                    s   g | ]} |k d dqS )r%   r   )r1   ).0cr9   r=   r>   
<listcomp>}       zEtest_robustness_to_high_cardinality_noisy_feature.<locals>.<listcomp>)Zaxisr   )Z	test_sizer)   r'   r*   gHz>g?g333333?)r,   r-   r.   arangeZchoicer2   astypeZfloat32ZconcatenateZrandnr0   r   r
   r3   Zfeature_importances_maxminr   r4   r6   abs)r   r   Zseedr7   r+   	n_samplesZ	n_classesZn_informative_featuresZn_noise_features
n_featuresclassesr8   ZX_trainZX_testZy_trainZy_testr;   Ztree_importancesZinformative_tree_importancesZnoisy_tree_importancesrZinformative_importancesZnoisy_importancesr=   rL   r>   1test_robustness_to_high_cardinality_noisy_featurej   sL    
 
rX   c                  C   s  t jd} d}t dddt jgg dgj}t g d}tt tdd	}|	|| t
||||| d
}|jj|jd |fksJ t |jd |jd d ksJ t jd} t
||||| d
}|jj|jd |fksJ t |j|jrJ t |jd |jd d ksJ d S )Nr       r          @      @)r   r   r   r   r   r   r   r   lbfgsZsolverr+   r)   r   r%   r   )r,   r-   r.   arraynanTr   r   r   r3   r   r4   r0   r5   r6   Zallclose)r7   r+   r8   r9   r;   r<   Zresult2r=   r=   r>   'test_permutation_importance_mixed_types   s    "rc   c            	      C   s   t d} tjd}d}| dddtjgg dd}tg d	}tt	 t
 }td
|dgfdt dgfg}t|tdd}||| t|||||d}|jj|jd |fksJ t|jd |jd d ksJ d S )Nr@   r    r!   r   rZ   r[   )abrd   re   )col1col2r\   Znumrf   catrg   r]   r^   r_   r   r%   )rB   rC   r,   r-   r.   rF   ra   r`   r   r   r   r   r   r   r3   r   r4   r0   r5   r6   )	rG   r7   r+   r8   r9   Znum_preprocessZ
preprocessr;   r<   r=   r=   r>   .test_permutation_importance_mixed_types_pandas   s    
ri   c                  C   sf   t dddd\} }t| } t|}t | |}d|jd  }t|| |ddd}t||jd	d
d d S )N  r&   r   rT   rU   r)   r   2   neg_mean_squared_error)r+   scoringg?gư>)ZrtolZatol)r   r   r   r3   Zcoef_r   r   r6   )r8   r9   lrexpected_importancesZresultsr=   r=   r>   .test_permutation_importance_linear_regresssion   s    

rq   rj   c           	   	   C   s   t dddd\}}t ||}t|||ddd| d}|d  }|d  }|| d	ks^J t|||ddd
d}t|d |d  td$ t|||ddd
d}W d    n1 s0    Y  t|d |d  d S )Nrj   r&   r   rk   r!   r   r*   r4   333333?r   )r+   r)   r   Z	threading)r   r   r3   r   rR   rQ   r   r   )	r   r8   r9   ro   Zimportance_sequentialimp_minimp_maxZimportance_processesZimportance_threadingr=   r=   r>   ;test_permutation_importance_equivalence_sequential_parallel  s*    
$ru   )Nr   r   c              	   C   sJ  t d}tdddd\}}||}tddd}||d	d
}t||g}|j	j
dksbJ t|dr||| }n| }t|j}|||< || j	|j	ksJ tt|t|_tdddd}	|	|| d}
t|	|||
d| |d}|d  }|d  }|| dksJ t|	|||
d| |d}t|d |d  d S )Nr@   d   r!   r   rk      Zordinal)Zn_binsencoder%   r   fCategorical)r(   Z	max_depthr)   r*   r4   rr   )rB   rC   r   rF   r   Zfit_transformr1   r,   r2   Zdtypekindhasattrrz   ZravellenrA   rO   rP   strindexr	   r3   r   rR   rQ   r   )r   r   rG   r8   r9   ZX_dfZbinnerZ
cat_columnZnew_col_idxZrfr+   Zimportance_arrayrs   rt   Zimportance_dataframer=   r=   r>   7test_permutation_importance_equivalence_array_dataframe,  sR    



	r   
input_typer`   Z	dataframec           	      C   s~   t dd }}t||dd\}}|jdks.J t|| }tdd||}d}t||||d	d
}t||f}t	||j
 d S )Ng     j@rY   r   rk   g    .AZprior)Zstrategyr!   r   )r+   r   )intr   nbytesr   r   r3   r   r,   zerosr   r4   )	r   rT   rU   r8   r9   r;   r+   rW   rp   r=   r=   r>   /test_permutation_importance_large_memmaped_datar  s    

r   c               	   C   s  t jd} d}d}|d }| dd||f}t |}d|d |df  |d |df  |d |< ||d df d||d df   ||d < tdd}||| t|||dd	d
d}|jd |jd  }|t	
ddksJ t |}	t|||dd	d
|	d}|jd |jd  }
|
t	
|dks,J t t d|t d|g}	||||	 t|||dd	d
|	d}|jd |jd  }|| t	
ddksJ d S )Nr   rI   r   g        r"   r   F)Zfit_interceptZneg_mean_absolute_error   r)   rn   r+   g{Gz?r)   rn   r+   Zsample_weightg    _Br   )r,   r-   r.   r/   r   r   r3   r   r6   rB   ZapproxZonesr2   repeat)r7   rT   rU   Zn_half_samplesxr9   ro   ZpiZx1_x2_imp_ratio_w_nonewZx1_x2_imp_ratio_w_onesZx1_x2_imp_ratio_wr=   r=   r>   )test_permutation_importance_sample_weight  sT    
,,

		r   c               
   C   s   dd } t ddgddgg}t ddg}t ddg}t }||| zt|||d| dd W n ty   td Y n0 tt& t|||d| d|d	 W d    n1 s0    Y  d S )
Nc                 S   s   dS )Nr   r=   Z	estimatorr8   r9   r=   r=   r>   	my_scorer  s    zJtest_permutation_importance_no_weights_scoring_function.<locals>.my_scorerr   r   rw   rY   r   zpermutation_test raised an error when using a scorer function that does not accept sample_weight even though sample_weight was Noner   )	r,   r`   r   r3   r   	TypeErrorrB   Zfailraises)r   r   r9   r   ro   r=   r=   r>   7test_permutation_importance_no_weights_scoring_function  s     
r   z list_single_scorer, multi_scorerr2rm   r   rm   c                 C   s$   t || |t|| | dS )Nr   )r   Zpredictr   r   r=   r=   r>   <lambda>  s    r   c           	   	   C   s   t dddd\}}t ||}t|||d|dd}t| t| ksLJ | D ].}|| }t|||d|dd}t|j|j qPd S )Nrj   r&   r   rk   r   r   r   )r   r   r3   r   setkeysr   r4   )	Zlist_single_scorerZmulti_scorerr   r9   ro   Zmulti_importanceZscorerZmulti_resultZsingle_resultr=   r=   r>   (test_permutation_importance_multi_metric  s    r   r%   r!   c                 C   st   t dgj}t g d}t }||| d}tjt|d  t|||| d W d   n1 sf0    Y  dS )zjCheck that a proper error message is raised when `max_samples` is not
    set to a valid input value.
    )r   rZ   r[   g      @r\   z'max_samples must be in \(0, n_samples\])match)r   N)	r,   r`   rb   r   r3   rB   r   
ValueErrorr   )r   r8   r9   r;   err_msgr=   r=   r>   -test_permutation_importance_max_samples_error  s    r   )r    )8rB   Znumpyr,   Znumpy.testingr   Zsklearn.composer   Zsklearn.datasetsr   r   r   r   Zsklearn.dummyr   Zsklearn.ensembler	   r
   Zsklearn.linear_modelr   r   Zsklearn.imputer   Zsklearn.inspectionr   Zsklearn.model_selectionr   Zsklearn.metricsr   r   r   Zsklearn.pipeliner   Zsklearn.preprocessingr   r   r   r   Zsklearn.utilsr   Zsklearn.utils._testingr   ZmarkZparametrizer?   rH   rX   rc   ri   rq   ru   r   r   r   r   r   r   r=   r=   r=   r>   <module>   sx   &O
(D
?
