a
    ;Za.                     @   s  d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
l	mZ ddlmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZ edZe Zeej j!Z"ej#e" e_#ej e" e_ e Z$ee$j j!Z"e$j#e" e$_#e$j e" e$_ dd Z%dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+ej,-ddd gd!d" Z.d#d$ Z/d%d& Z0d'd( Z1d)d* Z2d+d, Z3ed-ef i d.d/id0ej,-d1d2d3gd4d5 Z4ed-ef i d.d6id0ej,-d1d2d3gd7d8 Z5d9d: Z6d;d< Z7dS )=zD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
    N)assert_array_equal)assert_array_almost_equal)ignore_warnings)assert_allclose)ParameterGrid)IsolationForest)_average_path_length)train_test_split)load_diabetes	load_iris)check_random_state)roc_auc_score)
csc_matrix
csr_matrix)Mockpatchc                  C   s   t ddgddgg} t ddgddgg}tdgg dddgd}t 8 |D ]"}tf d	ti|| | qRW d
   n1 s0    Y  d
S )z6Check Isolation Forest for various parameter settings.r            )      ?      ?r   TF)n_estimatorsmax_samples	bootstraprandom_stateN)nparrayr   r   r   rngfitpredict)X_trainX_testgridparams r$   Blib/python3.9/site-packages/sklearn/ensemble/tests/test_iforest.pytest_iforest.   s    r&   c                  C   s   t d} ttjdd tjdd | d\}}}}tddgddgd	}ttfD ]p}||}||}|D ]V}	tf d
dd|		|}
|

|}tf d
dd|		|}|
|}t|| qhqPdS )z=Check IForest for various parameter settings on sparse input.r   N2   r   r   r   TF)r   r   
   r   )r   r   )r   r	   diabetesdatatargetr   r   r   r   r   r   r   )r   r    r!   y_trainy_testr"   Zsparse_formatZX_train_sparseZX_test_sparser#   Zsparse_classifierZsparse_resultsZdense_classifierZdense_resultsr$   r$   r%   test_iforest_sparse<   s4    

r/   c                  C   s~  t j} tt  tdd|  W d   n1 s60    Y  tt  tdd|  W d   n1 sp0    Y  tt  tdd|  W d   n1 s0    Y  d}tjt|d  tdd|  W d   n1 s0    Y  td }td	d|  W d   n1 s(0    Y  d
d |D }t	|dksRJ td&}tt
dd|  W d   n1 s0    Y  dd |D }t	|dksJ tt  tdd|  W d   n1 s0    Y  tt  tdd|  W d   n1 s"0    Y  tt2 t | | ddddf  W d   n1 sp0    Y  dS )z7Test that it gives proper exception on deficient input.r   N               @3max_samples will be set to n_samples for estimationmatchi  autoc                 S   s   g | ]}t |jtr|qS r$   
issubclasscategoryUserWarning.0Zeachr$   r$   r%   
<listcomp>n       z&test_iforest_error.<locals>.<listcomp>r   r   c                 S   s   g | ]}t |jtr|qS r$   r8   r<   r$   r$   r%   r>   r   r?   Zfoobarg      ?r   )irisr+   pytestZraises
ValueErrorr   r   warnsr;   lenr   Zint64r   )Xwarn_msgrecordZuser_warningsr$   r$   r%   test_iforest_errorX   s0    ....0600rH   c               	   C   sF   t j} t | }|jD ](}|jttt	| j
d ksJ qdS )zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)r@   r+   r   r   estimators_Z	max_depthintr   ZceilZlog2shape)rE   clfestr$   r$   r%   test_recalculate_max_depth   s    
rN   c                  C   s   t j} t | }|j| jd ks&J tdd}d}tjt|d ||  W d    n1 sb0    Y  |j| jd ksJ tdd| }|jd| jd  ksJ d S )Nr   i  r1   r4   r5   g?)	r@   r+   r   r   max_samples_rK   rA   rC   r;   )rE   rL   rF   r$   r$   r%   test_max_samples_attribute   s    
(rP   c            	      C   s   t d} ttjtj| d\}}}}tddd|}|jdd ||}|jdd ||}t	|| tddd|}||}t	|| dS )	zCheck parallel regression.r   r(   r   )n_jobsr   r   )rQ   r   N)
r   r	   r*   r+   r,   r   r   
set_paramsr   r   )	r   r    r!   r-   r.   ZensembleZy1Zy2Zy3r$   r$   r%    test_iforest_parallel_regression   s    




rS   c                  C   s   t d} d| dd }tj|d |d f }|dd }| jdddd	}tj|dd |f }td
gd dgd  }td| d|}|| }t	||dksJ dS )z#Test Isolation Forest performs wellr   g333333?x   Nd      )   r   )ZlowZhighsizer   rX   r   )r   r   g\(\?)
r   randnr   Zr_Zuniformr   r   r   decision_functionr   )r   rE   r    Z
X_outliersr!   r.   rL   Zy_predr$   r$   r%   test_iforest_performance   s    r\   contamination      ?r7   c              	   C   s   ddgddgddgddgddgddgddgddgg}t t| d	}|| || }||}t|dd  t|d d ksJ t|ddg ddg   d S )
Nr0   r   r      r   rV      )r   r]   )	r   r   r   r[   r   r   minmaxr   )r]   rE   rL   Zdecision_funcZpredr$   r$   r%   test_iforest_works   s    4

(rd   c                  C   s&   t j} t | }|j|jks"J d S N)r@   r+   r   r   rO   Z_max_samples)rE   rL   r$   r$   r%   test_max_samples_consistency   s    rf   c                  C   sV   t d} ttjd d tjd d | d\}}}}tdd}||| || d S )Nr   r'   r(   g?)Zmax_features)r   r	   r*   r+   r,   r   r   r   )r   r    r!   r-   r.   rL   r$   r$   r%    test_iforest_subsampled_features   s    
rg   c                  C   s   dt dt j  d } dt dt j  d }ttdgdg ttdgdg ttd	gd
g ttdg| g ttdg|g ttt g ddd
| |g tt d}t|t | d S )Nr3   g      @g?g     0@g}?r   r2   r   r   r        )r   r   rh   ri   )	r   logZeuler_gammar   r   r   Zaranger   sort)Z
result_oneZ
result_twoZavg_path_lengthr$   r$   r%    test_iforest_average_path_length   s    
rl   c                  C   s   ddgddgddgg} t dd| }t  | }t|ddgg|ddgg|j  t|ddgg|ddgg|j  t|ddgg|ddgg d S )Nr   r   g?)r]   r3   )r   r   r   Zscore_samplesr[   Zoffset_)r    Zclf1Zclf2r$   r$   r%   test_score_samples   s    rm   c                  C   sv   t d} | dd}tdd| dd}|| |jd }|jdd || t|jdks`J |jd |u srJ dS )	z/Test iterative addition of iTrees to an iForestr   rX   r   r)   T)r   r   r   Z
warm_start)r   N)r   rZ   r   r   rI   rR   rD   )r   rE   rL   Ztree_1r$   r$   r%   test_iforest_warm_start
  s    


rn   z*sklearn.ensemble._iforest.get_chunk_n_rowsZreturn_valuer   )Zside_effectzcontamination, n_predict_calls)r^   r   )r7   r   c                 C   s   t | | j|ksJ d S re   rd   Z
call_countZmocked_get_chunkr]   Zn_predict_callsr$   r$   r%   test_iforest_chunks_works1!  s    rq   r)   c                 C   s   t | | j|ksJ d S re   ro   rp   r$   r$   r%   test_iforest_chunks_works2,  s    rr   c                  C   s|  t d} t }||  t jd}t|| dks<J t||dddksZJ t|| d dkstJ t|| d dksJ t 	|dddd} t }||  t|| dksJ t||dddksJ t|t ddksJ |dd} t }||  t|| dks:J t||dddksZJ t|t ddksxJ dS )z=Test whether iforest predicts inliers when using uniform data)rU   r)   r   r   rU   r)   N)
r   Zonesr   r   ZrandomZRandomStateallr   rZ   repeat)rE   Ziforestr   r$   r$   r%   test_iforest_with_uniform_data6  s(    



 ru   c                  C   sj   t ddgddgg} t ddg}t | |}tjtdd |j W d    n1 s\0    Y  d S )Nr   r   r   rW   r   z`n_features_` was deprecatedr5   )r   r   r   r   rA   rC   FutureWarningZn_features_)rE   yrM   r$   r$   r%   test_n_features_deprecationY  s
    rx   )8__doc__rA   Znumpyr   Zsklearn.utils._testingr   r   r   r   Zsklearn.model_selectionr   Zsklearn.ensembler   Zsklearn.ensemble._iforestr   r	   Zsklearn.datasetsr
   r   Zsklearn.utilsr   Zsklearn.metricsr   Zscipy.sparser   r   Zunittest.mockr   r   r   r@   Zpermutationr,   rY   Zpermr+   r*   r&   r/   rH   rN   rP   rS   r\   ZmarkZparametrizerd   rf   rg   rl   rm   rn   rq   rr   ru   rx   r$   r$   r$   r%   <module>   sh   '
#