a
    ;ZaP                     @   s  d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZmZ dd	l
mZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZm Z  ddl!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ej*+dZ,ddgddgddgddgddgddggZ-g dZ.g dZ/ddgddgddggZ0g dZ1g d Z2e)3 Z4e,5e4j6j7Z8e%e4j9e4j6e,d!\e4_9e4_6e): Z;e%e;j9e;j6e,d!\e;_9e;_6d"d# Z<d$d% Z=ej>?d&d'd(gd)d* Z@d+d, ZAd-d. ZBej>?d/g d0d1d2 ZCej>?d&d'd(gd3d4 ZDd5d6 ZEd7d8 ZFd9d: ZGd;d< ZHd=d> ZId?d@ ZJdAdB ZKdCdD ZLdEdF ZMdGdH ZNej>?d&d'd(gdIdJ ZOdKdL ZPej>?d&d'd(gdMdN ZQej>?dOe e4j9e4j6fe e;j9e;j6fgdPdQ ZRdS )Rz6Testing for the boost module (sklearn.ensemble.boost).    N)
csc_matrix)
csr_matrix)
coo_matrix)
dok_matrix)
lil_matrix)assert_array_equalassert_array_less)assert_array_almost_equal)BaseEstimator)clone)DummyClassifierDummyRegressor)LinearRegression)train_test_split)GridSearchCV)AdaBoostClassifier)AdaBoostRegressor)_samme_proba)SVCSVR)DecisionTreeClassifierDecisionTreeRegressor)shuffle)NoSampleWeightWrapper)datasets      )foor   r   r   r   r   )r   r   r   r   r   r      )r   r   r   )r   r   r   random_statec                     s   t g dg dg dg dg  t  jddd d t jf   G  fddd} |  }t|d	t  }t|j j t 	|
 sJ tt j|ddg d
 tt j|ddg d d S )N)r   ư>r   )gRQ?333333?皙?)igRQ?g      ?)r#   r   g&.>r   Zaxisc                       s   e Zd Z fddZdS )z'test_samme_proba.<locals>.MockEstimatorc                    s   t |j j  S N)r   shapeselfXZprobs Jlib/python3.9/site-packages/sklearn/ensemble/tests/test_weight_boosting.pypredict_probaB   s    z5test_samme_proba.<locals>.MockEstimator.predict_probaN)__name__
__module____qualname__r/   r-   r,   r-   r.   MockEstimatorA   s   r3   r    )r   r   r   r   )r   r   r   r   )npZarrayabssumnewaxisr   	ones_liker   r(   ZisfiniteallZargminargmax)r3   ZmockZsamme_probar-   r,   r.   test_samme_proba6   s    $r;   c                  C   s>   t tt} t t| }t|tt ttdf d S )Nr   )r4   Zoneslenr+   r   fitr	   r/   )Zy_tclfr-   r-   r.   test_oneclass_adaboost_probaS   s    r?   	algorithmSAMMESAMME.Rc                 C   sz   t | dd}|tt t|tt tt	t
t|j |tjttdfks\J |tjttfksvJ d S )Nr   r@   r"   r   )r   r=   r+   y_classr   predictT	y_t_classr4   uniqueasarrayclasses_r/   r(   r<   decision_function)r@   r>   r-   r-   r.   test_classification_toy\   s    rL   c                  C   s*   t dd} | tt t| tt d S )Nr   r!   )r   r=   r+   y_regrr   rE   rF   y_t_regr)r>   r-   r-   r.   test_regression_toyg   s    
rO   c                  C   s  t tj} d  }}dD ]}t|d}|tjtj t| |j |	tj}|dkr^|}|}|j
d t| kstJ |tjj
d t| ksJ |tjtj}|dksJ d||f t|jdksJ ttdd |jD t|jksJ qd	|_td
t |	tj|  d S )NrA   rB   r@   rA   r   g?z'Failed with algorithm %s and score = %fc                 s   s   | ]}|j V  qd S r'   r!   .0Zestr-   r-   r.   	<genexpr>       ztest_iris.<locals>.<genexpr>rB   r   )r4   rH   iristargetr   r=   datar   rJ   r/   r(   r<   rK   scoreestimators_setr@   r   r5   )classesZ	clf_sammeZ
prob_sammealgr>   probarY   r-   r-   r.   	test_irisn   s(    
r_   loss)ZlinearZsquareZexponentialc                 C   st   t | dd}|tjtj |tjtj}|dks8J t|jdksJJ ttdd |jD t|jkspJ d S )Nr   )r`   r"   r$   r   c                 s   s   | ]}|j V  qd S r'   r!   rR   r-   r-   r.   rT      rU   z test_diabetes.<locals>.<genexpr>)	r   r=   diabetesrX   rW   rY   r<   rZ   r[   )r`   ZregrY   r-   r-   r.   test_diabetes   s    rb   c                 C   s  t jd}|jdtjjd}|jdtjjd}t| dd}|j	tj
tj|d |tj
}dd |tj
D }|tj
}dd |tj
D }|jtj
tj|d}	d	d |jtj
tj|dD }
t|dksJ t||d
  t|dksJ t||d
  t|
dksJ t|	|
d
  tddd}|j	tj
tj|d |tj
}dd |tj
D }|jtj
tj|d}	dd |jtj
tj|dD }
t|dksJ t||d
  t|
dksJ t|	|
d
  d S )Nr   
   )size)r@   n_estimatorssample_weightc                 S   s   g | ]}|qS r-   r-   rS   pr-   r-   r.   
<listcomp>   rU   z'test_staged_predict.<locals>.<listcomp>c                 S   s   g | ]}|qS r-   r-   rh   r-   r-   r.   rj      rU   c                 S   s   g | ]}|qS r-   r-   rS   sr-   r-   r.   rj      s   r   )re   r"   c                 S   s   g | ]}|qS r-   r-   rh   r-   r-   r.   rj      rU   c                 S   s   g | ]}|qS r-   r-   rk   r-   r-   r.   rj      s   )r4   randomRandomStateZrandintrV   rW   r(   ra   r   r=   rX   rE   staged_predictr/   staged_predict_probarY   staged_scorer<   r	   r   )r@   rngZiris_weightsZdiabetes_weightsr>   ZpredictionsZstaged_predictionsr^   Zstaged_probasrY   Zstaged_scoresr-   r-   r.   test_staged_predict   sB    
rs   c                  C   sh   t t d} dddd}t| |}|tjtj tt dd} ddd}t| |}|t	jt	j d S )N)base_estimator)r   r   rP   )re   base_estimator__max_depthr@   r   rt   r"   )re   ru   )
r   r   r   r=   rV   rX   rW   r   r   ra   )boost
parametersr>   r-   r-   r.   test_gridsearch   s    


ry   c                  C   s   dd l } dD ]p}t|d}|tjtj |tjtj}| |}| |}t	||j
ks`J |tjtj}||ksJ qtdd}|tjtj |tjtj}| |}| |}t	||j
ksJ |tjtj}||ksJ d S )Nr   rP   rQ   r!   )pickler   r=   rV   rX   rW   rY   dumpsloadstype	__class__r   ra   )rz   r]   objrY   rl   Zobj2Zscore2r-   r-   r.   test_pickle   s$    





r   c               	   C   s~   t jdddddddd\} }dD ]X}t|d	}|| | |j}|jd dksRJ |d dtjf |dd  k s J q d S )
Ni  rc   r    r   Fr   )	n_samples
n_featuresZn_informativeZn_redundantZ
n_repeatedr   r"   rP   rQ   )	r   Zmake_classificationr   r=   Zfeature_importances_r(   r4   r7   r9   )r+   yr]   r>   Zimportancesr-   r-   r.   test_importances   s    


r   c                   C   s   t t" tddtt W d    n1 s20    Y  t t" tddtt W d    n1 sn0    Y  t t* t jtttdgd W d    n1 s0    Y  d S )Nr   )learning_rater   rQ   rf   )	pytestraises
ValueErrorr   r=   r+   rD   r4   rI   r-   r-   r-   r.   
test_error  s    00r   c                  C   s   ddl m}  t|  }|tt tt dd}|tt ddl m} t	| dd}|tt t	t
 dd}|tt ddgddgddgddgg}g d}tt dd}tjtd	d
 ||| W d    n1 s0    Y  d S )Nr   )RandomForestClassifierrA   rQ   )RandomForestRegressorr!   r   )r   Zbarr   r   zworse than randommatch)sklearn.ensembler   r   r=   r+   rM   r   rD   r   r   r   r   r   r   )r   r>   r   ZX_failZy_failr-   r-   r.   test_base_estimator  s    
r   c                  C   sT   d} t dddd}tjt| d  |tjtj W d    n1 sF0    Y  d S )Nz+Sample weights have reached infinite values   g      @rA   )re   r   r@   r   )r   r   ZwarnsUserWarningr=   rV   rX   rW   )msgr>   r-   r-   r.   test_sample_weights_infinite:  s    r   c                  C   s<  G dd dt } tjddddd\}}t|}t||dd	\}}}}tttt	t
fD ]}||}||}	t| d
dddd||}
t| d
dddd||}|
|	}||}t|| |
|	}||}t|| |
|	}||}t|| |
|	}||}t|| |
|	|}|||}t|| |
|	}||}t||D ]\}}t|| qZ|
|	}||}t||D ]\}}t|| q|
|	}||}t||D ]\}}t|| q|
|	|}|||}t||D ]\}}t|| qdd |
jD }tdd |D sTJ qTd S )Nc                       s"   e Zd ZdZd fdd	Z  ZS )z-test_sparse_classification.<locals>.CustomSVCz8SVC variant that records the nature of the training set.Nc                    s    t  j|||d t|| _| S z<Modification on fit caries data type for later verification.rf   superr=   r}   
data_type_r*   r+   r   rg   r~   r-   r.   r=   G  s    
z1test_sparse_classification.<locals>.CustomSVC.fit)Nr0   r1   r2   __doc__r=   __classcell__r-   r-   r   r.   	CustomSVCD  s   r   r         *   )Z	n_classesr   r   r"   r   r!   T)ZprobabilityrA   )rt   r"   r@   c                 S   s   g | ]
}|j qS r-   r   rS   ir-   r-   r.   rj     rU   z.test_sparse_classification.<locals>.<listcomp>c                 S   s   g | ]}|t kp|tkqS r-   r   r   rS   tr-   r-   r.   rj     rU   )r   r   Zmake_multilabel_classificationr4   Zravelr   r   r   r   r   r   r   r=   rE   r   rK   r	   Zpredict_log_probar/   rY   Zstaged_decision_functionzipro   rp   rq   rZ   r9   )r   r+   r   X_trainX_testy_trainy_testsparse_formatX_train_sparseX_test_sparsesparse_classifierdense_classifiersparse_resultsdense_results
sprase_res	dense_restypesr-   r-   r.   test_sparse_classificationA  sp    	




















r   c                  C   s
  G dd dt } tjddddd\}}t||dd	\}}}}tttttfD ]}||}||}	t	|  dd

||}
t	|  dd

|| }}|
|	}||}t|| |
|	}||}t||D ]\}}t|| qdd |
jD }tdd |D sJJ qJd S )Nc                       s"   e Zd ZdZd fdd	Z  ZS )z)test_sparse_regression.<locals>.CustomSVRz8SVR variant that records the nature of the training set.Nc                    s    t  j|||d t|| _| S r   r   r   r   r-   r.   r=     s    
z-test_sparse_regression.<locals>.CustomSVR.fit)Nr   r-   r-   r   r.   	CustomSVR  s   r   r   2   r   r   )r   r   Z	n_targetsr"   r   r!   rv   c                 S   s   g | ]
}|j qS r-   r   r   r-   r-   r.   rj     rU   z*test_sparse_regression.<locals>.<listcomp>c                 S   s   g | ]}|t kp|tkqS r-   r   r   r-   r-   r.   rj     rU   )r   r   Zmake_regressionr   r   r   r   r   r   r   r=   rE   r	   ro   r   rZ   r9   )r   r+   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r-   r-   r.   test_sparse_regression  s6    	





r   c                  C   sF   G dd dt } t|  dd}|tt t|jt|jksBJ dS )z
    AdaBoostRegressor should work without sample_weights in the base estimator
    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    c                   @   s   e Zd Zdd Zdd ZdS )z=test_sample_weight_adaboost_regressor.<locals>.DummyEstimatorc                 S   s   d S r'   r-   )r*   r+   r   r-   r-   r.   r=     s    zAtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.fitc                 S   s   t |jd S )Nr   )r4   Zzerosr(   r)   r-   r-   r.   rE     s    zEtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.predictN)r0   r1   r2   r=   rE   r-   r-   r-   r.   DummyEstimator  s   r   r    )re   N)r
   r   r=   r+   rM   r<   Zestimator_weights_Zestimator_errors_)r   rw   r-   r-   r.   %test_sample_weight_adaboost_regressor  s    r   c                  C   s   t jd} | ddd}| ddgd}| d}ttdd}||| || |	| t
t }||| || dS )zX
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    r   r   r    r   Zmost_frequent)ZstrategyN)r4   rm   rn   ZrandnZchoicer   r   r=   rE   r/   r   r   )rr   r+   ZycZyrrw   r-   r-   r.   test_multidimensional_X  s    



r   c                 C   sp   t jt j }}tt }t|| d}d|jj}t	j
t|d ||| W d    n1 sb0    Y  d S )N)rt   r@   z {} doesn't support sample_weightr   )rV   rX   rW   r   r   r   formatr~   r0   r   r   r   r=   )r@   r+   r   rt   r>   err_msgr-   r-   r.   -test_adaboostclassifier_without_sample_weight  s    
r   c            
      C   sR  t jd} t jdddd}d| d | |jd d  }|d	d
}|d	  d9  < d|d	< tt d
dd}t	|}t	|}|
|| |
|d d	 |d d	  t |}d|d	< |j
|||d ||d d	 |d d	 }||d d	 |d d	 }||d d	 |d d	 }	||k s,J ||	k s:J |t|	ksNJ d S )Nr   r   d   i  )Znumg?r%   g-C6?r   r   rc   i'  )rt   re   r"   rf   )r4   rm   rn   ZlinspaceZrandr(   Zreshaper   r   r   r=   r8   rY   r   Zapprox)
rr   r+   r   Zregr_no_outlierZregr_with_weightZregr_with_outlierrg   Zscore_with_outlierZscore_no_outlierZscore_with_weightr-   r-   r.   $test_adaboostregressor_sample_weight  s,     
r   c                 C   sZ   t tjddddi\}}}}t| dd}||| ttj||dd|	| d S )NT)Z
return_X_yr"   r   rC   r   r&   )
r   r   Zload_digitsr   r=   r   r4   r:   r/   rE   )r@   r   r   r   r   modelr-   r-   r.    test_adaboost_consistent_predict(  s    
r   zmodel, X, yc                 C   sX   t |}d|d< d}tjt|d  | j|||d W d    n1 sJ0    Y  d S )Nir   z,sample_weight cannot contain negative weightr   rf   )r4   r8   r   r   r   r=   )r   r+   r   rg   r   r-   r-   r.   #test_adaboost_negative_weight_error8  s
    
r   )Sr   Znumpyr4   r   Zscipy.sparser   r   r   r   r   Zsklearn.utils._testingr   r   r	   Zsklearn.baser
   r   Zsklearn.dummyr   r   Zsklearn.linear_modelr   Zsklearn.model_selectionr   r   r   r   r   Z!sklearn.ensemble._weight_boostingr   Zsklearn.svmr   r   Zsklearn.treer   r   Zsklearn.utilsr   Zsklearn.utils._mockingr   Zsklearnr   rm   rn   rr   r+   rD   rM   rF   rG   rN   Z	load_irisrV   ZpermutationrW   rd   ZpermrX   Zload_diabetesra   r;   r?   ZmarkZparametrizerL   rO   r_   rb   rs   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r-   r-   r-   r.   <module>   s   (
	

"

-]0
&
