a
    ;ZaɈ                     @   sd  d dl Z d dlZd dlZd dlZd dlmZmZ d dl	m
Z
 d dl	mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZ d dlmZ eeeegZeeg ZeddgddgddgddgddgddggZeg dZejd ZejddZ ejddd k!e"Z#ej$dddZ%eg dZ&dd Z'dd Z(dd Z)dd Z*dd  Z+d!d" Z,d#d$ Z-d%d& Z.d'd( Z/d)d* Z0d+d, Z1d-d. Z2d/d0 Z3ej45d1ed2d3 Z6ej45d1ed4d5 Z7ej45d1ed6d7 Z8ej45d8ed9d: Z9ee:d;d<d= Z;ej45d1ed>d? Z<ej45d1ed@dA Z=ej45d1edBdC Z>ej45d1edDdE Z?ee:d;ej45d1eeegdFdG Z@ej45d1eej45dHdIdJgej45dKdIdJgdLdM ZAej45dNdOdPdQ ZBdRdS ZCee:d;dTdU ZDdVdW ZEdXdY ZFdZd[ ZGd\d] ZHej45d^d_eg d`g dageg dag daged dgged_d_gfd_dbgeg d`g dageg dcg dcged d_gged_dbgfdedd gddggeddgddgged dggeddgggddde ZIej45dfdgd_dgddbggdhfdigdjdk ZJdldm ZKdndo ZLdpdq ZMej45dredsdt ZNdS )u    N)load_digits	load_iris)train_test_split)cross_val_score)assert_almost_equal)assert_array_equal)assert_array_almost_equal)ignore_warnings)
GaussianNBBernoulliNB)MultinomialNBComplementNB)CategoricalNB      )r   r   r   r   r   r   )
      )sizer      )   d   )r   r   r   r   r   r   c                  C   s   t  } | ttt}t|t | t}| t}tt	
||d tjtdd& t  jttddgd W d    n1 s0    Y  d S )N   z;The target label.* in y do not exist in the initial classesmatchr   r   classes)r
   fitXypredictr   predict_probapredict_log_probar   nplogpytestraises
ValueErrorpartial_fit)clfy_predy_pred_probay_pred_log_proba r.   =lib/python3.9/site-packages/sklearn/tests/test_naive_bayes.pytest_gnb(   s    


r0   c                  C   sR   t  } | tt tjtdd t| j| j	 W d    n1 sD0    Y  d S )Nz!Attribute `sigma_` was deprecatedr   )
r
   r   r   r    r&   warnsFutureWarningr   Zsigma_var_r*   r.   r.   r/   test_gnb_var?   s    r5   c                  C   sL   t  tt} ttddgd | jd t  tt	} t| j
 d d S )Nr         @r   r   )r
   r   r   r    r   r$   arrayclass_prior_X1y1sumr4   r.   r.   r/   test_gnb_priorG   s    r<   c                  C   s   t d} t tt}t tt| }t|j|j t|j|j t	
tjd } t jtt| d}t jttddg| d d}|jtt| d d t|j|j t|j|j t	dtjd d}t j|tjd d}t t| t| }t tt|}t|j|j t|j|j d	S )
z5Test whether sample weights are properly used in GNB.r   r   sample_weightr   r   r   r>      )Z	minlengthN)r$   Zonesr
   r   r   r    r   theta_r3   rngZrandshaper)   randintZbincount)swr*   Zclf_swclf1clf2Zindr>   Zclf_duplr.   r.   r/   test_gnb_sample_weightP   s"    
rH   c                  C   sV   t tddgd} d}tjt|d | tt W d   n1 sH0    Y  dS )z:Test whether an error is raised in case of negative priorsg             @priorszPriors must be non-negativer   N	r
   r$   r7   r&   r'   r(   r   r   r    r*   msgr.   r.   r/   test_gnb_neg_priorsp   s    rO   c                  C   sZ   t tddgdtt} t| ddggtddggd t| jtddg dS )	z6Test whether the class prior override is properly used333333?gffffff?rJ   皙g[9h?gs\?r   N)	r
   r$   r7   r   r   r    r   r"   r8   r4   r.   r.   r/   test_gnb_priorsy   s    rR   c                  C   s|   t ddgddgddgddgddgddgddgddgd	d	gd
d
gg
} t g d}t g d}t|d}|| | d S )Nr   r   r   r   r      r   )
g{Gz?Q?gQ?{Gz?)\(?rX   gQ?rW   rY           )
r   r   r   rV   r   r      r   	   r   rJ   )r$   r7   r
   r   )r   rK   Yr*   r.   r.   r/   test_gnb_priors_sum_isclose   s"    
r^   c                  C   sV   t tg dd} d}tjt|d | tt W d   n1 sH0    Y  dS )z`Test whether an error is raised if the number of prior is different
    from the number of class)      ?r_   r_   r_   rJ   -Number of priors must match number of classesr   NrL   rM   r.   r.   r/   test_gnb_wrong_nb_priors   s    ra   c                  C   sV   t tddgd} d}tjt|d | tt W d   n1 sH0    Y  dS )z?Test if an error is raised if the sum of prior greater than onerI         ?rJ   z!The sum of the priors should be 1r   NrL   rM   r.   r.   r/   test_gnb_prior_greater_one   s    rc   c                  C   sD   t tddgd} | tt | ddggtdgks@J dS )z@Test if good prediction when class prior favor largely one classg{Gz?gGz?rJ   rQ   r   N)r
   r$   r7   r   r   r    r!   r4   r.   r.   r/   test_gnb_prior_large_bias   s    rd   c                  C   sP   d} d}d}t dtjd f}t| |||\}}||ks@J ||ksLJ dS )z4Test when the partial fit is called without any datar   rZ   rb   r   r   N)r$   emptyr   rC   r
   Z_update_mean_variance)Zprev_pointsmeanvarZx_emptyZtmeanZtvarr.   r.   r/   "test_gnb_check_update_with_no_data   s    rh   c                  C   s   t  tt} t  tttt}t| j|j t| j	|j	 t| j
|j
 t  tdd dd d f tdd d tt}|tdd d tdd d  t| j|j t| j	|j	 t| j
|j
 d S )Nr   r   r   )r
   r   r   r    r)   r$   uniquer   rA   r3   r8   )r*   Zclf_pfZclf_pf2r.   r.   r/   test_gnb_partial_fit   s    2 rj   c                     sP   t  } | j| j   fdddD }t|d |d  t|d |d  d S )Nc                    s(   g | ] }t  |  |  qS r.   )r
   r   r!   ).0fr   r    r.   r/   
<listcomp>       z9test_gnb_naive_bayes_scale_invariance.<locals>.<listcomp>)绽|=r   g    _Br   r   r   )r   datatargetr   )irislabelsr.   rm   r/   %test_gnb_naive_bayes_scale_invariance   s
    ru   DiscreteNaiveBayesc              	   C   sP   |   tt}dD ]8}tt t|| W d    q1 s@0    Y  qd S )N)coef_
intercept_)r   X2y2r&   r1   r2   hasattr)rv   estZattr.   r.   r/   )test_discretenb_deprecated_coef_intercept   s    r}   c                 C   s4   |   tt}tttg dd |jd d S )N)r   r   r   r6   r   )r   ry   rz   r   r$   r%   r7   class_log_prior_)rv   r*   r.   r.   r/   test_discretenb_prior   s    r   c                 C   s  |  }| ddgddgddggg d |  }|jddgddgddggg dddgd t|j|j | tu rtt|jD ]}t|j| |j|  q|nt|j|j |  }|jddggdgddgd |ddggdg |ddggdg t|j|j | tu rtt|jD ]J}t|j| j	|j| j	 tt
j|j| ddt
j|j| dd qt|jd d t
ddg t|jd d t
ddg t|jd d t
ddg t|jd d t
ddg nt|j|j d S )Nr   r   r   r   r   r   Zaxisr   )r   r)   r   class_count_r   rangelencategory_count_feature_count_rC   r$   r;   r7   )rv   rF   rG   iclf3r.   r.   r/   test_discretenb_partial_fit   s8    "*
 r   
NaiveBayesc                 C   s   t jtdd |  tt W d    n1 s20    Y  |  }|jttttd t jtdd& |jtttdd W d    n1 s0    Y  d S )Nz8classes must be passed on the first call to partial_fit.r   r   .is not the same as on last call to partial_fit*   )	r&   r'   r(   r)   ry   rz   r$   ri   Zarange)r   r*   r.   r.   r/   $test_NB_partial_fit_no_first_classes  s    ,r   )categoryc                  C   s  g dg dg dg} ddgddgddgg}g d}t ttg| |gD ]v\}}| ||}||d	d  d
ksvJ ||d gjdksJ t||d d
 jddt	
ddgd qFg d}t ttg| |gD ]\}}| ||}||dd jdksJ ||d d
 jdks*J tt	||d gd tt	||d	 gd tt	t	|jd tt	t	|jd qd S )N)r   r   r   r   r   r   )r   r   r   r   r   r   rV   )r   r   r   r   r   )r   r   r   rb   r   r   r   r   r   r   )r   r   )zipr   r   r   r!   r"   rC   r   r;   r$   r7   r   expr~   rx   )ZX_bernoulliZX_multinomialr    rv   r   r*   r.   r.   r/   test_discretenb_predict_proba/  s.    &r   c                 C   sR   |  }|j dd |dgdgdggg d t|j}t|tddg d S )NF)	fit_priorr   r   r   r   r         ?)Z
set_paramsr   r$   r   r~   r   r7   )rv   r*   priorr.   r.   r/   test_discretenb_uniform_priorR  s
    r   c                 C   s   | ddgd}| dgdgdggg d t|j}t|tddg d}tjt|d, | dgdgdggg d	 W d    n1 s0    Y  d
}tjt|d0 |j	dgdggddgg dd W d    n1 s0    Y  d S )Nr   class_priorr   r   r   r`   r   r   r   r   r   r   )
r   r$   r   r~   r   r7   r&   r'   r(   r)   )rv   r*   r   rN   r.   r.   r/   test_discretenb_provide_prior^  s    :r   c           	      C   s   t  }t|j|jddd\}}}}d g dfD ]V}| |d}||j|j | |d}|j||g dd ||| t|j|j q.d S )N皙?i  )Z	test_sizeZrandom_state)rP   rP   r   r   r   r   )r   r   rq   rr   r   r)   r   r~   )	rv   rs   Z
iris_data1Z
iris_data2Ziris_target1Ziris_target2r   Zclf_fullZclf_partialr.   r.   r/   .test_discretenb_provide_prior_with_partial_fitq  s    

r   c                 C   s  g dg dg dg dg}g d}t jg dt jd}||  }|  j|||d}t||g d |  }|j|d d	 |d d	 g d
|d d	 d |j|d	d |d	d |d	d d |j|dd  |dd  |dd  d t||g d d S )Nr   r   r   r   r   )r   r   r   r   r   r   r   r   )Zdtyper=   )r   r   r   r   r   r   r?   r   )r$   r7   Zfloat64r;   r   r   r!   r)   )rv   r   r    r>   r*   r.   r.   r/   (test_discretenb_sample_weight_multiclass  s    .((r   c                 C   sN   g dg dg}ddg}|  }| || |jjdks:J |jjdksJJ d S )Nr   )r   r   r   r   r   r   )r   )r   rw   rC   rx   )rv   r   r    r*   r.   r.   r/   $test_discretenb_coef_intercept_shape  s    r   use_partial_fitFTtrain_on_single_class_yc                 C   s   g dg dg dg}g d}|r:|d d }|d d }t tt|}t|}|  }|rn|j|||d n||| ||d d |d ksJ g d	}|D ]V}	t||	d }
|
d u rqt|
t	j
r|
jd |ksJ q|
D ]}|jd |ksJ qqd S )
Nr   r   r   )r   r   r   r   r   r   r   )Zclasses_r   r~   r   feature_log_prob_)sortedlistsetr   r)   r   r!   getattr
isinstancer$   ZndarrayrC   )rv   r   r   r   r    r   Znum_classesr*   Zattribute_namesZattribute_nameZ	attributeelementr.   r.   r/   )test_discretenb_degenerate_one_class_case  s*    r   kind)densesparsec                 C   s  | dkrt }n| dkr"tjt }t }d}tjt|d || t	 W d    n1 s^0    Y  ||t	
|}t|t	 ||}||}tt||d t }|j|d d t	d d tt	d ||dd t	dd  ||dd  t	dd   |
|}t|t	 ||}	||}
tt|	|
d t|	| t|
| t }|j|t	tt	d |
|}t|t	 ||}||}tt||d t|| t|| d S )	Nr   r   z!Negative values in data passed tor   r   r   r   r   )ry   scipyr   
csr_matrixr   r&   r'   r(   r   rz   r!   r   r"   r#   r   r$   r%   r)   ri   )r   r   r*   rN   r+   r,   r-   rG   Zy_pred2Zy_pred_proba2Zy_pred_log_proba2r   Zy_pred3Zy_pred_proba3Zy_pred_log_proba3r.   r.   r/   	test_mnnb  sB    ,


&










r   c                  C   sb  t ddgddgg} t ddg}t }td $}|j| |g dd W d    n1 s^0    Y  t|dksxJ |ddggdksJ |ddggdksJ |ddggdksJ td $}|ddggdg W d    n1 s0    Y  t|dksJ |ddggdks*J |ddggdksDJ |ddggdks^J d S )Nr   r   r   r   r   )r$   r7   r   r&   r1   r)   r   r!   )r   r    r*   recordr.   r.   r/   !test_mnb_prior_unobserved_targets  s    22r   c                  C   sr   t  } | jddgddgddggg dg dd t| ddggdg t| jd }td| |gddg d S )	Nr   r   r   r   )r   r   rV   r=   UUUUUU?UUUUUU?)r   r   r   r!   r$   r   rx   r   )r*   Zpositive_priorr.   r.   r/   test_mnb_sample_weight6  s
    *r   c                  C   s   t g dg dg dg dg} t g d}tdd}|| | t dd	g}tt |j| t g d
g dg}tt |j| t g dg}t ddgg}|t | }t|	|| d S )Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rb   alphag      ?r_   )r   g?皙?r   r   r   )r   r   r   r   r   r   g@fg;u?gy?)
r$   r7   r   r   r   r   r~   r   r;   r"   )r   r]   r*   r   feature_probZX_testZunnorm_predict_probar"   r.   r.   r/   test_bnb?  s$    
r   c                  C   s   t g dg dg dg dg dg} t g d}tdd}|| | t |jd }t t |jd | jd	 d	fj	}t
|j||  d S )
N)r   r   r   r   r   r   r   )r   r   r   )r   r   r   r   r   rb   r   rI   r   )r$   r7   r   r   r%   r   Ztiler   rC   Tr   r   )r   r]   r*   ZnumZdenomr.   r.   r/   test_bnb_feature_log_probn  s    (
$r   c                  C   s~  t g dg dg dg dg} t g d}t g dg dg}t |j}t |j}tdD ]0}t ||  ||< || ||   ||< qftd	d
}t	d}t
jt|d ||  | W d    n1 s0    Y  || | t g dg dg}t|j| t ddg}	t|j|	 t g d}
t|j|
 t|j| td	dd}|| | t|j| d S )Nr   r   r   r   r   )qq?qq?r   r   r   r   )UUUUUU?r   UUUUUU?r   r   r   r   rb   r   z8Negative values in data passed to ComplementNB (input X)r   )r   r   r   r   r   r   r   r   )r   rV   r   r   r   r   T)r   Znorm)r$   r7   ZzerosrC   r   r%   r;   r   reescaper&   r'   r(   r   r   r   r   Zfeature_all_r   r   )r   r]   ZthetaZweightsZnormed_weightsr   r*   rN   Zfeature_countZclass_countZfeature_allr.   r.   r/   test_cnb  s:    

,r   c                  C   s  t  } | ttt}t|t tddgddgg}tddg}t ddd} | || t| jtddg td	d
gg}tdg}t	
d}tjt|d | | W d    n1 s0    Y  tjt|d | || W d    n1 s0    Y  tddgg}tddgg}| }	t| |||	  t| j|jd ksdJ td	d	gd	dgd	d	gddgg}tg d}t ddd} | || t| td	d	ggtdg t| jtddg dD ]}
td	d	gd	dgd	d	gddgg}tg d}tg d|
 }t ddd} | j|||d t| td	d	ggtdg t| jtddg qd S )Nr   rV   r   r   F)r   r   r   r   r   r   z9Negative values in data passed to CategoricalNB (input X)r   r   qq?r   )rb   rP   r   g-C6?)r   r   r   皙?r=   )r   r   ry   rz   r!   r   r$   r7   n_categories_r   r   r&   r'   r(   r;   r   r"   r   r   rC   )r*   r+   ZX3Zy3r   r    	error_msgZX3_testZbayes_numeratorZbayes_denominatorZfactorr>   r.   r.   r/   test_categoricalnb  sH    

(,"$"$r   zDmin_categories, exp_X1_count, exp_X2_count, new_X, exp_n_categories_r   )r   r   r   r   rV   )r   r   r   r   c                 C   s   t ddgddgddgddgg}t g d}t dg}tdd| d}||| |j\}	}
t|	| t|
| ||}t|| t|j| d S )Nr   r   r   Fr   r   min_categories)r$   r7   r   r   r   r   r!   r   )r   Zexp_X1_countZexp_X2_countZnew_XZexp_n_categories_ZX_n_categoriesZy_n_categoriesZexpected_predictionr*   ZX1_countZX2_countZpredictionsr.   r.   r/   &test_categoricalnb_with_min_categories  s    ""




r   zmin_categories, error_msg)Zbad_arg%'min_categories' should have integralz"'min_categories' should have shape)rb   r   c                 C   s|   t ddgddgddgddgg}t g d}tdd| d}tjt|d ||| W d    n1 sn0    Y  d S )Nr   r   r   Fr   r   )r$   r7   r   r&   r'   r(   r   )r   r   r   r    r*   r.   r.   r/   (test_categoricalnb_min_categories_errors1  s
    
"r   c            	      C   st  t ddgddgg} t ddg}tdd}d}tjt|d$ |j| |ddgd W d    n1 sj0    Y  tjt|d || | W d    n1 s0    Y  t ddgddgg}t|	| | t
dd}tjt|d$ |j| |ddgd W d    n1 s0    Y  tjt|d || | W d    n1 sT0    Y  t dd	gddgg}t|	| | tdd}tjt|d || | W d    n1 s0    Y  t d
dgdd
gg}t|	| | tj| } tdd}tjt|d || | W d    n1 s80    Y  t ddgddgg}t|	| | t
dd}tjt|d || | W d    n1 s0    Y  t dd	gddgg}t|	| | t ddgddgg} t ddg}td}tdd}t
dd}tdd}tjt|d || | W d    n1 sR0    Y  tjt|d || | W d    n1 s0    Y  tjt|d || | W d    n1 s0    Y  tdd}t
dd}tjt|d$ |j| |ddgd W d    n1 s"0    Y  tjt|d$ |j| |ddgd W d    n1 sf0    Y  d S )Nr   r   rZ   r   zFalpha too small will result in numeric errors, setting alpha = 1.0e-10r   r   r   r   rb   :Smoothing parameter alpha = -1.0e-01. alpha should be > 0.rQ   )r$   r7   r   r&   r1   UserWarningr)   r   r   r"   r   r   r   r   r   r   r   r'   r(   )	r   r    nbrN   probexpected_msgZb_nbm_nbZc_nbr.   r.   r/   
test_alphaC  sj    
2*
4,
,
,
,


,,,

4r   c            	      C   s  t ddgddgg} t ddg}t ddg}t|d}|j| |ddgd t ddgddgg}t|jt | t d	d
gddgg}t|| | t ddg}t|d}d}tj	t
|d || | W d    n1 s0    Y  d}t |d dg}t|d}|j| |ddgd t| |dgdd t g d}t|d}td}tj	t
|d || | W d    n1 s0    Y  d S )Nr   r   r   r   r   r   r   g333333?grq?r   gS?gևX?rb   rQ   r   r   rp      )Zdecimal)rb   rI   g      @zAalpha should be a scalar or a numpy array with shape [n_features])r$   r7   r   r)   r   r   r%   r"   r&   r'   r(   r   Z_check_alphar   r   )	r   r    r   r   r   r   r   r   Z	ALPHA_MINr.   r.   r/   test_alpha_vector  s4    

*

r   c                  C   sF  t dd\} }t|dk|dk}| | ||  }}ttdd| |dd}| dksZJ ttdd||dd}| d	ksJ ttdd| d
k|dd}| dksJ ttdd|d
k|dd}| dksJ tt | |dd}| dksJ ttdd| |dd}| dksJ tt ||dd}| dksBJ d S )NT)Z
return_X_yr   r   r   r   )ZcvgQ?gGz?rV   g(\?gq=
ףp?gp=
ף?r   )Zvar_smoothingg{Gz?)r   r$   Z
logical_orr   r   rf   r   r
   )r   r    Z
binary_3v8ZX_3v8Zy_3v8Zscoresr.   r.   r/   test_check_accuracy_on_digits  s"    r   	Estimatorc                 C   sj   t ddgddgg}t ddg}|  ||}tjtdd |j W d    n1 s\0    Y  d S )Nr   r   r   rV   r   z`n_features_` was deprecatedr   )r$   r7   r   r&   r1   r2   Zn_features_)r   r   r    r|   r.   r.   r/   test_n_features_deprecation  s
    r   )Or   Znumpyr$   Zscipy.sparser   r&   Zsklearn.datasetsr   r   Zsklearn.model_selectionr   r   Zsklearn.utils._testingr   r   r   r	   Zsklearn.naive_bayesr
   r   r   r   r   ZDISCRETE_NAIVE_BAYES_CLASSESZALL_NAIVE_BAYES_CLASSESr7   r   r    ZrandomZRandomStaterB   Znormalr9   Zastypeintr:   rD   ry   rz   r0   r5   r<   rH   rO   rR   r^   ra   rc   rd   rh   rj   ru   ZmarkZparametrizer}   r   r   r   r2   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r.   r.   r.   r/   <module>   s   
.	 	
	



1

"




2
6
/G5	



A*"