a
    )a"                     @   s  d dl Zd dlZd dlZd dlZddlmZmZ ddlm	Z	 d dl
mZmZmZ d dlZd dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZmZm Z  ddl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z0 d.ddZ1dd Z2dd Z3d/ddZ4dd Z5dd Z6d0d d!Z7d1d"d#Z8d2d$d%Z9d&d' Z:d3d(d)Z;G d*d+ d+eZG d,d- d-eZdS )4    N   )	getFPTypeget_patch_message)support_usm_ndarray)daal_check_versionsklearn_check_versionPatchingConditionsChain)DecisionTreeClassifierDecisionTreeRegressor)Tree)RandomForestClassifier)RandomForestRegressor)check_random_statecheck_array
deprecated)check_is_fittedcheck_consistent_length_num_samples)_daal_num_features)clone)DataConversionWarning)__version__)LooseVersion)ceil)sparseFc                 C   s   | d u r|S t | trz| dkr:|r6tdtt|S |S | dkrVtdtt|S | dkrrtdtt|S tdt | tj	tj
fr| S | dkrtdt| | S dS )Nauto   sqrtlog2zSInvalid value for max_features. Allowed string values are "auto", "sqrt" or "log2".        r   )
isinstancestrmaxintnpr   r   
ValueErrornumbersIntegralZinteger)max_featuresZ
n_featuresis_classification r*   ?lib/python3.9/site-packages/daal4py/sklearn/ensemble/_forest.py_to_absolute_max_features1   s(    
r,   c                 C   s   |d u rdS t |tjrNd|  kr,| ksBn d}t|| |t||  S t |tjrtdrdt|  k rzdksn d}t||n,dt|  k rdk sn d}t||t|S d}t|t	|d S )	N      ?r   z7`max_samples` must be in range 1 to {} but got value {}1.0r   z:`max_samples` must be in range (0.0, 1.0] but got value {}z6`max_samples` must be in range (0, 1) but got value {}z7`max_samples` should be int or float, but got type '{}')
r    r&   r'   r%   formatfloatZRealr   	TypeErrortype)	n_samplesmax_samplesmsgr*   r*   r+   _get_n_samples_bootstrapJ   s$    r6   c                 C   s  t | jtjr(d| jksNtd| j n&d| j  k r>dksNn td| j t | jtjrvd| jkstd| j n&d| j  k rdksn td| j d	| j  krdksn td
| jd urt	dt
 | jdk rtd| jdk rtd| jd ur<t | jtjs td| j | jdk r<td| jt | jtjrhd| jksvtd| j ntd| j t | jtjrd| jkstd| j ntd| j d S )Nr   z:min_samples_leaf must be at least 1 or in (0, 0.5], got %sr   g      ?r   z`min_samples_split must be an integer greater than 1 or a float in (0.0, 1.0]; got the integer %sr-   z^min_samples_split must be an integer greater than 1 or a float in (0.0, 1.0]; got the float %sr   z)min_weight_fraction_leaf must in [0, 0.5]zThe min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.z5min_impurity_split must be greater than or equal to 0z8min_impurity_decrease must be greater than or equal to 0z1max_leaf_nodes must be integral number but was %rz7max_leaf_nodes {0} must be either None or larger than 1z"maxBins must be at least 2, got %sz*maxBins must be integral number but was %rz%minBinSize must be at least 1, got %sz-minBinSize must be integral number but was %r)r    min_samples_leafr&   r'   r%   min_samples_splitmin_weight_fraction_leafmin_impurity_splitwarningswarnFutureWarningmin_impurity_decreasemax_leaf_nodesr/   maxBins
minBinSizeselfr*   r*   r+   _check_parametersc   sj    




rD   c                 C   st  t |dd d}| |\}}| jd }|jd | _tdsB| j| _|d ur`|d ur\|| }n|}|d urn|g}t| j}|	dt
dj}|dk rtdt|}d	}	| j|	krtj||d
}
ntj||d
}
t| j|jd dd}t|jd | jd}| js| jrtdtjt||dt| j| jdu r8|ndt|t| jd u rRdn| jt| jtjrn| jntt | j|jd  |
t!| j"d u rdn| j"d| jrdnddt#| jt| j$tjr| j$ntt | j$|jd  | j%| j&| j'd u rdn| j'| j(| j)d}d | _*|+|||}|j,}|| _-| jrp|j.d d | _/|j0| _1| j1jd dkrp| j1j2dd| _1| S )NF	ensure_2ddtyper   r   r.   ir   z7Training data only contain information about one class.  ZseedfptypeTr)   r3   r4   6Out of bag estimation only available if bootstrap=Truehistr-   r   MDIzAcomputeOutOfBagErrorAccuracy|computeOutOfBagErrorDecisionFunction )nClassesrK   methodnTreesobservationsPerTreeFractionfeaturesPerNodemaxTreeDepthminObservationsInLeafNodeengineimpurityThresholdvarImportanceresultsToComputememorySavingMode	bootstrapminObservationsInSplitNodeminWeightFractionInLeafNodeminImpurityDecreaseInSplitNodemaxLeafNodesr@   rA   Zaxis)3r   Z_validate_y_class_weight
n_classes_shapen_features_in_r   n_features_r   random_staterandintr$   iinfor"   r%   r   n_estimatorsdaal4pyengines_mt2203engines_mt19937r,   r(   r6   r4   r^   	oob_scoreZ'decision_forest_classification_trainingr#   	max_depthr    r7   r&   r'   r   r0   r:   boolr8   r9   r>   r?   r@   rA   _cached_estimators_computemodeldaal_model_ZoutOfBagErrorAccuracy
oob_score_ZoutOfBagErrorDecisionFunctionZoob_decision_function_squeeze)rC   Xysample_weightZexpanded_class_weightre   rs_seed_X_fptypemax_stream_countdaal_engineZfeatures_per_node_Zn_samples_bootstrap_dfc_algorithmZdfc_trainingResultru   r*   r*   r+   _daal_fit_classifier   s    





#r   c                 C   s~   t |}tjt| j|dd}|jd | jkrLtd|jd  d| j d||| j	}|j
}t| j| jtjddS )	NZcomputeClassLabelsrR   rK   ZresultsToEvaluater   X has 3 features, but RandomForestClassifier is expecting  features as inputZunsafe)Zcasting)r   rm   )decision_forest_classification_predictionr#   re   rf   rg   r%   rt   rv   
predictionr$   Ztakeclasses_ravelZastypeZint64rC   ry   r~   r   Zdfc_predictionResultpredr*   r*   r+   _daal_predict_classifier  s"    r   c                 C   s6   t |}tjt| j|dd}||| j}|j}|S )NZcomputeClassProbabilitiesr   )r   rm   r   r#   re   rt   rv   Zprobabilitiesr   r*   r*   r+   _daal_predict_proba  s    r   c              	   C   s  t |rtdt|  |d ur,t||}td}|| jrFtdpL| j df| j	du df| j
dkd| j
 d	f| jd
kd| j dft | dfg}|rZtdr| j|dd t|tjtjgd}t|}t|}|jdkr|jd dkrtjdtdd t|| |jdkr.t|d}|jd | _|| jdkd| j dfg}|  |rt| |||d | j| _| j d | _ | j!d | _!| S t"t#| j$|||dS )N3sparse multilabel-indicator for y is not supported.z+sklearn.ensemble.RandomForestClassifier.fit  Pi  COOB score is only supported starting from 2021.5 version of oneDAL.FWarm start is not supported.gini'zA' criterion is not supported. Only 'gini' criterion is supported.r   Non-zero 'ccp_alpha' () is not supported.+X is sparse. Sparse input is not supported.r.   TresetrG   r   r   A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
stacklevelrc   r   Number of outputs () is not 1.r{   r   )%spissparser%   rD   check_sample_weightr   and_conditionsrp   r   
warm_start	criterion	ccp_alphar   _check_feature_namesr   r$   float32float64asarray
atleast_1dndimrf   r;   r<   r   r   reshape
n_outputs_	write_logr   _estimators_estimators_re   r   superr   fitrC   ry   rz   r{   _patching_status
_dal_readyr*   r*   r+   _fit_classifier%  sd    





r   c                 C   s  |j d | _tds| j| _t| j}| js:| jr:tdt	|}|
dtdj}d}| j|krttj||d}ntj||d}t| j|j d dd	}	t|j d | jd
}
|d ur|g}tjt	|dt| j| jdu r|
ndt|	t| jd u rdn| jt| jtjr| jntt| j|j d  |t| jd u r:dn| jd| jrNdnddt | jt| j!tjrp| j!ntt| j!|j d  | j"| j#| j$d u rdn| j$| j%| j&d}d | _'|(|||}|j)}|| _*| jr|j+d d | _,|j-j.dd| _/| j/j d dkr| j/j.dd| _/| S )Nr   r.   rN   r   rH   rI   rJ   FrL   rM   rO   Tr-   r   rP   z5computeOutOfBagErrorR2|computeOutOfBagErrorPredictionrQ   )rK   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   r@   rA   rd   rc   )0rf   rg   r   rh   r   ri   r^   rp   r%   r   rj   r$   rk   r"   rl   rm   rn   ro   r,   r(   r6   r4   Z#decision_forest_regression_trainingr#   rq   r    r7   r&   r'   r   r0   r:   rr   r8   r9   r>   r?   r@   rA   rs   rt   ru   rv   ZoutOfBagErrorR2rw   ZoutOfBagErrorPredictionrx   Zoob_prediction_)rC   ry   rz   r{   r|   r~   r}   r   r   Z_featuresPerNodeZn_samples_bootstrapZdfr_algorithmZdfr_trainingResultru   r*   r*   r+   _daal_fit_regressorb  s    



!r   c              	   C   s  t |rtdt|  |d ur,t||}tdrJ| jdkrJtdt	 t
d}|| jrdtdpj| j df| jdu d	f| jd
v d| j df| jdkd| j dft | dfg}|rtdr| j|dd t|tjtjgd}t|}t|}|jdkr*|jd dkr*tjdtdd t|d|jd}t|| |jdkr\t|d}|jd | _|| jdkd| j dfg}|  |rt| |||d | j | _!| S t"t#| j$|||dS )Nr   r.   msezCriterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.z*sklearn.ensemble.RandomForestRegressor.fitr   r   Fr   )r   squared_errorr   zT' criterion is not supported. Only 'mse' and 'squared_error' criteria are supported.r   r   r   r   Tr   r   r   r   r   r   rE   r   r   r   r   )%r   r   r%   rD   r   r   r   r;   r<   r=   r   r   rp   r   r   r   r   r   r$   r   r   r   r   r   rf   r   rG   r   r   r   r   r   r   r   r   r   r   r   r*   r*   r+   _fit_regressor  sl    





r   c                 C   s^   |j d | jkr.td|j d  d| j dt|}tj|d}||| j}|j}|	 S )Nr   r   z2 features, but RandomForestRegressor is expecting r   )rK   )
rf   rg   r%   r   rm   Z%decision_forest_regression_predictionrt   rv   r   r   )rC   ry   r~   Zdfr_algZdfr_predictionResultr   r*   r*   r+   _daal_predict_regressor  s    r   c                 C   s   t |}|d ur&|tjtjfvr&tj}| d u r>tj||d} nvt| tjr\tj|| |d} nX|d u rptjtjg}t	| dd|dd} | j
dkrtd| j|fkrtd| j|f| S )Nr   FC)accept_sparserF   rG   orderr   z)Sample weights must be 1D array or scalarz'sample_weight.shape == {}, expected {}!)r   r$   r   r   Zonesr    r&   NumberZfullr   r   r%   rf   r/   )r{   ry   rG   r3   r*   r*   r+   r     s(    
r   c                       s   e Zd ZejZedr&d fdd	Znd fdd	Ze dddZe  fddZ	e  fddZ
edrededd Zedd Z  ZS )r   r.   d   r   Nr   r   r   r   TFr      c                    sR   t t| j|||||||||	|
||||||d || _|| _|| _|| _d | _d S )N)rl   r   rq   r8   r7   r9   r(   r?   r>   r^   rp   n_jobsri   verboser   class_weight)r   r   __init__r   r4   r@   rA   r:   )rC   rl   r   rq   r8   r7   r9   r(   r?   r>   r^   rp   r   ri   r   r   r   r   r4   r@   rA   	__class__r*   r+   r   &  s.    
zRandomForestClassifier.__init__c                    sF   t t| j|||||||||	|
|||||||||d || _|| _d S )N)rl   r   rq   r8   r7   r9   r(   r?   r>   r:   r^   rp   r   ri   r   r   r   r   r4   )r   r   r   r@   rA   )rC   rl   r   rq   r8   r7   r9   r(   r?   r>   r:   r^   rp   r   ri   r   r   r   r   r4   r@   rA   r   r*   r+   r   S  s.    
c                 C   s   t | |||dS a?  
        Build a forest of trees from the training set (X, y).

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Internally, its dtype will be converted
            to ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csc_matrix``.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels in classification, real numbers in
            regression).

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted. Splits
            that would create child nodes with net zero or negative weight are
            ignored while searching for a split in each node. In the case of
            classification, splits are also ignored if they would result in any
            single class carrying a negative weight in either child node.

        Returns
        -------
        self : object
        r   )r   rC   ry   rz   r{   r*   r*   r+   r     s    zRandomForestClassifier.fitc                    s   t d}|t| ddft| dfg}t| drV|| jdkd| j dfg}|  |srtt| 	|S t
d	r| j|d
d t|g dtjtjgd}t| |S )  
        Predict class for X.

        The predicted class of an input sample is a vote by the trees in
        the forest, weighted by their probability estimates. That is,
        the predicted class is the one with highest mean probability
        estimate across the trees.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.

        Returns
        -------
        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
            The predicted classes.
        z/sklearn.ensemble.RandomForestClassifier.predictrv   oneDAL model was not trained.r   r   r   r   r   r.   Fr   ZcsrZcscZcoor   rG   )r   r   hasattrr   r   r   r   r   r   predictr   r   r   r$   r   r   r   rC   ry   r   r   r   r*   r+   r     s0    

zRandomForestClassifier.predictc                    s$  t dr| j|dd t| drjzt|}W n tyF   t|}Y n0 || jkrjtd| d| j dtd}|	t| d	d
ft
| dftddfg}t| dr|	| jdkd| j dfg}|  |stt| |S t|tjtjgd}t|  t dr| j|dd t| |S )a  
        Predict class probabilities for X.

        The predicted class probabilities of an input sample are computed as
        the mean predicted class probabilities of the trees in the forest.
        The class probability of a single tree is the fraction of samples of
        the same class in a leaf.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.

        Returns
        -------
        p : ndarray of shape (n_samples, n_classes), or a list of n_outputs
            such arrays if n_outputs > 1.
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute :term:`classes_`.
        r.   Fr   rg   r   r   r   z5sklearn.ensemble.RandomForestClassifier.predict_probarv   r   r   )r   r   i  z$oneDAL version is lower than 2021.4.r   r   r   r   r   z0.23)r   r   r   r   r1   r   rg   r%   r   r   r   r   r   r   r   r   r   predict_probar   r$   r   r   r   Z_check_n_featuresr   )rC   ry   Znum_featuresr   r   r   r*   r+   r     sJ    



z$RandomForestClassifier.predict_probaoAttribute `n_features_` was deprecated in version 1.0 and will be removed in 1.2. Use `n_features_in_` instead.c                 C   s   | j S Nrg   rB   r*   r*   r+   rh     s    z"RandomForestClassifier.n_features_c              
   C   sd  t | dr| jr| jS tdr(t|  n
t| d | jd }| jd }| j| j| j| j	| j
| j| j| jd d	}tds| j|d< tf i |}g }t| j}t| jD ]}t|}|j|ttjjd tdr| j|_n| j|_| j|_||_||_t| j ||}	|	j|	j!|	j"|	j#d	}
t$| jtj%|gtj&d
| j|_'|j'(|
 |)| q|| _|S )Nrs   0.22rv   r   	r   rq   r8   r7   r9   r(   r?   r>   ri   r.   r:   ri   rq   
node_countZnodesvaluesr   )*r   rs   r   r   r   re   r   rq   r8   r7   r9   r(   r?   r>   r:   r	   r   ri   rangerl   r   
set_paramsrj   r$   rk   int32r"   rg   rh   r   rm   getTreeStaterv   r   node_arvalue_arr   arrayintptree___setstate__append)rC   r   re   paramsestr   random_state_checkedrH   est_itree_i_state_classtree_i_state_dictr*   r*   r+   r     sl    







z#RandomForestClassifier._estimators_)r   r   Nr   r   r   r   Nr   TFNNr   FNr   Nr   r   )r   r   Nr   r   r   r   Nr   NTFNNr   FNr   Nr   r   )N)__name__
__module____qualname__RandomForestClassifier_original__doc__r   r   r   r   r   r   r   propertyrh   r   __classcell__r*   r*   r   r+   r   "  sv                       .                     .,9r   c                       s   e Zd ZejZedrNdddddddddd	d
dddd
ddddd fddZn8ddddddddddd	d
dddd
ddddd fddZe dddZe  fddZ	edre
dedd Zedd Z  ZS )r   r.   r   r   Nr   r   r   r   TFr   r   )r   rq   r8   r7   r9   r(   r?   r>   r^   rp   r   ri   r   r   r   r4   r@   rA   c                   sP   t t| j|||||||||	|
|||||d || _|| _|| _|| _d | _d S )N)rl   r   rq   r8   r7   r9   r(   r?   r>   r^   rp   r   ri   r   r   )r   r   r   r   r4   r@   rA   r:   )rC   rl   r   rq   r8   r7   r9   r(   r?   r>   r^   rp   r   ri   r   r   r   r4   r@   rA   r   r*   r+   r   [  s,    
zRandomForestRegressor.__init__r   )r   rq   r8   r7   r9   r(   r?   r>   r:   r^   rp   r   ri   r   r   r   r4   r@   rA   c                   sD   t t| j|||||||||	|
||||||||d || _|| _d S )N)rl   r   rq   r8   r7   r9   r(   r?   r>   r:   r^   rp   r   ri   r   r   r   r4   )r   r   r   r@   rA   )rC   rl   r   rq   r8   r7   r9   r(   r?   r>   r:   r^   rp   r   ri   r   r   r   r4   r@   rA   r   r*   r+   r     s,    
c                 C   s   t | |||dS r   )r   r   r*   r*   r+   r     s    zRandomForestRegressor.fitc                    s   t d}|t| ddft| dfg}t| drV|| jdkd| j dfg}|  |srtt| 	|S t
d	r| j|d
d t|g dtjtjgd}t| |S )r   z.sklearn.ensemble.RandomForestRegressor.predictrv   r   r   r   r   r   r   r.   Fr   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r$   r   r   r   r   r   r*   r+   r     s0    

zRandomForestRegressor.predictr   c                 C   s   | j S r   r   rB   r*   r*   r+   rh     s    z!RandomForestRegressor.n_features_c           	   
   C   s<  t | dr| jr| jS tdr(t|  n
t| d | j| j| j| j| j| j	| j
| jd d	}tdsl| j|d< tf i |}g }t| j}t| jD ]}t|}|j|ttjjd tdr| j|_n| j|_| j|_t| j|}|j|j|j |j!d}t"| jtj#d	gtj$d
| j|_%|j%&| |'| q|S )Nrs   r   rv   r   r.   r:   r   r   r   r   )(r   rs   r   r   r   rq   r8   r7   r9   r(   r?   r>   r:   r	   r   ri   r   rl   r   r   rj   r$   rk   r   r"   rg   rh   r   rm   r   rv   r   r   r   r   r   r   r   r   r   )	rC   r   r   r   r   rH   r   r   r   r*   r*   r+   r     sZ    





z"RandomForestRegressor._estimators_)r   )r   )N)r   r   r   RandomForestRegressor_originalr   r   r   r   r   r   r   r   rh   r   r   r*   r*   r   r+   r   W  sr    , ,,r   )F)N)N)N)N)N)<Znumpyr$   r&   r;   rm   Z_utilsr   r   Z_device_offloadr   Zdaal4py.sklearn._utilsr   r   r   ZloggingZsklearn.treer	   r
   Zsklearn.tree._treer   Zsklearn.ensembler   r   r   r   Zsklearn.utilsr   r   r   Zsklearn.utils.validationr   r   r   Zutils.validationr   Zsklearn.baser   Zsklearn.exceptionsr   Zsklearnr   Zsklearn_versionZdistutils.versionr   Zmathr   Zscipyr   r   r,   r6   rD   r   r   r   r   r   r   r   r   r*   r*   r*   r+   <module>   sH    
=
c
=
U
B
  7