a
    ;ZanT                     @   s  d Z ddlmZ ddlZddlZddlmZ ddlm	Z	 ddl
mZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z! dgZ"dd Z#dd Z$dd Z%dddddd d!dZ&dS )"zBPartial dependence plots for regression and classification models.    )IterableN)sparse)
mquantiles   )is_classifieris_regressor)	cartesian)check_array)check_matplotlib_support)_safe_indexing)_determine_key_type)_get_column_indices)check_is_fitted)Bunch)DecisionTreeRegressor)RandomForestRegressor)NotFittedError)BaseGradientBoosting)BaseHistGradientBoostingpartial_dependencec                 C   s  t |trt|dkrtdtdd |D s8td|d |d krPtd|dkr`td	g }t| jd D ]}tt	| |dd
}|jd |k r|}nNt
t	| |dd
|dd}t|d |d rtdtj|d |d |dd}|| qrt||fS )a  Generate a grid of points based on the percentiles of X.

    The grid is a cartesian product between the columns of ``values``. The
    ith column of ``values`` consists in ``grid_resolution`` equally-spaced
    points between the percentiles of the jth column of X.
    If ``grid_resolution`` is bigger than the number of unique values in the
    jth column of X, then those unique values will be used instead.

    Parameters
    ----------
    X : ndarray, shape (n_samples, n_target_features)
        The data.

    percentiles : tuple of floats
        The percentiles which are used to construct the extreme values of
        the grid. Must be in [0, 1].

    grid_resolution : int
        The number of equally spaced points to be placed on the grid for each
        feature.

    Returns
    -------
    grid : ndarray, shape (n_points, n_target_features)
        A value for each feature at each point in the grid. ``n_points`` is
        always ``<= grid_resolution ** X.shape[1]``.

    values : list of 1d ndarrays
        The values with which the grid has been created. The size of each
        array ``values[j]`` is either ``grid_resolution``, or the number of
        unique values in ``X[:, j]``, whichever is smaller.
    r   z/'percentiles' must be a sequence of 2 elements.c                 s   s&   | ]}d |  kodkn  V  qdS )r      N ).0xr   r   Elib/python3.9/site-packages/sklearn/inspection/_partial_dependence.py	<genexpr>I       z_grid_from_X.<locals>.<genexpr>z''percentiles' values must be in [0, 1].r   r   z9percentiles[0] must be strictly less than percentiles[1].z2'grid_resolution' must be strictly greater than 1.axis)Zprobr   ztpercentiles are too close to each other, unable to build the grid. Please choose percentiles that are further apart.T)ZnumZendpoint)
isinstancer   len
ValueErrorallrangeshapenpuniquer   r   ZallcloseZlinspaceappendr   )Xpercentilesgrid_resolutionvaluesZfeatureZuniquesr   Zemp_percentilesr   r   r   _grid_from_X&   s8    !r,   c                 C   s&   |  ||}|jdkr"|dd}|S )Nr   )Z%_compute_partial_dependence_recursionndimreshape)estgridfeaturesaveraged_predictionsr   r   r   _partial_dependence_recursionm   s    
r4   c                 C   s
  g }g }t | r| j}nnt| dd }t| dd }	|dkrB|p>|	}n|dkrN|n|	}|d u r|dkrltdn|dkr~tdntd|D ]}
| }t|D ]>\}}t|dr|
| |jd d |f< q|
| |d d |f< qz*||}|| |t	j
|dd	 W q ty8 } ztd
|W Y d }~qd }~0 0 q|jd }t	|j}t | rv|jdkrv||d}n.t| r|jd dkr|d }||d}t	|j}t | r|jdkr|dd}n.t| r|jd dkr|d }|dd}||fS )Npredict_probadecision_functionautozCThe estimator has no predict_proba and no decision_function method.z*The estimator has no predict_proba method.z.The estimator has no decision_function method.ilocr   r   z0'estimator' parameter must be a fitted estimatorr   r-   r   )r   Zpredictgetattrr!   copy	enumeratehasattrr8   r'   r%   Zmeanr   r$   ZarrayTr.   r/   r   )r0   r1   r2   r(   response_methodpredictionsr3   Zprediction_methodr5   r6   Z
new_valuesZX_evaliZvariableZpredeZ	n_samplesr   r   r   _partial_dependence_brutew   s\    



"
rB   r7   )g?gffffff?d   legacy)r>   r)   r*   methodkindc                C   s  t |  t| s t| s tdt| rBt| jd tjrBtdt|dsdt	
|sdt|dtd}d}||vrtd|d	|t| r|d
krtdd}	||	vrtd|d	|	|dkr|dkr|dkrtdd}|d
kr(t| tr| jdu rd}nt| tttfr$d}nd}|dkrt| ttttfs^d}
tdd	|
|d
krld}|dkrtd|t|dddkrtt|drtd|jd d tjt||tjdd }tt||dd||\}}|dkr<t| ||||\}}|jd |jd gd!d" |D R  }nt | ||}|jd gd#d" |D R  }|dkrt!"d$t# ||fS |dkrt$||d%S |d&krt$||d'S t$|||d(S dS ))a  Partial dependence of ``features``.

    Partial dependence of a feature (or a set of features) corresponds to
    the average response of an estimator for each possible value of the
    feature.

    Read more in the :ref:`User Guide <partial_dependence>`.

    .. warning::

        For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
        :class:`~sklearn.ensemble.GradientBoostingRegressor`, the
        `'recursion'` method (used by default) will not account for the `init`
        predictor of the boosting process. In practice, this will produce
        the same values as `'brute'` up to a constant offset in the target
        response, provided that `init` is a constant estimator (which is the
        default). However, if `init` is not a constant estimator, the
        partial dependence values are incorrect for `'recursion'` because the
        offset will be sample-dependent. It is preferable to use the `'brute'`
        method. Note that this only applies to
        :class:`~sklearn.ensemble.GradientBoostingClassifier` and
        :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
        :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.

    Parameters
    ----------
    estimator : BaseEstimator
        A fitted estimator object implementing :term:`predict`,
        :term:`predict_proba`, or :term:`decision_function`.
        Multioutput-multiclass classifiers are not supported.

    X : {array-like or dataframe} of shape (n_samples, n_features)
        ``X`` is used to generate a grid of values for the target
        ``features`` (where the partial dependence will be evaluated), and
        also to generate values for the complement features when the
        `method` is 'brute'.

    features : array-like of {int, str}
        The feature (e.g. `[0]`) or pair of interacting features
        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.

    response_method : {'auto', 'predict_proba', 'decision_function'},             default='auto'
        Specifies whether to use :term:`predict_proba` or
        :term:`decision_function` as the target response. For regressors
        this parameter is ignored and the response is always the output of
        :term:`predict`. By default, :term:`predict_proba` is tried first
        and we revert to :term:`decision_function` if it doesn't exist. If
        ``method`` is 'recursion', the response is always the output of
        :term:`decision_function`.

    percentiles : tuple of float, default=(0.05, 0.95)
        The lower and upper percentile used to create the extreme values
        for the grid. Must be in [0, 1].

    grid_resolution : int, default=100
        The number of equally spaced points on the grid, for each target
        feature.

    method : {'auto', 'recursion', 'brute'}, default='auto'
        The method used to calculate the averaged predictions:

        - `'recursion'` is only supported for some tree-based estimators
          (namely
          :class:`~sklearn.ensemble.GradientBoostingClassifier`,
          :class:`~sklearn.ensemble.GradientBoostingRegressor`,
          :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
          :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
          :class:`~sklearn.tree.DecisionTreeRegressor`,
          :class:`~sklearn.ensemble.RandomForestRegressor`,
          ) when `kind='average'`.
          This is more efficient in terms of speed.
          With this method, the target response of a
          classifier is always the decision function, not the predicted
          probabilities. Since the `'recursion'` method implicitly computes
          the average of the Individual Conditional Expectation (ICE) by
          design, it is not compatible with ICE and thus `kind` must be
          `'average'`.

        - `'brute'` is supported for any estimator, but is more
          computationally intensive.

        - `'auto'`: the `'recursion'` is used for estimators that support it,
          and `'brute'` is used otherwise.

        Please see :ref:`this note <pdp_method_differences>` for
        differences between the `'brute'` and `'recursion'` method.

    kind : {'legacy', 'average', 'individual', 'both'}, default='legacy'
        Whether to return the partial dependence averaged across all the
        samples in the dataset or one line per sample or both.
        See Returns below.

        Note that the fast `method='recursion'` option is only available for
        `kind='average'`. Plotting individual dependencies requires using the
        slower `method='brute'` option.

        .. versionadded:: 0.24
        .. deprecated:: 0.24
            `kind='legacy'` is deprecated and will be removed in version 1.1.
            `kind='average'` will be the new default. It is intended to migrate
            from the ndarray output to :class:`~sklearn.utils.Bunch` output.


    Returns
    -------
    predictions : ndarray or :class:`~sklearn.utils.Bunch`

        - if `kind='legacy'`, return value is ndarray of shape (n_outputs,                 len(values[0]), len(values[1]), ...)
            The predictions for all the points in the grid, averaged
            over all samples in X (or over the training data if ``method``
            is 'recursion').

        - if `kind='individual'`, `'average'` or `'both'`, return value is                 :class:`~sklearn.utils.Bunch`
            Dictionary-like object, with the following attributes.

            individual : ndarray of shape (n_outputs, n_instances,                     len(values[0]), len(values[1]), ...)
                The predictions for all the points in the grid for all
                samples in X. This is also known as Individual
                Conditional Expectation (ICE)

            average : ndarray of shape (n_outputs, len(values[0]),                     len(values[1]), ...)
                The predictions for all the points in the grid, averaged
                over all samples in X (or over the training data if
                ``method`` is 'recursion').
                Only available when kind='both'.

            values : seq of 1d ndarrays
                The values with which the grid has been created. The generated
                grid is a cartesian product of the arrays in ``values``.
                ``len(values) == len(features)``. The size of each array
                ``values[j]`` is either ``grid_resolution``, or the number of
                unique values in ``X[:, j]``, whichever is smaller.

        ``n_outputs`` corresponds to the number of classes in a multi-class
        setting, or to the number of tasks for multi-output regression.
        For classical regression and binary classification ``n_outputs==1``.
        ``n_values_feature_j`` corresponds to the size ``values[j]``.

    values : seq of 1d ndarrays
        The values with which the grid has been created. The generated grid
        is a cartesian product of the arrays in ``values``. ``len(values) ==
        len(features)``. The size of each array ``values[j]`` is either
        ``grid_resolution``, or the number of unique values in ``X[:, j]``,
        whichever is smaller. Only available when `kind="legacy"`.

    See Also
    --------
    PartialDependenceDisplay.from_estimator : Plot Partial Dependence.
    PartialDependenceDisplay : Partial Dependence visualization.

    Examples
    --------
    >>> X = [[0, 0, 2], [1, 0, 0]]
    >>> y = [0, 1]
    >>> from sklearn.ensemble import GradientBoostingClassifier
    >>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)
    >>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),
    ...                    grid_resolution=2) # doctest: +SKIP
    (array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])
    z5'estimator' must be a fitted regressor or classifier.r   z3Multiclass-multioutput estimators are not supportedZ	__array__z	allow-nan)Zforce_all_finitedtype)r7   r5   r6   zEresponse_method {} is invalid. Accepted response_method names are {}.z, r7   zKThe response_method parameter is ignored for regressors and must be 'auto'.)brute	recursionr7   z3method {} is invalid. Accepted method names are {}.averagerD   rI   zCThe 'recursion' method only applies when 'kind' is set to 'average'rH   N)ZGradientBoostingClassifierZGradientBoostingRegressorZHistGradientBoostingClassifierHistGradientBoostingRegressorrK   r   r   z[Only the following estimators support the 'recursion' method: {}. Try using method='brute'.r6   zUWith the 'recursion' method, the response_method must be 'decision_function'. Got {}.F)Zaccept_sliceintzall features must be in [0, {}]r   C)rG   orderr   r-   c                 S   s   g | ]}|j d  qS r   r$   r   valr   r   r   
<listcomp>  r   z&partial_dependence.<locals>.<listcomp>c                 S   s   g | ]}|j d  qS rO   rP   rQ   r   r   r   rS     r   zA Bunch will be returned in place of 'predictions' from version 1.1 (renaming of 0.26) with partial dependence results accessible via the 'average' key. In the meantime, pass kind='average' to get the future behaviour.)rJ   r+   
individual)rT   r+   )rJ   rT   r+   )%r   r   r   r!   r   Zclasses_r%   Zndarrayr<   r   Zissparser	   objectformatjoinr   initr   r   r   r   anyZlessr$   Zasarrayr   Zint32Zravelr,   r   rB   r/   r4   warningswarnFutureWarningr   )Z	estimatorr(   r2   r>   r)   r*   rE   rF   Zaccepted_responsesZaccepted_methodsZsupported_classes_recursionZfeatures_indicesr1   r+   r3   r?   r   r   r   r      s     2



		









)'__doc__collections.abcr   rZ   Znumpyr%   Zscipyr   Zscipy.stats.mstatsr   baser   r   Zutils.extmathr   Zutilsr	   r
   r   r   r   Zutils.validationr   r   Ztreer   Zensembler   
exceptionsr   Zensemble._gbr   Z2ensemble._hist_gradient_boosting.gradient_boostingr   __all__r,   r4   rB   r   r   r   r   r   <module>   s<   G
^