
    h$,fV              	           d dl Z d dlmZmZ d dlZddlmZmZm	Z	m
Z
mZmZ ddlmZ ddlmZmZ ddlmZmZmZmZmZ ddlmZmZ dd	lmZ dd
lmZmZm Z  ddl!m"Z"  ejF                  d      Z$d Z% G d deee
e	e      Z&y)    N)IntegralReal   )BaseEstimatorMetaEstimatorMixinMultiOutputMixinRegressorMixin_fit_contextclone)ConvergenceWarning)check_consistent_lengthcheck_random_state)
HasMethodsIntervalOptions
RealNotInt
StrOptions)_raise_for_unsupported_routing_RoutingNotSupportedMixin)sample_without_replacement)_check_sample_weightcheck_is_fittedhas_fit_parameter   )LinearRegressionc           
      <   | t        |      z  }t        t        d|z
        }t        t        d||z  z
        }|dk(  ry|dk(  rt        d      S t        t        t	        j
                  t	        j                  |      t	        j                  |      z                    S )a  Determine number trials such that at least one outlier-free subset is
    sampled for the given inlier/outlier ratio.

    Parameters
    ----------
    n_inliers : int
        Number of inliers in the data.

    n_samples : int
        Total number of samples in the data.

    min_samples : int
        Minimum number of samples chosen randomly from original data.

    probability : float
        Probability (confidence) that one outlier-free sample is generated.

    Returns
    -------
    trials : int
        Number of trials.

    r   r   inf)floatmax_EPSILONabsnpceillog)	n_inliers	n_samplesmin_samplesprobabilityinlier_rationomdenoms          <lib/python3.12/site-packages/sklearn/linear_model/_ransac.py_dynamic_max_trialsr-   &   s    0 uY//L
hK
(C!lK778E
axzU|uRWWRVVC[266%=89:;;    c                   $   e Zd ZU dZ eg d      dg eeddd       eeddd      dg eeddd      dge	dge	dg eeddd       e
eej                  h      g eeddd       e
eej                  h      g eeddd       e
eej                  h      g eeddd      g eeddd      g ed	d
h      e	gdgdZeed<   	 ddddddej                  ej                  ej                  dd	dddZ ed      dd       Zd Zd Zd Zy)RANSACRegressora  RANSAC (RANdom SAmple Consensus) algorithm.

    RANSAC is an iterative algorithm for the robust estimation of parameters
    from a subset of inliers from the complete data set.

    Read more in the :ref:`User Guide <ransac_regression>`.

    Parameters
    ----------
    estimator : object, default=None
        Base estimator object which implements the following methods:

         * `fit(X, y)`: Fit model to given training data and target values.
         * `score(X, y)`: Returns the mean accuracy on the given test data,
           which is used for the stop criterion defined by `stop_score`.
           Additionally, the score is used to decide which of two equally
           large consensus sets is chosen as the better one.
         * `predict(X)`: Returns predicted values using the linear model,
           which is used to compute residual error using loss function.

        If `estimator` is None, then
        :class:`~sklearn.linear_model.LinearRegression` is used for
        target values of dtype float.

        Note that the current implementation only supports regression
        estimators.

    min_samples : int (>= 1) or float ([0, 1]), default=None
        Minimum number of samples chosen randomly from original data. Treated
        as an absolute number of samples for `min_samples >= 1`, treated as a
        relative number `ceil(min_samples * X.shape[0])` for
        `min_samples < 1`. This is typically chosen as the minimal number of
        samples necessary to estimate the given `estimator`. By default a
        :class:`~sklearn.linear_model.LinearRegression` estimator is assumed and
        `min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly
        dependent upon the model, so if a `estimator` other than
        :class:`~sklearn.linear_model.LinearRegression` is used, the user must
        provide a value.

    residual_threshold : float, default=None
        Maximum residual for a data sample to be classified as an inlier.
        By default the threshold is chosen as the MAD (median absolute
        deviation) of the target values `y`. Points whose residuals are
        strictly equal to the threshold are considered as inliers.

    is_data_valid : callable, default=None
        This function is called with the randomly selected data before the
        model is fitted to it: `is_data_valid(X, y)`. If its return value is
        False the current randomly chosen sub-sample is skipped.

    is_model_valid : callable, default=None
        This function is called with the estimated model and the randomly
        selected data: `is_model_valid(model, X, y)`. If its return value is
        False the current randomly chosen sub-sample is skipped.
        Rejecting samples with this function is computationally costlier than
        with `is_data_valid`. `is_model_valid` should therefore only be used if
        the estimated model is needed for making the rejection decision.

    max_trials : int, default=100
        Maximum number of iterations for random sample selection.

    max_skips : int, default=np.inf
        Maximum number of iterations that can be skipped due to finding zero
        inliers or invalid data defined by ``is_data_valid`` or invalid models
        defined by ``is_model_valid``.

        .. versionadded:: 0.19

    stop_n_inliers : int, default=np.inf
        Stop iteration if at least this number of inliers are found.

    stop_score : float, default=np.inf
        Stop iteration if score is greater equal than this threshold.

    stop_probability : float in range [0, 1], default=0.99
        RANSAC iteration stops if at least one outlier-free set of the training
        data is sampled in RANSAC. This requires to generate at least N
        samples (iterations)::

            N >= log(1 - probability) / log(1 - e**m)

        where the probability (confidence) is typically set to high value such
        as 0.99 (the default) and e is the current fraction of inliers w.r.t.
        the total number of samples.

    loss : str, callable, default='absolute_error'
        String inputs, 'absolute_error' and 'squared_error' are supported which
        find the absolute error and squared error per sample respectively.

        If ``loss`` is a callable, then it should be a function that takes
        two arrays as inputs, the true and predicted value and returns a 1-D
        array with the i-th value of the array corresponding to the loss
        on ``X[i]``.

        If the loss on a sample is greater than the ``residual_threshold``,
        then this sample is classified as an outlier.

        .. versionadded:: 0.18

    random_state : int, RandomState instance, default=None
        The generator used to initialize the centers.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    Attributes
    ----------
    estimator_ : object
        Best fitted model (copy of the `estimator` object).

    n_trials_ : int
        Number of random selection trials until one of the stop criteria is
        met. It is always ``<= max_trials``.

    inlier_mask_ : bool array of shape [n_samples]
        Boolean mask of inliers classified as ``True``.

    n_skips_no_inliers_ : int
        Number of iterations skipped due to finding zero inliers.

        .. versionadded:: 0.19

    n_skips_invalid_data_ : int
        Number of iterations skipped due to invalid data defined by
        ``is_data_valid``.

        .. versionadded:: 0.19

    n_skips_invalid_model_ : int
        Number of iterations skipped due to an invalid model defined by
        ``is_model_valid``.

        .. versionadded:: 0.19

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    HuberRegressor : Linear regression model that is robust to outliers.
    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.
    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/RANSAC
    .. [2] https://www.sri.com/wp-content/uploads/2021/12/ransac-publication.pdf
    .. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf

    Examples
    --------
    >>> from sklearn.linear_model import RANSACRegressor
    >>> from sklearn.datasets import make_regression
    >>> X, y = make_regression(
    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
    >>> reg = RANSACRegressor(random_state=0).fit(X, y)
    >>> reg.score(X, y)
    0.9885...
    >>> reg.predict(X[:1,])
    array([-31.9417...])
    )fitscorepredictNr   left)closedr   bothabsolute_errorsquared_errorrandom_state)	estimatorr'   residual_thresholdis_data_validis_model_valid
max_trials	max_skipsstop_n_inliers
stop_scorestop_probabilitylossr9   _parameter_constraintsd   gGz?)r'   r;   r<   r=   r>   r?   r@   rA   rB   rC   r9   c                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        || _
        || _        y N)r:   r'   r;   r<   r=   r>   r?   r@   rA   rB   r9   rC   )selfr:   r'   r;   r<   r=   r>   r?   r@   rA   rB   rC   r9   s                r,   __init__zRANSACRegressor.__init__  s_      #&"4*,$",$ 0(	r.   F)prefer_skip_nested_validationc           	      <   t        | d|       t        dd      }t        d      }| j                  ||||f      \  }}t        ||       | j                  t        | j                        }n
t               }| j                  .t        |t              st        d	      |j                  d
   d
z   }ncd| j                  cxk  rd
k  r3n n0t        j                  | j                  |j                  d   z        }n| j                  d
k\  r| j                  }|j                  d   kD  rt        d|j                  d   z        | j                  ?t        j                  t        j                  |t        j                  |      z
              }n| j                  }| j                   dk(  r|j"                  d
k(  rd }	nKd }	nG| j                   dk(  r|j"                  d
k(  rd }	n%d }	n!t%        | j                         r| j                   }	t'        | j(                        }
	 |j+                  |
       t-        |d      }t/        |      j0                  }||st        d|z        |t3        ||      }d
}t        j4                   }d}d}d}d}d| _        d| _        d| _        |j                  d   }t        j<                  |      }d| _        | j@                  }| j>                  |k  r| xj>                  d
z  c_        | j6                  | j8                  z   | j:                  z   | jB                  kD  rntE        |||
      }||   }||   }| jF                  (| jG                  ||      s| xj8                  d
z  c_        ||jI                  ||       n|jI                  ||||          | jJ                  *| jK                  |||      s| xj:                  d
z  c_        |jM                  |      } 	||      }||k  }t        jN                  |      }||k  r| xj6                  d
z  c_        X||   }||   }||   }|jQ                  ||      } ||k(  r| |k  r|}| }|}|}|}|}tS        |tU        |||| jV                              }|| jX                  k\  s|| jZ                  k\  rn| j>                  |k  r|I| j6                  | j8                  z   | j:                  z   | jB                  kD  rt        d      t        d      | j6                  | j8                  z   | j:                  z   | jB                  kD  rt]        j^                  dt`               ||jI                  ||       n|jI                  ||||          || _1        || _2        | S # t        $ r Y kw xY w)a  Fit estimator using RANSAC algorithm.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Individual weights for each sample
            raises error if sample_weight is passed and estimator
            fit method does not support it.

            .. versionadded:: 0.18

        Returns
        -------
        self : object
            Fitted `RANSACRegressor` estimator.

        Raises
        ------
        ValueError
            If no valid consensus set could be found. This occurs if
            `is_data_valid` and `is_model_valid` return False for all
            `max_trials` randomly chosen sub-samples.
        r1   )sample_weightcsrF)accept_sparseforce_all_finite)	ensure_2d)validate_separatelyNzR`min_samples` needs to be explicitly set when estimator is not a LinearRegression.r   r   zG`min_samples` may not be larger than number of samples: n_samples = %d.r7   c                 2    t        j                  | |z
        S rG   )r"   r!   y_truey_preds     r,   <lambda>z%RANSACRegressor.fit.<locals>.<lambda>|  s    rvvfvo7N r.   c                 \    t        j                  t        j                  | |z
        d      S )Nr   axis)r"   sumr!   rS   s     r,   rV   z%RANSACRegressor.fit.<locals>.<lambda>~  s!    rvvFF6F?+!8 r.   r8   c                     | |z
  dz  S )Nr    rS   s     r,   rV   z%RANSACRegressor.fit.<locals>.<lambda>  s    A7M r.   c                 <    t        j                  | |z
  dz  d      S )Nr   r   rX   )r"   rZ   rS   s     r,   rV   z%RANSACRegressor.fit.<locals>.<lambda>  s    rvvf_*8 r.   )r9   rL   z\%s does not support sample_weight. Samples weights are only used for the calibration itself.zRANSAC skipped more iterations than `max_skips` without finding a valid consensus set. Iterations were skipped because each randomly chosen sub-sample failed the passing criteria. See estimator attributes for diagnostics (n_skips*).zRANSAC could not find a valid consensus set. All `max_trials` iterations were skipped because each randomly chosen sub-sample failed the passing criteria. See estimator attributes for diagnostics (n_skips*).zRANSAC found a valid consensus set but exited early due to skipping more iterations than `max_skips`. See estimator attributes for diagnostics (n_skips*).)3r   dict_validate_datar   r:   r   r   r'   
isinstance
ValueErrorshaper"   r#   r;   medianr!   rC   ndimcallabler   r9   
set_paramsr   type__name__r   r   n_skips_no_inliers_n_skips_invalid_data_n_skips_invalid_model_arange	n_trials_r>   r?   r   r<   r1   r=   r3   rZ   r2   minr-   rB   r@   rA   warningswarnr   
estimator_inlier_mask_)!rH   XyrL   check_X_paramscheck_y_paramsr:   r'   r;   loss_functionr9   estimator_fit_has_sample_weightestimator_namen_inliers_best
score_bestinlier_mask_bestX_inlier_besty_inlier_bestinlier_best_idxs_subsetr&   sample_idxsr>   subset_idxsX_subsety_subsetrU   residuals_subsetinlier_mask_subsetn_inliers_subsetinlier_idxs_subsetX_inlier_subsety_inlier_subsetscore_subsets!                                    r,   r1   zRANSACRegressor.fit1  s   D 	'tU-P EEJ.""q~~&F # 
1 	 1%>>%dnn-I(*I#i)9: 1  ''!*q.K!!%A%''$"2"2QWWQZ"?@K"**K#.12= 
 ""*!#266!biil2B+C!D!%!8!899((vv{ N! YY/)vv{ M! dii  IIM)$*;*;<	  l ; +<I*W'i11$-L+, 
 $0BMffW
"&#$ %&"&'# GGAJ	ii	*__
nnz)NNaN ((,,---. 	
  5;\K ~H~H !!-d6H6H(7 **a/* $h1hmK6P  
 "".t7J7J8X8 ++q0+ &&q)F,Q7 "25G!G!vv&89  .0((A-( "--?!@ 23O 23O %???OLL  >1lZ6O .N%J1+M+M&8##"I{D<Q<QJ !4!44
doo8Ue nnz)j #((,,---. 	
 !/  !L  ((,,---. 	
 3
 '  MM-7MM+,CD   $,G  		s   V 	VVc                 x    t        |        | j                  |ddd      }| j                  j                  |      S )au  Predict using the estimated model.

        This is a wrapper for `estimator_.predict(X)`.

        Parameters
        ----------
        X : {array-like or sparse matrix} of shape (n_samples, n_features)
            Input data.

        Returns
        -------
        y : array, shape = [n_samples] or [n_samples, n_targets]
            Returns predicted values.
        FTrO   rN   reset)r   r_   rq   r3   )rH   rs   s     r,   r3   zRANSACRegressor.predict5  sD     	"	   
 &&q))r.   c                 z    t        |        | j                  |ddd      }| j                  j                  ||      S )a  Return the score of the prediction.

        This is a wrapper for `estimator_.score(X, y)`.

        Parameters
        ----------
        X : (array-like or sparse matrix} of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values.

        Returns
        -------
        z : float
            Score of the prediction.
        FTr   )r   r_   rq   r2   )rH   rs   rt   s      r,   r2   zRANSACRegressor.scoreM  sF    $ 	"	   
 $$Q**r.   c                     dddiiS )N_xfail_checkscheck_sample_weights_invariancez8zero sample_weight is not equivalent to removing samplesr\   )rH   s    r,   
_more_tagszRANSACRegressor._more_tagsh  s    1N
 	
r.   rG   )rh   
__module____qualname____doc__r   r   r   r   r   re   r   r"   r   r   rD   r^   __annotations__rI   r
   r1   r3   r2   r   r\   r.   r,   r0   r0   H   s   gT !!<=tDXq$v6ZAf5

  (afEtL"D)#T*Xq$v6D266(#

 Xq$v6D266(#

 Xq$v6D266(#
  dD@A%dAq@A-?@(K'(3$D <  &&vv66: &+~	~@*0+6
r.   r0   )'ro   numbersr   r   numpyr"   baser   r   r   r	   r
   r   
exceptionsr   utilsr   r   utils._param_validationr   r   r   r   r   utils.metadata_routingr   r   utils.randomr   utils.validationr   r   r   _baser   spacingr    r-   r0   r\   r.   r,   <module>r      so   
  "   , ?  6 W W #2::a=<Dg
g
r.   