a
    ߙfbM1                     @   sd   d Z ddlZddlmZ g dZdddZdd	d
ZdddZdddZ	dddZ
G dd dZdS )zn
Methods for selecting the bin width of histograms

Ported from the astroML project: https://www.astroml.org/
    N   )bayesian_blocks)	histogramscott_bin_widthfreedman_bin_widthknuth_bin_widthcalculate_bin_edges
   c                 C   s  |dur$| | |d k| |d k@  } t |trt|  } |durLtd|dkr^t| }nX|dkrvt| d\}}n@|dkrt| d\}}n(|d	krt	| d\}}nt
d
| d|r|d |d kr|d |d< |d |d kr|d |d< n"t|dkrtj| |||d}|S )aX  
    Calculate histogram bin edges like ``numpy.histogram_bin_edges``.

    Parameters
    ----------

    a : array-like
        Input data. The bin edges are calculated over the flattened array.

    bins : int, list, or str, optional
        If ``bins`` is an int, it is the number of bins. If it is a list
        it is taken to be the bin edges. If it is a string, it must be one
        of  'blocks', 'knuth', 'scott' or 'freedman'. See
        `~astropy.stats.histogram` for a description of each method.

    range : tuple or None, optional
        The minimum and maximum range for the histogram.  If not specified,
        it will be (a.min(), a.max()). However, if bins is a list it is
        returned unmodified regardless of the range argument.

    weights : array-like, optional
        An array the same shape as ``a``. If given, the histogram accumulates
        the value of the weight corresponding to ``a`` instead of returning the
        count of values. This argument does not affect determination of bin
        edges, though they may be used in the future as new methods are added.
    Nr   r   z8weights are not yet supported for the enhanced histogramZblocksZknuthTZscottZfreedmanzunrecognized bin code: '')rangeweights)
isinstancestrnpasarrayZravelNotImplementedErrorr   r   r   r   
ValueErrorndimZhistogram_bin_edges)abinsr   r   Zda r   6lib/python3.9/site-packages/astropy/stats/histogram.pyr      s.    

r   c                 K   s*   t | |||d}tj| f|||d|S )a  Enhanced histogram function, providing adaptive binnings

    This is a histogram function that enables the use of more sophisticated
    algorithms for determining bins.  Aside from the ``bins`` argument allowing
    a string specified how bins are computed, the parameters are the same
    as ``numpy.histogram()``.

    Parameters
    ----------
    a : array-like
        array of data to be histogrammed

    bins : int, list, or str, optional
        If bins is a string, then it must be one of:

        - 'blocks' : use bayesian blocks for dynamic bin widths

        - 'knuth' : use Knuth's rule to determine bins

        - 'scott' : use Scott's rule to determine bins

        - 'freedman' : use the Freedman-Diaconis rule to determine bins

    range : tuple or None, optional
        the minimum and maximum range for the histogram.  If not specified,
        it will be (x.min(), x.max())

    weights : array-like, optional
        An array the same shape as ``a``. If given, the histogram accumulates
        the value of the weight corresponding to ``a`` instead of returning the
        count of values. This argument does not affect determination of bin
        edges.

    other keyword arguments are described in numpy.histogram().

    Returns
    -------
    hist : array
        The values of the histogram. See ``density`` and ``weights`` for a
        description of the possible semantics.
    bin_edges : array of dtype float
        Return the bin edges ``(length(hist)+1)``.

    See Also
    --------
    numpy.histogram
    )r   r   r   )r   r   r   )r   r   r   r   kwargsr   r   r   r   V   s    1r   Fc                 C   s   t | } | jdkrtd| j}t | }d| |d  }|rt |  |   | }td|}|  |t 	|d   }||fS |S dS )a&  Return the optimal histogram bin width using Scott's rule

    Scott's rule is a normal reference rule: it minimizes the integrated
    mean squared error in the bin approximation under the assumption that the
    data is approximately Gaussian.

    Parameters
    ----------
    data : array-like, ndim=1
        observed (one-dimensional) data
    return_bins : bool, optional
        if True, then return the bin edges

    Returns
    -------
    width : float
        optimal bin width using Scott's rule
    bins : ndarray
        bin edges: returned if ``return_bins`` is True

    Notes
    -----
    The optimal bin width is

    .. math::
        \Delta_b = \frac{3.5\sigma}{n^{1/3}}

    where :math:`\sigma` is the standard deviation of the data, and
    :math:`n` is the number of data points [1]_.

    References
    ----------
    .. [1] Scott, David W. (1979). "On optimal and data-based histograms".
       Biometricka 66 (3): 605-610

    See Also
    --------
    knuth_bin_width
    freedman_bin_width
    bayesian_blocks
    histogram
    r   data should be one-dimensionalg      @UUUUUU?N)
r   r   r   r   sizeZstdceilmaxminarange)datareturn_binsnZsigmadxNbinsr   r   r   r   r      s    +



r   c              
   C   s   t | } | jdkrtd| j}|dk r2tdt | ddg\}}d||  |d  }|r|  |   }}tdt || | }z||t 	|d   }	W nF ty }
 z.d	t
|
v rtd
|d n W Y d}
~
n
d}
~
0 0 ||	fS |S dS )a  Return the optimal histogram bin width using the Freedman-Diaconis rule

    The Freedman-Diaconis rule is a normal reference rule like Scott's
    rule, but uses rank-based statistics for results which are more robust
    to deviations from a normal distribution.

    Parameters
    ----------
    data : array-like, ndim=1
        observed (one-dimensional) data
    return_bins : bool, optional
        if True, then return the bin edges

    Returns
    -------
    width : float
        optimal bin width using the Freedman-Diaconis rule
    bins : ndarray
        bin edges: returned if ``return_bins`` is True

    Notes
    -----
    The optimal bin width is

    .. math::
        \Delta_b = \frac{2(q_{75} - q_{25})}{n^{1/3}}

    where :math:`q_{N}` is the :math:`N` percent quartile of the data, and
    :math:`n` is the number of data points [1]_.

    References
    ----------
    .. [1] D. Freedman & P. Diaconis (1981)
       "On the histogram as a density estimator: L2 theory".
       Probability Theory and Related Fields 57 (4): 453-476

    See Also
    --------
    knuth_bin_width
    scott_bin_width
    bayesian_blocks
    histogram
    r   r      z(data should have more than three entries   K      r   zMaximum allowed size exceededzThe inter-quartile range of the data is too small: failed to construct histogram with {} bins. Please use another bin method, such as bins="scott"N)r   r   r   r   r   Z
percentiler   r   r   r    r   format)r!   r"   r#   Zv25Zv75r$   ZdminZdmaxr%   r   er   r   r   r      s.    ,

r   Tc           
      C   sj   ddl m} t| }t| d\}}|j|t|| dd }||}|d |d  }	|rb|	|fS |	S dS )a/  Return the optimal histogram bin width using Knuth's rule.

    Knuth's rule is a fixed-width, Bayesian approach to determining
    the optimal bin width of a histogram.

    Parameters
    ----------
    data : array-like, ndim=1
        observed (one-dimensional) data
    return_bins : bool, optional
        if True, then return the bin edges
    quiet : bool, optional
        if True (default) then suppress stdout output from scipy.optimize

    Returns
    -------
    dx : float
        optimal bin width. Bins are measured starting at the first data point.
    bins : ndarray
        bin edges: returned if ``return_bins`` is True

    Notes
    -----
    The optimal number of bins is the value M which maximizes the function

    .. math::
        F(M|x,I) = n\log(M) + \log\Gamma(\frac{M}{2})
        - M\log\Gamma(\frac{1}{2})
        - \log\Gamma(\frac{2n+M}{2})
        + \sum_{k=1}^M \log\Gamma(n_k + \frac{1}{2})

    where :math:`\Gamma` is the Gamma function, :math:`n` is the number of
    data points, :math:`n_k` is the number of measurements in bin :math:`k`
    [1]_.

    References
    ----------
    .. [1] Knuth, K.H. "Optimal Data-Based Binning for Histograms".
       arXiv:0605197, 2006

    See Also
    --------
    freedman_bin_width
    scott_bin_width
    bayesian_blocks
    histogram
    r   )optimizeT)Zdispr   N)scipyr,   _KnuthFr   Zfminlenr   )
r!   r"   quietr,   ZknuthFZdx0Zbins0Mr   r$   r   r   r   r     s    1
r   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )r.   ag  Class which implements the function minimized by knuth_bin_width

    Parameters
    ----------
    data : array-like, one dimension
        data to be histogrammed

    Notes
    -----
    the function F is given by

    .. math::
        F(M|x,I) = n\log(M) + \log\Gamma(\frac{M}{2})
        - M\log\Gamma(\frac{1}{2})
        - \log\Gamma(\frac{2n+M}{2})
        + \sum_{k=1}^M \log\Gamma(n_k + \frac{1}{2})

    where :math:`\Gamma` is the Gamma function, :math:`n` is the number of
    data points, :math:`n_k` is the number of measurements in bin :math:`k`.

    See Also
    --------
    knuth_bin_width
    c                 C   sP   t j|dd| _| jjdkr$td| j  | jj| _ddlm	} |j
| _
d S )NT)copyr   zdata should be 1-dimensionalr   )special)r   Zarrayr!   r   r   sortr   r#   r-   r3   gammaln)selfr!   r3   r   r   r   __init__k  s    

z_KnuthF.__init__c                 C   s"   t | jd | jd t|d S )z+Return the bin edges given M number of binsr   r   r   )r   Zlinspacer!   intr6   r1   r   r   r   r   z  s    z_KnuthF.binsc                 C   s
   |  |S )N)evalr9   r   r   r   __call__~  s    z_KnuthF.__call__c                 C   s   t |}|dkrtjS | |}t| j|\}}| jt| | d|  || d  | | jd|   t	| |d   S )a   Evaluate the Knuth function

        Parameters
        ----------
        M : int
            Number of bins

        Returns
        -------
        F : float
            evaluation of the negative Knuth loglikelihood function:
            smaller values indicate a better fit.
        r   g      ?)
r8   r   infr   r   r!   r#   logr5   sum)r6   r1   r   Znkr   r   r   r:     s    
z_KnuthF.evalN)__name__
__module____qualname____doc__r7   r   r;   r:   r   r   r   r   r.   R  s
   r.   )r	   NN)r	   NN)F)F)FT)rB   Znumpyr    r   __all__r   r   r   r   r   r.   r   r   r   r   <module>   s   
F
6
=
J
?