# Copyright (C) 2003-2005 Peter J. Verveer # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # 3. The name of the author may not be used to endorse or promote # products derived from this software without specific prior # written permission. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy import numpy as np from . import _ni_support from . import _ni_label from . import _nd_image from . import _morphology __all__ = ['label', 'find_objects', 'labeled_comprehension', 'sum', 'mean', 'variance', 'standard_deviation', 'minimum', 'maximum', 'median', 'minimum_position', 'maximum_position', 'extrema', 'center_of_mass', 'histogram', 'watershed_ift', 'sum_labels', 'value_indices'] def label(input, structure=None, output=None): """ Label features in an array. Parameters ---------- input : array_like An array-like object to be labeled. Any non-zero values in `input` are counted as features and zero values are considered the background. structure : array_like, optional A structuring element that defines feature connections. `structure` must be centrosymmetric (see Notes). If no structuring element is provided, one is automatically generated with a squared connectivity equal to one. That is, for a 2-D `input` array, the default structuring element is:: [[0,1,0], [1,1,1], [0,1,0]] output : (None, data-type, array_like), optional If `output` is a data type, it specifies the type of the resulting labeled feature array. If `output` is an array-like object, then `output` will be updated with the labeled features from this function. This function can operate in-place, by passing output=input. Note that the output must be able to store the largest label, or this function will raise an Exception. Returns ------- label : ndarray or int An integer ndarray where each unique feature in `input` has a unique label in the returned array. num_features : int How many objects were found. If `output` is None, this function returns a tuple of (`labeled_array`, `num_features`). If `output` is a ndarray, then it will be updated with values in `labeled_array` and only `num_features` will be returned by this function. See Also -------- find_objects : generate a list of slices for the labeled features (or objects); useful for finding features' position or dimensions Notes ----- A centrosymmetric matrix is a matrix that is symmetric about the center. See [1]_ for more information. The `structure` matrix must be centrosymmetric to ensure two-way connections. For instance, if the `structure` matrix is not centrosymmetric and is defined as:: [[0,1,0], [1,1,0], [0,0,0]] and the `input` is:: [[1,2], [0,3]] then the structure matrix would indicate the entry 2 in the input is connected to 1, but 1 is not connected to 2. References ---------- .. [1] James R. Weaver, "Centrosymmetric (cross-symmetric) matrices, their basic properties, eigenvalues, and eigenvectors." The American Mathematical Monthly 92.10 (1985): 711-717. Examples -------- Create an image with some features, then label it using the default (cross-shaped) structuring element: >>> from scipy.ndimage import label, generate_binary_structure >>> import numpy as np >>> a = np.array([[0,0,1,1,0,0], ... [0,0,0,1,0,0], ... [1,1,0,0,1,0], ... [0,0,0,1,0,0]]) >>> labeled_array, num_features = label(a) Each of the 4 features are labeled with a different integer: >>> num_features 4 >>> labeled_array array([[0, 0, 1, 1, 0, 0], [0, 0, 0, 1, 0, 0], [2, 2, 0, 0, 3, 0], [0, 0, 0, 4, 0, 0]]) Generate a structuring element that will consider features connected even if they touch diagonally: >>> s = generate_binary_structure(2,2) or, >>> s = [[1,1,1], ... [1,1,1], ... [1,1,1]] Label the image using the new structuring element: >>> labeled_array, num_features = label(a, structure=s) Show the 2 labeled features (note that features 1, 3, and 4 from above are now considered a single feature): >>> num_features 2 >>> labeled_array array([[0, 0, 1, 1, 0, 0], [0, 0, 0, 1, 0, 0], [2, 2, 0, 0, 1, 0], [0, 0, 0, 1, 0, 0]]) """ input = numpy.asarray(input) if numpy.iscomplexobj(input): raise TypeError('Complex type not supported') if structure is None: structure = _morphology.generate_binary_structure(input.ndim, 1) structure = numpy.asarray(structure, dtype=bool) if structure.ndim != input.ndim: raise RuntimeError('structure and input must have equal rank') for ii in structure.shape: if ii != 3: raise ValueError('structure dimensions must be equal to 3') # Use 32 bits if it's large enough for this image. # _ni_label.label() needs two entries for background and # foreground tracking need_64bits = input.size >= (2**31 - 2) if isinstance(output, numpy.ndarray): if output.shape != input.shape: raise ValueError("output shape not correct") caller_provided_output = True else: caller_provided_output = False if output is None: output = np.empty(input.shape, np.intp if need_64bits else np.int32) else: output = np.empty(input.shape, output) # handle scalars, 0-D arrays if input.ndim == 0 or input.size == 0: if input.ndim == 0: # scalar maxlabel = 1 if (input != 0) else 0 output[...] = maxlabel else: # 0-D maxlabel = 0 if caller_provided_output: return maxlabel else: return output, maxlabel try: max_label = _ni_label._label(input, structure, output) except _ni_label.NeedMoreBits as e: # Make another attempt with enough bits, then try to cast to the # new type. tmp_output = np.empty(input.shape, np.intp if need_64bits else np.int32) max_label = _ni_label._label(input, structure, tmp_output) output[...] = tmp_output[...] if not np.all(output == tmp_output): # refuse to return bad results raise RuntimeError( "insufficient bit-depth in requested output type" ) from e if caller_provided_output: # result was written in-place return max_label else: return output, max_label def find_objects(input, max_label=0): """ Find objects in a labeled array. Parameters ---------- input : ndarray of ints Array containing objects defined by different labels. Labels with value 0 are ignored. max_label : int, optional Maximum label to be searched for in `input`. If max_label is not given, the positions of all objects are returned. Returns ------- object_slices : list of tuples A list of tuples, with each tuple containing N slices (with N the dimension of the input array). Slices correspond to the minimal parallelepiped that contains the object. If a number is missing, None is returned instead of a slice. The label ``l`` corresponds to the index ``l-1`` in the returned list. See Also -------- label, center_of_mass Notes ----- This function is very useful for isolating a volume of interest inside a 3-D array, that cannot be "seen through". Examples -------- >>> from scipy import ndimage >>> import numpy as np >>> a = np.zeros((6,6), dtype=int) >>> a[2:4, 2:4] = 1 >>> a[4, 4] = 1 >>> a[:2, :3] = 2 >>> a[0, 5] = 3 >>> a array([[2, 2, 2, 0, 0, 3], [2, 2, 2, 0, 0, 0], [0, 0, 1, 1, 0, 0], [0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) >>> ndimage.find_objects(a) [(slice(2, 5, None), slice(2, 5, None)), (slice(0, 2, None), slice(0, 3, None)), (slice(0, 1, None), slice(5, 6, None))] >>> ndimage.find_objects(a, max_label=2) [(slice(2, 5, None), slice(2, 5, None)), (slice(0, 2, None), slice(0, 3, None))] >>> ndimage.find_objects(a == 1, max_label=2) [(slice(2, 5, None), slice(2, 5, None)), None] >>> loc = ndimage.find_objects(a)[0] >>> a[loc] array([[1, 1, 0], [1, 1, 0], [0, 0, 1]]) """ input = numpy.asarray(input) if numpy.iscomplexobj(input): raise TypeError('Complex type not supported') if max_label < 1: max_label = input.max() return _nd_image.find_objects(input, max_label) def value_indices(arr, *, ignore_value=None): """ Find indices of each distinct value in given array. Parameters ---------- arr : ndarray of ints Array containing integer values. ignore_value : int, optional This value will be ignored in searching the `arr` array. If not given, all values found will be included in output. Default is None. Returns ------- indices : dictionary A Python dictionary of array indices for each distinct value. The dictionary is keyed by the distinct values, the entries are array index tuples covering all occurrences of the value within the array. This dictionary can occupy significant memory, usually several times the size of the input array. See Also -------- label, maximum, median, minimum_position, extrema, sum, mean, variance, standard_deviation, numpy.where, numpy.unique Notes ----- For a small array with few distinct values, one might use `numpy.unique()` to find all possible values, and ``(arr == val)`` to locate each value within that array. However, for large arrays, with many distinct values, this can become extremely inefficient, as locating each value would require a new search through the entire array. Using this function, there is essentially one search, with the indices saved for all distinct values. This is useful when matching a categorical image (e.g. a segmentation or classification) to an associated image of other data, allowing any per-class statistic(s) to then be calculated. Provides a more flexible alternative to functions like ``scipy.ndimage.mean()`` and ``scipy.ndimage.variance()``. Some other closely related functionality, with different strengths and weaknesses, can also be found in ``scipy.stats.binned_statistic()`` and the `scikit-image `_ function ``skimage.measure.regionprops()``. Note for IDL users: this provides functionality equivalent to IDL's REVERSE_INDICES option (as per the IDL documentation for the `HISTOGRAM `_ function). .. versionadded:: 1.10.0 Examples -------- >>> import numpy as np >>> from scipy import ndimage >>> a = np.zeros((6, 6), dtype=int) >>> a[2:4, 2:4] = 1 >>> a[4, 4] = 1 >>> a[:2, :3] = 2 >>> a[0, 5] = 3 >>> a array([[2, 2, 2, 0, 0, 3], [2, 2, 2, 0, 0, 0], [0, 0, 1, 1, 0, 0], [0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) >>> val_indices = ndimage.value_indices(a) The dictionary `val_indices` will have an entry for each distinct value in the input array. >>> val_indices.keys() dict_keys([0, 1, 2, 3]) The entry for each value is an index tuple, locating the elements with that value. >>> ndx1 = val_indices[1] >>> ndx1 (array([2, 2, 3, 3, 4]), array([2, 3, 2, 3, 4])) This can be used to index into the original array, or any other array with the same shape. >>> a[ndx1] array([1, 1, 1, 1, 1]) If the zeros were to be ignored, then the resulting dictionary would no longer have an entry for zero. >>> val_indices = ndimage.value_indices(a, ignore_value=0) >>> val_indices.keys() dict_keys([1, 2, 3]) """ # Cope with ignore_value being None, without too much extra complexity # in the C code. If not None, the value is passed in as a numpy array # with the same dtype as arr. ignore_value_arr = numpy.zeros((1,), dtype=arr.dtype) ignoreIsNone = (ignore_value is None) if not ignoreIsNone: ignore_value_arr[0] = ignore_value_arr.dtype.type(ignore_value) val_indices = _nd_image.value_indices(arr, ignoreIsNone, ignore_value_arr) return val_indices def labeled_comprehension(input, labels, index, func, out_dtype, default, pass_positions=False): """ Roughly equivalent to [func(input[labels == i]) for i in index]. Sequentially applies an arbitrary function (that works on array_like input) to subsets of an N-D image array specified by `labels` and `index`. The option exists to provide the function with positional parameters as the second argument. Parameters ---------- input : array_like Data from which to select `labels` to process. labels : array_like or None Labels to objects in `input`. If not None, array must be same shape as `input`. If None, `func` is applied to raveled `input`. index : int, sequence of ints or None Subset of `labels` to which to apply `func`. If a scalar, a single value is returned. If None, `func` is applied to all non-zero values of `labels`. func : callable Python function to apply to `labels` from `input`. out_dtype : dtype Dtype to use for `result`. default : int, float or None Default return value when a element of `index` does not exist in `labels`. pass_positions : bool, optional If True, pass linear indices to `func` as a second argument. Default is False. Returns ------- result : ndarray Result of applying `func` to each of `labels` to `input` in `index`. Examples -------- >>> import numpy as np >>> a = np.array([[1, 2, 0, 0], ... [5, 3, 0, 4], ... [0, 0, 0, 7], ... [9, 3, 0, 0]]) >>> from scipy import ndimage >>> lbl, nlbl = ndimage.label(a) >>> lbls = np.arange(1, nlbl+1) >>> ndimage.labeled_comprehension(a, lbl, lbls, np.mean, float, 0) array([ 2.75, 5.5 , 6. ]) Falling back to `default`: >>> lbls = np.arange(1, nlbl+2) >>> ndimage.labeled_comprehension(a, lbl, lbls, np.mean, float, -1) array([ 2.75, 5.5 , 6. , -1. ]) Passing positions: >>> def fn(val, pos): ... print("fn says: %s : %s" % (val, pos)) ... return (val.sum()) if (pos.sum() % 2 == 0) else (-val.sum()) ... >>> ndimage.labeled_comprehension(a, lbl, lbls, fn, float, 0, True) fn says: [1 2 5 3] : [0 1 4 5] fn says: [4 7] : [ 7 11] fn says: [9 3] : [12 13] array([ 11., 11., -12., 0.]) """ as_scalar = numpy.isscalar(index) input = numpy.asarray(input) if pass_positions: positions = numpy.arange(input.size).reshape(input.shape) if labels is None: if index is not None: raise ValueError("index without defined labels") if not pass_positions: return func(input.ravel()) else: return func(input.ravel(), positions.ravel()) try: input, labels = numpy.broadcast_arrays(input, labels) except ValueError as e: raise ValueError("input and labels must have the same shape " "(excepting dimensions with width 1)") from e if index is None: if not pass_positions: return func(input[labels > 0]) else: return func(input[labels > 0], positions[labels > 0]) index = numpy.atleast_1d(index) if np.any(index.astype(labels.dtype).astype(index.dtype) != index): raise ValueError(f"Cannot convert index values from <{index.dtype}> to " f"<{labels.dtype}> (labels' type) without loss of precision") index = index.astype(labels.dtype) # optimization: find min/max in index, # and select those parts of labels, input, and positions lo = index.min() hi = index.max() mask = (labels >= lo) & (labels <= hi) # this also ravels the arrays labels = labels[mask] input = input[mask] if pass_positions: positions = positions[mask] # sort everything by labels label_order = labels.argsort() labels = labels[label_order] input = input[label_order] if pass_positions: positions = positions[label_order] index_order = index.argsort() sorted_index = index[index_order] def do_map(inputs, output): """labels must be sorted""" nidx = sorted_index.size # Find boundaries for each stretch of constant labels # This could be faster, but we already paid N log N to sort labels. lo = numpy.searchsorted(labels, sorted_index, side='left') hi = numpy.searchsorted(labels, sorted_index, side='right') for i, l, h in zip(range(nidx), lo, hi): if l == h: continue output[i] = func(*[inp[l:h] for inp in inputs]) temp = numpy.empty(index.shape, out_dtype) temp[:] = default if not pass_positions: do_map([input], temp) else: do_map([input, positions], temp) output = numpy.zeros(index.shape, out_dtype) output[index_order] = temp if as_scalar: output = output[0] return output def _safely_castable_to_int(dt): """Test whether the NumPy data type `dt` can be safely cast to an int.""" int_size = np.dtype(int).itemsize safe = ((np.issubdtype(dt, np.signedinteger) and dt.itemsize <= int_size) or (np.issubdtype(dt, np.unsignedinteger) and dt.itemsize < int_size)) return safe def _stats(input, labels=None, index=None, centered=False): """Count, sum, and optionally compute (sum - centre)^2 of input by label Parameters ---------- input : array_like, N-D The input data to be analyzed. labels : array_like (N-D), optional The labels of the data in `input`. This array must be broadcast compatible with `input`; typically, it is the same shape as `input`. If `labels` is None, all nonzero values in `input` are treated as the single labeled group. index : label or sequence of labels, optional These are the labels of the groups for which the stats are computed. If `index` is None, the stats are computed for the single group where `labels` is greater than 0. centered : bool, optional If True, the centered sum of squares for each labeled group is also returned. Default is False. Returns ------- counts : int or ndarray of ints The number of elements in each labeled group. sums : scalar or ndarray of scalars The sums of the values in each labeled group. sums_c : scalar or ndarray of scalars, optional The sums of mean-centered squares of the values in each labeled group. This is only returned if `centered` is True. """ def single_group(vals): if centered: vals_c = vals - vals.mean() return vals.size, vals.sum(), (vals_c * vals_c.conjugate()).sum() else: return vals.size, vals.sum() if labels is None: return single_group(input) # ensure input and labels match sizes input, labels = numpy.broadcast_arrays(input, labels) if index is None: return single_group(input[labels > 0]) if numpy.isscalar(index): return single_group(input[labels == index]) def _sum_centered(labels): # `labels` is expected to be an ndarray with the same shape as `input`. # It must contain the label indices (which are not necessarily the labels # themselves). means = sums / counts centered_input = input - means[labels] # bincount expects 1-D inputs, so we ravel the arguments. bc = numpy.bincount(labels.ravel(), weights=(centered_input * centered_input.conjugate()).ravel()) return bc # Remap labels to unique integers if necessary, or if the largest # label is larger than the number of values. if (not _safely_castable_to_int(labels.dtype) or labels.min() < 0 or labels.max() > labels.size): # Use numpy.unique to generate the label indices. `new_labels` will # be 1-D, but it should be interpreted as the flattened N-D array of # label indices. unique_labels, new_labels = numpy.unique(labels, return_inverse=True) new_labels = np.reshape(new_labels, (-1,)) # flatten, since it may be >1-D counts = numpy.bincount(new_labels) sums = numpy.bincount(new_labels, weights=input.ravel()) if centered: # Compute the sum of the mean-centered squares. # We must reshape new_labels to the N-D shape of `input` before # passing it _sum_centered. sums_c = _sum_centered(new_labels.reshape(labels.shape)) idxs = numpy.searchsorted(unique_labels, index) # make all of idxs valid idxs[idxs >= unique_labels.size] = 0 found = (unique_labels[idxs] == index) else: # labels are an integer type allowed by bincount, and there aren't too # many, so call bincount directly. counts = numpy.bincount(labels.ravel()) sums = numpy.bincount(labels.ravel(), weights=input.ravel()) if centered: sums_c = _sum_centered(labels) # make sure all index values are valid idxs = numpy.asanyarray(index, numpy.int_).copy() found = (idxs >= 0) & (idxs < counts.size) idxs[~found] = 0 counts = counts[idxs] counts[~found] = 0 sums = sums[idxs] sums[~found] = 0 if not centered: return (counts, sums) else: sums_c = sums_c[idxs] sums_c[~found] = 0 return (counts, sums, sums_c) def sum(input, labels=None, index=None): """ Calculate the sum of the values of the array. Notes ----- This is an alias for `ndimage.sum_labels` kept for backwards compatibility reasons, for new code please prefer `sum_labels`. See the `sum_labels` docstring for more details. """ return sum_labels(input, labels, index) def sum_labels(input, labels=None, index=None): """ Calculate the sum of the values of the array. Parameters ---------- input : array_like Values of `input` inside the regions defined by `labels` are summed together. labels : array_like of ints, optional Assign labels to the values of the array. Has to have the same shape as `input`. index : array_like, optional A single label number or a sequence of label numbers of the objects to be measured. Returns ------- sum : ndarray or scalar An array of the sums of values of `input` inside the regions defined by `labels` with the same shape as `index`. If 'index' is None or scalar, a scalar is returned. See Also -------- mean, median Examples -------- >>> from scipy import ndimage >>> input = [0,1,2,3] >>> labels = [1,1,2,2] >>> ndimage.sum_labels(input, labels, index=[1,2]) [1.0, 5.0] >>> ndimage.sum_labels(input, labels, index=1) 1 >>> ndimage.sum_labels(input, labels) 6 """ count, sum = _stats(input, labels, index) return sum def mean(input, labels=None, index=None): """ Calculate the mean of the values of an array at labels. Parameters ---------- input : array_like Array on which to compute the mean of elements over distinct regions. labels : array_like, optional Array of labels of same shape, or broadcastable to the same shape as `input`. All elements sharing the same label form one region over which the mean of the elements is computed. index : int or sequence of ints, optional Labels of the objects over which the mean is to be computed. Default is None, in which case the mean for all values where label is greater than 0 is calculated. Returns ------- out : list Sequence of same length as `index`, with the mean of the different regions labeled by the labels in `index`. See Also -------- variance, standard_deviation, minimum, maximum, sum, label Examples -------- >>> from scipy import ndimage >>> import numpy as np >>> a = np.arange(25).reshape((5,5)) >>> labels = np.zeros_like(a) >>> labels[3:5,3:5] = 1 >>> index = np.unique(labels) >>> labels array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 1, 1], [0, 0, 0, 1, 1]]) >>> index array([0, 1]) >>> ndimage.mean(a, labels=labels, index=index) [10.285714285714286, 21.0] """ count, sum = _stats(input, labels, index) return sum / numpy.asanyarray(count).astype(numpy.float64) def variance(input, labels=None, index=None): """ Calculate the variance of the values of an N-D image array, optionally at specified sub-regions. Parameters ---------- input : array_like Nd-image data to process. labels : array_like, optional Labels defining sub-regions in `input`. If not None, must be same shape as `input`. index : int or sequence of ints, optional `labels` to include in output. If None (default), all values where `labels` is non-zero are used. Returns ------- variance : float or ndarray Values of variance, for each sub-region if `labels` and `index` are specified. See Also -------- label, standard_deviation, maximum, minimum, extrema Examples -------- >>> import numpy as np >>> a = np.array([[1, 2, 0, 0], ... [5, 3, 0, 4], ... [0, 0, 0, 7], ... [9, 3, 0, 0]]) >>> from scipy import ndimage >>> ndimage.variance(a) 7.609375 Features to process can be specified using `labels` and `index`: >>> lbl, nlbl = ndimage.label(a) >>> ndimage.variance(a, lbl, index=np.arange(1, nlbl+1)) array([ 2.1875, 2.25 , 9. ]) If no index is given, all non-zero `labels` are processed: >>> ndimage.variance(a, lbl) 6.1875 """ count, sum, sum_c_sq = _stats(input, labels, index, centered=True) return sum_c_sq / np.asanyarray(count).astype(float) def standard_deviation(input, labels=None, index=None): """ Calculate the standard deviation of the values of an N-D image array, optionally at specified sub-regions. Parameters ---------- input : array_like N-D image data to process. labels : array_like, optional Labels to identify sub-regions in `input`. If not None, must be same shape as `input`. index : int or sequence of ints, optional `labels` to include in output. If None (default), all values where `labels` is non-zero are used. Returns ------- standard_deviation : float or ndarray Values of standard deviation, for each sub-region if `labels` and `index` are specified. See Also -------- label, variance, maximum, minimum, extrema Examples -------- >>> import numpy as np >>> a = np.array([[1, 2, 0, 0], ... [5, 3, 0, 4], ... [0, 0, 0, 7], ... [9, 3, 0, 0]]) >>> from scipy import ndimage >>> ndimage.standard_deviation(a) 2.7585095613392387 Features to process can be specified using `labels` and `index`: >>> lbl, nlbl = ndimage.label(a) >>> ndimage.standard_deviation(a, lbl, index=np.arange(1, nlbl+1)) array([ 1.479, 1.5 , 3. ]) If no index is given, non-zero `labels` are processed: >>> ndimage.standard_deviation(a, lbl) 2.4874685927665499 """ return numpy.sqrt(variance(input, labels, index)) def _select(input, labels=None, index=None, find_min=False, find_max=False, find_min_positions=False, find_max_positions=False, find_median=False): """Returns min, max, or both, plus their positions (if requested), and median.""" input = numpy.asanyarray(input) find_positions = find_min_positions or find_max_positions positions = None if find_positions: positions = numpy.arange(input.size).reshape(input.shape) def single_group(vals, positions): result = [] if find_min: result += [vals.min()] if find_min_positions: result += [positions[vals == vals.min()][0]] if find_max: result += [vals.max()] if find_max_positions: result += [positions[vals == vals.max()][0]] if find_median: result += [numpy.median(vals)] return result if labels is None: return single_group(input, positions) # ensure input and labels match sizes input, labels = numpy.broadcast_arrays(input, labels) if index is None: mask = (labels > 0) masked_positions = None if find_positions: masked_positions = positions[mask] return single_group(input[mask], masked_positions) if numpy.isscalar(index): mask = (labels == index) masked_positions = None if find_positions: masked_positions = positions[mask] return single_group(input[mask], masked_positions) # remap labels to unique integers if necessary, or if the largest # label is larger than the number of values. if (not _safely_castable_to_int(labels.dtype) or labels.min() < 0 or labels.max() > labels.size): # remap labels, and indexes unique_labels, labels = numpy.unique(labels, return_inverse=True) idxs = numpy.searchsorted(unique_labels, index) # make all of idxs valid idxs[idxs >= unique_labels.size] = 0 found = (unique_labels[idxs] == index) else: # labels are an integer type, and there aren't too many idxs = numpy.asanyarray(index, numpy.int_).copy() found = (idxs >= 0) & (idxs <= labels.max()) idxs[~ found] = labels.max() + 1 if find_median: order = numpy.lexsort((input.ravel(), labels.ravel())) else: order = input.ravel().argsort() input = input.ravel()[order] labels = labels.ravel()[order] if find_positions: positions = positions.ravel()[order] result = [] if find_min: mins = numpy.zeros(labels.max() + 2, input.dtype) mins[labels[::-1]] = input[::-1] result += [mins[idxs]] if find_min_positions: minpos = numpy.zeros(labels.max() + 2, int) minpos[labels[::-1]] = positions[::-1] result += [minpos[idxs]] if find_max: maxs = numpy.zeros(labels.max() + 2, input.dtype) maxs[labels] = input result += [maxs[idxs]] if find_max_positions: maxpos = numpy.zeros(labels.max() + 2, int) maxpos[labels] = positions result += [maxpos[idxs]] if find_median: locs = numpy.arange(len(labels)) lo = numpy.zeros(labels.max() + 2, numpy.int_) lo[labels[::-1]] = locs[::-1] hi = numpy.zeros(labels.max() + 2, numpy.int_) hi[labels] = locs lo = lo[idxs] hi = hi[idxs] # lo is an index to the lowest value in input for each label, # hi is an index to the largest value. # move them to be either the same ((hi - lo) % 2 == 0) or next # to each other ((hi - lo) % 2 == 1), then average. step = (hi - lo) // 2 lo += step hi -= step if (np.issubdtype(input.dtype, np.integer) or np.issubdtype(input.dtype, np.bool_)): # avoid integer overflow or boolean addition (gh-12836) result += [(input[lo].astype('d') + input[hi].astype('d')) / 2.0] else: result += [(input[lo] + input[hi]) / 2.0] return result def minimum(input, labels=None, index=None): """ Calculate the minimum of the values of an array over labeled regions. Parameters ---------- input : array_like Array_like of values. For each region specified by `labels`, the minimal values of `input` over the region is computed. labels : array_like, optional An array_like of integers marking different regions over which the minimum value of `input` is to be computed. `labels` must have the same shape as `input`. If `labels` is not specified, the minimum over the whole array is returned. index : array_like, optional A list of region labels that are taken into account for computing the minima. If index is None, the minimum over all elements where `labels` is non-zero is returned. Returns ------- minimum : float or list of floats List of minima of `input` over the regions determined by `labels` and whose index is in `index`. If `index` or `labels` are not specified, a float is returned: the minimal value of `input` if `labels` is None, and the minimal value of elements where `labels` is greater than zero if `index` is None. See Also -------- label, maximum, median, minimum_position, extrema, sum, mean, variance, standard_deviation Notes ----- The function returns a Python list and not a NumPy array, use `np.array` to convert the list to an array. Examples -------- >>> from scipy import ndimage >>> import numpy as np >>> a = np.array([[1, 2, 0, 0], ... [5, 3, 0, 4], ... [0, 0, 0, 7], ... [9, 3, 0, 0]]) >>> labels, labels_nb = ndimage.label(a) >>> labels array([[1, 1, 0, 0], [1, 1, 0, 2], [0, 0, 0, 2], [3, 3, 0, 0]]) >>> ndimage.minimum(a, labels=labels, index=np.arange(1, labels_nb + 1)) [1.0, 4.0, 3.0] >>> ndimage.minimum(a) 0.0 >>> ndimage.minimum(a, labels=labels) 1.0 """ return _select(input, labels, index, find_min=True)[0] def maximum(input, labels=None, index=None): """ Calculate the maximum of the values of an array over labeled regions. Parameters ---------- input : array_like Array_like of values. For each region specified by `labels`, the maximal values of `input` over the region is computed. labels : array_like, optional An array of integers marking different regions over which the maximum value of `input` is to be computed. `labels` must have the same shape as `input`. If `labels` is not specified, the maximum over the whole array is returned. index : array_like, optional A list of region labels that are taken into account for computing the maxima. If index is None, the maximum over all elements where `labels` is non-zero is returned. Returns ------- output : float or list of floats List of maxima of `input` over the regions determined by `labels` and whose index is in `index`. If `index` or `labels` are not specified, a float is returned: the maximal value of `input` if `labels` is None, and the maximal value of elements where `labels` is greater than zero if `index` is None. See Also -------- label, minimum, median, maximum_position, extrema, sum, mean, variance, standard_deviation Notes ----- The function returns a Python list and not a NumPy array, use `np.array` to convert the list to an array. Examples -------- >>> import numpy as np >>> a = np.arange(16).reshape((4,4)) >>> a array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [12, 13, 14, 15]]) >>> labels = np.zeros_like(a) >>> labels[:2,:2] = 1 >>> labels[2:, 1:3] = 2 >>> labels array([[1, 1, 0, 0], [1, 1, 0, 0], [0, 2, 2, 0], [0, 2, 2, 0]]) >>> from scipy import ndimage >>> ndimage.maximum(a) 15.0 >>> ndimage.maximum(a, labels=labels, index=[1,2]) [5.0, 14.0] >>> ndimage.maximum(a, labels=labels) 14.0 >>> b = np.array([[1, 2, 0, 0], ... [5, 3, 0, 4], ... [0, 0, 0, 7], ... [9, 3, 0, 0]]) >>> labels, labels_nb = ndimage.label(b) >>> labels array([[1, 1, 0, 0], [1, 1, 0, 2], [0, 0, 0, 2], [3, 3, 0, 0]]) >>> ndimage.maximum(b, labels=labels, index=np.arange(1, labels_nb + 1)) [5.0, 7.0, 9.0] """ return _select(input, labels, index, find_max=True)[0] def median(input, labels=None, index=None): """ Calculate the median of the values of an array over labeled regions. Parameters ---------- input : array_like Array_like of values. For each region specified by `labels`, the median value of `input` over the region is computed. labels : array_like, optional An array_like of integers marking different regions over which the median value of `input` is to be computed. `labels` must have the same shape as `input`. If `labels` is not specified, the median over the whole array is returned. index : array_like, optional A list of region labels that are taken into account for computing the medians. If index is None, the median over all elements where `labels` is non-zero is returned. Returns ------- median : float or list of floats List of medians of `input` over the regions determined by `labels` and whose index is in `index`. If `index` or `labels` are not specified, a float is returned: the median value of `input` if `labels` is None, and the median value of elements where `labels` is greater than zero if `index` is None. See Also -------- label, minimum, maximum, extrema, sum, mean, variance, standard_deviation Notes ----- The function returns a Python list and not a NumPy array, use `np.array` to convert the list to an array. Examples -------- >>> from scipy import ndimage >>> import numpy as np >>> a = np.array([[1, 2, 0, 1], ... [5, 3, 0, 4], ... [0, 0, 0, 7], ... [9, 3, 0, 0]]) >>> labels, labels_nb = ndimage.label(a) >>> labels array([[1, 1, 0, 2], [1, 1, 0, 2], [0, 0, 0, 2], [3, 3, 0, 0]]) >>> ndimage.median(a, labels=labels, index=np.arange(1, labels_nb + 1)) [2.5, 4.0, 6.0] >>> ndimage.median(a) 1.0 >>> ndimage.median(a, labels=labels) 3.0 """ return _select(input, labels, index, find_median=True)[0] def minimum_position(input, labels=None, index=None): """ Find the positions of the minimums of the values of an array at labels. Parameters ---------- input : array_like Array_like of values. labels : array_like, optional An array of integers marking different regions over which the position of the minimum value of `input` is to be computed. `labels` must have the same shape as `input`. If `labels` is not specified, the location of the first minimum over the whole array is returned. The `labels` argument only works when `index` is specified. index : array_like, optional A list of region labels that are taken into account for finding the location of the minima. If `index` is None, the ``first`` minimum over all elements where `labels` is non-zero is returned. The `index` argument only works when `labels` is specified. Returns ------- output : list of tuples of ints Tuple of ints or list of tuples of ints that specify the location of minima of `input` over the regions determined by `labels` and whose index is in `index`. If `index` or `labels` are not specified, a tuple of ints is returned specifying the location of the first minimal value of `input`. See Also -------- label, minimum, median, maximum_position, extrema, sum, mean, variance, standard_deviation Examples -------- >>> import numpy as np >>> a = np.array([[10, 20, 30], ... [40, 80, 100], ... [1, 100, 200]]) >>> b = np.array([[1, 2, 0, 1], ... [5, 3, 0, 4], ... [0, 0, 0, 7], ... [9, 3, 0, 0]]) >>> from scipy import ndimage >>> ndimage.minimum_position(a) (2, 0) >>> ndimage.minimum_position(b) (0, 2) Features to process can be specified using `labels` and `index`: >>> label, pos = ndimage.label(a) >>> ndimage.minimum_position(a, label, index=np.arange(1, pos+1)) [(2, 0)] >>> label, pos = ndimage.label(b) >>> ndimage.minimum_position(b, label, index=np.arange(1, pos+1)) [(0, 0), (0, 3), (3, 1)] """ dims = numpy.array(numpy.asarray(input).shape) # see numpy.unravel_index to understand this line. dim_prod = numpy.cumprod([1] + list(dims[:0:-1]))[::-1] result = _select(input, labels, index, find_min_positions=True)[0] if numpy.isscalar(result): return tuple((result // dim_prod) % dims) return [tuple(v) for v in (result.reshape(-1, 1) // dim_prod) % dims] def maximum_position(input, labels=None, index=None): """ Find the positions of the maximums of the values of an array at labels. For each region specified by `labels`, the position of the maximum value of `input` within the region is returned. Parameters ---------- input : array_like Array_like of values. labels : array_like, optional An array of integers marking different regions over which the position of the maximum value of `input` is to be computed. `labels` must have the same shape as `input`. If `labels` is not specified, the location of the first maximum over the whole array is returned. The `labels` argument only works when `index` is specified. index : array_like, optional A list of region labels that are taken into account for finding the location of the maxima. If `index` is None, the first maximum over all elements where `labels` is non-zero is returned. The `index` argument only works when `labels` is specified. Returns ------- output : list of tuples of ints List of tuples of ints that specify the location of maxima of `input` over the regions determined by `labels` and whose index is in `index`. If `index` or `labels` are not specified, a tuple of ints is returned specifying the location of the ``first`` maximal value of `input`. See Also -------- label, minimum, median, maximum_position, extrema, sum, mean, variance, standard_deviation Examples -------- >>> from scipy import ndimage >>> import numpy as np >>> a = np.array([[1, 2, 0, 0], ... [5, 3, 0, 4], ... [0, 0, 0, 7], ... [9, 3, 0, 0]]) >>> ndimage.maximum_position(a) (3, 0) Features to process can be specified using `labels` and `index`: >>> lbl = np.array([[0, 1, 2, 3], ... [0, 1, 2, 3], ... [0, 1, 2, 3], ... [0, 1, 2, 3]]) >>> ndimage.maximum_position(a, lbl, 1) (1, 1) If no index is given, non-zero `labels` are processed: >>> ndimage.maximum_position(a, lbl) (2, 3) If there are no maxima, the position of the first element is returned: >>> ndimage.maximum_position(a, lbl, 2) (0, 2) """ dims = numpy.array(numpy.asarray(input).shape) # see numpy.unravel_index to understand this line. dim_prod = numpy.cumprod([1] + list(dims[:0:-1]))[::-1] result = _select(input, labels, index, find_max_positions=True)[0] if numpy.isscalar(result): return tuple((result // dim_prod) % dims) return [tuple(v) for v in (result.reshape(-1, 1) // dim_prod) % dims] def extrema(input, labels=None, index=None): """ Calculate the minimums and maximums of the values of an array at labels, along with their positions. Parameters ---------- input : ndarray N-D image data to process. labels : ndarray, optional Labels of features in input. If not None, must be same shape as `input`. index : int or sequence of ints, optional Labels to include in output. If None (default), all values where non-zero `labels` are used. Returns ------- minimums, maximums : int or ndarray Values of minimums and maximums in each feature. min_positions, max_positions : tuple or list of tuples Each tuple gives the N-D coordinates of the corresponding minimum or maximum. See Also -------- maximum, minimum, maximum_position, minimum_position, center_of_mass Examples -------- >>> import numpy as np >>> a = np.array([[1, 2, 0, 0], ... [5, 3, 0, 4], ... [0, 0, 0, 7], ... [9, 3, 0, 0]]) >>> from scipy import ndimage >>> ndimage.extrema(a) (0, 9, (0, 2), (3, 0)) Features to process can be specified using `labels` and `index`: >>> lbl, nlbl = ndimage.label(a) >>> ndimage.extrema(a, lbl, index=np.arange(1, nlbl+1)) (array([1, 4, 3]), array([5, 7, 9]), [(0, 0), (1, 3), (3, 1)], [(1, 0), (2, 3), (3, 0)]) If no index is given, non-zero `labels` are processed: >>> ndimage.extrema(a, lbl) (1, 9, (0, 0), (3, 0)) """ dims = numpy.array(numpy.asarray(input).shape) # see numpy.unravel_index to understand this line. dim_prod = numpy.cumprod([1] + list(dims[:0:-1]))[::-1] minimums, min_positions, maximums, max_positions = _select(input, labels, index, find_min=True, find_max=True, find_min_positions=True, find_max_positions=True) if numpy.isscalar(minimums): return (minimums, maximums, tuple((min_positions // dim_prod) % dims), tuple((max_positions // dim_prod) % dims)) min_positions = [ tuple(v) for v in (min_positions.reshape(-1, 1) // dim_prod) % dims ] max_positions = [ tuple(v) for v in (max_positions.reshape(-1, 1) // dim_prod) % dims ] return minimums, maximums, min_positions, max_positions def center_of_mass(input, labels=None, index=None): """ Calculate the center of mass of the values of an array at labels. Parameters ---------- input : ndarray Data from which to calculate center-of-mass. The masses can either be positive or negative. labels : ndarray, optional Labels for objects in `input`, as generated by `ndimage.label`. Only used with `index`. Dimensions must be the same as `input`. index : int or sequence of ints, optional Labels for which to calculate centers-of-mass. If not specified, the combined center of mass of all labels greater than zero will be calculated. Only used with `labels`. Returns ------- center_of_mass : tuple, or list of tuples Coordinates of centers-of-mass. Examples -------- >>> import numpy as np >>> a = np.array(([0,0,0,0], ... [0,1,1,0], ... [0,1,1,0], ... [0,1,1,0])) >>> from scipy import ndimage >>> ndimage.center_of_mass(a) (2.0, 1.5) Calculation of multiple objects in an image >>> b = np.array(([0,1,1,0], ... [0,1,0,0], ... [0,0,0,0], ... [0,0,1,1], ... [0,0,1,1])) >>> lbl = ndimage.label(b)[0] >>> ndimage.center_of_mass(b, lbl, [1,2]) [(0.33333333333333331, 1.3333333333333333), (3.5, 2.5)] Negative masses are also accepted, which can occur for example when bias is removed from measured data due to random noise. >>> c = np.array(([-1,0,0,0], ... [0,-1,-1,0], ... [0,1,-1,0], ... [0,1,1,0])) >>> ndimage.center_of_mass(c) (-4.0, 1.0) If there are division by zero issues, the function does not raise an error but rather issues a RuntimeWarning before returning inf and/or NaN. >>> d = np.array([-1, 1]) >>> ndimage.center_of_mass(d) (inf,) """ normalizer = sum(input, labels, index) grids = numpy.ogrid[[slice(0, i) for i in input.shape]] results = [sum(input * grids[dir].astype(float), labels, index) / normalizer for dir in range(input.ndim)] if numpy.isscalar(results[0]): return tuple(results) return [tuple(v) for v in numpy.array(results).T] def histogram(input, min, max, bins, labels=None, index=None): """ Calculate the histogram of the values of an array, optionally at labels. Histogram calculates the frequency of values in an array within bins determined by `min`, `max`, and `bins`. The `labels` and `index` keywords can limit the scope of the histogram to specified sub-regions within the array. Parameters ---------- input : array_like Data for which to calculate histogram. min, max : int Minimum and maximum values of range of histogram bins. bins : int Number of bins. labels : array_like, optional Labels for objects in `input`. If not None, must be same shape as `input`. index : int or sequence of ints, optional Label or labels for which to calculate histogram. If None, all values where label is greater than zero are used Returns ------- hist : ndarray Histogram counts. Examples -------- >>> import numpy as np >>> a = np.array([[ 0. , 0.2146, 0.5962, 0. ], ... [ 0. , 0.7778, 0. , 0. ], ... [ 0. , 0. , 0. , 0. ], ... [ 0. , 0. , 0.7181, 0.2787], ... [ 0. , 0. , 0.6573, 0.3094]]) >>> from scipy import ndimage >>> ndimage.histogram(a, 0, 1, 10) array([13, 0, 2, 1, 0, 1, 1, 2, 0, 0]) With labels and no indices, non-zero elements are counted: >>> lbl, nlbl = ndimage.label(a) >>> ndimage.histogram(a, 0, 1, 10, lbl) array([0, 0, 2, 1, 0, 1, 1, 2, 0, 0]) Indices can be used to count only certain objects: >>> ndimage.histogram(a, 0, 1, 10, lbl, 2) array([0, 0, 1, 1, 0, 0, 1, 1, 0, 0]) """ _bins = numpy.linspace(min, max, bins + 1) def _hist(vals): return numpy.histogram(vals, _bins)[0] return labeled_comprehension(input, labels, index, _hist, object, None, pass_positions=False) def watershed_ift(input, markers, structure=None, output=None): """ Apply watershed from markers using image foresting transform algorithm. Parameters ---------- input : array_like Input. markers : array_like Markers are points within each watershed that form the beginning of the process. Negative markers are considered background markers which are processed after the other markers. structure : structure element, optional A structuring element defining the connectivity of the object can be provided. If None, an element is generated with a squared connectivity equal to one. output : ndarray, optional An output array can optionally be provided. The same shape as input. Returns ------- watershed_ift : ndarray Output. Same shape as `input`. References ---------- .. [1] A.X. Falcao, J. Stolfi and R. de Alencar Lotufo, "The image foresting transform: theory, algorithms, and applications", Pattern Analysis and Machine Intelligence, vol. 26, pp. 19-29, 2004. """ input = numpy.asarray(input) if input.dtype.type not in [numpy.uint8, numpy.uint16]: raise TypeError('only 8 and 16 unsigned inputs are supported') if structure is None: structure = _morphology.generate_binary_structure(input.ndim, 1) structure = numpy.asarray(structure, dtype=bool) if structure.ndim != input.ndim: raise RuntimeError('structure and input must have equal rank') for ii in structure.shape: if ii != 3: raise RuntimeError('structure dimensions must be equal to 3') if not structure.flags.contiguous: structure = structure.copy() markers = numpy.asarray(markers) if input.shape != markers.shape: raise RuntimeError('input and markers must have equal shape') integral_types = [numpy.int8, numpy.int16, numpy.int32, numpy.int64, numpy.intc, numpy.intp] if markers.dtype.type not in integral_types: raise RuntimeError('marker should be of integer type') if isinstance(output, numpy.ndarray): if output.dtype.type not in integral_types: raise RuntimeError('output should be of integer type') else: output = markers.dtype output = _ni_support._get_output(output, input) _nd_image.watershed_ift(input, markers, structure, output) return output