"""Sparse DIAgonal format""" __docformat__ = "restructuredtext en" __all__ = ['dia_array', 'dia_matrix', 'isspmatrix_dia'] import numpy as np from .._lib._util import copy_if_needed from ._matrix import spmatrix from ._base import issparse, _formats, _spbase, sparray from ._data import _data_matrix from ._sputils import ( isshape, upcast_char, getdtype, get_sum_dtype, validateaxis, check_shape ) from ._sparsetools import dia_matvec class _dia_base(_data_matrix): _format = 'dia' def __init__(self, arg1, shape=None, dtype=None, copy=False): _data_matrix.__init__(self) if issparse(arg1): if arg1.format == "dia": if copy: arg1 = arg1.copy() self.data = arg1.data self.offsets = arg1.offsets self._shape = check_shape(arg1.shape) else: if arg1.format == self.format and copy: A = arg1.copy() else: A = arg1.todia() self.data = A.data self.offsets = A.offsets self._shape = check_shape(A.shape) elif isinstance(arg1, tuple): if isshape(arg1): # It's a tuple of matrix dimensions (M, N) # create empty matrix self._shape = check_shape(arg1) self.data = np.zeros((0,0), getdtype(dtype, default=float)) idx_dtype = self._get_index_dtype(maxval=max(self.shape)) self.offsets = np.zeros((0), dtype=idx_dtype) else: try: # Try interpreting it as (data, offsets) data, offsets = arg1 except Exception as e: message = 'unrecognized form for dia_array constructor' raise ValueError(message) from e else: if shape is None: raise ValueError('expected a shape argument') if not copy: copy = copy_if_needed self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy)) offsets = np.array(arg1[1], dtype=self._get_index_dtype(maxval=max(shape)), copy=copy) self.offsets = np.atleast_1d(offsets) self._shape = check_shape(shape) else: #must be dense, convert to COO first, then to DIA try: arg1 = np.asarray(arg1) except Exception as e: raise ValueError("unrecognized form for" " %s_matrix constructor" % self.format) from e if isinstance(self, sparray) and arg1.ndim != 2: raise ValueError(f"DIA arrays don't support {arg1.ndim}D input. Use 2D") A = self._coo_container(arg1, dtype=dtype, shape=shape).todia() self.data = A.data self.offsets = A.offsets self._shape = check_shape(A.shape) if dtype is not None: self.data = self.data.astype(dtype) #check format if self.offsets.ndim != 1: raise ValueError('offsets array must have rank 1') if self.data.ndim != 2: raise ValueError('data array must have rank 2') if self.data.shape[0] != len(self.offsets): raise ValueError('number of diagonals (%d) ' 'does not match the number of offsets (%d)' % (self.data.shape[0], len(self.offsets))) if len(np.unique(self.offsets)) != len(self.offsets): raise ValueError('offset array contains duplicate values') def __repr__(self): _, fmt = _formats[self.format] sparse_cls = 'array' if isinstance(self, sparray) else 'matrix' shape_str = 'x'.join(str(x) for x in self.shape) ndiag = self.data.shape[0] return ( f"<{shape_str} sparse {sparse_cls} of type '{self.dtype.type}'\n" f"\twith {self.nnz} stored elements ({ndiag} diagonals) in {fmt} format>" ) def _data_mask(self): """Returns a mask of the same shape as self.data, where mask[i,j] is True when data[i,j] corresponds to a stored element.""" num_rows, num_cols = self.shape offset_inds = np.arange(self.data.shape[1]) row = offset_inds - self.offsets[:,None] mask = (row >= 0) mask &= (row < num_rows) mask &= (offset_inds < num_cols) return mask def count_nonzero(self): mask = self._data_mask() return np.count_nonzero(self.data[mask]) def _getnnz(self, axis=None): if axis is not None: raise NotImplementedError("_getnnz over an axis is not implemented " "for DIA format") M,N = self.shape nnz = 0 for k in self.offsets: if k > 0: nnz += min(M,N-k) else: nnz += min(M+k,N) return int(nnz) _getnnz.__doc__ = _spbase._getnnz.__doc__ count_nonzero.__doc__ = _spbase.count_nonzero.__doc__ def sum(self, axis=None, dtype=None, out=None): validateaxis(axis) if axis is not None and axis < 0: axis += 2 res_dtype = get_sum_dtype(self.dtype) num_rows, num_cols = self.shape ret = None if axis == 0: mask = self._data_mask() x = (self.data * mask).sum(axis=0) if x.shape[0] == num_cols: res = x else: res = np.zeros(num_cols, dtype=x.dtype) res[:x.shape[0]] = x ret = self._ascontainer(res, dtype=res_dtype) else: row_sums = np.zeros((num_rows, 1), dtype=res_dtype) one = np.ones(num_cols, dtype=res_dtype) dia_matvec(num_rows, num_cols, len(self.offsets), self.data.shape[1], self.offsets, self.data, one, row_sums) row_sums = self._ascontainer(row_sums) if axis is None: return row_sums.sum(dtype=dtype, out=out) ret = self._ascontainer(row_sums.sum(axis=axis)) if out is not None and out.shape != ret.shape: raise ValueError("dimensions do not match") return ret.sum(axis=(), dtype=dtype, out=out) sum.__doc__ = _spbase.sum.__doc__ def _add_sparse(self, other): # Check if other is also of type dia_array if not isinstance(other, type(self)): # If other is not of type dia_array, default to # converting to csr_matrix, as is done in the _add_sparse # method of parent class _spbase return self.tocsr()._add_sparse(other) # The task is to compute m = self + other # Start by making a copy of self, of the datatype # that should result from adding self and other dtype = np.promote_types(self.dtype, other.dtype) m = self.astype(dtype, copy=True) # Then, add all the stored diagonals of other. for d in other.offsets: # Check if the diagonal has already been added. if d in m.offsets: # If the diagonal is already there, we need to take # the sum of the existing and the new m.setdiag(m.diagonal(d) + other.diagonal(d), d) else: m.setdiag(other.diagonal(d), d) return m def _matmul_vector(self, other): x = other y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char, x.dtype.char)) L = self.data.shape[1] M,N = self.shape dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data, x.ravel(), y.ravel()) return y def _setdiag(self, values, k=0): M, N = self.shape if values.ndim == 0: # broadcast values_n = np.inf else: values_n = len(values) if k < 0: n = min(M + k, N, values_n) min_index = 0 max_index = n else: n = min(M, N - k, values_n) min_index = k max_index = k + n if values.ndim != 0: # allow also longer sequences values = values[:n] data_rows, data_cols = self.data.shape if k in self.offsets: if max_index > data_cols: data = np.zeros((data_rows, max_index), dtype=self.data.dtype) data[:, :data_cols] = self.data self.data = data self.data[self.offsets == k, min_index:max_index] = values else: self.offsets = np.append(self.offsets, self.offsets.dtype.type(k)) m = max(max_index, data_cols) data = np.zeros((data_rows + 1, m), dtype=self.data.dtype) data[:-1, :data_cols] = self.data data[-1, min_index:max_index] = values self.data = data def todia(self, copy=False): if copy: return self.copy() else: return self todia.__doc__ = _spbase.todia.__doc__ def transpose(self, axes=None, copy=False): if axes is not None and axes != (1, 0): raise ValueError("Sparse arrays/matrices do not support " "an 'axes' parameter because swapping " "dimensions is the only logical permutation.") num_rows, num_cols = self.shape max_dim = max(self.shape) # flip diagonal offsets offsets = -self.offsets # re-align the data matrix r = np.arange(len(offsets), dtype=np.intc)[:, None] c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None] pad_amount = max(0, max_dim-self.data.shape[1]) data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount), dtype=self.data.dtype))) data = data[r, c] return self._dia_container((data, offsets), shape=( num_cols, num_rows), copy=copy) transpose.__doc__ = _spbase.transpose.__doc__ def diagonal(self, k=0): rows, cols = self.shape if k <= -rows or k >= cols: return np.empty(0, dtype=self.data.dtype) idx, = np.nonzero(self.offsets == k) first_col = max(0, k) last_col = min(rows + k, cols) result_size = last_col - first_col if idx.size == 0: return np.zeros(result_size, dtype=self.data.dtype) result = self.data[idx[0], first_col:last_col] padding = result_size - len(result) if padding > 0: result = np.pad(result, (0, padding), mode='constant') return result diagonal.__doc__ = _spbase.diagonal.__doc__ def tocsc(self, copy=False): if self.nnz == 0: return self._csc_container(self.shape, dtype=self.dtype) num_rows, num_cols = self.shape num_offsets, offset_len = self.data.shape offset_inds = np.arange(offset_len) row = offset_inds - self.offsets[:,None] mask = (row >= 0) mask &= (row < num_rows) mask &= (offset_inds < num_cols) mask &= (self.data != 0) idx_dtype = self._get_index_dtype(maxval=max(self.shape)) indptr = np.zeros(num_cols + 1, dtype=idx_dtype) indptr[1:offset_len+1] = np.cumsum(mask.sum(axis=0)[:num_cols]) if offset_len < num_cols: indptr[offset_len+1:] = indptr[offset_len] indices = row.T[mask.T].astype(idx_dtype, copy=False) data = self.data.T[mask.T] return self._csc_container((data, indices, indptr), shape=self.shape, dtype=self.dtype) tocsc.__doc__ = _spbase.tocsc.__doc__ def tocoo(self, copy=False): num_rows, num_cols = self.shape num_offsets, offset_len = self.data.shape offset_inds = np.arange(offset_len) row = offset_inds - self.offsets[:,None] mask = (row >= 0) mask &= (row < num_rows) mask &= (offset_inds < num_cols) mask &= (self.data != 0) row = row[mask] col = np.tile(offset_inds, num_offsets)[mask.ravel()] idx_dtype = self._get_index_dtype( arrays=(self.offsets,), maxval=max(self.shape) ) row = row.astype(idx_dtype, copy=False) col = col.astype(idx_dtype, copy=False) data = self.data[mask] # Note: this cannot set has_canonical_format=True, because despite the # lack of duplicates, we do not generate sorted indices. return self._coo_container( (data, (row, col)), shape=self.shape, dtype=self.dtype, copy=False ) tocoo.__doc__ = _spbase.tocoo.__doc__ # needed by _data_matrix def _with_data(self, data, copy=True): """Returns a matrix with the same sparsity structure as self, but with different data. By default the structure arrays are copied. """ if copy: return self._dia_container( (data, self.offsets.copy()), shape=self.shape ) else: return self._dia_container( (data, self.offsets), shape=self.shape ) def resize(self, *shape): shape = check_shape(shape) M, N = shape # we do not need to handle the case of expanding N self.data = self.data[:, :N] if (M > self.shape[0] and np.any(self.offsets + self.shape[0] < self.data.shape[1])): # explicitly clear values that were previously hidden mask = (self.offsets[:, None] + self.shape[0] <= np.arange(self.data.shape[1])) self.data[mask] = 0 self._shape = shape resize.__doc__ = _spbase.resize.__doc__ def isspmatrix_dia(x): """Is `x` of dia_matrix type? Parameters ---------- x object to check for being a dia matrix Returns ------- bool True if `x` is a dia matrix, False otherwise Examples -------- >>> from scipy.sparse import dia_array, dia_matrix, coo_matrix, isspmatrix_dia >>> isspmatrix_dia(dia_matrix([[5]])) True >>> isspmatrix_dia(dia_array([[5]])) False >>> isspmatrix_dia(coo_matrix([[5]])) False """ return isinstance(x, dia_matrix) # This namespace class separates array from matrix with isinstance class dia_array(_dia_base, sparray): """ Sparse array with DIAgonal storage. This can be instantiated in several ways: dia_array(D) where D is a 2-D ndarray dia_array(S) with another sparse array or matrix S (equivalent to S.todia()) dia_array((M, N), [dtype]) to construct an empty array with shape (M, N), dtype is optional, defaulting to dtype='d'. dia_array((data, offsets), shape=(M, N)) where the ``data[k,:]`` stores the diagonal entries for diagonal ``offsets[k]`` (See example below) Attributes ---------- dtype : dtype Data type of the array shape : 2-tuple Shape of the array ndim : int Number of dimensions (this is always 2) nnz size data DIA format data array of the array offsets DIA format offset array of the array T Notes ----- Sparse arrays can be used in arithmetic operations: they support addition, subtraction, multiplication, division, and matrix power. Examples -------- >>> import numpy as np >>> from scipy.sparse import dia_array >>> dia_array((3, 4), dtype=np.int8).toarray() array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=int8) >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0) >>> offsets = np.array([0, -1, 2]) >>> dia_array((data, offsets), shape=(4, 4)).toarray() array([[1, 0, 3, 0], [1, 2, 0, 4], [0, 2, 3, 0], [0, 0, 3, 4]]) >>> from scipy.sparse import dia_array >>> n = 10 >>> ex = np.ones(n) >>> data = np.array([ex, 2 * ex, ex]) >>> offsets = np.array([-1, 0, 1]) >>> dia_array((data, offsets), shape=(n, n)).toarray() array([[2., 1., 0., ..., 0., 0., 0.], [1., 2., 1., ..., 0., 0., 0.], [0., 1., 2., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 2., 1., 0.], [0., 0., 0., ..., 1., 2., 1.], [0., 0., 0., ..., 0., 1., 2.]]) """ class dia_matrix(spmatrix, _dia_base): """ Sparse matrix with DIAgonal storage. This can be instantiated in several ways: dia_matrix(D) where D is a 2-D ndarray dia_matrix(S) with another sparse array or matrix S (equivalent to S.todia()) dia_matrix((M, N), [dtype]) to construct an empty matrix with shape (M, N), dtype is optional, defaulting to dtype='d'. dia_matrix((data, offsets), shape=(M, N)) where the ``data[k,:]`` stores the diagonal entries for diagonal ``offsets[k]`` (See example below) Attributes ---------- dtype : dtype Data type of the matrix shape : 2-tuple Shape of the matrix ndim : int Number of dimensions (this is always 2) nnz size data DIA format data array of the matrix offsets DIA format offset array of the matrix T Notes ----- Sparse matrices can be used in arithmetic operations: they support addition, subtraction, multiplication, division, and matrix power. Examples -------- >>> import numpy as np >>> from scipy.sparse import dia_matrix >>> dia_matrix((3, 4), dtype=np.int8).toarray() array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=int8) >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0) >>> offsets = np.array([0, -1, 2]) >>> dia_matrix((data, offsets), shape=(4, 4)).toarray() array([[1, 0, 3, 0], [1, 2, 0, 4], [0, 2, 3, 0], [0, 0, 3, 4]]) >>> from scipy.sparse import dia_matrix >>> n = 10 >>> ex = np.ones(n) >>> data = np.array([ex, 2 * ex, ex]) >>> offsets = np.array([-1, 0, 1]) >>> dia_matrix((data, offsets), shape=(n, n)).toarray() array([[2., 1., 0., ..., 0., 0., 0.], [1., 2., 1., ..., 0., 0., 0.], [0., 1., 2., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 2., 1., 0.], [0., 0., 0., ..., 1., 2., 1.], [0., 0., 0., ..., 0., 1., 2.]]) """