# This file is part of h5py, a Python interface to the HDF5 library. # # http://www.h5py.org # # Copyright 2008-2013 Andrew Collette and contributors # # License: Standard 3-clause BSD; see "license.txt" for full license terms # and contributor agreement. """ Dataset testing operations. Tests all dataset operations, including creation, with the exception of: 1. Slicing operations for read and write, handled by module test_slicing 2. Type conversion for read and write (currently untested) """ import pathlib import sys import numpy as np import platform import pytest import warnings from .common import ut, TestCase from .data_files import get_data_file_path from h5py import File, Group, Dataset from h5py._hl.base import is_empty_dataspace from h5py import h5f, h5t from h5py.h5py_warnings import H5pyDeprecationWarning import h5py import h5py._hl.selections as sel class BaseDataset(TestCase): def setUp(self): self.f = File(self.mktemp(), 'w') def tearDown(self): if self.f: self.f.close() class TestRepr(BaseDataset): """ Feature: repr(Dataset) behaves sensibly """ def test_repr_open(self): """ repr() works on live and dead datasets """ ds = self.f.create_dataset('foo', (4,)) self.assertIsInstance(repr(ds), str) self.f.close() self.assertIsInstance(repr(ds), str) class TestCreateShape(BaseDataset): """ Feature: Datasets can be created from a shape only """ def test_create_scalar(self): """ Create a scalar dataset """ dset = self.f.create_dataset('foo', ()) self.assertEqual(dset.shape, ()) def test_create_simple(self): """ Create a size-1 dataset """ dset = self.f.create_dataset('foo', (1,)) self.assertEqual(dset.shape, (1,)) def test_create_integer(self): """ Create a size-1 dataset with integer shape""" dset = self.f.create_dataset('foo', 1) self.assertEqual(dset.shape, (1,)) def test_create_extended(self): """ Create an extended dataset """ dset = self.f.create_dataset('foo', (63,)) self.assertEqual(dset.shape, (63,)) self.assertEqual(dset.size, 63) dset = self.f.create_dataset('bar', (6, 10)) self.assertEqual(dset.shape, (6, 10)) self.assertEqual(dset.size, (60)) def test_create_integer_extended(self): """ Create an extended dataset """ dset = self.f.create_dataset('foo', 63) self.assertEqual(dset.shape, (63,)) self.assertEqual(dset.size, 63) dset = self.f.create_dataset('bar', (6, 10)) self.assertEqual(dset.shape, (6, 10)) self.assertEqual(dset.size, (60)) def test_default_dtype(self): """ Confirm that the default dtype is float """ dset = self.f.create_dataset('foo', (63,)) self.assertEqual(dset.dtype, np.dtype('=f4')) def test_missing_shape(self): """ Missing shape raises TypeError """ with self.assertRaises(TypeError): self.f.create_dataset('foo') def test_long_double(self): """ Confirm that the default dtype is float """ dset = self.f.create_dataset('foo', (63,), dtype=np.longdouble) if platform.machine() in ['ppc64le']: pytest.xfail("Storage of long double deactivated on %s" % platform.machine()) self.assertEqual(dset.dtype, np.longdouble) @ut.skipIf(not hasattr(np, "complex256"), "No support for complex256") def test_complex256(self): """ Confirm that the default dtype is float """ dset = self.f.create_dataset('foo', (63,), dtype=np.dtype('complex256')) self.assertEqual(dset.dtype, np.dtype('complex256')) def test_name_bytes(self): dset = self.f.create_dataset(b'foo', (1,)) self.assertEqual(dset.shape, (1,)) dset2 = self.f.create_dataset(b'bar/baz', (2,)) self.assertEqual(dset2.shape, (2,)) class TestCreateData(BaseDataset): """ Feature: Datasets can be created from existing data """ def test_create_scalar(self): """ Create a scalar dataset from existing array """ data = np.ones((), 'f') dset = self.f.create_dataset('foo', data=data) self.assertEqual(dset.shape, data.shape) def test_create_extended(self): """ Create an extended dataset from existing data """ data = np.ones((63,), 'f') dset = self.f.create_dataset('foo', data=data) self.assertEqual(dset.shape, data.shape) def test_dataset_intermediate_group(self): """ Create dataset with missing intermediate groups """ ds = self.f.create_dataset("/foo/bar/baz", shape=(10, 10), dtype='= (1, 10, 5), "chunk info requires HDF5 >= 1.10.5") def test_get_chunk_details(): from io import BytesIO buf = BytesIO() with h5py.File(buf, 'w') as fout: fout.create_dataset('test', shape=(100, 100), chunks=(10, 10), dtype='i4') fout['test'][:] = 1 buf.seek(0) with h5py.File(buf, 'r') as fin: ds = fin['test'].id assert ds.get_num_chunks() == 100 for j in range(100): offset = tuple(np.array(np.unravel_index(j, (10, 10))) * 10) si = ds.get_chunk_info(j) assert si.chunk_offset == offset assert si.filter_mask == 0 assert si.byte_offset is not None assert si.size > 0 si = ds.get_chunk_info_by_coord((0, 0)) assert si.chunk_offset == (0, 0) assert si.filter_mask == 0 assert si.byte_offset is not None assert si.size > 0 def test_empty_shape(writable_file): ds = writable_file.create_dataset('empty', dtype='int32') assert ds.shape is None assert ds.maxshape is None def test_zero_storage_size(): # https://github.com/h5py/h5py/issues/1475 from io import BytesIO buf = BytesIO() with h5py.File(buf, 'w') as fout: fout.create_dataset('empty', dtype='uint8') buf.seek(0) with h5py.File(buf, 'r') as fin: assert fin['empty'].chunks is None assert fin['empty'].id.get_offset() is None assert fin['empty'].id.get_storage_size() == 0 def test_python_int_uint64(writable_file): # https://github.com/h5py/h5py/issues/1547 data = [np.iinfo(np.int64).max, np.iinfo(np.int64).max + 1] # Check creating a new dataset ds = writable_file.create_dataset('x', data=data, dtype=np.uint64) assert ds.dtype == np.dtype(np.uint64) np.testing.assert_array_equal(ds[:], np.array(data, dtype=np.uint64)) # Check writing to an existing dataset ds[:] = data np.testing.assert_array_equal(ds[:], np.array(data, dtype=np.uint64)) def test_setitem_fancy_indexing(writable_file): # https://github.com/h5py/h5py/issues/1593 arr = writable_file.create_dataset('data', (5, 1000, 2), dtype=np.uint8) block = np.random.randint(255, size=(5, 3, 2)) arr[:, [0, 2, 4], ...] = block def test_vlen_spacepad(): with File(get_data_file_path("vlen_string_dset.h5")) as f: assert f["DS1"][0] == b"Parting" def test_vlen_nullterm(): with File(get_data_file_path("vlen_string_dset_utc.h5")) as f: assert f["ds1"][0] == b"2009-12-20T10:16:18.662409Z" @pytest.mark.skipif( h5py.version.hdf5_version_tuple < (1, 10, 3), reason="Appears you cannot pass an unknown filter id for HDF5 < 1.10.3" ) def test_allow_unknown_filter(writable_file): # apparently 256-511 are reserved for testing purposes fake_filter_id = 256 ds = writable_file.create_dataset( 'data', shape=(10, 10), dtype=np.uint8, compression=fake_filter_id, allow_unknown_filter=True ) assert str(fake_filter_id) in ds._filters class TestCommutative(BaseDataset): """ Test the symmetry of operators, at least with the numpy types. Issue: https://github.com/h5py/h5py/issues/1947 """ def test_numpy_commutative(self,): """ Create a h5py dataset, extract one element convert to numpy Check that it returns symmetric response to == and != """ shape = (100,1) dset = self.f.create_dataset("test", shape, dtype=float, data=np.random.rand(*shape)) # grab a value from the elements, ie dset[0] # check that mask arrays are commutative wrt ==, != val = np.float64(dset[0]) assert np.all((val == dset) == (dset == val)) assert np.all((val != dset) == (dset != val)) # generate sample not in the dset, ie max(dset)+delta # check that mask arrays are commutative wrt ==, != delta = 0.001 nval = np.nanmax(dset)+delta assert np.all((nval == dset) == (dset == nval)) assert np.all((nval != dset) == (dset != nval)) def test_basetype_commutative(self,): """ Create a h5py dataset and check basetype compatibility. Check that operation is symmetric, even if it is potentially not meaningful. """ shape = (100,1) dset = self.f.create_dataset("test", shape, dtype=float, data=np.random.rand(*shape)) # generate float type, sample float(0.) # check that operation is symmetric (but potentially meaningless) val = float(0.) assert (val == dset) == (dset == val) assert (val != dset) == (dset != val)