# -*- coding: utf-8 -*- # Licensed under a 3-clause BSD style license - see LICENSE.rst from astropy.utils.tests.test_metadata import MetaBaseTest import operator import pytest import numpy as np from numpy.testing import assert_array_equal from astropy.tests.helper import assert_follows_unicode_guidelines from astropy import table from astropy import time from astropy import units as u class TestColumn(): def test_subclass(self, Column): c = Column(name='a') assert isinstance(c, np.ndarray) c2 = c * 2 assert isinstance(c2, Column) assert isinstance(c2, np.ndarray) def test_numpy_ops(self, Column): """Show that basic numpy operations with Column behave sensibly""" arr = np.array([1, 2, 3]) c = Column(arr, name='a') for op, test_equal in ((operator.eq, True), (operator.ne, False), (operator.ge, True), (operator.gt, False), (operator.le, True), (operator.lt, False)): for eq in (op(c, arr), op(arr, c)): assert np.all(eq) if test_equal else not np.any(eq) assert len(eq) == 3 if Column is table.Column: assert type(eq) == np.ndarray else: assert type(eq) == np.ma.core.MaskedArray assert eq.dtype.str == '|b1' lt = c - 1 < arr assert np.all(lt) def test_numpy_boolean_ufuncs(self, Column): """Show that basic numpy operations with Column behave sensibly""" arr = np.array([1, 2, 3]) c = Column(arr, name='a') for ufunc, test_true in ((np.isfinite, True), (np.isinf, False), (np.isnan, False), (np.sign, True), (np.signbit, False)): result = ufunc(c) assert len(result) == len(c) assert np.all(result) if test_true else not np.any(result) if Column is table.Column: assert type(result) == np.ndarray else: assert type(result) == np.ma.core.MaskedArray if ufunc is not np.sign: assert result.dtype.str == '|b1' def test_view(self, Column): c = np.array([1, 2, 3], dtype=np.int64).view(Column) assert repr(c) == f"<{Column.__name__} dtype='int64' length=3>\n1\n2\n3" def test_format(self, Column): """Show that the formatted output from str() works""" from astropy import conf with conf.set_temp('max_lines', 8): c1 = Column(np.arange(2000), name='a', dtype=float, format='%6.2f') assert str(c1).splitlines() == [' a ', '-------', ' 0.00', ' 1.00', ' ...', '1998.00', '1999.00', 'Length = 2000 rows'] def test_convert_numpy_array(self, Column): d = Column([1, 2, 3], name='a', dtype='i8') np_data = np.array(d) assert np.all(np_data == d) np_data = np.array(d, copy=False) assert np.all(np_data == d) np_data = np.array(d, dtype='i4') assert np.all(np_data == d) def test_convert_unit(self, Column): d = Column([1, 2, 3], name='a', dtype="f8", unit="m") d.convert_unit_to("km") assert np.all(d.data == [0.001, 0.002, 0.003]) def test_array_wrap(self): """Test that the __array_wrap__ method converts a reduction ufunc output that has a different shape into an ndarray view. Without this a method call like c.mean() returns a Column array object with length=1.""" # Mean and sum for a 1-d float column c = table.Column(name='a', data=[1., 2., 3.]) assert np.allclose(c.mean(), 2.0) assert isinstance(c.mean(), (np.floating, float)) assert np.allclose(c.sum(), 6.) assert isinstance(c.sum(), (np.floating, float)) # Non-reduction ufunc preserves Column class assert isinstance(np.cos(c), table.Column) # Sum for a 1-d int column c = table.Column(name='a', data=[1, 2, 3]) assert np.allclose(c.sum(), 6) assert isinstance(c.sum(), (np.integer, int)) # Sum for a 2-d int column c = table.Column(name='a', data=[[1, 2, 3], [4, 5, 6]]) assert c.sum() == 21 assert isinstance(c.sum(), (np.integer, int)) assert np.all(c.sum(axis=0) == [5, 7, 9]) assert c.sum(axis=0).shape == (3,) assert isinstance(c.sum(axis=0), np.ndarray) # Sum and mean for a 1-d masked column c = table.MaskedColumn(name='a', data=[1., 2., 3.], mask=[0, 0, 1]) assert np.allclose(c.mean(), 1.5) assert isinstance(c.mean(), (np.floating, float)) assert np.allclose(c.sum(), 3.) assert isinstance(c.sum(), (np.floating, float)) def test_name_none(self, Column): """Can create a column without supplying name, which defaults to None""" c = Column([1, 2]) assert c.name is None assert np.all(c == np.array([1, 2])) def test_quantity_init(self, Column): c = Column(data=np.array([1, 2, 3]) * u.m) assert np.all(c.data == np.array([1, 2, 3])) assert np.all(c.unit == u.m) c = Column(data=np.array([1, 2, 3]) * u.m, unit=u.cm) assert np.all(c.data == np.array([100, 200, 300])) assert np.all(c.unit == u.cm) def test_quantity_comparison(self, Column): # regression test for gh-6532 c = Column([1, 2100, 3], unit='Hz') q = 2 * u.kHz check = c < q assert np.all(check == [True, False, True]) # This already worked, but just in case. check = q >= c assert np.all(check == [True, False, True]) def test_attrs_survive_getitem_after_change(self, Column): """ Test for issue #3023: when calling getitem with a MaskedArray subclass the original object attributes are not copied. """ c1 = Column([1, 2, 3], name='a', unit='m', format='%i', description='aa', meta={'a': 1}) c1.name = 'b' c1.unit = 'km' c1.format = '%d' c1.description = 'bb' c1.meta = {'bbb': 2} for item in (slice(None, None), slice(None, 1), np.array([0, 2]), np.array([False, True, False])): c2 = c1[item] assert c2.name == 'b' assert c2.unit is u.km assert c2.format == '%d' assert c2.description == 'bb' assert c2.meta == {'bbb': 2} # Make sure that calling getitem resulting in a scalar does # not copy attributes. val = c1[1] for attr in ('name', 'unit', 'format', 'description', 'meta'): assert not hasattr(val, attr) def test_to_quantity(self, Column): d = Column([1, 2, 3], name='a', dtype="f8", unit="m") assert np.all(d.quantity == ([1, 2, 3.] * u.m)) assert np.all(d.quantity.value == ([1, 2, 3.] * u.m).value) assert np.all(d.quantity == d.to('m')) assert np.all(d.quantity.value == d.to('m').value) np.testing.assert_allclose(d.to(u.km).value, ([.001, .002, .003] * u.km).value) np.testing.assert_allclose(d.to('km').value, ([.001, .002, .003] * u.km).value) np.testing.assert_allclose(d.to(u.MHz, u.equivalencies.spectral()).value, [299.792458, 149.896229, 99.93081933]) d_nounit = Column([1, 2, 3], name='a', dtype="f8", unit=None) with pytest.raises(u.UnitsError): d_nounit.to(u.km) assert np.all(d_nounit.to(u.dimensionless_unscaled) == np.array([1, 2, 3])) # make sure the correct copy/no copy behavior is happening q = [1, 3, 5] * u.km # to should always make a copy d.to(u.km)[:] = q np.testing.assert_allclose(d, [1, 2, 3]) # explicit copying of the quantity should not change the column d.quantity.copy()[:] = q np.testing.assert_allclose(d, [1, 2, 3]) # but quantity directly is a "view", accessing the underlying column d.quantity[:] = q np.testing.assert_allclose(d, [1000, 3000, 5000]) # view should also work for integers d2 = Column([1, 2, 3], name='a', dtype=int, unit="m") d2.quantity[:] = q np.testing.assert_allclose(d2, [1000, 3000, 5000]) # but it should fail for strings or other non-numeric tables d3 = Column(['arg', 'name', 'stuff'], name='a', unit="m") with pytest.raises(TypeError): d3.quantity def test_to_funcunit_quantity(self, Column): """ Tests for #8424, check if function-unit can be retrieved from column. """ d = Column([1, 2, 3], name='a', dtype="f8", unit="dex(AA)") assert np.all(d.quantity == ([1, 2, 3] * u.dex(u.AA))) assert np.all(d.quantity.value == ([1, 2, 3] * u.dex(u.AA)).value) assert np.all(d.quantity == d.to("dex(AA)")) assert np.all(d.quantity.value == d.to("dex(AA)").value) # make sure, casting to linear unit works q = [10, 100, 1000] * u.AA np.testing.assert_allclose(d.to(u.AA), q) def test_item_access_type(self, Column): """ Tests for #3095, which forces integer item access to always return a plain ndarray or MaskedArray, even in the case of a multi-dim column. """ integer_types = (int, np.int_) for int_type in integer_types: c = Column([[1, 2], [3, 4]]) i0 = int_type(0) i1 = int_type(1) assert np.all(c[i0] == [1, 2]) assert type(c[i0]) == (np.ma.MaskedArray if hasattr(Column, 'mask') else np.ndarray) assert c[i0].shape == (2,) c01 = c[i0:i1] assert np.all(c01 == [[1, 2]]) assert isinstance(c01, Column) assert c01.shape == (1, 2) c = Column([1, 2]) assert np.all(c[i0] == 1) assert isinstance(c[i0], np.integer) assert c[i0].shape == () c01 = c[i0:i1] assert np.all(c01 == [1]) assert isinstance(c01, Column) assert c01.shape == (1,) def test_insert_basic(self, Column): c = Column([0, 1, 2], name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) # Basic insert c1 = c.insert(1, 100) assert np.all(c1 == [0, 100, 1, 2]) assert c1.attrs_equal(c) assert type(c) is type(c1) if hasattr(c1, 'mask'): assert c1.data.shape == c1.mask.shape c1 = c.insert(-1, 100) assert np.all(c1 == [0, 1, 100, 2]) c1 = c.insert(3, 100) assert np.all(c1 == [0, 1, 2, 100]) c1 = c.insert(-3, 100) assert np.all(c1 == [100, 0, 1, 2]) c1 = c.insert(1, [100, 200, 300]) if hasattr(c1, 'mask'): assert c1.data.shape == c1.mask.shape # Out of bounds index with pytest.raises((ValueError, IndexError)): c1 = c.insert(-4, 100) with pytest.raises((ValueError, IndexError)): c1 = c.insert(4, 100) def test_insert_axis(self, Column): """Insert with non-default axis kwarg""" c = Column([[1, 2], [3, 4]]) c1 = c.insert(1, [5, 6], axis=None) assert np.all(c1 == [1, 5, 6, 2, 3, 4]) c1 = c.insert(1, [5, 6], axis=1) assert np.all(c1 == [[1, 5, 2], [3, 6, 4]]) def test_insert_string_expand(self, Column): c = Column(['a', 'b']) c1 = c.insert(0, 'abc') assert np.all(c1 == ['abc', 'a', 'b']) c = Column(['a', 'b']) c1 = c.insert(0, ['c', 'def']) assert np.all(c1 == ['c', 'def', 'a', 'b']) def test_insert_string_masked_values(self): c = table.MaskedColumn(['a', 'b']) c1 = c.insert(0, np.ma.masked) assert np.all(c1 == ['', 'a', 'b']) assert np.all(c1.mask == [True, False, False]) assert c1.dtype == 'U1' c2 = c.insert(1, np.ma.MaskedArray(['ccc', 'dd'], mask=[True, False])) assert np.all(c2 == ['a', 'ccc', 'dd', 'b']) assert np.all(c2.mask == [False, True, False, False]) assert c2.dtype == 'U3' def test_insert_string_type_error(self, Column): c = Column([1, 2]) with pytest.raises(ValueError, match='invalid literal for int'): c.insert(0, 'string') c = Column(['a', 'b']) with pytest.raises(TypeError, match='string operation on non-string array'): c.insert(0, 1) def test_insert_multidim(self, Column): c = Column([[1, 2], [3, 4]], name='a', dtype=int) # Basic insert c1 = c.insert(1, [100, 200]) assert np.all(c1 == [[1, 2], [100, 200], [3, 4]]) # Broadcast c1 = c.insert(1, 100) assert np.all(c1 == [[1, 2], [100, 100], [3, 4]]) # Wrong shape with pytest.raises(ValueError): c1 = c.insert(1, [100, 200, 300]) def test_insert_object(self, Column): c = Column(['a', 1, None], name='a', dtype=object) # Basic insert c1 = c.insert(1, [100, 200]) assert np.all(c1 == np.array(['a', [100, 200], 1, None], dtype=object)) def test_insert_masked(self): c = table.MaskedColumn([0, 1, 2], name='a', fill_value=9999, mask=[False, True, False]) # Basic insert c1 = c.insert(1, 100) assert np.all(c1.data.data == [0, 100, 1, 2]) assert c1.fill_value == 9999 assert np.all(c1.data.mask == [False, False, True, False]) assert type(c) is type(c1) for mask in (False, True): c1 = c.insert(1, 100, mask=mask) assert np.all(c1.data.data == [0, 100, 1, 2]) assert np.all(c1.data.mask == [False, mask, True, False]) def test_masked_multidim_as_list(self): data = np.ma.MaskedArray([1, 2], mask=[True, False]) c = table.MaskedColumn([data]) assert c.shape == (1, 2) assert np.all(c[0].mask == [True, False]) def test_insert_masked_multidim(self): c = table.MaskedColumn([[1, 2], [3, 4]], name='a', dtype=int) c1 = c.insert(1, [100, 200], mask=True) assert np.all(c1.data.data == [[1, 2], [100, 200], [3, 4]]) assert np.all(c1.data.mask == [[False, False], [True, True], [False, False]]) c1 = c.insert(1, [100, 200], mask=[True, False]) assert np.all(c1.data.data == [[1, 2], [100, 200], [3, 4]]) assert np.all(c1.data.mask == [[False, False], [True, False], [False, False]]) with pytest.raises(ValueError): c1 = c.insert(1, [100, 200], mask=[True, False, True]) def test_mask_on_non_masked_table(self): """ When table is not masked and trying to set mask on column then it's Raise AttributeError. """ t = table.Table([[1, 2], [3, 4]], names=('a', 'b'), dtype=('i4', 'f8')) with pytest.raises(AttributeError): t['a'].mask = [True, False] class TestAttrEqual(): """Bunch of tests originally from ATpy that test the attrs_equal method.""" def test_5(self, Column): c1 = Column(name='a', dtype=int, unit='mJy') c2 = Column(name='a', dtype=int, unit='mJy') assert c1.attrs_equal(c2) def test_6(self, Column): c1 = Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) c2 = Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) assert c1.attrs_equal(c2) def test_7(self, Column): c1 = Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) c2 = Column(name='b', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) assert not c1.attrs_equal(c2) def test_8(self, Column): c1 = Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) c2 = Column(name='a', dtype=float, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) assert not c1.attrs_equal(c2) def test_9(self, Column): c1 = Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) c2 = Column(name='a', dtype=int, unit='erg.cm-2.s-1.Hz-1', format='%i', description='test column', meta={'c': 8, 'd': 12}) assert not c1.attrs_equal(c2) def test_10(self, Column): c1 = Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) c2 = Column(name='a', dtype=int, unit='mJy', format='%g', description='test column', meta={'c': 8, 'd': 12}) assert not c1.attrs_equal(c2) def test_11(self, Column): c1 = Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) c2 = Column(name='a', dtype=int, unit='mJy', format='%i', description='another test column', meta={'c': 8, 'd': 12}) assert not c1.attrs_equal(c2) def test_12(self, Column): c1 = Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) c2 = Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'e': 8, 'd': 12}) assert not c1.attrs_equal(c2) def test_13(self, Column): c1 = Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) c2 = Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 9, 'd': 12}) assert not c1.attrs_equal(c2) def test_col_and_masked_col(self): c1 = table.Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) c2 = table.MaskedColumn(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) assert c1.attrs_equal(c2) assert c2.attrs_equal(c1) # Check that the meta descriptor is working as expected. The MetaBaseTest class # takes care of defining all the tests, and we simply have to define the class # and any minimal set of args to pass. class TestMetaColumn(MetaBaseTest): test_class = table.Column args = () class TestMetaMaskedColumn(MetaBaseTest): test_class = table.MaskedColumn args = () def test_getitem_metadata_regression(): """ Regression test for #1471: MaskedArray does not call __array_finalize__ so the meta-data was not getting copied over. By overloading _update_from we are able to work around this bug. """ # Make sure that meta-data gets propagated with __getitem__ c = table.Column(data=[1, 2], name='a', description='b', unit='m', format="%i", meta={'c': 8}) assert c[1:2].name == 'a' assert c[1:2].description == 'b' assert c[1:2].unit == 'm' assert c[1:2].format == '%i' assert c[1:2].meta['c'] == 8 c = table.MaskedColumn(data=[1, 2], name='a', description='b', unit='m', format="%i", meta={'c': 8}) assert c[1:2].name == 'a' assert c[1:2].description == 'b' assert c[1:2].unit == 'm' assert c[1:2].format == '%i' assert c[1:2].meta['c'] == 8 # As above, but with take() - check the method and the function c = table.Column(data=[1, 2, 3], name='a', description='b', unit='m', format="%i", meta={'c': 8}) for subset in [c.take([0, 1]), np.take(c, [0, 1])]: assert subset.name == 'a' assert subset.description == 'b' assert subset.unit == 'm' assert subset.format == '%i' assert subset.meta['c'] == 8 # Metadata isn't copied for scalar values for subset in [c.take(0), np.take(c, 0)]: assert subset == 1 assert subset.shape == () assert not isinstance(subset, table.Column) c = table.MaskedColumn(data=[1, 2, 3], name='a', description='b', unit='m', format="%i", meta={'c': 8}) for subset in [c.take([0, 1]), np.take(c, [0, 1])]: assert subset.name == 'a' assert subset.description == 'b' assert subset.unit == 'm' assert subset.format == '%i' assert subset.meta['c'] == 8 # Metadata isn't copied for scalar values for subset in [c.take(0), np.take(c, 0)]: assert subset == 1 assert subset.shape == () assert not isinstance(subset, table.MaskedColumn) def test_unicode_guidelines(): arr = np.array([1, 2, 3]) c = table.Column(arr, name='a') assert_follows_unicode_guidelines(c) def test_scalar_column(): """ Column is not designed to hold scalars, but for numpy 1.6 this can happen: >> type(np.std(table.Column([1, 2]))) astropy.table.column.Column """ c = table.Column(1.5) assert repr(c) == '1.5' assert str(c) == '1.5' def test_qtable_column_conversion(): """ Ensures that a QTable that gets assigned a unit switches to be Quantity-y """ qtab = table.QTable([[1, 2], [3, 4.2]], names=['i', 'f']) assert isinstance(qtab['i'], table.column.Column) assert isinstance(qtab['f'], table.column.Column) qtab['i'].unit = 'km/s' assert isinstance(qtab['i'], u.Quantity) assert isinstance(qtab['f'], table.column.Column) # should follow from the above, but good to make sure as a #4497 regression test assert isinstance(qtab['i'][0], u.Quantity) assert isinstance(qtab[0]['i'], u.Quantity) assert not isinstance(qtab['f'][0], u.Quantity) assert not isinstance(qtab[0]['f'], u.Quantity) # Regression test for #5342: if a function unit is assigned, the column # should become the appropriate FunctionQuantity subclass. qtab['f'].unit = u.dex(u.cm / u.s**2) assert isinstance(qtab['f'], u.Dex) @pytest.mark.parametrize('masked', [True, False]) def test_string_truncation_warning(masked): """ Test warnings associated with in-place assignment to a string column that results in truncation of the right hand side. """ from inspect import currentframe, getframeinfo t = table.Table([['aa', 'bb']], names=['a'], masked=masked) t['a'][1] = 'cc' t['a'][:] = 'dd' with pytest.warns(table.StringTruncateWarning, match=r'truncated right side ' r'string\(s\) longer than 2 character\(s\)') as w: frameinfo = getframeinfo(currentframe()) t['a'][0] = 'eee' # replace item with string that gets truncated assert t['a'][0] == 'ee' assert len(w) == 1 # Make sure the warning points back to the user code line assert w[0].lineno == frameinfo.lineno + 1 assert 'test_column' in w[0].filename with pytest.warns(table.StringTruncateWarning, match=r'truncated right side ' r'string\(s\) longer than 2 character\(s\)') as w: t['a'][:] = ['ff', 'ggg'] # replace item with string that gets truncated assert np.all(t['a'] == ['ff', 'gg']) assert len(w) == 1 # Test the obscure case of assigning from an array that was originally # wider than any of the current elements (i.e. dtype is U4 but actual # elements are U1 at the time of assignment). val = np.array(['ffff', 'gggg']) val[:] = ['f', 'g'] t['a'][:] = val assert np.all(t['a'] == ['f', 'g']) def test_string_truncation_warning_masked(): """ Test warnings associated with in-place assignment to a string to a masked column, specifically where the right hand side contains np.ma.masked. """ # Test for strings, but also cover assignment of np.ma.masked to # int and float masked column setting. This was previously only # covered in an unrelated io.ascii test (test_line_endings) which # showed an unexpected difference between handling of str and numeric # masked arrays. for values in (['a', 'b'], [1, 2], [1.0, 2.0]): mc = table.MaskedColumn(values) mc[1] = np.ma.masked assert np.all(mc.mask == [False, True]) mc[:] = np.ma.masked assert np.all(mc.mask == [True, True]) mc = table.MaskedColumn(['aa', 'bb']) with pytest.warns(table.StringTruncateWarning, match=r'truncated right side ' r'string\(s\) longer than 2 character\(s\)') as w: mc[:] = [np.ma.masked, 'ggg'] # replace item with string that gets truncated assert mc[1] == 'gg' assert np.all(mc.mask == [True, False]) assert len(w) == 1 @pytest.mark.parametrize('Column', (table.Column, table.MaskedColumn)) def test_col_unicode_sandwich_create_from_str(Column): """ Create a bytestring Column from strings (including unicode) in Py3. """ # a-umlaut is a 2-byte character in utf-8, test fails with ascii encoding. # Stress the system by injecting non-ASCII characters. uba = 'bä' c = Column([uba, 'def'], dtype='S') assert c.dtype.char == 'S' assert c[0] == uba assert isinstance(c[0], str) assert isinstance(c[:0], table.Column) assert np.all(c[:2] == np.array([uba, 'def'])) @pytest.mark.parametrize('Column', (table.Column, table.MaskedColumn)) def test_col_unicode_sandwich_bytes_obj(Column): """ Create a Column of dtype object with bytestring in it and make sure it keeps the bytestring and not convert to str with accessed. """ c = Column([None, b'def']) assert c.dtype.char == 'O' assert not c[0] assert c[1] == b'def' assert isinstance(c[1], bytes) assert not isinstance(c[1], str) assert isinstance(c[:0], table.Column) assert np.all(c[:2] == np.array([None, b'def'])) assert not np.all(c[:2] == np.array([None, 'def'])) @pytest.mark.parametrize('Column', (table.Column, table.MaskedColumn)) def test_col_unicode_sandwich_bytes(Column): """ Create a bytestring Column from bytes and ensure that it works in Python 3 in a convenient way like in Python 2. """ # a-umlaut is a 2-byte character in utf-8, test fails with ascii encoding. # Stress the system by injecting non-ASCII characters. uba = 'bä' uba8 = uba.encode('utf-8') c = Column([uba8, b'def']) assert c.dtype.char == 'S' assert c[0] == uba assert isinstance(c[0], str) assert isinstance(c[:0], table.Column) assert np.all(c[:2] == np.array([uba, 'def'])) assert isinstance(c[:], table.Column) assert c[:].dtype.char == 'S' # Array / list comparisons assert np.all(c == [uba, 'def']) ok = c == [uba8, b'def'] assert type(ok) is type(c.data) # noqa assert ok.dtype.char == '?' assert np.all(ok) assert np.all(c == np.array([uba, 'def'])) assert np.all(c == np.array([uba8, b'def'])) # Scalar compare cmps = (uba, uba8) for cmp in cmps: ok = c == cmp assert type(ok) is type(c.data) # noqa assert np.all(ok == [True, False]) def test_col_unicode_sandwich_unicode(): """ Sanity check that Unicode Column behaves normally. """ # On Py2 the unicode must be ASCII-compatible, else the final test fails. uba = 'bä' uba8 = uba.encode('utf-8') c = table.Column([uba, 'def'], dtype='U') assert c[0] == uba assert isinstance(c[:0], table.Column) assert isinstance(c[0], str) assert np.all(c[:2] == np.array([uba, 'def'])) assert isinstance(c[:], table.Column) assert c[:].dtype.char == 'U' ok = c == [uba, 'def'] assert type(ok) == np.ndarray assert ok.dtype.char == '?' assert np.all(ok) assert np.all(c != [uba8, b'def']) def test_masked_col_unicode_sandwich(): """ Create a bytestring MaskedColumn and ensure that it works in Python 3 in a convenient way like in Python 2. """ c = table.MaskedColumn([b'abc', b'def']) c[1] = np.ma.masked assert isinstance(c[:0], table.MaskedColumn) assert isinstance(c[0], str) assert c[0] == 'abc' assert c[1] is np.ma.masked assert isinstance(c[:], table.MaskedColumn) assert c[:].dtype.char == 'S' ok = c == ['abc', 'def'] assert ok[0] == True # noqa assert ok[1] is np.ma.masked assert np.all(c == [b'abc', b'def']) assert np.all(c == np.array(['abc', 'def'])) assert np.all(c == np.array([b'abc', b'def'])) for cmp in ('abc', b'abc'): ok = c == cmp assert type(ok) is np.ma.MaskedArray assert ok[0] == True # noqa assert ok[1] is np.ma.masked @pytest.mark.parametrize('Column', (table.Column, table.MaskedColumn)) def test_unicode_sandwich_set(Column): """ Test setting """ uba = 'bä' c = Column([b'abc', b'def']) c[0] = b'aa' assert np.all(c == ['aa', 'def']) c[0] = uba # a-umlaut is a 2-byte character in utf-8, test fails with ascii encoding assert np.all(c == [uba, 'def']) assert c.pformat() == ['None', '----', ' ' + uba, ' def'] c[:] = b'cc' assert np.all(c == ['cc', 'cc']) c[:] = uba assert np.all(c == [uba, uba]) c[:] = '' c[:] = [uba, b'def'] assert np.all(c == [uba, b'def']) @pytest.mark.parametrize('class1', [table.MaskedColumn, table.Column]) @pytest.mark.parametrize('class2', [table.MaskedColumn, table.Column, str, list]) def test_unicode_sandwich_compare(class1, class2): """Test that comparing a bytestring Column/MaskedColumn with various str (unicode) object types gives the expected result. Tests #6838. """ obj1 = class1([b'a', b'c']) if class2 is str: obj2 = 'a' elif class2 is list: obj2 = ['a', 'b'] else: obj2 = class2(['a', 'b']) assert np.all((obj1 == obj2) == [True, False]) assert np.all((obj2 == obj1) == [True, False]) assert np.all((obj1 != obj2) == [False, True]) assert np.all((obj2 != obj1) == [False, True]) assert np.all((obj1 > obj2) == [False, True]) assert np.all((obj2 > obj1) == [False, False]) assert np.all((obj1 <= obj2) == [True, False]) assert np.all((obj2 <= obj1) == [True, True]) assert np.all((obj1 < obj2) == [False, False]) assert np.all((obj2 < obj1) == [False, True]) assert np.all((obj1 >= obj2) == [True, True]) assert np.all((obj2 >= obj1) == [True, False]) def test_unicode_sandwich_masked_compare(): """Test the fix for #6839 from #6899.""" c1 = table.MaskedColumn(['a', 'b', 'c', 'd'], mask=[True, False, True, False]) c2 = table.MaskedColumn([b'a', b'b', b'c', b'd'], mask=[True, True, False, False]) for cmp in ((c1 == c2), (c2 == c1)): assert cmp[0] is np.ma.masked assert cmp[1] is np.ma.masked assert cmp[2] is np.ma.masked assert cmp[3] for cmp in ((c1 != c2), (c2 != c1)): assert cmp[0] is np.ma.masked assert cmp[1] is np.ma.masked assert cmp[2] is np.ma.masked assert not cmp[3] # Note: comparisons <, >, >=, <= fail to return a masked array entirely, # see https://github.com/numpy/numpy/issues/10092. def test_structured_masked_column_roundtrip(): mc = table.MaskedColumn([(1., 2.), (3., 4.)], mask=[(False, False), (False, False)], dtype='f8,f8') assert len(mc.dtype.fields) == 2 mc2 = table.MaskedColumn(mc) assert_array_equal(mc2, mc) @pytest.mark.parametrize('dtype', ['i4,f4', 'f4,(2,)f8']) def test_structured_empty_column_init(dtype): dtype = np.dtype(dtype) c = table.Column(length=5, shape=(2,), dtype=dtype) assert c.shape == (5, 2) assert c.dtype == dtype def test_column_value_access(): """Can a column's underlying data consistently be accessed via `.value`, whether it is a `Column`, `MaskedColumn`, `Quantity`, or `Time`?""" data = np.array([1, 2, 3]) tbl = table.QTable({'a': table.Column(data), 'b': table.MaskedColumn(data), 'c': u.Quantity(data), 'd': time.Time(data, format='mjd')}) assert type(tbl['a'].value) == np.ndarray assert type(tbl['b'].value) == np.ma.MaskedArray assert type(tbl['c'].value) == np.ndarray assert type(tbl['d'].value) == np.ndarray def test_masked_column_serialize_method_propagation(): mc = table.MaskedColumn([1., 2., 3.], mask=[True, False, True]) assert mc.info.serialize_method['ecsv'] == 'null_value' mc.info.serialize_method['ecsv'] = 'data_mask' assert mc.info.serialize_method['ecsv'] == 'data_mask' mc2 = mc.copy() assert mc2.info.serialize_method['ecsv'] == 'data_mask' mc3 = table.MaskedColumn(mc) assert mc3.info.serialize_method['ecsv'] == 'data_mask' mc4 = mc.view(table.MaskedColumn) assert mc4.info.serialize_method['ecsv'] == 'data_mask' mc5 = mc[1:] assert mc5.info.serialize_method['ecsv'] == 'data_mask'