import numpy as np import pytest from pandas.compat import np_version_under1p20 import pandas as pd import pandas._testing as tm from pandas.core.arrays import FloatingArray from pandas.core.arrays.floating import ( Float32Dtype, Float64Dtype, ) def test_uses_pandas_na(): a = pd.array([1, None], dtype=Float64Dtype()) assert a[1] is pd.NA def test_floating_array_constructor(): values = np.array([1, 2, 3, 4], dtype="float64") mask = np.array([False, False, False, True], dtype="bool") result = FloatingArray(values, mask) expected = pd.array([1, 2, 3, np.nan], dtype="Float64") tm.assert_extension_array_equal(result, expected) tm.assert_numpy_array_equal(result._data, values) tm.assert_numpy_array_equal(result._mask, mask) msg = r".* should be .* numpy array. Use the 'pd.array' function instead" with pytest.raises(TypeError, match=msg): FloatingArray(values.tolist(), mask) with pytest.raises(TypeError, match=msg): FloatingArray(values, mask.tolist()) with pytest.raises(TypeError, match=msg): FloatingArray(values.astype(int), mask) msg = r"__init__\(\) missing 1 required positional argument: 'mask'" with pytest.raises(TypeError, match=msg): FloatingArray(values) def test_floating_array_disallows_float16(): # GH#44715 arr = np.array([1, 2], dtype=np.float16) mask = np.array([False, False]) msg = "FloatingArray does not support np.float16 dtype" with pytest.raises(TypeError, match=msg): FloatingArray(arr, mask) def test_floating_array_disallows_Float16_dtype(request): # GH#44715 if np_version_under1p20: # https://github.com/numpy/numpy/issues/20512 mark = pytest.mark.xfail(reason="numpy does not raise on np.dtype('Float16')") request.node.add_marker(mark) with pytest.raises(TypeError, match="data type 'Float16' not understood"): pd.array([1.0, 2.0], dtype="Float16") def test_floating_array_constructor_copy(): values = np.array([1, 2, 3, 4], dtype="float64") mask = np.array([False, False, False, True], dtype="bool") result = FloatingArray(values, mask) assert result._data is values assert result._mask is mask result = FloatingArray(values, mask, copy=True) assert result._data is not values assert result._mask is not mask def test_to_array(): result = pd.array([0.1, 0.2, 0.3, 0.4]) expected = pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float64") tm.assert_extension_array_equal(result, expected) @pytest.mark.parametrize( "a, b", [ ([1, None], [1, pd.NA]), ([None], [pd.NA]), ([None, np.nan], [pd.NA, pd.NA]), ([1, np.nan], [1, pd.NA]), ([np.nan], [pd.NA]), ], ) def test_to_array_none_is_nan(a, b): result = pd.array(a, dtype="Float64") expected = pd.array(b, dtype="Float64") tm.assert_extension_array_equal(result, expected) def test_to_array_mixed_integer_float(): result = pd.array([1, 2.0]) expected = pd.array([1.0, 2.0], dtype="Float64") tm.assert_extension_array_equal(result, expected) result = pd.array([1, None, 2.0]) expected = pd.array([1.0, None, 2.0], dtype="Float64") tm.assert_extension_array_equal(result, expected) @pytest.mark.parametrize( "values", [ ["foo", "bar"], ["1", "2"], "foo", 1, 1.0, pd.date_range("20130101", periods=2), np.array(["foo"]), [[1, 2], [3, 4]], [np.nan, {"a": 1}], # GH#44514 all-NA case used to get quietly swapped out before checking ndim np.array([pd.NA] * 6, dtype=object).reshape(3, 2), ], ) def test_to_array_error(values): # error in converting existing arrays to FloatingArray msg = "|".join( [ "cannot be converted to a FloatingDtype", "values must be a 1D list-like", "Cannot pass scalar", ] ) with pytest.raises((TypeError, ValueError), match=msg): pd.array(values, dtype="Float64") def test_to_array_inferred_dtype(): # if values has dtype -> respect it result = pd.array(np.array([1, 2], dtype="float32")) assert result.dtype == Float32Dtype() # if values have no dtype -> always float64 result = pd.array([1.0, 2.0]) assert result.dtype == Float64Dtype() def test_to_array_dtype_keyword(): result = pd.array([1, 2], dtype="Float32") assert result.dtype == Float32Dtype() # if values has dtype -> override it result = pd.array(np.array([1, 2], dtype="float32"), dtype="Float64") assert result.dtype == Float64Dtype() def test_to_array_integer(): result = pd.array([1, 2], dtype="Float64") expected = pd.array([1.0, 2.0], dtype="Float64") tm.assert_extension_array_equal(result, expected) # for integer dtypes, the itemsize is not preserved # TODO can we specify "floating" in general? result = pd.array(np.array([1, 2], dtype="int32"), dtype="Float64") assert result.dtype == Float64Dtype() @pytest.mark.parametrize( "bool_values, values, target_dtype, expected_dtype", [ ([False, True], [0, 1], Float64Dtype(), Float64Dtype()), ([False, True], [0, 1], "Float64", Float64Dtype()), ([False, True, np.nan], [0, 1, np.nan], Float64Dtype(), Float64Dtype()), ], ) def test_to_array_bool(bool_values, values, target_dtype, expected_dtype): result = pd.array(bool_values, dtype=target_dtype) assert result.dtype == expected_dtype expected = pd.array(values, dtype=target_dtype) tm.assert_extension_array_equal(result, expected) def test_series_from_float(data): # construct from our dtype & string dtype dtype = data.dtype # from float expected = pd.Series(data) result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype)) tm.assert_series_equal(result, expected) # from list expected = pd.Series(data) result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) tm.assert_series_equal(result, expected)