import numpy as np from numba.core import types from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase from numba import cuda from numba.cuda import libdevice, compile_ptx from numba.cuda.libdevicefuncs import functions, create_signature def use_sincos(s, c, x): i = cuda.grid(1) if i < len(x): sr, cr = libdevice.sincos(x[i]) s[i] = sr c[i] = cr def use_frexp(frac, exp, x): i = cuda.grid(1) if i < len(x): fracr, expr = libdevice.frexp(x[i]) frac[i] = fracr exp[i] = expr def use_sad(r, x, y, z): i = cuda.grid(1) if i < len(x): r[i] = libdevice.sad(x[i], y[i], z[i]) @skip_on_cudasim('Libdevice functions are not supported on cudasim') class TestLibdevice(CUDATestCase): """ Some tests of libdevice function wrappers that check the returned values. These are mainly to check that the generation of the implementations results in correct typing and lowering for each type of function return (e.g. scalar return, UniTuple return, Tuple return, etc.). """ def test_sincos(self): # Tests return of a UniTuple from a libdevice function arr = np.arange(100, dtype=np.float64) sres = np.zeros_like(arr) cres = np.zeros_like(arr) cufunc = cuda.jit(use_sincos) cufunc[4, 32](sres, cres, arr) np.testing.assert_allclose(np.cos(arr), cres) np.testing.assert_allclose(np.sin(arr), sres) def test_frexp(self): # Test return of a Tuple from a libdevice function arr = np.linspace(start=1.0, stop=10.0, num=100, dtype=np.float64) fracres = np.zeros_like(arr) expres = np.zeros(shape=arr.shape, dtype=np.int32) cufunc = cuda.jit(use_frexp) cufunc[4, 32](fracres, expres, arr) frac_expect, exp_expect = np.frexp(arr) np.testing.assert_array_equal(frac_expect, fracres) np.testing.assert_array_equal(exp_expect, expres) def test_sad(self): # Test return of a scalar from a libdevice function x = np.arange(0, 200, 2) y = np.arange(50, 150) z = np.arange(15, 115) r = np.zeros_like(x) cufunc = cuda.jit(use_sad) cufunc[4, 32](r, x, y, z) np.testing.assert_array_equal(np.abs(x - y) + z, r) # A template for generating tests of compiling calls to libdevice functions. # The purpose of the call and assignment of the return variables is to ensure # the actual function implementations are not thrown away resulting in a PTX # implementation that only contains the ret instruction - this may hide certain # errors. function_template = """\ from numba.cuda import libdevice def pyfunc(%(pyargs)s): ret = libdevice.%(func)s(%(funcargs)s) %(retvars)s = ret """ def make_test_call(libname): """ Generates a test function for each libdevice function. """ def _test_call_functions(self): # Strip off '__nv_' from libdevice name to get Python name apiname = libname[5:] apifunc = getattr(libdevice, apiname) retty, args = functions[libname] sig = create_signature(retty, args) # Construct arguments to the libdevice function. These are all # non-pointer arguments to the underlying bitcode function. funcargs = ", ".join(['a%d' % i for i, arg in enumerate(args) if not arg.is_ptr]) # Arguments to the Python function (`pyfunc` in the template above) are # the arguments to the libdevice function, plus as many extra arguments # as there are in the return type of the libdevice function - one for # scalar-valued returns, or the length of the tuple for tuple-valued # returns. if isinstance(sig.return_type, (types.Tuple, types.UniTuple)): # Start with the parameters for the return values pyargs = ", ".join(['r%d' % i for i in range(len(sig.return_type))]) # Add the parameters for the argument values pyargs += ", " + funcargs # Generate the unpacking of the return value from the libdevice # function into the Python function return values (`r0`, `r1`, # etc.). retvars = ", ".join(['r%d[0]' % i for i in range(len(sig.return_type))]) else: # Scalar return is a more straightforward case pyargs = "r0, " + funcargs retvars = "r0[0]" # Create the string containing the function to compile d = { 'func': apiname, 'pyargs': pyargs, 'funcargs': funcargs, 'retvars': retvars } code = function_template % d # Convert the string to a Python function locals = {} exec(code, globals(), locals) pyfunc = locals['pyfunc'] # Compute the signature for compilation. This mirrors the creation of # arguments to the Python function above. pyargs = [ arg.ty for arg in args if not arg.is_ptr ] if isinstance(sig.return_type, (types.Tuple, types.UniTuple)): pyreturns = [ret[::1] for ret in sig.return_type] pyargs = pyreturns + pyargs else: pyargs.insert(0, sig.return_type[::1]) ptx, resty = compile_ptx(pyfunc, pyargs) # If the function body was discarded by optimization (therefore making # the test a bit weak), there won't be any loading of parameters - # ensure that a load from parameters occurs somewhere in the PTX self.assertIn('ld.param', ptx) # Returning the result (through a passed-in array) should also require # a store to global memory, so check for at least one of those too. self.assertIn('st.global', ptx) return _test_call_functions @skip_on_cudasim('Compilation to PTX is not supported on cudasim') class TestLibdeviceCompilation(unittest.TestCase): """ Class for holding all tests of compiling calls to libdevice functions. We generate the actual tests in this class (as opposed to using subTest and one test within this class) because there are a lot of tests, and it makes the test suite appear frozen to test them all as subTests in one test. """ for libname in functions: setattr(TestLibdeviceCompilation, 'test_%s' % libname, make_test_call(libname)) if __name__ == '__main__': unittest.main()