import numpy as np

from numba import cuda, int32, complex128, void
from numba.core.errors import TypingError
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim


def culocal(A, B):
    C = cuda.local.array(1000, dtype=int32)
    for i in range(C.shape[0]):
        C[i] = A[i]
    for i in range(C.shape[0]):
        B[i] = C[i]


def culocalcomplex(A, B):
    C = cuda.local.array(100, dtype=complex128)
    for i in range(C.shape[0]):
        C[i] = A[i]
    for i in range(C.shape[0]):
        B[i] = C[i]


def culocal1tuple(A, B):
    C = cuda.local.array((5,), dtype=int32)
    for i in range(C.shape[0]):
        C[i] = A[i]
    for i in range(C.shape[0]):
        B[i] = C[i]


@skip_on_cudasim('PTX inspection not available in cudasim')
class TestCudaLocalMem(CUDATestCase):
    def test_local_array(self):
        sig = (int32[:], int32[:])
        jculocal = cuda.jit(sig)(culocal)
        self.assertTrue('.local' in jculocal.ptx[sig])
        A = np.arange(1000, dtype='int32')
        B = np.zeros_like(A)
        jculocal[1, 1](A, B)
        self.assertTrue(np.all(A == B))

    def test_local_array_1_tuple(self):
        """Ensure that local arrays can be constructed with 1-tuple shape
        """
        jculocal = cuda.jit('void(int32[:], int32[:])')(culocal1tuple)
        # Don't check if .local is in the ptx because the optimizer
        # may reduce it to registers.
        A = np.arange(5, dtype='int32')
        B = np.zeros_like(A)
        jculocal[1, 1](A, B)
        self.assertTrue(np.all(A == B))

    def test_local_array_complex(self):
        sig = 'void(complex128[:], complex128[:])'
        jculocalcomplex = cuda.jit(sig)(culocalcomplex)
        # The local memory would be turned into register
        # self.assertTrue('.local' in jculocalcomplex.ptx)
        A = (np.arange(100, dtype='complex128') - 1) / 2j
        B = np.zeros_like(A)
        jculocalcomplex[1, 1](A, B)
        self.assertTrue(np.all(A == B))

    def check_dtype(self, f):
        # Find the typing of the dtype argument to cuda.local.array
        annotation = next(iter(f.overloads.values()))._type_annotation
        l_dtype = annotation.typemap['l'].dtype
        # Ensure that the typing is correct
        self.assertEqual(l_dtype, int32)

    @skip_on_cudasim("Can't check typing in simulator")
    def test_numba_dtype(self):
        # Check that Numba types can be used as the dtype of a local array
        @cuda.jit(void(int32[::1]))
        def f(x):
            l = cuda.local.array(10, dtype=int32)
            l[0] = x[0]
            x[0] = l[0]

        self.check_dtype(f)

    @skip_on_cudasim("Can't check typing in simulator")
    def test_numpy_dtype(self):
        # Check that NumPy types can be used as the dtype of a local array
        @cuda.jit(void(int32[::1]))
        def f(x):
            l = cuda.local.array(10, dtype=np.int32)
            l[0] = x[0]
            x[0] = l[0]

        self.check_dtype(f)

    @skip_on_cudasim("Can't check typing in simulator")
    def test_string_dtype(self):
        # Check that strings can be used to specify the dtype of a local array
        @cuda.jit(void(int32[::1]))
        def f(x):
            l = cuda.local.array(10, dtype='int32')
            l[0] = x[0]
            x[0] = l[0]

        self.check_dtype(f)

    @skip_on_cudasim("Can't check typing in simulator")
    def test_invalid_string_dtype(self):
        # Check that strings of invalid dtypes cause a typing error
        re = ".*Invalid NumPy dtype specified: 'int33'.*"
        with self.assertRaisesRegex(TypingError, re):
            @cuda.jit(void(int32[::1]))
            def f(x):
                l = cuda.local.array(10, dtype='int33')
                l[0] = x[0]
                x[0] = l[0]


if __name__ == '__main__':
    unittest.main()