import re

# When sphinx (including the napoleon extension) parses the parameters
# section of a docstring, it converts the information into field lists.
# Some items in the list are for the parameter type. When the type fields
# are processed, the text is split and some tokens are turned into
# pending_xref nodes. These nodes are responsible for creating links.
#
# numpydoc does not create field lists, so the type information is
# not placed into fields that can be processed to make links. Instead,
# when parsing the type information we identify tokens that are link
# worthy and wrap them around a :obj: role.

# Note: we never split on commas that are not followed by a space
# You risk creating bad rst markup if you do so.

QUALIFIED_NAME_RE = re.compile(
    # e.g int, numpy.array, ~numpy.array, .class_in_current_module
    r'^'
    r'[~\.]?'
    r'[a-zA-Z_]\w*'
    r'(?:\.[a-zA-Z_]\w*)*'
    r'$'
)

CONTAINER_SPLIT_RE = re.compile(
    # splits dict(str, int) into
    #    ['dict', '[', 'str', ', ', 'int', ']', '']
    r'(\s*[\[\]\(\)\{\}]\s*|,\s+)'
)

CONTAINER_SPLIT_REJECT_RE = re.compile(
    # Leads to bad markup e.g.
    # {int}qualified_name
    r'[\]\)\}]\w'
)

DOUBLE_QUOTE_SPLIT_RE = re.compile(
    # splits 'callable ``f(x0, *args)`` or ``f(x0, y0, *args)``' into
    #    ['callable ', '``f(x0, *args)``', ' or ', '``f(x0, y0, *args)``', '']
    r'(``.+?``)'
)

ROLE_SPLIT_RE = re.compile(
    # splits to preserve ReST roles
    r'(:\w+:`.+?(?<!\\)`)'
)

SINGLE_QUOTE_SPLIT_RE = re.compile(
    # splits to preserve quoted expressions roles
    r'(`.+?`)'
)

TEXT_SPLIT_RE = re.compile(
    # splits on ' or ', ' | ', ', ' and ' '
    r'(\s+or\s+|\s+\|\s+|,\s+|\s+)'
)

CONTAINER_CHARS = set('[](){}')

# Save people some time and add some common standard aliases
DEFAULT_LINKS = {
    # Python
    'None': ':data:`python:None`',
    'bool': ':ref:`bool <python:bltin-boolean-values>`',
    'boolean': ':ref:`bool <python:bltin-boolean-values>`',
    'True': ':data:`python:True`',
    'False': ':data:`python:False`',
    'list': ':class:`python:list`',
    'tuple': ':class:`python:tuple`',
    'str': ':class:`python:str`',
    'string': ':class:`python:str`',
    'dict': ':class:`python:dict`',
    'float': ':class:`python:float`',
    'int': ':class:`python:int`',
    'callable': ':func:`python:callable`',
    'iterable': ':term:`python:iterable`',
    'sequence': ':term:`python:sequence`',
    'contextmanager': ':func:`python:contextlib.contextmanager`',
    'namedtuple': ':func:`python:collections.namedtuple`',
    'generator': ':term:`python:generator`',
    # NumPy
    'array': 'numpy.ndarray',
    'ndarray': 'numpy.ndarray',
    'np.ndarray': 'numpy.ndarray',
    'array-like': ':term:`numpy:array_like`',
    'array_like': ':term:`numpy:array_like`',
    'scalar': ':ref:`scalar <numpy:arrays.scalars>`',
    'RandomState': 'numpy.random.RandomState',
    'np.random.RandomState': 'numpy.random.RandomState',
    'np.inf': ':data:`numpy.inf`',
    'np.nan': ':data:`numpy.nan`',
    'numpy': ':mod:`numpy`',
}


def make_xref(param_type, xref_aliases, xref_ignore):
    """Parse and apply appropriate sphinx role(s) to `param_type`.

    The :obj: role is the default.

    Parameters
    ----------
    param_type : str
        text
    xref_aliases : dict
        Mapping used to resolve common abbreviations and aliases
        to fully qualified names that can be cross-referenced.
    xref_ignore : set or "all"
        A set containing words not to cross-reference. Instead of a set, the
        string 'all' can be given to ignore all unrecognized terms.
        Unrecognized terms include those that are not in `xref_aliases` and
        are not already wrapped in a reST role.

    Returns
    -------
    out : str
        Text with fully-qualified names and terms that may be wrapped in a
        ``:obj:`` role.
    """
    ignore_set = xref_ignore
    wrap_unknown = True
    if isinstance(xref_ignore, str):
        if xref_ignore.lower() == "all":
            wrap_unknown = False
            ignore_set = set()
        else:
            raise TypeError(
                f"xref_ignore must be a set or 'all', got {xref_ignore}"
            )

    if param_type in xref_aliases:
        link, title = xref_aliases[param_type], param_type
        param_type = link
    else:
        link = title = param_type

    if QUALIFIED_NAME_RE.match(link) and link not in ignore_set:
        if link != title:
            return f':obj:`{title} <{link}>`'
        if wrap_unknown:
            return f':obj:`{link}`'
        return link

    def _split_and_apply_re(s, pattern):
        """
        Split string using the regex pattern,
        apply main function to the parts that do not match the pattern,
        combine the results
        """
        results = []
        tokens = pattern.split(s)
        n = len(tokens)
        if n > 1:
            for i, tok in enumerate(tokens):
                if pattern.match(tok):
                    results.append(tok)
                else:
                    res = make_xref(tok, xref_aliases, xref_ignore)
                    # Opening brackets immediately after a role is
                    # bad markup. Detect that and add backslash.
                    # :role:`type`( to :role:`type`\(
                    if res and res[-1] == '`' and i < n-1:
                        next_char = tokens[i+1][0]
                        if next_char in '([{':
                            res += '\\'
                    results.append(res)

            return ''.join(results)
        return s

    # The cases are dealt with in an order the prevents
    # conflict.
    # Then the strategy is:
    #   - Identify a pattern we are not interested in
    #   - split off the pattern
    #   - re-apply the function to the other parts
    #   - join the results with the pattern

    # Unsplittable literal
    if '``' in param_type:
        return _split_and_apply_re(param_type, DOUBLE_QUOTE_SPLIT_RE)

    # Any roles
    if ':`' in param_type:
        return _split_and_apply_re(param_type, ROLE_SPLIT_RE)

    # Any quoted expressions
    if '`' in param_type:
        return _split_and_apply_re(param_type, SINGLE_QUOTE_SPLIT_RE)

    # Any sort of bracket '[](){}'
    if any(c in CONTAINER_CHARS for c in param_type):
        if CONTAINER_SPLIT_REJECT_RE.search(param_type):
            return param_type
        return _split_and_apply_re(param_type, CONTAINER_SPLIT_RE)

    # Common splitter tokens
    return _split_and_apply_re(param_type, TEXT_SPLIT_RE)