# -*- coding: utf-8 -*-

# -----------------------------------------------------------------------------
# Copyright (c) Spyder Project Contributors

# Licensed under the terms of the MIT License
# (see spyder/__init__.py for details)
# -----------------------------------------------------------------------------

"""
Text encoding utilities, text file I/O.

Functions 'get_coding', 'decode' and 'encode' come from Eric4 source code (Utilities/__init___.py)

Copyright © 2003-2009 Detlev Offenbach
"""

from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32
import locale
import os
import re
import sys
from chardet.universaldetector import UniversalDetector
from anaconda_navigator.external.binaryornot.check import is_binary
from anaconda_navigator.utils.py3compat import is_binary_string, is_string, is_unicode, to_text_string


PREFERRED_ENCODING = locale.getpreferredencoding()

# -----------------------------------------------------------------------------
#  Functions for encoding and decoding bytes that come from
#  the *file system*.
# -----------------------------------------------------------------------------


# The default encoding for file paths and environment variables should be set to match the default encoding that the OS
# is using.
def getfilesystemencoding():
    """
    Query the filesystem for the encoding used to encode filenames and envvars.
    """
    encoding = sys.getfilesystemencoding()
    if encoding is None:
        # Must be Linux or Unix and nl_langinfo(CODESET) failed.
        encoding = PREFERRED_ENCODING
    return encoding


FS_ENCODING = getfilesystemencoding()


def to_unicode_from_fs(string):
    """
    Return a unicode version of string decoded using the file system encoding.
    """
    if not is_string(string):  # string is a QString
        string = to_text_string(string.toUtf8(), 'utf-8')
    else:
        if is_binary_string(string):
            try:
                unic = string.decode(FS_ENCODING)
            except (UnicodeError, TypeError):
                pass
            else:
                return unic
    return string


def to_fs_from_unicode(unic):
    """
    Return a byte string version of unc encoded using the file system encoding.
    """
    if is_unicode(unic):
        try:
            string = unic.encode(FS_ENCODING)
        except (UnicodeError, TypeError):
            pass
        else:
            return string
    return unic


# -----------------------------------------------------------------------------
#  Functions for encoding and decoding *text data* itself, usually originating
#  from or destined for the *contents* of a file.
# -----------------------------------------------------------------------------

# Codecs for working with files and text.
CODING_RE = re.compile(r'coding[:=]\s*([-\w_.]+)')
CODECS = [
    'utf-8', 'iso8859-1', 'iso8859-15', 'ascii', 'koi8-r', 'koi8-u', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
    'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', 'iso8859-13', 'iso8859-14', 'latin-1', 'utf-16'
]


def get_coding(text):
    """
    Function to get the coding of a text.
    @param text text to inspect (string)
    @return coding string
    """
    for line in text.splitlines()[:2]:
        try:
            result = CODING_RE.search(to_text_string(line))
        except UnicodeDecodeError:
            # This could fail because to_text_string assume the text is
            # utf8-like and we don't know the encoding to give it to
            # to_text_string
            pass
        else:
            if result:
                codec = result.group(1)
                # sometimes we find a false encoding that can result in errors
                if codec in CODECS:
                    return codec

    # Fallback using chardet
    if is_binary_string(text):
        detector = UniversalDetector()
        for line in text.splitlines()[:2]:
            detector.feed(line)
            if detector.done:
                break

        detector.close()
        return detector.result['encoding']

    return None


def decode(text):
    """
    Function to decode a text.

    @param text text to decode (string)
    @return decoded text and encoding
    """
    try:
        if text.startswith(BOM_UTF8):  # UTF-8 with BOM
            return to_text_string(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom'

        if text.startswith(BOM_UTF16):  # UTF-16 with BOM
            return to_text_string(text[len(BOM_UTF16):], 'utf-16'), 'utf-16'

        if text.startswith(BOM_UTF32):  # UTF-32 with BOM
            return to_text_string(text[len(BOM_UTF32):], 'utf-32'), 'utf-32'

        coding = get_coding(text)
        if coding:
            return to_text_string(text, coding), coding

    except (UnicodeError, LookupError):
        pass

    # Assume UTF-8
    try:
        return to_text_string(text, 'utf-8'), 'utf-8-guessed'
    except (UnicodeError, LookupError):
        pass

    # Assume Latin-1 (behaviour before 3.7.1)
    return to_text_string(text, 'latin-1'), 'latin-1-guessed'


def encode(text, orig_coding):
    """
    Function to encode a text.

    @param text text to encode (string).
    @param orig_coding type of the original coding (string).
    @return encoded text and encoding.
    """
    if orig_coding == 'utf-8-bom':
        return BOM_UTF8 + text.encode('utf-8'), 'utf-8-bom'

    # Try saving with original encoding
    if orig_coding:
        try:
            return text.encode(orig_coding), orig_coding
        except (UnicodeError, LookupError):
            pass

    # Try declared coding spec
    coding = get_coding(text)
    if coding:
        try:
            return text.encode(coding), coding
        except (UnicodeError, LookupError) as exception:
            raise RuntimeError(f'Incorrect encoding ({coding})') from exception
    if orig_coding and (orig_coding.endswith('-default') or orig_coding.endswith('-guessed')):
        coding = orig_coding.replace('-default', '').replace('-guessed', '')
        try:
            return text.encode(coding), coding
        except (UnicodeError, LookupError):
            pass

    # Try saving as ASCII
    try:
        return text.encode('ascii'), 'ascii'
    except UnicodeError:
        pass

    # Save as UTF-8 without BOM
    return text.encode('utf-8'), 'utf-8'


def write(text, filename, encoding='utf-8', mode='wb'):
    """
    Write 'text' to file ('filename') assuming 'encoding'.

    Return (eventually new) encoding.
    """
    text, encoding = encode(text, encoding)
    with open(filename, mode) as textfile:  # pylint: disable=unspecified-encoding
        textfile.write(text)
    return encoding


def writelines(lines, filename, encoding='utf-8', mode='wb'):
    """
    Write 'lines' to file ('filename') assuming 'encoding'.

    Return (eventually new) encoding.
    """
    return write(os.linesep.join(lines), filename, encoding, mode)


def read(filename, encoding='utf-8'):
    """
    Read text from file ('filename')
    Return text and encoding
    """
    with open(filename, 'rb') as stream:
        text, encoding = decode(stream.read())
    return text, encoding


def readlines(filename, encoding='utf-8'):
    """
    Read lines from file ('filename').

    Return lines and encoding.
    """
    text, encoding = read(filename, encoding)
    return text.split(os.linesep), encoding


def is_text_file(filename):
    """Test if the given path is a text-like file."""
    try:
        return not is_binary(filename)
    except (OSError, IOError):
        return False


def ensure_binary(value):
    """Ensure a file is binary format. From conda/common."""
    try:
        return value.encode('utf-8')
    except AttributeError:
        # AttributeError: '<>' object has no attribute 'encode'
        # In this case assume already binary type and do nothing
        return value