# -*- coding: utf-8 -*- """Additional utilities to work with URLs.""" __all__ = ['join', 'netloc'] import typing from urllib import parse def join(base: str, *args: str) -> str: """ Join multiple URLS into a single one. :param base: Base URL to join parts to. :param args: URL parts to join. :return: Joined URL. """ arg: str for arg in args: # This should prevent trimming of the last element for relative joins: # parse.urljoin('https://example.org/A', 'B/C') == 'https://example.org/B/C' # parse.urljoin('https://example.org/A/', 'B/C') == 'https://example.org/A/B/C' if not base.endswith('/'): base += '/' base = parse.urljoin(base, arg) return base def netloc(url: typing.Union[None, bytes, str]) -> str: """ Retrieve domain name from the `url`. This function is designed to work with both complete URLs like 'https://example.org/page' and schema-less URLs like 'example.org/path'. :param url: URL to retrieve domain from. :return: Parsed domain name. """ if url is None: return '' if not isinstance(url, str): url = url.decode() result: parse.ParseResult = parse.urlparse(url) if result.netloc: return result.netloc return result.path.split('/')[0]