a
    &b9                     @   s   d Z ddlZddlZddlmZmZ ddlmZmZm	Z	m
Z
 ddlmZmZ G dd deZG d	d
 d
eZG dd dejZeje ddee dddZdd ZdddZG dd deZG dd deZdS )z
XPath selectors based on lxml
    N)etreehtml   )flatteniflattenextract_regexshorten)HTMLTranslatorGenericTranslatorc                   @   s   e Zd ZdS )CannotRemoveElementWithoutRootN__name__
__module____qualname__ r   r   .lib/python3.9/site-packages/parsel/selector.pyr      s   r   c                   @   s   e Zd ZdS ) CannotRemoveElementWithoutParentNr   r   r   r   r   r      s   r   c                       s   e Zd Z fddZ  ZS )SafeXMLParserc                    s&   | dd tt| j|i | d S )NZresolve_entitiesF)
setdefaultsuperr   __init__)selfargskwargs	__class__r   r   r      s    zSafeXMLParser.__init__)r   r   r   r   __classcell__r   r   r   r   r      s   r   r   )_parser_csstranslator_tostring_methodxml)r   r    c                 C   s(   | d u rdS | t v r| S td|  d S )Nr   zInvalid type: %s)_ctgroup
ValueError)str   r   r   _st&   s
    r$   c                 C   sR   |   dddpd}|ddd}tj|||d}|du rNtjd||d}|S )	z8Create root node for text using given parser class.
      utf8s   <html/>T)Zrecoverencoding)parserbase_urlN)stripreplaceencoder   Z
fromstring)textZ
parser_clsr*   bodyr)   rootr   r   r   create_root_node/   s    r1   c                       s   e Zd ZdZ fddZ fddZdd Zdd	d
Zdd ZdddZ	dddZ
dd ZeZdddZeZedd Zdd Z  ZS )SelectorListz
    The :class:`SelectorList` class is a subclass of the builtin ``list``
    class, which provides a few additional methods.
    c                    s   t t| ||}| |S N)r   r2   __getslice__r   )r   ijor   r   r   r4   A   s    zSelectorList.__getslice__c                    s(   t t| |}t|tr$| |S |S r3   )r   r2   __getitem__
isinstanceslicer   )r   posr7   r   r   r   r8   E   s    zSelectorList.__getitem__c                 C   s   t dd S )Nz!can't pickle SelectorList objects	TypeErrorr   r   r   r   __getstate__I   s    zSelectorList.__getstate__Nc                    s    |  t fdd| D S )a  
        Call the ``.xpath()`` method for each element in this list and return
        their results flattened as another :class:`SelectorList`.

        ``query`` is the same argument as the one in :meth:`Selector.xpath`

        ``namespaces`` is an optional ``prefix: namespace-uri`` mapping (dict)
        for additional prefixes to those registered with ``register_namespace(prefix, uri)``.
        Contrary to ``register_namespace()``, these prefixes are not
        saved for future calls.

        Any additional named arguments can be used to pass values for XPath
        variables in the XPath expression, e.g.::

            selector.xpath('//a[href=$url]', url="http://www.example.com")
        c                    s"   g | ]}|j fd i qS 
namespaces)xpath.0xr   rA   rB   r   r   
<listcomp>]       z&SelectorList.xpath.<locals>.<listcomp>r   r   )r   rB   rA   r   r   rF   r   rB   L   s    zSelectorList.xpathc                    s   |  t fdd| D S )z
        Call the ``.css()`` method for each element in this list and return
        their results flattened as another :class:`SelectorList`.

        ``query`` is the same argument as the one in :meth:`Selector.css`
        c                    s   g | ]}|  qS r   )cssrC   queryr   r   rG   f   rH   z$SelectorList.css.<locals>.<listcomp>rI   r   rL   r   rK   r   rJ   _   s    zSelectorList.cssTc                    s   t  fdd| D S )ay  
        Call the ``.re()`` method for each element in this list and return
        their results flattened, as a list of unicode strings.

        By default, character entity references are replaced by their
        corresponding character (except for ``&amp;`` and ``&lt;``.
        Passing ``replace_entities`` as ``False`` switches off these
        replacements.
        c                    s   g | ]}|j  d qS )replace_entitiesrerC   regexrO   r   r   rG   r   rH   z#SelectorList.re.<locals>.<listcomp>)r   r   rS   rO   r   rR   r   rQ   h   s    
zSelectorList.rec                    s(   t  fdd| D D ]
}|  S |S )a  
        Call the ``.re()`` method for the first element in this list and
        return the result in an unicode string. If the list is empty or the
        regex doesn't match anything, return the default value (``None`` if
        the argument is not provided).

        By default, character entity references are replaced by their
        corresponding character (except for ``&amp;`` and ``&lt;``.
        Passing ``replace_entities`` as ``False`` switches off these
        replacements.
        c                 3   s   | ]}|j  d V  qdS )rN   NrP   rC   rR   r   r   	<genexpr>   rH   z(SelectorList.re_first.<locals>.<genexpr>)r   )r   rS   defaultrO   elr   rR   r   re_firstt   s    zSelectorList.re_firstc                 C   s   dd | D S )z
        Call the ``.get()`` method for each element is this list and return
        their results flattened, as a list of unicode strings.
        c                 S   s   g | ]}|  qS r   getrC   r   r   r   rG      rH   z'SelectorList.getall.<locals>.<listcomp>r   r>   r   r   r   getall   s    zSelectorList.getallc                 C   s   | D ]}|    S |S )z
        Return the result of ``.get()`` for the first element in this list.
        If the list is empty, return the default value.
        rY   )r   rV   rE   r   r   r   rZ      s    zSelectorList.getc                 C   s   | D ]}|j   S i S )ztReturn the attributes dictionary for the first element.
        If the list is empty, return an empty dict.
        )attribr   rE   r   r   r   r\      s    
zSelectorList.attribc                 C   s   | D ]}|   qdS )zU
        Remove matched nodes from the parent for each element in this list.
        N)remover]   r   r   r   r^      s    zSelectorList.remove)N)T)NT)N)r   r   r   __doc__r4   r8   r?   rB   rJ   rQ   rX   r[   extractrZ   Zextract_firstpropertyr\   r^   r   r   r   r   r   r2   :   s   
	



r2   c                   @   s   e Zd ZdZg dZdZdddZdZeZ	d)dd	Z
d
d Zd*ddZd+ddZdd Zdd Zd,ddZd-ddZdd ZeZdd Zdd Zdd  Zd!d" Zed#d$ Zd%d& ZeZd'd( ZeZdS ).Selectoral  
    :class:`Selector` allows you to select parts of an XML or HTML text using CSS
    or XPath expressions and extract data from it.

    ``text`` is a ``unicode`` object in Python 2 or a ``str`` object in Python 3

    ``type`` defines the selector type, it can be ``"html"``, ``"xml"`` or ``None`` (default).
    If ``type`` is ``None``, the selector defaults to ``"html"``.

    ``base_url`` allows setting a URL for the document. This is needed when looking up external entities with relative paths.
    See [`lxml` documentation](https://lxml.de/api/index.html) ``lxml.etree.fromstring`` for more information.
    )	r.   rA   type_exprr0   __weakref__r   r   r   Nz$http://exslt.org/regular-expressionszhttp://exslt.org/sets)rQ   setFc           	      C   s   t |p
| j | _}t| d | _t| d | _t| d | _|d urxt|tj	sjdtj	|j
f }t|| ||}n|d u rtdt| j| _|d ur| j| || _|| _d S )Nr   r   r   z*text argument should be of type %s, got %sz+Selector needs either text or root argument)r$   _default_typerc   r!   r   r   r   r9   six	text_typer   r=   	_get_rootr"   dict_default_namespacesrA   updater0   rd   )	r   r.   rc   rA   r0   r*   rd   r#   msgr   r   r   r      s$    zSelector.__init__c                 C   s   t dd S )Nzcan't pickle Selector objectsr<   r>   r   r   r   r?      s    zSelector.__getstate__c                 C   s   t || j|dS )N)r*   )r1   r   )r   r.   r*   r   r   r   rj      s    zSelector._get_rootc           	   
      s   zj j}W n ty(   g  Y S 0 tj}|durF|| z| f|jd|}W n` tj	y } zFd| f }t
jr|n|d}t
tt|t d  W Y d}~n
d}~0 0 t|tur|g} fdd|D }|S )a  
        Find nodes matching the xpath ``query`` and return the result as a
        :class:`SelectorList` instance with all elements flattened. List
        elements implement :class:`Selector` interface too.

        ``query`` is a string containing the XPATH query to apply.

        ``namespaces`` is an optional ``prefix: namespace-uri`` mapping (dict)
        for additional prefixes to those registered with ``register_namespace(prefix, uri)``.
        Contrary to ``register_namespace()``, these prefixes are not
        saved for future calls.

        Any additional named arguments can be used to pass values for XPath
        variables in the XPath expression, e.g.::

            selector.xpath('//a[href=$url]', url="http://www.example.com")
        N)rA   Zsmart_stringszXPath error: %s in %sZunicode_escape   c                    s"   g | ]}j | jjd qS ))r0   rd   rA   rc   )r   rA   rc   rC   rL   r   r   r   rG   	  s
   z"Selector.xpath.<locals>.<listcomp>)r0   rB   AttributeErrorselectorlist_clsrk   rA   rm   _lxml_smart_stringsr   Z
XPathErrorrh   ZPY3r-   Zreraiser"   sysexc_inforc   list)	r   rL   rA   r   ZxpathevZnspresultexcrn   r   rp   r   rB      s.    


0zSelector.xpathc                 C   s   |  | |S )aj  
        Apply the given CSS selector and return a :class:`SelectorList` instance.

        ``query`` is a string containing the CSS selector to apply.

        In the background, CSS queries are translated into XPath queries using
        `cssselect`_ library and run ``.xpath()`` method.

        .. _cssselect: https://pypi.python.org/pypi/cssselect/
        )rB   
_css2xpathrM   r   r   r   rJ     s    zSelector.cssc                 C   s   | j |S r3   )r   Zcss_to_xpathrM   r   r   r   ry     s    zSelector._css2xpathTc                 C   s   t ||  |dS )a  
        Apply the given regex and return a list of unicode strings with the
        matches.

        ``regex`` can be either a compiled regular expression or a string which
        will be compiled to a regular expression using ``re.compile(regex)``.

        By default, character entity references are replaced by their
        corresponding character (except for ``&amp;`` and ``&lt;``).
        Passing ``replace_entities`` as ``False`` switches off these
        replacements.
        rN   )r   rZ   rT   r   r   r   rQ     s    zSelector.rec                 C   s   t t| j||d|S )a  
        Apply the given regex and return the first unicode string which
        matches. If there is no match, return the default value (``None`` if
        the argument is not provided).

        By default, character entity references are replaced by their
        corresponding character (except for ``&amp;`` and ``&lt;``).
        Passing ``replace_entities`` as ``False`` switches off these
        replacements.
        rN   )nextr   rQ   )r   rS   rV   rO   r   r   r   rX   .  s    zSelector.re_firstc              	   C   sd   zt j| j| jdddW S  ttfy^   | jdu r:Y dS | jdu rJY dS t| j Y S Y n0 dS )z
        Serialize and return the matched nodes in a single unicode string.
        Percent encoded content is unquoted.
        ZunicodeF)methodr(   Z	with_tailT10N)r   Ztostringr0   r   rq   r=   rh   ri   r>   r   r   r   rZ   ;  s    

zSelector.getc                 C   s
   |   gS )z_
        Serialize and return the matched node in a 1-element list of unicode strings.
        rY   r>   r   r   r   r[   N  s    zSelector.getallc                 C   s   || j |< dS )z
        Register the given namespace to be used in this :class:`Selector`.
        Without registering namespaces you can't select or extract data from
        non-standard namespaces. See :ref:`selector-examples-xml`.
        Nr@   )r   prefixZurir   r   r   register_namespaceT  s    zSelector.register_namespacec                 C   sz   | j dD ]\}|jdr0|jddd |_|j D ],}|dr:|j||j|ddd < q:qt	| j  dS )z
        Remove all namespaces, allowing to traverse the document using
        namespace-less xpaths. See :ref:`removing-namespaces`.
        *{}r   N)
r0   itertag
startswithsplitr\   keyspopr   Zcleanup_namespaces)r   rW   Zanr   r   r   remove_namespaces\  s    
"zSelector.remove_namespacesc                 C   sZ   z| j  }W n ty(   tdY n0 z|| j  W n tyT   tdY n0 dS )z?
        Remove matched nodes from the parent element.
        zThe node you're trying to remove has no root, are you trying to remove a pseudo-element? Try to use 'li' as a selector instead of 'li::text' or '//li' instead of '//li/text()', for example.zXThe node you're trying to remove has no parent, are you trying to remove a root element?N)r0   Z	getparentrq   r   r^   r   )r   parentr   r   r   r^   k  s    
zSelector.removec                 C   s   t | jjS )zAReturn the attributes dictionary for underlying element.
        )rk   r0   r\   r>   r   r   r   r\     s    zSelector.attribc                 C   s   t |  S )z
        Return ``True`` if there is any real content selected or ``False``
        otherwise.  In other words, the boolean value of a :class:`Selector` is
        given by the contents it selects.
        )boolrZ   r>   r   r   r   __bool__  s    zSelector.__bool__c                 C   s*   t t|  dd}dt| j| j|f S )N(   )widthz<%s xpath=%r data=%s>)reprr   rZ   rc   r   rd   )r   datar   r   r   __str__  s    zSelector.__str__)NNNNNN)N)N)T)NT)r   r   r   r_   	__slots__rg   rl   rs   r2   rr   r   r?   rj   rB   rJ   ry   rQ   rX   rZ   r`   r[   r   r   r^   ra   r\   r   Z__nonzero__r   __repr__r   r   r   r   rb      s<     


,


rb   )N)r_   rt   rh   Zlxmlr   r   Zutilsr   r   r   r   Zcsstranslatorr	   r
   	Exceptionr   r   Z	XMLParserr   Z
HTMLParserr!   r$   r1   rv   r2   objectrb   r   r   r   r   <module>   s*   
	
m