a
    |%b'                     @   sL  d Z g dZddlZddlmZmZ z0ddlmZmZm	Z	m
Z
mZmZmZ eefZW n6 ey   ddlmZmZm	Z	m
Z
mZmZ eZY n0 dddZdd	d
ZdddZdd ZedejjZG dd dZdd Zdd ZzddlmZ W n  ey   ddlmZ Y n0 edjZze  W n e!y>   e"Z Y n0 dd Z#dS )z5External interface to the BeautifulSoup HTML parser.
)
fromstringparseconvert_tree    N)etreehtml)BeautifulSoupTagCommentProcessingInstructionNavigableStringDeclarationDoctype)r   r   r	   r
   r   r   c                 K   s   t | ||fi |S )a  Parse a string of HTML data into an Element tree using the
    BeautifulSoup parser.

    Returns the root ``<html>`` Element of the tree.

    You can pass a different BeautifulSoup parser through the
    `beautifulsoup` keyword, and a diffent Element factory function
    through the `makeelement` keyword.  By default, the standard
    ``BeautifulSoup`` class and the default factory of `lxml.html` are
    used.
    )_parse)databeautifulsoupmakeelementbsargs r   3lib/python3.9/site-packages/lxml/html/soupparser.pyr      s    r   c                 K   s0   t | dst| } t| ||fi |}t|S )aY  Parse a file into an ElemenTree using the BeautifulSoup parser.

    You can pass a different BeautifulSoup parser through the
    `beautifulsoup` keyword, and a diffent Element factory function
    through the `makeelement` keyword.  By default, the standard
    ``BeautifulSoup`` class and the default factory of `lxml.html` are
    used.
    read)hasattropenr   r   ZElementTree)filer   r   r   rootr   r   r   r   $   s    	
r   c                 C   s*   t | |}| }|D ]}|| q|S )a  Convert a BeautifulSoup tree to a list of Element trees.

    Returns a list instead of a single root Element to support
    HTML-like soup with more than one root element.

    You can pass a different Element factory through the `makeelement`
    keyword.
    )_convert_treeZgetchildrenremove)beautiful_soup_treer   r   Zchildrenchildr   r   r   r   3   s
    	
r   c                 K   s   |d u rt }t|dr&d|vr&d|d< t|dr@d|vr@d|d< || fi |}t||}t|dkr||d jdkr||d S d|_|S )	NZHTML_ENTITIESZconvertEntitiesr   ZDEFAULT_BUILDER_FEATURESZfeatureszhtml.parser   r   )r   r   r   lentag)sourcer   r   r   Ztreer   r   r   r   r   E   s    


r   z`(?:\s|[<!])*DOCTYPE\s*HTML(?:\s+PUBLIC)?(?:\s+(\'[^\']*\'|"[^"]*"))?(?:\s+(\'[^\']*\'|"[^"]*"))?c                   @   s   e Zd Zdd Zdd ZdS )
_PseudoTagc                 C   s   d| _ g | _|| _d S )Nr   )nameattrscontents)selfr%   r   r   r   __init__`   s    z_PseudoTag.__init__c                 C   s
   | j  S N)r%   __iter__)r&   r   r   r   r)   e   s    z_PseudoTag.__iter__N)__name__
__module____qualname__r'   r)   r   r   r   r   r"   ^   s   r"   c                 C   s  |d u rt jj}d  }}d  }}t| D ]Z\}}t|trl|d u rF|}|}|d u r|jr|j dkr|}q(|d u r(t|tr(|}q(|d u rg  }}	| j	}
n2| j	d | }| j	||d  }
| j	|d d  }	|d ur|

|}|
d | |j	 |
|d d   |_	nt|
}t|}||}|}t|D ]&}||}|d ur,|| |}q,|}|	D ]&}||}|d ur\|| |}q\|d urz| }W n ty   |j}Y n0 t|}|sn>| \}}| j}|o|dd |_|o |dd |_|S )Nr   r   )r   Zhtml_parserr   	enumerate
isinstancer   r#   lower_DECLARATION_OR_DOCTYPEr%   indexr"   _init_node_convertersreversedZaddpreviousZaddnextZoutput_readyAttributeErrorstring_parse_doctype_declarationgroupsZgetroottreedocinfoZ	public_idZ
system_url)r   r   Zfirst_element_idxZlast_element_idxZ	html_rootZdeclarationieZpre_rootZ	post_rootrootsconvert_nodeZres_rootprevZ	convertedZdoctype_stringmatchZexternal_idZsys_urir9   r   r   r   r   i   sd    


&





r   c                    s   i g fdd}fdddfdd	dd d	d
  |t tfdd}|tdd }|tdd }|t fdd}S )Nc                     s    fdd}|S )Nc                    s    D ]}|  |<  | q| S r(   )append)handlert)
convertersordered_node_typestypesr   r   add   s    z5_init_node_converters.<locals>.converter.<locals>.addr   )rE   rF   rC   rD   )rE   r   	converter   s    z(_init_node_converters.<locals>.converterc                    s$   D ]}t | |r |   S qd S r(   )r/   )ZnoderB   rG   r   r   find_best_converter   s    
z2_init_node_converters.<locals>.find_best_converterc                    sN   z t |  }W n& ty6   |  } t | < Y n0 |d u rDd S || |S r(   )typeKeyError)bs_nodeparentrA   )rC   rI   r   r   r=      s    z+_init_node_converters.<locals>.convert_nodec                 S   sX   t | trBi }|  D ](\}}t |tr2d|}t|||< qntdd | D }|S )N c                 s   s   | ]\}}|t |fV  qd S r(   unescape).0kvr   r   r   	<genexpr>       z;_init_node_converters.<locals>.map_attrs.<locals>.<genexpr>)r/   dictitemslistjoinrP   )Zbs_attrsattribsrR   rS   r   r   r   	map_attrs   s    


z(_init_node_converters.<locals>.map_attrsc                 S   s:   t | dkr| jpd| | _n| d jp*d| | d _d S )Nr    r-   )r   texttail)rM   r]   r   r   r   append_text   s    z*_init_node_converters.<locals>.append_textc              	      s   | j }|d ur2|r|nd }tj|| j|d}n|r>|ni }| j|d}| D ]F}zt| }W n tyz   Y n0 |d urT||| qT || qT|S )N)Zattrib)r$   r   Z
SubElementr#   rJ   rK   )rL   rM   r$   rZ   resr   rA   )r=   rC   r   r[   r   r   convert_tag   s     
z*_init_node_converters.<locals>.convert_tagc                 S   s    t | }|d ur|| |S r(   )r   ZHtmlCommentr@   rL   rM   r`   r   r   r   convert_comment  s    

z._init_node_converters.<locals>.convert_commentc                 S   s>   |  dr| d d } tj| dd }|d ur:|| |S )N?r-   rN   r   )endswithr   r
   splitr@   rb   r   r   r   
convert_pi  s    

z)_init_node_converters.<locals>.convert_pic                    s   |d ur |t |  d S r(   rO   )rL   rM   )r_   r   r   convert_text  s    z+_init_node_converters.<locals>.convert_text)N)r   r"   r	   r
   r   )r   rH   ra   rc   rg   rh   r   )r_   r=   rC   rI   r   r[   rD   r   r3      s     



r3   )name2codepointz&(\w+);c                 C   s   | sdS dd }t || S )Nr\   c                 S   s6   zt t| d W S  ty0   | d Y S 0 d S )Nr   r   )unichrri   grouprK   )mr   r   r   unescape_entity5  s    z!unescape.<locals>.unescape_entity)handle_entities)r6   rm   r   r   r   rP   1  s    rP   )NN)NN)N)$__doc____all__reZlxmlr   r   Zbs4r   r   r	   r
   r   r   r   r1   ImportErrorr   r   r   r   compile
IGNORECASEr?   r7   r"   r   r3   Zhtml.entitiesri   Zhtmlentitydefssubrn   rj   	NameErrorchrrP   r   r   r   r   <module>   s<   $ 



Uc
