a
    'À(bÝ  ã                   @   s4   d Z ddlmZ ddlZG dd„ dƒZddd„ZdS )	z¢
Module for processing Sitemaps.

Note: The main purpose of this module is to provide support for the
SitemapSpider, its API is subject to change without notice.
é    )ÚurljoinNc                   @   s    e Zd ZdZdd„ Zdd„ ZdS )ÚSitemapzTClass to parse Sitemap (type=urlset) and Sitemap Index
    (type=sitemapindex) filesc                 C   sR   t jjdddd}t jj||d| _| jj}d|v rH| jj dd¡d n|| _d S )NTF)ZrecoverZremove_commentsZresolve_entities)ÚparserÚ}é   )ÚlxmlZetreeZ	XMLParserZ
fromstringÚ_rootÚtagÚsplitÚtype)ÚselfZxmltextZxmlpZrt© r   ú3lib/python3.9/site-packages/scrapy/utils/sitemap.pyÚ__init__   s    zSitemap.__init__c                 c   sš   | j  ¡ D ]Š}i }| ¡ D ]j}|j}d|v r<| dd¡d n|}|dkrld|jv r„| dg ¡ | d¡¡ q|jr||j 	¡ nd||< qd|v r
|V  q
d S )Nr   r   ÚlinkZhrefZ	alternateÚ Zloc)
r   Zgetchildrenr	   r
   ZattribÚ
setdefaultÚappendÚgetÚtextÚstrip)r   ÚelemÚdZelr	   Únamer   r   r   Ú__iter__   s    
zSitemap.__iter__N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   r   r      s   r   c                 c   sD   |   ¡ D ]6}| ¡  ¡  d¡r| dd¡d  ¡ }t||ƒV  qdS )zXReturn an iterator over all sitemap urls contained in the given
    robots.txt file
    zsitemap:ú:r   N)Ú
splitlinesÚlstripÚlowerÚ
startswithr
   r   r   )Zrobots_textZbase_urlÚlineZurlr   r   r   Úsitemap_urls_from_robots(   s    r%   )N)r   Zurllib.parser   Z
lxml.etreer   r   r%   r   r   r   r   Ú<module>   s   