B
    ufx@                 @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z
ddlZ
ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ ddl m!Z!m"Z" dd	l#m$Z$ dd
l%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5m6Z6 erHddlm7Z7 ne8Z7e9e:Z;ee<e<f Z=e<ee< dddZ>G dd de?Z@eddddZAG dd de?ZBe<e*dddd ZCe<e*edd!d"ZDe=ee< d#d$d%ZEG d&d' d'ZFG d(d) d)e7ZGeGeGd*d+d,ZHeHd-ee& d.d/d0ZIG d1d- d-ZJG d2d3 d3eZKdCe&ee<e?f eed4  dd5d6d7ZLdDeeMeJd9d:d;ZNe&e*ed- d<d=d>ZOG d?d@ d@eZPG dAdB dBZQdS )EzO
The main purpose of this module is to expose LinkCollector.collect_sources().
    N)
HTMLParser)Values)TYPE_CHECKINGCallableDictIterableListMutableMapping
NamedTupleOptionalSequenceTupleUnion)requests)Response)
RetryErrorSSLError)NetworkConnectionError)Link)SearchScope)
PipSession)raise_for_status)is_archive_file)redact_auth_from_url)vcs   )CandidatesFromPage
LinkSourcebuild_source)Protocol)urlreturnc             C   s6   x0t jD ]&}|  |r| t| dkr|S qW dS )zgLook for VCS schemes in the URL.

    Returns the matched VCS scheme, or None if there's no match.
    z+:N)r   Zschemeslower
startswithlen)r    scheme r&   _/home/ankuromar296_gmail_com/myenv/lib/python3.7/site-packages/pip/_internal/index/collector.py_match_vcs_scheme7   s    r(   c                   s&   e Zd Zeedd fddZ  ZS )_NotAPIContentN)content_typerequest_descr!   c                s   t  || || _|| _d S )N)super__init__r*   r+   )selfr*   r+   )	__class__r&   r'   r-   C   s    z_NotAPIContent.__init__)__name__
__module____qualname__strr-   __classcell__r&   r&   )r/   r'   r)   B   s   r)   )responser!   c             C   s6   | j dd}| }|dr$dS t|| jjdS )z
    Check the Content-Type header to ensure the response contains a Simple
    API Response.

    Raises `_NotAPIContent` if the content type is not a valid content-type.
    zContent-TypeUnknown)z	text/htmlz#application/vnd.pypi.simple.v1+htmlz#application/vnd.pypi.simple.v1+jsonN)headersgetr"   r#   r)   requestmethod)r5   r*   content_type_lr&   r&   r'   _ensure_api_headerI   s    r<   c               @   s   e Zd ZdS )_NotHTTPN)r0   r1   r2   r&   r&   r&   r'   r=   _   s   r=   )r    sessionr!   c             C   sF   t j| \}}}}}|dkr$t |j| dd}t| t| dS )z
    Send a HEAD request to the URL, and ensure the response contains a simple
    API Response.

    Raises `_NotHTTP` if the URL is not available for a HEAD request, or
    `_NotAPIContent` if the content type is not a valid content type.
    >   httphttpsT)allow_redirectsN)urllibparseurlsplitr=   headr   r<   )r    r>   r%   netlocpathqueryfragmentrespr&   r&   r'   _ensure_api_responsec   s    rK   c             C   sz   t t| jrt| |d tdt|  |j| ddddgddd	}t	| t
| td
t| |jdd |S )aY  Access an Simple API response with GET, and return the response.

    This consists of three parts:

    1. If the URL looks suspiciously like an archive, send a HEAD first to
       check the Content-Type is HTML or Simple API, to avoid downloading a
       large file. Raise `_NotHTTP` if the content type cannot be determined, or
       `_NotAPIContent` if it is not HTML or a Simple API.
    2. Actually perform the request. Raise HTTP exceptions on network failures.
    3. Check the Content-Type header to make sure we got a Simple API response,
       and raise `_NotAPIContent` otherwise.
    )r>   zGetting page %sz, z#application/vnd.pypi.simple.v1+jsonz*application/vnd.pypi.simple.v1+html; q=0.1ztext/html; q=0.01z	max-age=0)AcceptzCache-Control)r7   zFetched page %s as %szContent-Typer6   )r   r   filenamerK   loggerdebugr   r8   joinr   r<   r7   )r    r>   rJ   r&   r&   r'   _get_simple_responseu   s"    rQ   )r7   r!   c             C   s<   | r8d| kr8t j }| d |d< |d}|r8t|S dS )z=Determine if we have any encoding information in our headers.zContent-Typezcontent-typecharsetN)emailmessageMessage	get_paramr3   )r7   mrR   r&   r&   r'   _get_encoding_from_headers   s    

rX   c               @   s:   e Zd ZdddddZeedddZed	d
dZdS )CacheablePageContentIndexContentN)pager!   c             C   s   |j s
t|| _d S )N)cache_link_parsingAssertionErrorr[   )r.   r[   r&   r&   r'   r-      s    
zCacheablePageContent.__init__)otherr!   c             C   s   t |t| o| jj|jjkS )N)
isinstancetyper[   r    )r.   r^   r&   r&   r'   __eq__   s    zCacheablePageContent.__eq__)r!   c             C   s   t | jjS )N)hashr[   r    )r.   r&   r&   r'   __hash__   s    zCacheablePageContent.__hash__)	r0   r1   r2   r-   objectboolra   intrc   r&   r&   r&   r'   rY      s   rY   c               @   s    e Zd Zdee dddZdS )
ParseLinksrZ   )r[   r!   c             C   s   d S )Nr&   )r.   r[   r&   r&   r'   __call__   s    zParseLinks.__call__N)r0   r1   r2   r   r   rh   r&   r&   r&   r'   rg      s   rg   )fnr!   c                sL   t jddttt d fddt  dtt d fdd	}|S )
z
    Given a function that parses an Iterable[Link] from an IndexContent, cache the
    function's result (keyed by CacheablePageContent), unless the IndexContent
    `page` has `page.cache_link_parsing == False`.
    N)maxsize)cacheable_pager!   c                s   t  | jS )N)listr[   )rk   )ri   r&   r'   wrapper   s    z*with_cached_index_content.<locals>.wrapperrZ   )r[   r!   c                s   | j rt| S t | S )N)r\   rY   rl   )r[   )ri   rm   r&   r'   wrapper_wrapper   s    z2with_cached_index_content.<locals>.wrapper_wrapper)	functools	lru_cacherY   r   r   wraps)ri   rn   r&   )ri   rm   r'   with_cached_index_content   s
    
rr   rZ   )r[   r!   c       
      c   s   | j  }|drXt| j}x2|dg D ]"}t|| j	}|dkrJq.|V  q.W dS t
| j	}| jpjd}|| j| | j	}|jp|}x.|jD ]$}	tj|	||d}|dkrq|V  qW dS )z\
    Parse a Simple API's Index Content, and yield its anchor elements as Link objects.
    z#application/vnd.pypi.simple.v1+jsonfilesNzutf-8)page_urlbase_url)r*   r"   r#   jsonloadscontentr8   r   	from_jsonr    HTMLLinkParserencodingfeeddecoderu   anchorsfrom_element)
r[   r;   datafilelinkparserr{   r    ru   anchorr&   r&   r'   parse_links   s&    





r   c               @   s<   e Zd ZdZd
eeee eeddddZeddd	Z	dS )rZ   z5Represents one response (or page), along with its URLTN)rx   r*   r{   r    r\   r!   c             C   s"   || _ || _|| _|| _|| _dS )am  
        :param encoding: the encoding to decode the given content.
        :param url: the URL from which the HTML was downloaded.
        :param cache_link_parsing: whether links parsed from this page's url
                                   should be cached. PyPI index urls should
                                   have this set to False, for example.
        N)rx   r*   r{   r    r\   )r.   rx   r*   r{   r    r\   r&   r&   r'   r-     s
    zIndexContent.__init__)r!   c             C   s
   t | jS )N)r   r    )r.   r&   r&   r'   __str__  s    zIndexContent.__str__)T)
r0   r1   r2   __doc__bytesr3   r   re   r-   r   r&   r&   r&   r'   rZ     s   c                   sn   e Zd ZdZedd fddZeeeeee f  ddddZ	eeeee f  ee d	d
dZ
  ZS )rz   zf
    HTMLParser that keeps the first base HREF and a list of all anchor
    elements' attributes.
    N)r    r!   c                s$   t  jdd || _d | _g | _d S )NT)Zconvert_charrefs)r,   r-   r    ru   r~   )r.   r    )r/   r&   r'   r-   #  s    zHTMLLinkParser.__init__)tagattrsr!   c             C   sH   |dkr,| j d kr,| |}|d k	rD|| _ n|dkrD| jt| d S )Nbasea)ru   get_hrefr~   appenddict)r.   r   r   hrefr&   r&   r'   handle_starttag*  s    
zHTMLLinkParser.handle_starttag)r   r!   c             C   s"   x|D ]\}}|dkr|S qW d S )Nr   r&   )r.   r   namevaluer&   r&   r'   r   2  s    zHTMLLinkParser.get_href)r0   r1   r2   r   r3   r-   r   r   r   r   r   r4   r&   r&   )r/   r'   rz     s   "rz   ).N)r   reasonmethr!   c             C   s   |d krt j}|d| | d S )Nz%Could not fetch URL %s: %s - skipping)rN   rO   )r   r   r   r&   r&   r'   _handle_get_simple_fail9  s    r   T)r5   r\   r!   c             C   s&   t | j}t| j| jd || j|dS )NzContent-Type)r{   r    r\   )rX   r7   rZ   rx   r    )r5   r\   r{   r&   r&   r'   _make_index_contentC  s    
r   )r   r>   r!   c      
   
   C   s  | j ddd }t|}|r0td||  d S tj|\}}}}}}|dkrtj	
tj|r|dsv|d7 }tj|d}td| yt||d	}W nD tk
r   td
|  Y n2 tk
r } ztd| |j|j W d d }~X Y n tk
r$ } zt| | W d d }~X Y n tk
rP } zt| | W d d }~X Y n tk
r } z$d}	|	t|7 }	t| |	tjd W d d }~X Y nd tjk
r } zt| d|  W d d }~X Y n0 tjk
r   t| d Y nX t|| jdS d S )N#r   r   zICannot look at %s URL %s because it does not support lookup as web pages.r   /z
index.htmlz# file: URL is directory, getting %s)r>   z`Skipping page %s because it looks like an archive, and cannot be checked by a HTTP HEAD request.zSkipping page %s because the %s request got Content-Type: %s. The only supported Content-Types are application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html, and text/htmlz4There was a problem confirming the ssl certificate: )r   zconnection error: z	timed out)r\   ) r    splitr(   rN   warningrB   rC   urlparseosrG   isdirr9   url2pathnameendswithurljoinrO   rQ   r=   r)   r+   r*   r   r   r   r   r3   infor   ConnectionErrorTimeoutr   r\   )
r   r>   r    Z
vcs_schemer%   _rG   rJ   excr   r&   r&   r'   _get_index_contentP  sP    
  r   c               @   s.   e Zd ZU eee  ed< eee  ed< dS )CollectedSources
find_links
index_urlsN)r0   r1   r2   r   r   r   __annotations__r&   r&   r&   r'   r     s   
r   c               @   sx   e Zd ZdZeeddddZedeee	d ddd	Z
eee d
ddZeee dddZeeedddZdS )LinkCollectorz
    Responsible for collecting Link objects from all configured locations,
    making network requests as needed.

    The class's main method is its collect_sources() method.
    N)r>   search_scoper!   c             C   s   || _ || _d S )N)r   r>   )r.   r>   r   r&   r&   r'   r-     s    zLinkCollector.__init__F)r>   optionssuppress_no_indexr!   c             C   sd   |j g|j }|jr8|s8tdddd |D  g }|jp@g }tj|||jd}t	||d}|S )z
        :param session: The Session to use to make requests.
        :param suppress_no_index: Whether to ignore the --no-index option
            when constructing the SearchScope object.
        zIgnoring indexes: %s,c             s   s   | ]}t |V  qd S )N)r   ).0r    r&   r&   r'   	<genexpr>  s    z'LinkCollector.create.<locals>.<genexpr>)r   r   no_index)r>   r   )
	index_urlextra_index_urlsr   rN   rO   rP   r   r   creater   )clsr>   r   r   r   r   r   link_collectorr&   r&   r'   r     s    


zLinkCollector.create)r!   c             C   s   | j jS )N)r   r   )r.   r&   r&   r'   r     s    zLinkCollector.find_links)locationr!   c             C   s   t || jdS )z>
        Fetch an HTML page containing package links.
        )r>   )r   r>   )r.   r   r&   r&   r'   fetch_response  s    zLinkCollector.fetch_response)project_namecandidates_from_pager!   c                s   t  fddj|D  }t  fddjD  }ttj	rdd t
||D }t| d| dg| }td| tt|t|d	S )
Nc             3   s$   | ]}t | jjd d dV  qdS )F)r   page_validator
expand_dirr\   N)r   r>   is_secure_origin)r   loc)r   r.   r&   r'   r     s   z0LinkCollector.collect_sources.<locals>.<genexpr>c             3   s$   | ]}t | jjd d dV  qdS )T)r   r   r   r\   N)r   r>   r   )r   r   )r   r.   r&   r'   r     s   c             S   s*   g | ]"}|d k	r|j d k	rd|j  qS )Nz* )r   )r   sr&   r&   r'   
<listcomp>  s   z1LinkCollector.collect_sources.<locals>.<listcomp>z' location(s) to search for versions of :
)r   r   )collectionsOrderedDictr   Zget_index_urls_locationsvaluesr   rN   isEnabledForloggingDEBUG	itertoolschainr$   rO   rP   r   rl   )r.   r   r   Zindex_url_sourcesZfind_links_sourceslinesr&   )r   r.   r'   collect_sources  s    zLinkCollector.collect_sources)F)r0   r1   r2   r   r   r   r-   classmethodr   re   r   propertyr   r3   r   r   r   rZ   r   r   r   r   r&   r&   r&   r'   r     s   r   )N)T)Rr   r   email.messagerS   ro   r   rv   r   r   urllib.parserB   urllib.requestZhtml.parserr   optparser   typingr   r   r   r   r   r	   r
   r   r   r   r   pip._vendorr   Zpip._vendor.requestsr   Zpip._vendor.requests.exceptionsr   r   pip._internal.exceptionsr   pip._internal.models.linkr   Z!pip._internal.models.search_scoper   pip._internal.network.sessionr   Zpip._internal.network.utilsr   pip._internal.utils.filetypesr   pip._internal.utils.miscr   pip._internal.vcsr   sourcesr   r   r   r   rd   	getLoggerr0   rN   r3   ZResponseHeadersr(   	Exceptionr)   r<   r=   rK   rQ   rX   rY   rg   rr   r   rZ   rz   r   re   r   r   r   r   r&   r&   r&   r'   <module>   sd   4
?=