B
    uf9!  ã               @   s<   d dl mZmZ ddlmZ ddlmZ G dd„ deƒZdS )é    )ÚListÚUnioné   )ÚCharSetProber)ÚProbingStatec                   s  e Zd ZdZdZdZddœ‡ fdd„Zddœ‡ fdd	„Zee	dœd
d„ƒZ
ee	dœdd„ƒZedœdd„Zedœdd„Zedœdd„Zedœdd„Zedœdd„Zedœdd„Zee ddœdd„Zee ddœdd„Zeeef ed œd!d"„Zeedœd#d$„ƒZedœd%d&„Z‡  ZS )'ÚUTF1632Proberad  
    This class simply looks for occurrences of zero bytes, and infers
    whether the file is UTF16 or UTF32 (low-endian or big-endian)
    For instance, files looking like (       [nonzero] )+
    have a good probability to be UTF32BE.  Files looking like (   [nonzero] )+
    may be guessed to be UTF16BE, and inversely for little-endian varieties.
    é   g®Gázî?N)Úreturnc                sn   t ƒ  ¡  d| _dgd | _dgd | _tj| _ddddg| _d| _	d| _
d| _d| _d| _d| _|  ¡  d S )Nr   é   F)ÚsuperÚ__init__ÚpositionÚzeros_at_modÚnonzeros_at_modr   Ú	DETECTINGÚ_stateÚquadÚinvalid_utf16beÚinvalid_utf16leÚinvalid_utf32beÚinvalid_utf32leÚ'first_half_surrogate_pair_detected_16beÚ'first_half_surrogate_pair_detected_16leÚreset)Úself)Ú	__class__© úc/home/ankuromar296_gmail_com/myenv/lib/python3.7/site-packages/pip/_vendor/chardet/utf1632prober.pyr   )   s    
zUTF1632Prober.__init__c                sf   t ƒ  ¡  d| _dgd | _dgd | _tj| _d| _d| _	d| _
d| _d| _d| _ddddg| _d S )Nr   r
   F)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   )r   r   r   r   8   s    
zUTF1632Prober.resetc             C   s4   |   ¡ rdS |  ¡ rdS |  ¡ r$dS |  ¡ r0dS dS )Nzutf-32bezutf-32lezutf-16bezutf-16lezutf-16)Úis_likely_utf32beÚis_likely_utf32leÚis_likely_utf16beÚis_likely_utf16le)r   r   r   r   Úcharset_nameF   s    zUTF1632Prober.charset_namec             C   s   dS )NÚ r   )r   r   r   r   ÚlanguageS   s    zUTF1632Prober.languagec             C   s   t d| jd ƒS )Ng      ð?g      @)Úmaxr   )r   r   r   r   Úapprox_32bit_charsW   s    z UTF1632Prober.approx_32bit_charsc             C   s   t d| jd ƒS )Ng      ð?g       @)r%   r   )r   r   r   r   Úapprox_16bit_charsZ   s    z UTF1632Prober.approx_16bit_charsc             C   sj   |   ¡ }|| jkoh| jd | | jkoh| jd | | jkoh| jd | | jkoh| jd | | jkoh| j S )Nr   r   é   é   )r&   ÚMIN_CHARS_FOR_DETECTIONr   ÚEXPECTED_RATIOr   r   )r   Úapprox_charsr   r   r   r   ]   s    
zUTF1632Prober.is_likely_utf32bec             C   sj   |   ¡ }|| jkoh| jd | | jkoh| jd | | jkoh| jd | | jkoh| jd | | jkoh| j S )Nr   r   r(   r)   )r&   r*   r   r+   r   r   )r   r,   r   r   r   r   g   s    
zUTF1632Prober.is_likely_utf32lec             C   sV   |   ¡ }|| jkoT| jd | jd  | | jkoT| jd | jd  | | jkoT| j S )Nr   r)   r   r(   )r'   r*   r   r+   r   r   )r   r,   r   r   r   r    q   s    
zUTF1632Prober.is_likely_utf16bec             C   sV   |   ¡ }|| jkoT| jd | jd  | | jkoT| jd | jd  | | jkoT| j S )Nr   r(   r   r)   )r'   r*   r   r+   r   r   )r   r,   r   r   r   r!   {   s    
zUTF1632Prober.is_likely_utf16le)r   r	   c             C   s¨   |d dksL|d dksL|d dkrR|d dkrRd|d   krHdkrRn nd| _ |d dksž|d dksž|d dkr¤|d dkr¤d|d   kršdkr¤n nd| _d	S )
zÖ
        Validate if the quad of bytes is valid UTF-32.

        UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
        excluding 0x0000D800 - 0x0000DFFF

        https://en.wikipedia.org/wiki/UTF-32
        r   r   é   éØ   r(   éß   Tr)   N)r   r   )r   r   r   r   r   Úvalidate_utf32_characters…   s    
44z'UTF1632Prober.validate_utf32_characters)Úpairr	   c             C   sô   | j sNd|d   krdkr*n nd| _ qxd|d   krBdkrxn qxd| _n*d|d   krfdkrrn nd| _ nd| _| jsÆd|d   kr–dkr¢n nd| _qðd|d   krºdkrðn qðd| _n*d|d   krÞdkrên nd| _nd| _d	S )
a9  
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        r.   r   éÛ   TéÜ   r/   Fr   N)r   r   r   r   )r   r1   r   r   r   Úvalidate_utf16_characters›   s     
z'UTF1632Prober.validate_utf16_characters)Úbyte_strr	   c             C   s    x˜|D ]}| j d }|| j|< |dkrZ|  | j¡ |  | jdd… ¡ |  | jdd… ¡ |dkrv| j|  d7  < n| j|  d7  < |  j d7  _ qW | jS )Nr
   r)   r   r(   r   )r   r   r0   r4   r   r   Ústate)r   r5   ÚcZmod4r   r   r   Úfeed»   s    


zUTF1632Prober.feedc             C   sF   | j tjtjhkr| j S |  ¡ dkr.tj| _ n| jdkr@tj| _ | j S )Ngš™™™™™é?i   )r   r   ÚNOT_MEÚFOUND_ITÚget_confidencer   )r   r   r   r   r6   Ê   s    

zUTF1632Prober.statec             C   s(   |   ¡ s |  ¡ s |  ¡ s |  ¡ r$dS dS )Ng333333ë?g        )r!   r    r   r   )r   r   r   r   r;   ×   s
    zUTF1632Prober.get_confidence) Ú__name__Ú
__module__Ú__qualname__Ú__doc__r*   r+   r   r   ÚpropertyÚstrr"   r$   Úfloatr&   r'   Úboolr   r   r    r!   r   Úintr0   r4   r   ÚbytesÚ	bytearrayr   r8   r6   r;   Ú__classcell__r   r   )r   r   r      s*   



 r   N)Útypingr   r   Úcharsetproberr   Úenumsr   r   r   r   r   r   Ú<module>   s   