
    :'a!                        	 d dl Zd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ  ee	      d
edefd       Z ee	      d
edefd       Z ee	      d
edee   fd       Z ee	      d
edefd       Z d
edefdZ! ee	      d
edefd       Z" ee	      d
edefd       Z# ee	      d
edefd       Z$ ee	      d
edefd       Z%d
edefdZ& ee	      d
edefd       Z' ee	      d
edefd       Z( ee	      d
edefd       Z) ee	      d
edefd       Z* ee	      d
edefd       Z+ e e,e      	      dedefd       Z-d/de.de/dee   fdZ0 ed 	      d!edefd"       Z1de.de	ee   e.f   fd#Z2d$edefd%Z3d0d&ed'edefd(Z4d)edee   fd*Z5d+ed,ede6fd-Z7d+ed,edefd.Z8y# e$ r d dlZY w xY w)1    N)IncrementalDecoder)findall)OptionalTupleUnionListSet)MultibyteIncrementalDecoder)aliases)	lru_cache)UNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDRE_POSSIBLE_ENCODING_INDICATIONENCODING_MARKSUTF8_MAXIMAL_ALLOCATIONIANA_SUPPORTED_SIMILAR)maxsize	characterreturnc                     	 t        j                  |       }d|v xs d|v xs d|v xs
 d|v xs d|v S # t        $ r Y yw xY w)NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXunicodedataname
ValueErrorr   descriptions     8lib/python3.12/site-packages/charset_normalizer/utils.pyis_accentuatedr      s    !&&y1 ;&  n,+*E  n[fIf  njz  J  kJ  n  N_  cn  Nn  n  s   3 	??c                     t        j                  |       }|s| S |j                  d      }t        t	        |d   d            S )N r      )r   decompositionsplitchrint)r   
decomposedcodess      r   remove_accentr(      sJ    **95JS!E!H	
     c                 b    t        |       }t        j                         D ]  \  }}||v s|c S  y)zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   character_ord
range_name	ord_ranges       r   unicode_ranger0   ,   s=    
 	NM!8!>!>!@ 
II% r)   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFLATINr   r   s     r   is_latinr3   :   s8    !&&y1 k!!      	''c                 F    	 | j                  d       y# t        $ r Y yw xY w)NasciiFT)encodeUnicodeEncodeErrorr   s    r   is_asciir:   C   s.    !   s    	  c                 Z    t        j                  |       }d|v ryt        |       }|yd|v S )NPTFPunctuationr   categoryr0   r   character_categorycharacter_ranges      r   is_punctuationrC   J   s=    $--i8
  #I.OO++r)   c                 b    t        j                  |       }d|v sd|v ryt        |       }|yd|v S )NSNTFFormsr>   r@   s      r   	is_symbolrH   Y   sE    $--i8
  C+=$=#I.Oo%%r)   c                 ^    | j                         s| dv ryt        j                  |       }d|v S )N)u   ｜+,;<>TZ)isspacer   r?   r   rA   s     r   is_separatorrR   h   s6    i+KK$--i8$$$r)   c                 D    | j                         | j                         k7  S N)islowerisupperr9   s    r   is_case_variablerW   r   s    )"3"3"555r)   c                 6    t        j                  |       }d|k(  S )NCo)r   r?   rQ   s     r   is_private_use_onlyrZ   w   s    $--i8%%%r)   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFCJKr   r   character_names     r   is_cjkr_   }   s8    $)))4 N""  r4   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFHIRAGANAr   r]   s     r   is_hiraganarb      8    $)))4 ''  r4   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFKATAKANAr   r]   s     r   is_katakanarf      rc   r4   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFHANGULr   r]   s     r   	is_hangulri      s8    $)))4 ~%%  r4   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFTHAIr   r]   s     r   is_thairl      s8    $)))4 ^##  r4   r.   c                 &    t         D ]  }|| v s y y)NTF)r   )r.   keywords     r   is_unicode_range_secondaryro      s!    2 j  r)   sequencesearch_zonec                 l   t        | t              st        t        |       }t	        t
        | d||k  r|n| j                  dd            }t        |      dk(  ry|D ]T  }|j                         j                  dd      }t        j                         D ]  \  }}||k(  r|c c S ||k(  s|c c S  V y)zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nr6   ignoreerrorsr   -_)
isinstancebytes	TypeErrorlenr   r   decodelowerreplacer   r,   )rp   rq   seq_lenresultsspecified_encodingencoding_aliasencoding_ianas          r   any_specified_encodingr      s     h&(mG'DW3'ELLW]eLfG
 7|q% %/557??SI-4]]_ 	%)NM!33$$ 22$$		%% r)      r   c                     | dv xs< t        t        j                  dj                  |             j                  t
              S )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_7utf_8utf_16utf_32	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sigencodings.{})
issubclass	importlibimport_moduleformatr   r
   )r   s    r   is_multi_byte_encodingr      sF    
 zz   I 5 5d ;<OO# r)   c                     t         D ]>  }t         |   }t        |t              r|g}|D ]  }| j                  |      s||fc c S  @ y)z9
    Identify and extract SIG/BOM in given sequence.
    )Nr)   )r   rx   ry   
startswith)rp   iana_encodingmarksmarks       r   identify_sig_or_bomr      s[    
 ( +}-eU#GE 	+D""4($d**	++ r)   r   c                 
    | dvS )N>   r   r    )r   s    r   should_strip_sig_or_bomr      s     444r)   cp_namestrictc                     | j                         j                  dd      } t        j                         D ]  \  }}| |k(  s| |k(  s|c S  |rt	        dj                  |             | S )Nrv   rw   z Unable to retrieve IANA for '{}')r}   r~   r   r,   r   r   )r   r   r   r   s       r   	iana_namer      sj    mmo%%c3/G)0 !%n$=(@  ! ;BB7KLLNr)   decoded_sequencec                 x    t               }| D ]!  }t        |      }||j                  |       # t        |      S rT   )setr0   addlist)r   rangesr   rB   s       r   
range_scanr     sF    UF% 
	'	2"

	

 <r)   iana_name_aiana_name_bc                    t        |       st        |      ryt        j                  dj                  |             j                  }t        j                  dj                  |            j                  } |d      } |d      }d}t        dd      D ]7  }t        |g      }|j                  |      |j                  |      k(  s3|dz  }9 |dz  S )	Ng        r   rs   rt   r            )r   r   r   r   r   rangery   r|   )	r   r   	decoder_a	decoder_bid_aid_bcharacter_match_countito_be_decodeds	            r   cp_similarityr     s    k*.D[.Q''(=(=k(JK^^I''(=(=k(JK^^IH%DH%D1c] 'qc
;;}%])CC!Q&!'
 !3&&r)   c                 ,    | t         v xr |t         |    v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   s     r   is_cp_similarr   (  s!    
 00g[DZ[fDg5ggr)   )i   )T)9unicodedata2r   ImportErrorcodecsr   rer   typingr   r   r   r   r	   r   _multibytecodecr
   encodings.aliasesr   	functoolsr   charset_normalizer.constantr   r   r   r   r   r   strboolr   r(   r0   r3   r:   rC   rH   rR   rW   rZ   r_   rb   rf   ri   rl   r{   ro   ry   r%   r   r   r   r   r   r   floatr   r   r   r)   r   <module>r      s  & &  4 4  7 % e e *+nc nd n ,n *+S S  , *+
S 
Xc] 
 ,
 *+" " " ,"   *+,c ,d , ,, *+& & & ,& *+%C %D % ,% *+6 6 6 ,6&3 &4 & *+#c #d # ,# *+(3 (4 ( ,( *+(3 (4 ( ,( *+& & & ,& *+$s $t $ ,$ 3./03 4  1U  QT : 3   % E(3-2F,G $53 54 5
s 
D 
C 
 c  's ' ' '*hs h h hK	  s   G 	G)(G)