
    AeP?                     f   d dl mZ d dlZ	 d dlmZ ddlmZ dZ	dZ
d	Z ed
      Zd Zd Zd Zd Zd Z G d de      Zd Z G d de      Z G d de      Z e       Z G d de      Z G d de      Z G d de      Z G d de      Z G d d e      Z e       Zd!e_        d"e_         d# Z!d$ Z"d% Z#d& Z$ e$d'      Z%d(e%_        d)e%_         d5d*Z&d+ Z'd, Z(d- Z)d. Z* ee	      Z+d/e+_        d0e+_          ee
      Z,d1e,_        d2e,_          ee      Z-d3e-_        d4e-_         y# e$ r
 d dlmZ Y w xY w)6    )absolute_importN)maxsize)maxint   )Errorsboleoleof
c                 R   t        |       }|j                          d}t        |      }g }||k  rwt        ||         }|dz   }|dz  }||k  r2|t        ||         k\  r!|dz  }|dz  }||k  r|t        ||         k\  r!|j	                  |       |j	                  |       ||k  rw|S )z
    Return a list of character codes consisting of pairs
    [code1a, code1b, code2a, code2b,...] which cover all
    the characters in |s|.
    r   r   )listsortlenordappend)s	char_listinresultcode1code2s          3lib/python3.12/site-packages/Cython/Plex/Regexps.pychars_to_rangesr   "   s     QINN	AIAF
a%IaL!		Q!eYq\!22QJEFA !eYq\!22 	ee a% M    c                     t        | t        d            }t        |t        d      dz         }||k  r!t        d      t        d      z
  }||z   ||z   fS y)z
    If the range of characters from code1 to code2-1 includes any
    lower case letters, return the corresponding upper case range.
    azr   ANmaxr   minr   r   code3code4ds        r   uppercase_ranger'   9   W    
 s3x Es3x!|$Eu}Hs3x	519%%r   c                     t        | t        d            }t        |t        d      dz         }||k  r!t        d      t        d      z
  }||z   ||z   fS y)z
    If the range of characters from code1 to code2-1 includes any
    upper case letters, return the corresponding lower case range.
    r   Zr   r   Nr    r#   s        r   lowercase_ranger+   G   r(   r   c           	          t        dt        |       d      D cg c]  }t        | |   | |dz             }}t        | S c c}w )z
    Given a list of codes as returned by chars_to_ranges, return
    an RE which will match a character in any of the ranges.
    r      r   )ranger   	CodeRangeAlt)	code_listr   re_lists      r   
CodeRangesr3   U   sK    
 CH3y>[\B]^Qy1yQ'78^G^= _s   ?c                     | t         cxk  r|k  r3n n0t        t        | t               t        t        t         dz   |            S t        | |      S )z
    CodeRange(code1, code2) is an RE which matches any character
    with a code |c| in the range |code1| <= |c| < |code2|.
    r   )nl_coder0   RawCodeRange
RawNewliner   r   s     r   r/   r/   ^   sG    
 %<w/!U35 	5 E5))r   c                   R    e Zd ZdZdZdZdZd Zd Zd Z	d Z
d Zd	 Zd
 Zd Zd Zy)REa  RE is the base class for regular expression constructors.
    The following operators are defined on REs:

         re1 + re2         is an RE which matches |re1| followed by |re2|
         re1 | re2         is an RE which matches either |re1| or |re2|
    r   Nc                 F    t        d| j                  j                  z        )aM  
        This method should add states to |machine| to implement this
        RE, starting at |initial_state| and ending at |final_state|.
        If |match_bol| is true, the RE must be able to match at the
        beginning of a line. If nocase is true, upper and lower case
        letters should be treated as equivalent.
        z %s.build_machine not implemented)NotImplementedError	__class____name__)selfmachineinitial_statefinal_state	match_bolnocases         r   build_machinezRE.build_machine{   s&     ""D"&.."9"9#: ; 	;r   c                 l    |j                         }|j                  |       |j                  ||       |S )z~
        Given a state |s| of machine |m|, return a new state
        reachable from |s| on character |c| or epsilon.
        )	new_statelink_toadd_transition)r?   mrA   cr   s        r   	build_optzRE.build_opt   s2    
 KKMa $$Q*r   c                     t        | |      S N)Seqr?   others     r   __add__z
RE.__add__       4r   c                     t        | |      S rN   )r0   rP   s     r   __or__z	RE.__or__   rS   r   c                 R    | j                   r| j                   S | j                         S rN   )strcalc_strr?   s    r   __str__z
RE.__str__   s    8888O==?"r   c                 L    t        |t              s| j                  ||d       y y )NzPlex.RE instance)
isinstancer:   
wrong_typer?   numvalues      r   check_rezRE.check_re   s"    %$OOC(:; %r   c                 Z    t        |      t        d      k7  r| j                  ||d       y y )N string)typer]   r^   s      r   check_stringzRE.check_string   s'    ;$r("OOC1 #r   c                     | j                  ||       t        |      dk7  r8t        j                  d|| j                  j
                  t        |      fz        y )Nr   zOInvalid value for argument %d of Plex.%s.Expected a string of length 1, got: %s)rf   r   r   PlexValueErrorr=   r>   reprr^   s      r   
check_charzRE.check_char   s_    #u%u:?'' )Q,/1H1H$u+TW)W X X r   c                 &   t        |      t        j                  k(  r1|j                  j                  d|j                  j
                  d}nt        |      j
                  }t        j                  d|| j                  j
                  ||fz        )N.z	 instancez<Invalid type for argument %d of Plex.%s (expected %s, got %s)re   typesInstanceTyper=   
__module__r>   r   PlexTypeError)r?   r_   r`   expectedgots        r   r]   zRE.wrong_type   s    ;%,,,**EOO,D,DFC u+&&C"" $:'*DNN,C,CXs=T$T U 	Ur   )r>   ro   __qualname____doc__nullablematch_nlrW   rE   rL   rR   rU   rZ   ra   rf   rj   r]    r   r   r:   r:   o   sG     HH
C
;  #<2XUr   r:   c                     t        |       dk(  r"t        t        |       t        |       dz         }nt        |       }dt	        |       z  |_        |S )z;
    Char(c) is an RE which matches the character |c|.
    r   zChar(%s))r   r/   r   SpecialSymbolri   rW   )rK   r   s     r   Charrz      sF     1v{3q63q6A:.q!d1g%FJMr   c                   6    e Zd ZdZdZdZdZdZdZd Z	d Z
d Zy)r6   z
    RawCodeRange(code1, code2) is a low-level RE which matches any character
    with a code |c| in the range |code1| <= |c| < |code2|, where the range
    does not include newline. For internal use only.
    r   Nc                 Z    ||f| _         t        ||      | _        t        ||      | _        y rN   )r.   r'   r+   )r?   r   r   s      r   __init__zRawCodeRange.__init__   s+    U^
.ue<.ue<r   c                    |r| j                  ||t              }|j                  | j                  |       |rR| j                  r|j                  | j                  |       | j
                  r|j                  | j
                  |       y y y rN   )rL   BOLrI   r.   r'   r+   r?   rJ   rA   rB   rC   rD   s         r   rE   zRawCodeRange.build_machine   sv     NN1mSAM$$TZZ=##,,T-A-A;O##,,T-A-A;O $ r   c                 8    d| j                   | j                  fz  S )NzCodeRange(%d,%d)r8   rY   s    r   rX   zRawCodeRange.calc_str  s    !TZZ$<<<r   )r>   ro   rs   rt   ru   rv   r.   r'   r+   r}   rE   rX   rw   r   r   r6   r6      s2    
 HHEOO=
P=r   r6   c                       e Zd ZdZdZdZd Zy)_RawNewlinezd
    RawNewline is a low-level RE which matches a newline character.
    For internal use only.
    r   r   c                     |r| j                  ||t              }| j                  ||t              }|j                  t        t        dz   f|       y Nr   )rL   r   EOLrI   r5   )r?   rJ   rA   rB   rC   rD   r   s          r   rE   z_RawNewline.build_machine  sB     NN1mSAMNN1mS1	'7Q;/=r   N)r>   ro   rs   rt   ru   rv   rE   rw   r   r   r   r     s     HH>r   r   c                   (    e Zd ZdZdZdZdZd Zd Zy)ry   zx
    SpecialSymbol(sym) is an RE which matches the special input
    symbol |sym|, which is one of BOL, EOL or EOF.
    r   Nc                     || _         y rN   )sym)r?   r   s     r   r}   zSpecialSymbol.__init__#  s	    r   c                     |r*| j                   t        k(  r| j                  ||t              }|j	                  | j                   |       y rN   )r   r   rL   r   rI   r   s         r   rE   zSpecialSymbol.build_machine&  s7     S NN1mSAM$$TXX{;r   )	r>   ro   rs   rt   ru   rv   r   r}   rE   rw   r   r   ry   ry     s"     HH
C<r   ry   c                   "    e Zd ZdZd Zd Zd Zy)rO   z]Seq(re1, re2, re3...) is an RE which matches |re1| followed by
    |re2| followed by |re3|...c                 ,   d}t        |      D ]'  \  }}| j                  ||       |xr |j                  }) || _        || _        t	        |      }d}|r9|dz  }||   }|j
                  rd}	 || _        y |j                  s		 || _        y |r9|| _        y )Nr   r   )	enumeratera   ru   r2   r   rv   )r?   r2   ru   r   rerv   s         r   r}   zSeq.__init__2  s    w' 	0EArMM!R /BKKH	0  LFAB{{ ! ;;   !r   c                 <   | j                   }t        |      dk(  r|j                  |       y |}t        |      }t        |      D ]U  \  }	}
|	|dz
  k  r|j	                         }n|}|
j                  |||||       |}|
j                  xs |xr |
j                  }W y Nr   r   )r2   r   rH   r   rG   rE   rv   ru   )r?   rJ   rA   rB   rC   rD   r2   s1r   r   r   s2s               r   rE   zSeq.build_machineE  s    ,,w<1!!+.BGA"7+ G2q1u9B$B  BIv>KKFI,E"++	Gr   c                 Z    ddj                  t        t        | j                              z  S )NzSeq(%s),joinmaprW   r2   rY   s    r   rX   zSeq.calc_strU  !    388CT\\$:;;;r   Nr>   ro   rs   rt   r}   rE   rX   rw   r   r   rO   rO   .  s    "!&G <r   rO   c                   "    e Zd ZdZd Zd Zd Zy)r0   zRAlt(re1, re2, re3...) is an RE which matches either |re1| or
    |re2| or |re3|...c                    || _         d}d}g }g }d}|D ]X  }| j                  ||       |j                  r|j                  |       d}n|j                  |       |j                  rd}|dz  }Z || _        || _        || _        || _        y r   )r2   ra   ru   r   rv   nullable_resnon_nullable_res)r?   r2   ru   rv   r   r   r   r   s           r   r}   zAlt.__init__]  s     		BMM!R {{##B' ''+{{FA		 ) 0  r   c                     | j                   D ]  }|j                  |||||        | j                  r@|r| j                  ||t              }| j                  D ]  }|j                  |||d|        y y )Nr   )r   rE   r   rL   r   )r?   rJ   rA   rB   rC   rD   r   s          r   rE   zAlt.build_machines  s    ## 	OBQ{IvN	O   $q- E++ K  M;6JK !r   c                 Z    ddj                  t        t        | j                              z  S )NzAlt(%s)r   r   rY   s    r   rX   zAlt.calc_str|  r   r   Nr   rw   r   r   r0   r0   Y  s    !,K<r   r0   c                   "    e Zd ZdZd Zd Zd Zy)Rep1z@Rep1(re) is an RE which matches one or more repetitions of |re|.c                 z    | j                  d|       || _        |j                  | _        |j                  | _        y r   )ra   r   ru   rv   )r?   r   s     r   r}   zRep1.__init__  s-    ar   c                    |j                         }|j                         }|j                  |       | j                  j                  ||||xs | j                  j                  |       |j                  |       |j                  |       y rN   )rG   rH   r   rE   rv   )r?   rJ   rA   rB   rC   rD   r   r   s           r   rE   zRep1.build_machine  sf    [[][[]b!aR)Fdgg6F6FO


2


;r   c                      d| j                   z  S )NzRep1(%s)r   rY   s    r   rX   zRep1.calc_str  s    DGG##r   Nr   rw   r   r   r   r     s    J$ $r   r   c                   *    e Zd ZdZdZdZd Zd Zd Zy)
SwitchCasez
    SwitchCase(re, nocase) is an RE which matches the same strings as RE,
    but treating upper and lower case letters according to |nocase|. If
    |nocase| is true, case is ignored, otherwise it is not.
    Nc                 d    || _         || _        |j                  | _        |j                  | _        y rN   )r   rD   ru   rv   )r?   r   rD   s      r   r}   zSwitchCase.__init__  s&    r   c                 V    | j                   j                  ||||| j                         y rN   )r   rE   rD   r   s         r   rE   zSwitchCase.build_machine  s#    aY"kk	+r   c                 H    | j                   rd}nd}|d| j                  dS )NNoCaseCase())rD   r   )r?   names     r   rX   zSwitchCase.calc_str  s"    ;;DD))r   )	r>   ro   rs   rt   r   rD   r}   rE   rX   rw   r   r   r   r     s"    
 
BF$+*r   r   z8
    Empty is an RE which matches the empty string.
    Emptyc                 j    t        t        t        t        |              }dt	        |       z  |_        |S )z@
    Str1(s) is an RE which matches the literal string |s|.
    Str(%s))rO   tupler   rz   ri   rW   r   r   s     r   Str1r     s.     %D!%&FT!W$FJMr   c                      t        |       dk(  rt        | d         S t        t        t	        t        |              }ddj                  t	        t        |             z  |_        |S )z
    Str(s) is an RE which matches the literal string |s|.
    Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|...
    r   r   r   r   )r   r   r0   r   r   r   ri   rW   )strsr   s     r   Strr     sR    
 4yA~DG}eCdO,-#dD/!::
r   c                 T    t        t        |             }dt        |       z  |_        |S )zH
    Any(s) is an RE which matches any character in the string |s|.
    zAny(%s))r3   r   ri   rW   r   s     r   Anyr     s(    
 *+FT!W$FJMr   c                     t        |       }|j                  dt                |j                  t               t	        |      }dt        |       z  |_        |S )zp
    AnyBut(s) is an RE which matches any character (including
    newline) which is not in the string |s|.
    r   z
AnyBut(%s))r   insertr   r   r3   ri   rW   )r   rangesr   s      r   AnyButr     sI    
 QF
MM!fW
MM&FQ'FJMr   rc   zT
    AnyChar is an RE which matches any single character (including a newline).
    AnyCharc                 N   |r1t        t        |       t        |      dz         }d| d|d|_        |S g }t        dt	        |       d      D ];  }|j                  t        t        | |         t        | |dz            dz                = t        | }dt        |       z  |_        |S )a  
    Range(c1, c2) is an RE which matches any single character in the range
    |c1| to |c2| inclusive.
    Range(s) where |s| is a string of even length is an RE which matches
    any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,...
    r   zRange(r   r   r   r-   z	Range(%s))r/   r   rW   r.   r   r   r0   ri   )r   r   r   r   r   s        r   Ranger     s     
3r7CGaK0')2.
 M q#b'1% 	EAMM)C1JBq1uI0BCD	Ef 48+
Mr   c                 :    t        | t              }d| z  |_        |S )zI
    Opt(re) is an RE which matches either |re| or the empty string.
    zOpt(%s))r0   r   rW   r   r   s     r   Optr     s     U^FRFJMr   c                 B    t        t        |             }d| z  |_        |S )zJ
    Rep(re) is an RE which matches zero or more repetitions of |re|.
    zRep(%s))r   r   rW   r   s     r   Repr     s!     b]FRFJMr   c                     t        | d      S )z
    NoCase(re) is an RE which matches the same strings as RE, but treating
    upper and lower case letters as equivalent.
    r   rD   r   r   s    r   r   r     s    
 b##r   c                     t        | d      S )z
    Case(re) is an RE which matches the same strings as RE, but treating
    upper and lower case letters as distinct, i.e. it cancels the effect
    of any enclosing NoCase().
    r   r   r   r   s    r   r   r      s     b##r   z=
    Bol is an RE which matches the beginning of a line.
    Bolz7
    Eol is an RE which matches the end of a line.
    Eolz9
    Eof is an RE which matches the end of the file.
    EofrN   ).
__future__r   rm   sysr   r   ImportErrorrc   r   r   r   EOFr   r5   r   r'   r+   r3   r/   objectr:   rz   r6   r   r7   ry   rO   r0   r   r   r   rt   rW   r   r   r   r   r   r   r   r   r   r   r   r   r   rw   r   r   <module>r      s   ' %  
d).
*"EU EU\	=2 =>>" > ]
<B <((<" (<V$<" $<N$2 $** *B 	  	

 *  &$$ 3i  
3i  
3i  c  s   D! !D0/D0