a
    Kb                     @   s`   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ G dd de j	Z
G dd	 d	e j	ZdS )
    N)closing)data)PorterStemmer)SnowballStemmerc                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )SnowballTestc                 C   s\  t dd}|ddksJ |ddks.J |ddks@J |dd	ksRJ |d
d	ksdJ |dd	ksvJ |dd	ksJ |ddksJ |ddksJ t dd}|ddksJ |dd	ksJ |ddksJ t d}|ddksJ |ddksJ |ddks0J |dd	ksDJ |ddksXJ dS )z
        this unit testing for test the snowball arabic light stemmer
        this stemmer deals with prefixes and suffixes
        arabicTu&   الْعَرَبِــــــيَّةu   عربu   العربيةu   فقالواu   قالu   الطالباتu   طالبu   فالطالباتu   والطالباتu   الطالبونu   اللذانu   منFu   اللذu   الكلماتu   كلمNr   stem)selfZ
ar_stemmer r   7lib/python3.9/site-packages/nltk/test/unit/test_stem.pytest_arabic
   s(    

zSnowballTest.test_arabicc                 C   s   t d}|ddksJ d S )Nrussianu   авантненькаяu   авантненькr   )r
   Zstemmer_russianr   r   r   test_russian'   s    zSnowballTest.test_russianc                 C   s`   t d}t ddd}|ddks&J |ddks8J |ddksJJ |ddks\J d S )NgermanT)Zignore_stopwordsu	   SchränkeZschrankZkeinenZkeinr   )r
   Zstemmer_germanZstemmer_german2r   r   r   test_german+   s    zSnowballTest.test_germanc                 C   s0   t d}|ddksJ |ddks,J d S )NspanishZ	VisionadoZvisionZalgueZalgur   r
   stemmerr   r   r   test_spanish5   s    zSnowballTest.test_spanishc                 C   s   t d}|ddksJ d S )Nenglishzy'syr   r   r   r   r   test_short_strings_bug=   s    z#SnowballTest.test_short_strings_bugN)__name__
__module____qualname__r   r   r   r   r   r   r   r   r   r   	   s
   
r   c                   @   sD   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dS )
PorterTestc                 C   sF   t tdjdd}|  W  d    S 1 s80    Y  d S )Nz*stemmers/porter_test/porter_vocabulary.txtutf-8encoding)r   r   findopenread
splitlinesr
   fpr   r   r   _vocabularyC   s    
zPorterTest._vocabularyc                 C   sL   t |d}t|  |D ].\}}||}||ksJ d||||qd S )N)modez*{} should stem to {} in {} mode but got {})r   zipr&   r	   format)r
   Zstemmer_modeZexpected_stemsr   ZwordZ	true_stemZour_stemr   r   r   _test_against_expected_outputK   s    

z(PorterTest._test_against_expected_outputc                 C   sP   t tdjdd&}| tj|   W d   n1 sB0    Y  dS )az  Tests all words from the test vocabulary provided by M Porter

        The sample vocabulary and output were sourced from
        https://tartarus.org/martin/PorterStemmer/voc.txt and
        https://tartarus.org/martin/PorterStemmer/output.txt
        and are linked to from the Porter Stemmer algorithm's homepage
        at https://tartarus.org/martin/PorterStemmer/
        z-stemmers/porter_test/porter_martin_output.txtr   r   N)	r   r   r    r!   r*   r   ZMARTIN_EXTENSIONSr"   r#   r$   r   r   r   test_vocabulary_martin_modeX   s    	
z&PorterTest.test_vocabulary_martin_modec                 C   sP   t tdjdd&}| tj|   W d    n1 sB0    Y  d S )Nz+stemmers/porter_test/porter_nltk_output.txtr   r   )	r   r   r    r!   r*   r   ZNLTK_EXTENSIONSr"   r#   r$   r   r   r   test_vocabulary_nltk_modej   s    
z$PorterTest.test_vocabulary_nltk_modec                 C   st   t tdjdd&}| tj|   W d    n1 sB0    Y  | tjtdjdd   d S )Nz/stemmers/porter_test/porter_original_output.txtr   r   )	r   r   r    r!   r*   r   ZORIGINAL_ALGORITHMr"   r#   r$   r   r   r   test_vocabulary_original_modet   s    
"
z(PorterTest.test_vocabulary_original_modec                 C   s   t  ddksJ dS )zTest for bug https://github.com/nltk/nltk/issues/1581

        Ensures that 'oed' can be stemmed without throwing an error.
        ZoedoNr   r	   )r
   r   r   r   test_oed_bug   s    zPorterTest.test_oed_bugc                 C   sl   t  }|ddksJ |ddks*J |jddddks@J |ddksRJ |jddddkshJ d	S )
zTest for improvement on https://github.com/nltk/nltk/issues/2507

        Ensures that stems are lowercased when `to_lowercase=True`
        ZOnZonIiF)Zto_lowercaseZGithubZgithubNr/   )r
   Zporterr   r   r   test_lowercase_option   s    z PorterTest.test_lowercase_optionN)
r   r   r   r&   r*   r+   r,   r-   r0   r3   r   r   r   r   r   B   s   
r   )Zunittest
contextlibr   Znltkr   Znltk.stem.porterr   Znltk.stem.snowballr   ZTestCaser   r   r   r   r   r   <module>   s   9