a
    Kbe+                     @   sj   d dl Z d dlmZ G dd dZG dd deZdd Zd	d
 Zdd ZedddgZ	G dd dZ
dS )    N)
namedtuplec                   @   st   e Zd ZdZdddZedd Zedd Zd	d
 Zdd Z	eee	Z
dd Zdd Zdd Zdd Zdd ZdS )AlignedSenta#  
    Return an aligned sentence object, which encapsulates two sentences
    along with an ``Alignment`` between them.

    Typically used in machine translation to represent a sentence and
    its translation.

        >>> from nltk.translate import AlignedSent, Alignment
        >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
        ...     ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1'))
        >>> algnsent.words
        ['klein', 'ist', 'das', 'Haus']
        >>> algnsent.mots
        ['the', 'house', 'is', 'small']
        >>> algnsent.alignment
        Alignment([(0, 3), (1, 2), (2, 0), (3, 1)])
        >>> from nltk.corpus import comtrans
        >>> print(comtrans.aligned_sents()[54])
        <AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
        >>> print(comtrans.aligned_sents()[54].alignment)
        0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13

    :param words: Words in the target language sentence
    :type words: list(str)
    :param mots: Words in the source language sentence
    :type mots: list(str)
    :param alignment: Word-level alignments between ``words`` and ``mots``.
        Each alignment is represented as a 2-tuple (words_index, mots_index).
    :type alignment: Alignment
    Nc                 C   s:   || _ || _|d u r tg | _nt|tu s0J || _d S N)_words_mots	Alignment	alignmenttype)selfwordsmotsr    r   1lib/python3.9/site-packages/nltk/translate/api.py__init__/   s    zAlignedSent.__init__c                 C   s   | j S r   )r   r
   r   r   r   r   8   s    zAlignedSent.wordsc                 C   s   | j S r   )r   r   r   r   r   r   <   s    zAlignedSent.motsc                 C   s   | j S r   )
_alignmentr   r   r   r   _get_alignment@   s    zAlignedSent._get_alignmentc                 C   s"   t t| jt| j| || _d S r   )_check_alignmentlenr   r   r   )r
   r   r   r   r   _set_alignmentC   s    zAlignedSent._set_alignmentc                 C   sN   dd dd | jD  }dd dd | jD  }d| d| d| jdS )z_
        Return a string representation for this ``AlignedSent``.

        :rtype: str
        z[%s]z, c                 s   s   | ]}d | V  qdS z'%s'Nr   .0wr   r   r   	<genexpr>O       z'AlignedSent.__repr__.<locals>.<genexpr>c                 s   s   | ]}d | V  qdS r   r   r   r   r   r   r   P   r   zAlignedSent())joinr   r   r   )r
   r   r   r   r   r   __repr__I   s    zAlignedSent.__repr__c                 C   s:  d}|d7 }| j D ]}|d| d| d7 }q| jD ]}|d| d| d7 }q4| jD ]*\}}|d| j |  d| j|  d7 }qVtt| j d	 D ]$}|d
| j | | j |d	  7 }qtt| jd	 D ]$}|d| j| | j|d	  7 }q|dddd | j D  7 }|dddd | jD  7 }|d7 }|S )z<
        Dot representation of the aligned sentence
        zgraph align {
znode[shape=plaintext]
"z_source" [label="z"] 
z_target" [label="z_source" -- "z
_target" 
   z)"{}_source" -- "{}_source" [style=invis]
z)"{}_target" -- "{}_target" [style=invis]
z{rank = same; %s}
 c                 s   s   | ]}d | V  qdS )z"%s_source"Nr   r   r   r   r   r   u   r   z&AlignedSent._to_dot.<locals>.<genexpr>c                 s   s   | ]}d | V  qdS )z"%s_target"Nr   r   r   r   r   r   v   r   })r   r   r   ranger   formatr   )r
   sr   uvir   r   r   _to_dotT   s,    

$zAlignedSent._to_dotc              
   C   s~   |   d}d}z$tjdd| gtjtjtjd}W n. tyd } ztd|W Y d}~n
d}~0 0 ||\}}|dS )zR
        Ipython magic : show SVG representation of this ``AlignedSent``.
        utf8Zsvgdotz-T%s)stdinstdoutstderrz0Cannot find the dot binary from Graphviz packageN)	r)   encode
subprocessPopenPIPEOSError	ExceptionZcommunicatedecode)r
   Z
dot_stringZoutput_formatZprocesseouterrr   r   r   
_repr_svg_|   s    

 zAlignedSent._repr_svg_c                 C   sB   d | jdd d }d | jdd d }d| d| dS )zn
        Return a human-readable string representation for this ``AlignedSent``.

        :rtype: str
        r!   N   z...z<AlignedSent: 'z' -> 'z'>)r   r   r   )r
   sourcetargetr   r   r   __str__   s    zAlignedSent.__str__c                 C   s   t | j| j| j S )zm
        Return the aligned sentence pair, reversing the directionality

        :rtype: AlignedSent
        )r   r   r   r   invertr   r   r   r   r>      s    zAlignedSent.invert)N)__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r   r)   r9   r=   r>   r   r   r   r   r      s   
	


(
r   c                   @   sV   e Zd ZdZdd Zedd Zdd Zdd	 ZdddZ	dd Z
dd Zdd Zd
S )r   ac  
    A storage class for representing alignment between two sequences, s1, s2.
    In general, an alignment is a set of tuples of the form (i, j, ...)
    representing an alignment between the i-th element of s1 and the
    j-th element of s2.  Tuples are extensible (they might contain
    additional data, such as a boolean to indicate sure vs possible alignments).

        >>> from nltk.translate import Alignment
        >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)])
        >>> a.invert()
        Alignment([(0, 0), (1, 0), (2, 1), (2, 2)])
        >>> print(a.invert())
        0-0 1-0 2-1 2-2
        >>> a[0]
        [(0, 1), (0, 0)]
        >>> a.invert()[2]
        [(2, 1), (2, 2)]
        >>> b = Alignment([(0, 0), (0, 1)])
        >>> b.issubset(a)
        True
        >>> c = Alignment.fromstring('0-0 0-1')
        >>> b == c
        True
    c                 C   s:   t | |}|t g kr*tdd |D nd|_d |_|S )Nc                 s   s   | ]}|d  V  qdS r   Nr   r   pr   r   r   r      r   z$Alignment.__new__.<locals>.<genexpr>r   )	frozenset__new__max_len_index)clspairsr
   r   r   r   rH      s    $zAlignment.__new__c                 C   s   t dd | D S )a  
        Read a giza-formatted string and return an Alignment object.

            >>> Alignment.fromstring('0-0 2-1 9-2 21-3 10-4 7-5')
            Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)])

        :type s: str
        :param s: the positional alignments in giza format
        :rtype: Alignment
        :return: An Alignment object corresponding to the string representation ``s``.
        c                 S   s   g | ]}t |qS r   )
_giza2pair)r   ar   r   r   
<listcomp>   r   z(Alignment.fromstring.<locals>.<listcomp>)r   split)rL   r%   r   r   r   
fromstring   s    zAlignment.fromstringc                 C   s   | j s|   | j |S )zN
        Look up the alignments that map from a given index or slice.
        )rK   _build_index__getitem__)r
   keyr   r   r   rT      s    zAlignment.__getitem__c                 C   s   t dd | D S )zI
        Return an Alignment object, being the inverted mapping.
        c                 s   s*   | ]"}|d  |d f|dd  V  qdS )r    r      Nr   rE   r   r   r   r      r   z#Alignment.invert.<locals>.<genexpr>)r   r   r   r   r   r>      s    zAlignment.invertNc                 C   sV   t  }| js|   |s*ttt| j}|D ]}|dd | j| D  q.t|S )z
        Work out the range of the mapping from the given positions.
        If no positions are specified, compute the range of the entire mapping.
        c                 s   s   | ]\}}|V  qd S r   r   )r   _fr   r   r   r      r   z"Alignment.range.<locals>.<genexpr>)setrK   rS   listr#   r   updatesorted)r
   Z	positionsZimagerF   r   r   r   r#      s    zAlignment.rangec                 C   s   dt |  S )M
        Produce a Giza-formatted string representing the alignment.
        zAlignment(%r))r\   r   r   r   r   r      s    zAlignment.__repr__c                 C   s   d dd t| D S )r]   r!   c                 s   s   | ]}d |dd  V  qdS )z%d-%dNrV   r   rE   r   r   r   r      r   z$Alignment.__str__.<locals>.<genexpr>)r   r\   r   r   r   r   r=      s    zAlignment.__str__c                 C   s<   dd t | jd D | _| D ]}| j|d  | qdS )z
        Build a list self._index such that self._index[i] is a list
        of the alignments originating from word i.
        c                 S   s   g | ]}g qS r   r   )r   rW   r   r   r   rP      r   z*Alignment._build_index.<locals>.<listcomp>r    r   N)r#   rJ   rK   append)r
   rF   r   r   r   rS      s    zAlignment._build_index)N)r?   r@   rA   rB   rH   classmethodrR   rT   r>   r#   r   r=   rS   r   r   r   r   r      s   

r   c                 C   s   |  d\}}t|t|fS N-rQ   int)pair_stringr(   jr   r   r   rN     s    rN   c                 C   s    |  d\}}}t|t|fS r`   rb   )rd   r(   re   rF   r   r   r   _naacl2pair	  s    rf   c                    sP   t |tu sJ tfdd|D s.tdt fdd|D sLtddS )ab  
    Check whether the alignments are legal.

    :param num_words: the number of source language words
    :type num_words: int
    :param num_mots: the number of target language words
    :type num_mots: int
    :param alignment: alignment to be checked
    :type alignment: Alignment
    :raise IndexError: if alignment falls outside the sentence
    c                 3   s*   | ]"}d |d    ko k n  V  qdS rD   r   r   Zpair)	num_wordsr   r   r     r   z#_check_alignment.<locals>.<genexpr>z&Alignment is outside boundary of wordsc                 3   s6   | ].}|d  du p,d|d    ko( k n  V  qdS )r    Nr   r   rg   )num_motsr   r   r     r   z%Alignment is outside boundary of motsN)r	   r   all
IndexError)rh   ri   r   r   )ri   rh   r   r     s
    r   PhraseTableEntry
trg_phraselog_probc                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )PhraseTablezs
    In-memory store of translations for a given phrase, and the log
    probability of the those translations
    c                 C   s   t  | _d S r   )dictsrc_phrasesr   r   r   r   r   ,  s    zPhraseTable.__init__c                 C   s
   | j | S )a  
        Get the translations for a source language phrase

        :param src_phrase: Source language phrase of interest
        :type src_phrase: tuple(str)

        :return: A list of target language phrases that are translations
            of ``src_phrase``, ordered in decreasing order of
            likelihood. Each list element is a tuple of the target
            phrase and its log probability.
        :rtype: list(PhraseTableEntry)
        rq   r
   
src_phraser   r   r   translations_for/  s    zPhraseTable.translations_forc                 C   sL   t ||d}|| jvr g | j|< | j| | | j| jdd dd dS )z
        :type src_phrase: tuple(str)
        :type trg_phrase: tuple(str)

        :param log_prob: Log probability that given ``src_phrase``,
            ``trg_phrase`` is its translation
        :type log_prob: float
        )rm   rn   c                 S   s   | j S r   )rn   )r6   r   r   r   <lambda>K  r   z!PhraseTable.add.<locals>.<lambda>T)rU   reverseN)rl   rq   r^   sort)r
   rt   rm   rn   entryr   r   r   add>  s
    	

zPhraseTable.addc                 C   s
   || j v S r   rr   rs   r   r   r   __contains__M  s    zPhraseTable.__contains__N)r?   r@   rA   rB   r   ru   rz   r{   r   r   r   r   ro   &  s
   ro   )r0   collectionsr   r   rG   r   rN   rf   r   rl   ro   r   r   r   r   <module>   s    b