a
    Kb7'                     @   s   d dl Z d dlmZ d dlmZ d dlmZ zd dlZW n eyJ   Y n0 d dl	m
Z
 G dd de
Zdd	 Zd
d ZG dd dZG dd dZdS )    N)abstractmethod)sqrt)stdout)ClusterIc                   @   sd   e Zd ZdZdddZdddZedd	 Zd
d Zedd Z	dd Z
dd Zdd Zdd ZdS )VectorSpaceClustererz
    Abstract clusterer which takes tokens and maps them into a vector space.
    Optionally performs singular value decomposition to reduce the
    dimensionality.
    FNc                 C   s   d| _ || _|| _dS )a)  
        :param normalise:       should vectors be normalised to length 1
        :type normalise:        boolean
        :param svd_dimensions:  number of dimensions to use in reducing vector
                                dimensionsionality with SVD
        :type svd_dimensions:   int
        N)_Tt_should_normalise_svd_dimensions)selfZ	normaliseZsvd_dimensions r   0lib/python3.9/site-packages/nltk/cluster/util.py__init__   s    zVectorSpaceClusterer.__init__c           
         s   t |dksJ  jr&tt j|} jrĈ jt |d k rtjt	t
|\}}}|d  j t jtj }|d d d  jf }|d  jd d f }	t	t||	}t	| _ || |r fdd|D S d S )Nr   c                    s   g | ]}  |qS r   )classify).0vectorr
   r   r   
<listcomp>?       z0VectorSpaceClusterer.cluster.<locals>.<listcomp>)lenr   listmap
_normaliser	   numpyZlinalgZsvdZ	transposeZarrayidentityZfloat64dotr   cluster_vectorspace)
r
   vectorsZassign_clusterstraceudZvtSTZDtr   r   r   cluster(   s    zVectorSpaceClusterer.clusterc                 C   s   dS )zD
        Finds the clusters using the given set of vectors.
        Nr   )r
   r   r   r   r   r   r   A   s    z(VectorSpaceClusterer.cluster_vectorspacec                 C   s<   | j r| |}| jd ur(t| j|}| |}| |S N)r   r   r   r   r   classify_vectorspaceZcluster_name)r
   r   r"   r   r   r   r   G   s    


zVectorSpaceClusterer.classifyc                 C   s   dS )zN
        Returns the index of the appropriate cluster for the vector.
        Nr   r
   r   r   r   r   r$   O   s    z)VectorSpaceClusterer.classify_vectorspacec                 C   s4   | j r| |}| jd ur(t| j|}| ||S r#   )r   r   r   r   r   likelihood_vectorspace)r
   r   Zlabelr   r   r   
likelihoodU   s
    

zVectorSpaceClusterer.likelihoodc                 C   s   |  |}||krdS dS )zP
        Returns the likelihood of the vector belonging to the cluster.
        g      ?g        )r$   )r
   r   r"   Z	predictedr   r   r   r&   \   s    
z+VectorSpaceClusterer.likelihood_vectorspacec                 C   s,   | j r| |}| jdur(t| j|}|S )zU
        Returns the vector after normalisation and dimensionality reduction
        N)r   r   r   r   r   r%   r   r   r   r   c   s
    

zVectorSpaceClusterer.vectorc                 C   s   |t t|| S )z7
        Normalises the vector to unit length.
        r   r   r   r%   r   r   r   r   m   s    zVectorSpaceClusterer._normalise)FN)FF)__name__
__module____qualname____doc__r   r"   r   r   r   r$   r'   r&   r   r   r   r   r   r   r      s   




r   c                 C   s   | | }t t||S )z}
    Returns the euclidean distance between vectors u and v. This is equivalent
    to the length of the vector (u - v).
    r(   )r   vdiffr   r   r   euclidean_distancet   s    r/   c                 C   s0   dt | |tt | | tt ||   S )zw
    Returns 1 minus the cosine of the angle between vectors v and u. This is
    equal to ``1 - (u.v / |u||v|)``.
       )r   r   r   )r   r-   r   r   r   cosine_distance}   s    r1   c                   @   s2   e Zd ZdZdd ZdddZdd Zd	d
 ZdS )_DendrogramNodezTree node of a dendrogram.c                 G   s   || _ || _d S r#   )_value	_children)r
   valueZchildrenr   r   r   r      s    z_DendrogramNode.__init__Tc                 C   s@   | j r*g }| j D ]}||| q|S |r6| jgS | gS d S r#   )r4   extendleavesr3   )r
   valuesr7   childr   r   r   r7      s    
z_DendrogramNode.leavesc                 C   s   | j | fg}t||k rv| \}}|js:|||f qv|jD ]*}|jr\||j |f q@|d|f q@|  qg }|D ]\}}||  q~|S Nr   )r3   r   popr4   pushappendsortr7   )r
   nqueueprioritynoder9   groupsr   r   r   rC      s    

z_DendrogramNode.groupsc                 C   s   t | j|jdk S r:   )r1   r3   )r
   Z
comparatorr   r   r   __lt__   s    z_DendrogramNode.__lt__N)T)r)   r*   r+   r,   r   r7   rC   rD   r   r   r   r   r2      s
   
r2   c                   @   s@   e Zd ZdZg fddZdd Zdd Zg fdd	Zd
d ZdS )
Dendrograma  
    Represents a dendrogram, a tree with a specified branching order.  This
    must be initialised with the leaf items, then iteratively call merge for
    each branch. This class constructs a tree representing the order of calls
    to the merge function.
    c                 C   s(   dd |D | _ t| j | _d| _dS )zs
        :param  items: the items at the leaves of the dendrogram
        :type   items: sequence of (any)
        c                 S   s   g | ]}t |qS r   )r2   r   itemr   r   r   r      r   z'Dendrogram.__init__.<locals>.<listcomp>r0   N)_itemscopy_original_items_merge)r
   itemsr   r   r   r      s    zDendrogram.__init__c                    sj   t |dksJ t jg fdd|D R  }  jd7  _| j|d < |dd D ]} j|= qXdS )a=  
        Merges nodes at given indices in the dendrogram. The nodes will be
        combined which then replaces the first node specified. All other nodes
        involved in the merge will be removed.

        :param  indices: indices of the items to merge (at least two)
        :type   indices: seq of int
           c                 3   s   | ]} j | V  qd S r#   )rH   )r   ir   r   r   	<genexpr>   r   z#Dendrogram.merge.<locals>.<genexpr>r0   r   N)r   r2   rK   rH   )r
   indicesrB   rN   r   r   r   merge   s    	 zDendrogram.mergec                 C   s8   t | jdkr$t| jg| jR  }n
| jd }||S )z
        Finds the n-groups of items (leaves) reachable from a cut at depth n.
        :param  n: number of groups
        :type   n: int
        r0   r   )r   rH   r2   rK   rC   )r
   r?   rootr   r   r   rC      s    
zDendrogram.groupsc                    s0  d\}}}t | jdkr.t| jg| jR  }n
| jd }| j}|rH|}ndd |D }ttt |d d t d dfdd		 d
d }|j|fg}	 fdd|D }
|	r|		 \}}t
tdd |j}t
t|j|}|rt|}t|}tt |D ]}|| |v rv||kr<| |d| n,||krX| ||d n| |||  ||
|< n:||  kr|krn n| ||| n||
|  q|d |jD ]}|jr|	|j|f q|	  |
D ]}|| q|d q|dfdd|D  |d dS )z
        Print the dendrogram in ASCII art to standard out.

        :param leaf_labels: an optional list of strings to use for labeling the
                            leaves
        :type leaf_labels: list
        )+-|r0   r   c                 S   s   g | ]}d |j  qS )z%s)r3   r   Zleafr   r   r   r      r   z#Dendrogram.show.<locals>.<listcomp>rM    c                    s    |  |  |  S r#   r   )Zcentreleftright)lhalfrhalfr   r   format   s    zDendrogram.show.<locals>.formatc                 S   s   t |  d S r#   )r   write)strr   r   r   display   s    z Dendrogram.show.<locals>.displayc                    s   g | ]} d qS )rW   r   rV   )r\   r   r   r     r   c                 S   s   |  dd S )NFr   )r7   )cr   r   r   <lambda>  r   z!Dendrogram.show.<locals>.<lambda>
 c                 3   s   | ]}|  V  qd S r#   )centerrF   )widthr   r   rO   #  r   z"Dendrogram.show.<locals>.<genexpr>N)rW   rW   )r   rH   r2   rK   rJ   maxr   intr3   r;   r   r4   indexminranger=   r>   join)r
   Zleaf_labelsZJOINZHLINKZVLINKrR   r7   Zlast_rowr_   r@   Z	verticalsrA   rB   Zchild_left_leafrP   Zmin_idxZmax_idxrN   r9   Zverticalr   )r\   rZ   r[   re   r   show   sV    






zDendrogram.showc                 C   sD   t | jdkr$t| jg| jR  }n
| jd }|d}dt | S )Nr0   r   Fz<Dendrogram with %d leaves>)r   rH   r2   rK   r7   )r
   rR   r7   r   r   r   __repr__&  s
    

zDendrogram.__repr__N)	r)   r*   r+   r,   r   rQ   rC   rl   rm   r   r   r   r   rE      s   	IrE   )rI   abcr   Zmathr   sysr   r   ImportErrorZnltk.cluster.apir   r   r/   r1   r2   rE   r   r   r   r   <module>   s   _	+