a
    v5`!                     @   s  d dl mZ d dlmZmZmZ d dlmZ ddlm	Z
mZ ddlmZ g dZG dd	 d	eZG d
d deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd de
Ze Ze Ze Ze Ze Ze Ze Ze Ze Z e Z!dS )    )reduce)islicepermutationsrepeat)log   )BaseBaseSimilarity)DamerauLevenshtein)JaccardSorensenTverskyOverlapCosineTanimoto
MongeElkanBagjaccardsorensentverskysorensen_diceoverlapcosinetanimotomonge_elkanbagc                   @   s*   e Zd ZdZdddZdd Zd	d
 ZdS )r   aY  
    Compute the Jaccard similarity between the two sequences.
    They should contain hashable items.
    The return value is a float between 0 and 1, where 1 means equal,
    and 0 totally different.

    https://en.wikipedia.org/wiki/Jaccard_index
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/jaccard.js
    r   FTc                 C   s   || _ || _|| _d S Nqvalas_setexternalselfr   r   r     r#   Blib/python3.9/site-packages/textdistance/algorithms/token_based.py__init__   s    zJaccard.__init__c                 G   s   dS Nr   r#   r"   	sequencesr#   r#   r$   maximum#   s    zJaccard.maximumc                 G   sP   | j | }|d ur|S | j| }| j| }| |}| j| }| |}|| S r   )quick_answer_get_counters_intersect_counters_count_countersZ_union_counters)r"   r(   resultintersectionunionr#   r#   r$   __call__&   s    





zJaccard.__call__N)r   FT__name__
__module____qualname____doc__r%   r)   r1   r#   r#   r#   r$   r      s   	
r   c                   @   s*   e Zd ZdZdddZdd Zd	d
 ZdS )r   an  
    Compute the Sorensen distance between the two sequences.
    They should contain hashable items.
    The return value is a float between 0 and 1, where 0 means equal,
    and 1 totally different.

    https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/dice.js
    r   FTc                 C   s   || _ || _|| _d S r   r   r!   r#   r#   r$   r%   =   s    zSorensen.__init__c                 G   s   dS r&   r#   r'   r#   r#   r$   r)   B   s    zSorensen.maximumc                    sV    j | }|d ur|S  j| }t fdd|D } j| } |}d| | S )Nc                 3   s   | ]}  |V  qd S r   r-   .0sr"   r#   r$   	<genexpr>K       z$Sorensen.__call__.<locals>.<genexpr>g       @)r*   r+   sumr,   r-   )r"   r(   r.   countr/   r#   r;   r$   r1   E   s    



zSorensen.__call__N)r   FTr2   r#   r#   r#   r$   r   3   s   	
r   c                   @   s*   e Zd ZdZdddZdd	 Zd
d ZdS )r   zTversky index

    https://en.wikipedia.org/wiki/Tversky_index
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/tversky.js
    r   NFTc                 C   s*   || _ |ptd| _|| _|| _|| _d S r&   )r   r   ksbiasr   r    )r"   r   r@   rA   r   r    r#   r#   r$   r%   W   s
    zTversky.__init__c                 G   s   dS r&   r#   r'   r#   r#   r$   r)   ^   s    zTversky.maximumc                    s    j | }|d ur|S  j| } j| } |} fdd|D }tt jt|}t|dksp jd u r|}t	||D ]\}}||||  7 }q~|| S |\}}|\}	}
t
||g}t||g}| j }|	|
 ||  ||
  }|||  S )Nc                    s   g | ]}  |qS r#   r7   r8   r;   r#   r$   
<listcomp>i   r=   z$Tversky.__call__.<locals>.<listcomp>   )r*   r+   r,   r-   listr   r@   lenrA   zipminmax)r"   r(   r.   r/   r@   kr:   s1s2ZalphaZbetaZa_valZb_valZc_valr#   r;   r$   r1   a   s(    




zTversky.__call__)r   NNFTr2   r#   r#   r#   r$   r   Q   s   
r   c                   @   s*   e Zd ZdZdddZdd Zd	d
 ZdS )r   zoverlap coefficient

    https://en.wikipedia.org/wiki/Overlap_coefficient
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/overlap.js
    r   FTc                 C   s   || _ || _|| _d S r   r   r!   r#   r#   r$   r%      s    zOverlap.__init__c                 G   s   dS r&   r#   r'   r#   r#   r$   r)      s    zOverlap.maximumc                    sR    j | }|d ur|S  j| } j| } |} fdd|D }|t| S )Nc                    s   g | ]}  |qS r#   r7   r8   r;   r#   r$   rB      r=   z$Overlap.__call__.<locals>.<listcomp>)r*   r+   r,   r-   rG   )r"   r(   r.   r/   r#   r;   r$   r1      s    



zOverlap.__call__N)r   FTr2   r#   r#   r#   r$   r   {   s   
r   c                   @   s*   e Zd ZdZdddZdd Zd	d
 ZdS )r   zcosine similarity (Ochiai coefficient)

    https://en.wikipedia.org/wiki/Cosine_similarity
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/cosine.js
    r   FTc                 C   s   || _ || _|| _d S r   r   r!   r#   r#   r$   r%      s    zCosine.__init__c                 G   s   dS r&   r#   r'   r#   r#   r$   r)      s    zCosine.maximumc                    sj    j | }|d ur|S  j| } j| } |} fdd|D }tdd |}|t|dt|  S )Nc                    s   g | ]}  |qS r#   r7   r8   r;   r#   r$   rB      r=   z#Cosine.__call__.<locals>.<listcomp>c                 S   s   | | S r   r#   )xyr#   r#   r$   <lambda>   r=   z!Cosine.__call__.<locals>.<lambda>g      ?)r*   r+   r,   r-   r   powrE   )r"   r(   r.   r/   Zprodr#   r;   r$   r1      s    



zCosine.__call__N)r   FTr2   r#   r#   r#   r$   r      s   
r   c                       s    e Zd ZdZ fddZ  ZS )r   zTanimoto distance
    This is identical to the Jaccard similarity coefficient
    and the Tversky index for alpha=1 and beta=1.
    c                    s*   t  j| }|dkrtdS t|dS d S )Nr   -infrC   )superr1   floatr   )r"   r(   r.   	__class__r#   r$   r1      s    zTanimoto.__call__)r3   r4   r5   r6   r1   __classcell__r#   r#   rS   r$   r      s   r   c                   @   s@   e Zd ZdZe ZedddfddZdd Zd	d
 Zdd Z	dS )r   a  
    https://www.academia.edu/200314/Generalized_Monge-Elkan_Method_for_Approximate_Text_String_Comparison
    http://www.cs.cmu.edu/~wcohen/postscript/kdd-2003-match-ws.pdf
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/monge-elkan.js
    Fr   Tc                 C   s   || _ || _|| _|| _d S r   )	algorithm	symmetricr   r    )r"   rV   rW   r   r    r#   r#   r$   r%      s    zMongeElkan.__init__c                 G   s0   | j |}|D ]}|rt|| j j| }q|S r   )rV   r)   rH   )r"   r(   r.   seqr#   r#   r$   r)      s
    zMongeElkan.maximumc              	   G   sh   |sdS g }|D ]>}|D ]4}t d}|D ]}t|| j||}q(|| qqt|t| t| S )Nr   rP   )rR   rH   rV   Z
similarityappendr>   rE   )r"   rX   r(   ZmaxesZc1r:   Zmax_simZc2r#   r#   r$   _calc   s    zMongeElkan._calcc                 G   sf   | j | }|d ur|S | j| }| jrXg }t|D ]}|| j|  q2t|t| S | j| S d S r   )r*   Z_get_sequencesrW   r   rY   rZ   r>   rE   )r"   r(   r.   Zseqsr#   r#   r$   r1      s    

zMongeElkan.__call__N)
r3   r4   r5   r6   r
   Z_damerau_levenshteinr%   r)   rZ   r1   r#   r#   r#   r$   r      s   r   c                   @   s   e Zd ZdZdd ZdS )r   zgBag distance
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/bag.js
    c                    s0   j | }j|   fdd|D }t|S )Nc                 3   s   | ]} |  V  qd S r   r7   )r9   Zsequencer/   r"   r#   r$   r<      r=   zBag.__call__.<locals>.<genexpr>)r+   r,   rH   r'   r#   r[   r$   r1      s    

zBag.__call__N)r3   r4   r5   r6   r1   r#   r#   r#   r$   r      s   r   N)"	functoolsr   	itertoolsr   r   r   Zmathr   baser   Z_Baser	   Z_BaseSimilarityZ
edit_basedr
   __all__r   r   r   r   r   r   r   r   r   r   Zdicer   r   r   r   r   r   r   r#   r#   r#   r$   <module>   s.   	*0