a
    v5`                     @   s   d dl mZ ddlmZ ddlmZ zd dlZW n" e	yR   d dl
m
Z
 dZY n0 g dZG d	d
 d
eZG dd deZG dd deZe Ze Ze ZdS )    )SequenceMatcher   )find_ngrams   )BaseSimilarityN)array)lcsseqlcsstrratcliff_obershelpLCSSeqLCSStrRatcliffObershelpc                   @   s:   e Zd ZdZdddZdd Zd	d
 Zdd Zdd ZdS )r   zplongest common subsequence similarity

    https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
    r   NTc                 C   s   || _ |p| j| _|| _d S N)qval_ident	test_funcexternal)selfr   r   r    r   Elib/python3.9/site-packages/textdistance/algorithms/sequence_based.py__init__   s    zLCSSeq.__init__c           	         s  t r*t jt|d t d ft jd}n fddtt|d D }t|D ]v\}}t D ]d\}}||kr|| | d ||d  |d < q`t||d  | || |d  ||d  |d < q`qPd}t|t  }}|dkr|dkr|| | ||d  | kr|d8 }q|| | || |d  krB|d8 }q||d   |d  ks`J ||d  | }|d8 }|d8 }q|S )z
        https://github.com/chrislit/abydos/blob/master/abydos/distance/_lcsseq.py
        http://www.dis.uniroma1.it/~bonifaci/algo/LCSSEQ.py
        http://rosettacode.org/wiki/Longest_common_subsequence#Dynamic_Programming_8
        r   )Zdtypec                    s$   g | ]}t d dgt d  qS )Lr   r   )r   len).0_seq2r   r   
<listcomp>)       z#LCSSeq._dynamic.<locals>.<listcomp> r   )numpyZzerosr   intrange	enumeratemax)	r   Zseq1r   ZlengthsiZchar1jZchar2resultr   r   r   _dynamic    s(    &"6


zLCSSeq._dynamicc                 G   s   t |st|d  S | jdd |D  rP|d d }dd |D }| | | S t|d  }t|D ]F\}}|d | |d d f ||d d   }t| | |gtd}qf|S )Nr   c                 S   s   g | ]}|d  qS )r   r   sr   r   r   r   E   r   z%LCSSeq._recursive.<locals>.<listcomp>r)   c                 S   s   g | ]}|d d qS )Nr)   r   r*   r   r   r   r   G   r   r   key)alltyper   r#   r$   r   )r   	sequencescmr%   r+   ssr   r   r   
_recursiveB   s    *zLCSSeq._recursivec                 G   s6   |sdS | j | }t|dkr(| j| S | j| S d S )Nr   r   )_get_sequencesr   r(   r4   r   r0   r   r   r   __call__O   s    

zLCSSeq.__call__c                 G   s   t | | S r   r   r6   r   r   r   
similarityX   s    zLCSSeq.similarity)r   NT)	__name__
__module____qualname____doc__r   r(   r4   r7   r9   r   r   r   r   r      s   
"	r   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )r   z(longest common substring similarity
    c                 C   s:   t ||d}|dt|dt|}||j|j|j  S )N)abr   )_SequenceMatcherZfind_longest_matchr   r>   size)r   s1s2Zmatchermatchr   r   r   	_standart_   s    zLCSStr._standartc                 G   sl   t |td}t|}t|ddD ]@}t||D ]0}d|}|D ]}||vr@ q.q@|    S q.q t| S )Nr,   r   r)   r   )minr   r"   r   joinr/   )r   r0   Zshortlengthnsubseqseqr   r   r   _customd   s    
zLCSStr._customc                 G   sh   t |sdS t|}|dkr dS |dkr0|d S | j| }|dkr^ttt|dk r^| j| S | j| S )Nr   r   r   r      )r.   r   r5   r$   maprE   rL   )r   r0   rH   r   r   r   r7   q   s    

zLCSStr.__call__c                 G   s   t | | S r   r8   r6   r   r   r   r9      s    zLCSStr.similarityN)r:   r;   r<   r=   rE   rL   r7   r9   r   r   r   r   r   \   s
   r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	r   a+  Ratcliff-Obershelp similarity
    This follows the Ratcliff-Obershelp algorithm to derive a similarity
    measure:
        1. Find the length of the longest common substring in sequences.
        2. Recurse on the strings to the left & right of each this substring
           in sequences. The base case is a 0 length common substring, in which
           case, return 0. Otherwise, return the sum of the current longest
           common substring and the left & right recursed sums.
        3. Multiply this length by 2 and divide by the sum of the lengths of
           sequences.

    https://en.wikipedia.org/wiki/Gestalt_Pattern_Matching
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/ratcliff-obershelp.js
    https://xlinux.nist.gov/dads/HTML/ratcliffObershelp.html
    c                 G   s   dS )Nr   r   r6   r   r   r   maximum   s    zRatcliffObershelp.maximumc                    s\   t  | t  dkrdS fdd|D } fdd|D }| j|   | j|  S )Nr   c                    s   g | ]}|d |   qS r   findr*   )rJ   r   r   r      r   z+RatcliffObershelp._find.<locals>.<listcomp>c                    s"   g | ]}||   d  qS r   rP   r*   rH   rJ   r   r   r      r   )r   r   _find)r   r0   beforeZafterr   rR   r   rS      s    
zRatcliffObershelp._findc                 G   sH   | j | }|d ur|S t|}ttt|}| j| }|| j|  | S r   )Zquick_answerr   sumrN   r5   rS   )r   r0   r'   ZscountZecountr   r   r   r7      s    

zRatcliffObershelp.__call__N)r:   r;   r<   r=   rO   rS   r7   r   r   r   r   r      s   	r   )Zdifflibr   r@   Zutilsr   baser   Z_BaseSimilarityr    ImportErrorr   __all__r   r   r   r   r	   r
   r   r   r   r   <module>   s   
F''