a
    GGb'                     @  sN  d Z ddlmZ ddlZddlmZmZmZmZm	Z	 ddl
ZddlmZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZmZmZ erdd
lmZmZmZmZmZ dZ ddddddZ!dde dfdddddddddZ"de fdddddddZ#d dddd!d"d#Z$de dfd$ddddd%d&d'Z%de dfdddddd%d(d)Z&dS )*z"
data hash pandas / numpy objects
    )annotationsN)TYPE_CHECKINGHashableIterableIteratorcast)lib)hash_object_array)	ArrayLike)is_categorical_dtypeis_list_like)ABCDataFrameABCIndexABCMultiIndex	ABCSeries)Categorical	DataFrameIndex
MultiIndexSeriesZ0123456789123456zIterator[np.ndarray]intz
np.ndarray)arrays	num_itemsreturnc                 C  s   zt | }W n" ty.   tjg tjd Y S 0 t|g| } td}t|td }t| D ]6\}}|| }||N }||9 }|td| | 7 }qd|d |ksJ d|td7 }|S )z
    Parameters
    ----------
    arrays : Iterator[np.ndarray]
    num_items : int

    Returns
    -------
    np.ndarray[uint64]

    Should be the same as CPython's tupleobject.c
    dtypeiCB ixV4 iXB    zFed in wrong num_itemsi| )	nextStopIterationnpZarrayuint64	itertoolschainZ
zeros_like	enumerate)r   r   firstZmultoutiaZ	inverse_i r(   7lib/python3.9/site-packages/pandas/core/util/hashing.pycombine_hash_arrays.   s    
r*   Tutf8zIndex | DataFrame | Seriesboolstrz
str | Noner   )objindexencodinghash_key
categorizer   c                   s  ddl m} du rtttr8|tdddS ttrptj j	ddd}||ddd}ntt
rtj j	ddd}|rȇ fd	d
dD }t|g|}	t|	d}||jddd}nttrj fdd
 D }
tj}|rL fdd
dD }|d7 }t|
|}dd
 |D }
t|
|}||jddd}ntdt |S )a~  
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    obj : Index, Series, or DataFrame
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object
    r   )r   Nr    F)r   copyr3   )r/   r   r3   c                 3  s$   | ]}t jd  djV  qdS F)r/   r0   r1   r2   Nhash_pandas_objectr/   _values.0_r2   r0   r1   r.   r(   r)   	<genexpr>|   s   z%hash_pandas_object.<locals>.<genexpr>N   c                 3  s"   | ]\}}t |j V  qd S r>   )
hash_arrayr8   )r:   r;   Zseries)r2   r0   r1   r(   r)   r=      s   c                 3  s$   | ]}t jd  djV  qdS r5   r6   r9   r<   r(   r)   r=      s   r   c                 s  s   | ]
}|V  qd S r>   r(   )r:   xr(   r(   r)   r=          zUnexpected type for hashing )pandasr   _default_hash_key
isinstancer   hash_tuplesr   r@   r8   astyper   r!   r"   r*   r/   r   itemslencolumns	TypeErrortype)r.   r/   r0   r1   r2   r   hZserZ
index_iterr   hashesr   Zindex_hash_generatorZ_hashesr(   r<   r)   r7   N   sJ    







r7   z+MultiIndex | Iterable[tuple[Hashable, ...]])valsr0   r1   r   c                   sz   t | stdddlm m} t| ts6|| n|  fddtj	D }fdd|D }t
|t|}|S )a  
    Hash an MultiIndex / listlike-of-tuples efficiently.

    Parameters
    ----------
    vals : MultiIndex or listlike-of-tuples
    encoding : str, default 'utf8'
    hash_key : str, default _default_hash_key

    Returns
    -------
    ndarray[np.uint64] of hashed values
    z'must be convertible to a list-of-tuplesr   )r   r   c                   s(   g | ] } j | j| d ddqS )FTZorderedZfastpath)codesZlevels)r:   level)r   mir(   r)   
<listcomp>   s   zhash_tuples.<locals>.<listcomp>c                 3  s   | ]}t | d V  qdS )r0   r1   N)_hash_categorical)r:   catrU   r(   r)   r=      s   zhash_tuples.<locals>.<genexpr>)r   rK   rC   r   r   rE   r   Zfrom_tuplesrangeZnlevelsr*   rI   )rO   r0   r1   r   Zcat_valsrN   rM   r(   )r   r0   r1   rS   r)   rF      s    
rF   r   )rW   r0   r1   r   c                 C  sd   t | jj}t|||dd}|  }t|r<|| j}nt j	t|dd}|
 r`tj||< |S )a  
    Hash a Categorical by hashing its categories, and then mapping the codes
    to the hashes

    Parameters
    ----------
    cat : Categorical
    encoding : str
    hash_key : str

    Returns
    -------
    ndarray[np.uint64] of hashed values, same size as len(c)
    F)r2   r    r   )r   Zasarray
categoriesr8   r@   ZisnarI   ZtakerQ   Zzerosanyr   Zu8max)rW   r0   r1   valuesZhashedmaskresultr(   r(   r)   rV      s    	
rV   r
   )rO   r0   r1   r2   r   c                 C  s\   t | dstd| j}t|r6td| } t| ||S t| tjsN| 	 \} }t
| |||S )aK  
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.
    r   zmust pass a ndarray-liker   )hasattrrK   r   r   r   rV   rE   r   ZndarrayZ_values_for_factorize_hash_ndarray)rO   r0   r1   r2   r   r;   r(   r(   r)   r@      s    

r@   c                 C  st  | j }t|tjr4tt| dtt|   S t|trJ| 	d} nt
|jtjtjfrt| dj	ddd} nt
|jtjr|jdkr| d| j j 	d} n|rdd	lm}m}m} || dd
\}}	||||	ddd}
t|
||S zt| ||} W n, ty.   t| 	t	t||} Y n0 | | d? N } | td9 } | | d? N } | td9 } | | d? N } | S )z!
    See hash_array.__doc__.
       u8Zi8Fr4      ur   )r   r   	factorize)sortTrP      l   e9z    l   b&&&	    )r   r   Z
issubdtypeZ
complex128r@   realimagrE   r,   rG   
issubclassrL   Z
datetime64Ztimedelta64ZviewZnumberitemsizerC   r   r   rd   Z_with_inferrV   r	   rK   r-   objectr    )rO   r0   r1   r2   r   r   r   rd   rQ   rY   rW   r(   r(   r)   r_   (  s8    	 

r_   )'__doc__Z
__future__r   r!   typingr   r   r   r   r   Znumpyr   Zpandas._libsr   Zpandas._libs.hashingr	   Zpandas._typingr
   Zpandas.core.dtypes.commonr   r   Zpandas.core.dtypes.genericr   r   r   r   rC   r   r   r   r   r   rD   r*   r7   rF   rV   r@   r_   r(   r(   r(   r)   <module>   s<   
"^.(,