a
    ~b-                     @   s   d dl mZ d dlZzd dlmZ W n ey6   Y n0 d dlZd dl	Z
ddlmZ ddlmZ ddlmZ ddlmZmZmZmZ dd	lmZ dd
l	mZ G dd deZee dS )    )absolute_importN   )util)	Dimension)Element)	NdMapping
item_checkOrderedDictsorted_context   )	Interface)PandasInterfacec                   @   s  e Zd ZdZdZdZdZedd Zedd Z	ed	d
 Z
edd Zedd Zedd Zedd Zeg dfddZed0ddZedd Zed1ddZedd Zed d! Zed"d# Zeg fd$d%Zed&d' Zed(d) Zed*d+ Zed,d- Zed.d/ ZdS )2DaskInterfacea  
    The DaskInterface allows a Dataset objects to wrap a dask
    DataFrame object. Using dask allows loading data lazily
    and performing out-of-core operations on the data, making
    it possible to work on datasets larger than memory.

    The DaskInterface covers almost the complete API exposed
    by the PandasInterface with two notable exceptions:

    1) Sorting is not supported and any attempt at sorting will
       be ignored with an warning.
    2) Dask does not easily support adding a new column to an existing
       dataframe unless it is a scalar, add_dimension will therefore
       error when supplied a non-scalar value.
    4) Not all functions can be easily applied to a dask dataframe so
       some functions applied with aggregate and reduce will not work.
     Zdaskd   c                 C   s   dt jv odt jv S )Nzdask.dataframepandas)sysmodules)clsr   r   7lib/python3.9/site-packages/holoviews/core/data/dask.pyloaded-   s    zDaskInterface.loadedc                 C   s*   |   sdS dd lm} t||j|jfS )NFr   )r   dask.dataframe	dataframe
isinstance	DataFrameZSeries)r   objddr   r   r   applies1   s    zDaskInterface.appliesc                    s   dd l m} t| ||\ }}t |js@|j | jdd dd |d D }t fdd|D r 	 t
fd	d|D r  ||fS )
Nr   F)Znpartitionssortc                 S   s    g | ]}t |tr|jn|qS r   )r   r   name.0dr   r   r   
<listcomp>?       z&DaskInterface.init.<locals>.<listcomp>kdimsc                 3   s   | ]}| j vr|V  qd S Ncolumnsr    )datar   r   	<genexpr>C   r$   z%DaskInterface.init.<locals>.<genexpr>c                 3   s   | ]}| j v r|V  qd S r&   r'   r    )resetr   r   r*   E   r$   )r   r   r   initr   r   Zfrom_pandasdefault_partitionsanyreset_indexall)r   Zeltyper)   r%   vdimsr   dimsZextrar   )r)   r+   r   r,   8   s    zDaskInterface.initc                 C   s   | |j S r&   )cloner)   computer   datasetr   r   r   r4   I   s    zDaskInterface.computec                 C   s   | |j S r&   )r3   r)   persistr5   r   r   r   r7   M   s    zDaskInterface.persistc                 C   s   t |jt |jjfS r&   )lenr)   r(   r5   r   r   r   shapeQ   s    zDaskInterface.shapec                 C   s   dd l m} |j|dd}|j|j }|jjdkrdt||	  
 }t|r`|d |d fS dS |jd ur|| ||j}|
| | S d S )Nr   T)strictO)NN)r   r   get_dimensionr)   r   dtypekindnpr   Znotnullr4   r8   ZnodataZreplace_valueminmax)r   r6   	dimensionr   columnr   r   r   rangeU   s    
zDaskInterface.rangeFc                 C   s   |j d |jS )Nz&Dask dataframes do not support sorting)ZparamZwarningr)   )r   r6   Zbyreverser   r   r   r   b   s    zDaskInterface.sortTc                 C   sN   | |}|j|j }|s"| }|r6|r2| S |S |rD| jS |jS d S r&   )r=   r)   r   uniquer4   values)r   r6   dimZexpandedZflatr4   Z
keep_indexr)   r   r   r   rH   g   s    
zDaskInterface.valuesc                 C   s,  d}|  D ]\}}t|tr(t| }g }||j}|j| }t|tr|jdurpt	|j}	|
|	|k |jdurt	|j}	|
||	k  nlt|ttfrd}
|D ]"}||k}|
du r|}
q|
|O }
q|
|
 n&t|r|
|| n|
||k |D ]}|dur||M }n|}qq|S )a  
        Given a Dataset object and a dictionary with dimension keys and
        selection keys (i.e. tuple ranges, slices, sets, lists. or literals)
        return a boolean mask over the rows in the Dataset object that
        have been selected.
        N)itemsr   tupleslicer=   r   r)   startr   Znumpy_scalar_to_pythonappendstopsetlistcallable)r   r6   	selectionselect_maskrI   kZmasksaliasZseriesZkvalZiter_slcZikmaskr   r   r   rT   r   s<    








zDaskInterface.select_maskNc                 K   s~   |j }|d ur|| S | ||}| ||}|d u r:|n|| }|rzt|dkrzt|jdkrz||jd j  jd S |S )Nr   r   )r)   rT   indexedr8   r1   r   r4   iloc)r   r6   Zselection_maskrS   dfrX   r   r   r   select   s    zDaskInterface.selectc              	      s
   fdd|D fdd j D }i }|dkrNt|trNtt |d}||  j|d< g }dd D }	 j	|	}
t
|	dkr j|	d	  }|jjd
krzdd |jjD }W n( ty   dd |  D }Y n0 ndd |  D }n" j|	  }tdd |D }|D ]X}tdd |D rDq(t
|dkrZ|d	 }||
|fi |}|||f q(t|trtdT td* ||dW  d    W  d    S 1 s0    Y  W d    n1 s0    Y  n||S d S )Nc                    s   g | ]}  |qS r   )r=   r    r6   r   r   r#      r$   z)DaskInterface.groupby.<locals>.<listcomp>c                    s   g | ]}| vr|qS r   r   )r!   Zkdim)
index_dimsr   r   r#      s   raw)r%   r6   c                 S   s   g | ]
}|j qS r   r   r    r   r   r   r#      r$   r   r   categoryc                 s   s   | ]}|fV  qd S r&   r   r!   Zindr   r   r   r*      r$   z(DaskInterface.groupby.<locals>.<genexpr>c                 s   s   | ]}|fV  qd S r&   r   ra   r   r   r   r*      r$   c                 s   s   | ]}|fV  qd S r&   r   ra   r   r   r   r*      r$   c                 s   s   | ]}|d d V  qdS )r   Nr   ra   r   r   r   r*      r$   c                 s   s"   | ]}t |tot|V  qd S r&   )r   floatr@   Zisnan)r!   cr   r   r   r*      r$   F)r%   
issubclassr   dictr   Zget_param_valuesupdater6   r)   groupbyr8   r>   r   cat
categoriesNotImplementedErrorrG   r4   Z
itertuplesZunique_iteratorr.   Z	get_grouprN   r   r   r
   )r   r6   
dimensionsZcontainer_typeZ
group_typekwargsZelement_dimsZgroup_kwargsr)   Zgroup_byrg   rD   indicesZgroup_tuplesZcoordgroupr   )r6   r]   r   rg      sB    


\zDaskInterface.groupbyc                    s  |j } fdd|jD }|jddd|j}fddt|j|jD }|||  }	ddd	d
ddd}
t r|	|}|j	|
v rt
||
|j	  }n
||}| }n2|j	|
v rt
|	|
|j	  }ntt| j}g }D ]}||jvr|| q||fS )Nc                    s   g | ]}| v r|j qS r   r_   r    )rk   r   r   r#      r$   z+DaskInterface.aggregate.<locals>.<listcomp>valuer   Zlabelc                    s&   g | ]\}}|j d v r| v r|qS )Ziufc)r?   )r!   rc   r>   )r1   r   r   r#      s   rA   rB   meanstdsumvar)ZaminZamaxrq   rr   rs   rt   )r)   r%   rk   dtypeszipindexrH   r8   rg   __name__getattrZapplyr/   rj   pdr   r4   Tr(   rN   )r   r6   rk   functionrl   r)   colsru   numericZ	reindexedZinbuiltsgroupsZaggrZ   ZdroppedZvdr   )rk   r1   r   	aggregate   s0    





zDaskInterface.aggregatec                 C   sH   ddl m} t|jdks&t|dkr*|S t||jr>| }|jd S )z~
        Given a dataset object and data in the appropriate format for
        the interface, return a simple scalar.
        r   Nr   )r   r   )r   r   r8   r(   r   r   r4   Ziat)r   r6   r)   r   r   r   r   unpack_scalar   s    zDaskInterface.unpack_scalarc                 C   sx   |j }|jddd}d }|D ]R}t|r0|g}tt||D ].\}\}}	|| |	k}
|d u rd|
}q>||
O }q>q|| S )Nkeyr   rp   )r)   rk   r@   isscalar	enumeraterv   )r   r6   Zsamplesr)   r2   rW   sampleirc   vZdim_maskr   r   r   r     s    zDaskInterface.samplec                 C   sN   |j }|j|jvrJt|s4t|r0d}t|d }|jf i |j|i}|S )Nz;Dask dataframe does not support assigning non-scalar value.)r)   r   r(   r@   r   r8   rj   Zassign)r   r6   rC   Zdim_posrH   Zvdimr)   errr   r   r   add_dimension  s    
zDaskInterface.add_dimensionc                 K   s   dd l m} |j|fi |S )Nr   )r   r   concat)r   Z
dataframesrl   r   r   r   r   	concat_fn  s    zDaskInterface.concat_fnc                 C   s    |r|j |  S |j  S d S r&   )r)   r4   )r   r6   rk   r   r   r   dframe"  s    zDaskInterface.dframec                 C   s   dS )NTr   r5   r   r   r   nonzero)  s    zDaskInterface.nonzeroc                    s   |\}}d}t |tr.dd   D | }n:t|rRt|} |jg}n fdd|d D }t|rx|g}t }|D ]} j| 	 j
| j||< q|r||d  d S t| S )z
        Dask does not support iloc, therefore iloc will execute
        the call graph and lose the laziness of the operation.
        Fc                 S   s   g | ]
}|j qS r   r_   r    r   r   r   r#   6  r$   z&DaskInterface.iloc.<locals>.<listcomp>c                    s   g | ]}  |jqS r   )r=   r   r    r\   r   r   r#   ;  r$   r   r   )r   rL   rk   r@   r   r=   r   r	   r)   r4   rY   rH   rK   )r   r6   rw   Zrowsr}   Zscalarr)   rc   r   r\   r   rY   -  s     



zDaskInterface.iloc)TTTF)N)rx   
__module____qualname____doc__typesZdatatyper-   classmethodr   r   r,   r4   r7   r9   rE   r   rH   rT   r[   rg   r   r   r   r   r   r   r   rY   r   r   r   r   r      sX   








*
*





r   )Z
__future__r   r   Zitertools.izipZiziprv   ImportErrorZnumpyr@   r   rz    r   rC   r   elementr   Z	ndmappingr   r   r	   r
   Z	interfacer   r   r   registerr   r   r   r   <module>   s"     5