a
    GGb9                     @  s   d dl mZ d dlmZmZmZ d dlZd dlZd dl	m
  mZ d dlmZmZmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlm Z  d dl!m"Z"m#Z# G dd de"Z$dddddZ%dddddZ&dS )    )annotations)HashableMappingSequenceN)	ArrayLikeDtypeArgDtypeObjReadCsvBuffer)DtypeWarning)find_stack_level)is_categorical_dtypepandas_dtype)union_categoricals)ExtensionDtype)Index
MultiIndex)ensure_index_from_sequences)
ParserBaseis_index_colc                      s   e Zd ZU ded< ded< dd fddZd	d
ddZd	d
ddZd dddddZdddddZdd Z	d!dddddZ
  ZS )"CParserWrapperbool
low_memoryzparsers.TextReader_readerzReadCsvBuffer[str])srcc                   s  t  | | _| }|dd _ jdu|d<  j|d<  jj	|d< dD ]}||d  qPt
|dd |d< tj|fi | _ jj _ jd u } jjd u rd  _n"  jj j|\ _ _ _} jd u r jr fdd	t jjD  _ntt jj _ jd d   _ jr̈  j j jd usPJ  jd
kr|t js|  j t jtkrfdd	t  jD  _t jtk r̈  j  ! j  "   j _ j#sv jj$dkr@t% jr@d _& ' j j j\} _ _ jd u r@| _ jjd u rv|sv jd usdJ d gt j  _ jj$dk _(d S )Nr   FZallow_leading_colsusecolson_bad_lines)Zstorage_optionsencodingZ
memory_mapZcompressionZerror_bad_linesZwarn_bad_linesdtypec                   s   g | ]} j  | qS  )prefix).0iselfr   Alib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py
<listcomp>f   s   z+CParserWrapper.__init__.<locals>.<listcomp>stringc                   s$   g | ]\}}| v s| v r|qS r   r   )r    r!   nr   r   r$   r%      s   r   T))super__init__kwdscopypopr   	index_colr   r   valueensure_dtype_objsgetparsersZ
TextReaderr   unnamed_colsnamesheaderZ_extract_multi_indexer_columnsindex_names	col_namesr   rangeZtable_widthlist
orig_names_evaluate_usecolsZusecols_dtypesetissubsetZ_validate_usecols_nameslen	enumerateZ_validate_parse_dates_presence_set_noconvert_columns_has_complex_date_colleading_colsr   Z_name_processed_clean_index_namesZ_implicit_index)r#   r   r+   keyZpassed_namesr6   	__class__)r#   r   r$   r*   -   s    


	




	zCParserWrapper.__init__None)returnc                 C  s&   z| j   W n ty    Y n0 d S N)r   close
ValueErrorr"   r   r   r$   rJ      s    zCParserWrapper.closec                   s^   | j dusJ dd t| j D   fdd| jD }| || j}|D ]}| j| qHdS )z
        Set the columns that should not undergo dtype conversions.

        Currently, any column that is involved with date parsing will not
        undergo such conversions.
        Nc                 S  s   i | ]\}}||qS r   r   )r    r!   xr   r   r$   
<dictcomp>       z9CParserWrapper._set_noconvert_columns.<locals>.<dictcomp>c                   s   g | ]} | qS r   r   r    rL   Z
names_dictr   r$   r%      rN   z9CParserWrapper._set_noconvert_columns.<locals>.<listcomp>)r:   r?   r4   Z_set_noconvert_dtype_columnsr   Zset_noconvert)r#   Zcol_indicesZnoconvert_columnscolr   rP   r$   r@      s    z%CParserWrapper._set_noconvert_columnsNz
int | Nonez_tuple[Index | MultiIndex | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike]])nrowsrH   c              
     s^  z,| j r| j|}t|}n| j|}W n ty   | jrd| _| | j}| j	|| j
| j| jdd\} }|  | j | jd ur|    fdd| D }| |f Y S |    Y n0 d| _| j}| jjr| jrtdg }t| jjD ]H}| j
d u r||}	n|| j
| }	| j|	|dd}	||	 q t|}| jd urh| |}| |}t| }
d	d t||
D }| ||\}}nt| }
| jd usJ t | j}| |}| jd ur| |}d
d |
D }| jd u r| !|| dd t||
D }| ||\}}| "|||\}}| || j}|||fS )NFr   r   c                   s   i | ]\}}| v r||qS r   r   )r    kvcolumnsr   r$   rM      rN   z'CParserWrapper.read.<locals>.<dictcomp>z file structure not yet supportedT)try_parse_datesc                 S  s   i | ]\}\}}||qS r   r   r    rT   r!   rU   r   r   r$   rM     rN   c                 S  s   g | ]}|d  qS )   r   rO   r   r   r$   r%   0  rN   z'CParserWrapper.read.<locals>.<listcomp>c                 S  s   i | ]\}\}}||qS r   r   rY   r   r   r$   rM   4  rN   )#r   r   Zread_low_memory_concatenate_chunksreadStopIterationZ_first_chunkZ_maybe_dedup_namesr:   Z_get_empty_metar.   r6   r+   r1   Z_maybe_make_multi_index_columnsr7   r   _filter_usecolsitemsrJ   r4   rB   rA   NotImplementedErrorr8   r-   _maybe_parse_datesappendr   sortedzipZ_do_date_conversionsr9   Z_check_data_lengthZ_make_index)r#   rR   chunksdatar4   indexZcol_dictZarraysr!   valuesZ	data_tupsZ	date_dataZalldataZ
conv_namesr   rV   r$   r\      sn    









zCParserWrapper.readzSequence[Hashable])r4   rH   c                   s@   |  | j|  d ur<t|t kr< fddt|D }|S )Nc                   s$   g | ]\}}| v s| v r|qS r   r   )r    r!   namer(   r   r$   r%   B  s   z2CParserWrapper._filter_usecols.<locals>.<listcomp>)r;   r   r>   r?   )r#   r4   r   r(   r$   r^   >  s    
zCParserWrapper._filter_usecolsc                 C  sL   t | jjd }d }| jjdkrD| jd urD| || j| j\}}| _||fS )Nr   )r9   r   r5   rB   r.   rC   r3   )r#   r4   Z	idx_namesr   r   r$   _get_index_namesG  s    
zCParserWrapper._get_index_namesTint)rg   rX   c                 C  s   |r|  |r| |}|S rI   )Z_should_parse_datesZ
_date_conv)r#   rh   rg   rX   r   r   r$   ra   R  s    
z!CParserWrapper._maybe_parse_dates)N)T)__name__
__module____qualname____annotations__r*   rJ   r@   r\   r^   rj   ra   __classcell__r   r   rE   r$   r   )   s   
  g	r   zlist[dict[int, ArrayLike]]dict)re   rH   c                   s
  t | d  }g }i }|D ]  fdd| D }dd |D }dd |D }t|dkr|t|g }|tkr||t  | }t	|rt
|dd	| < qt|tr| }	|	|| < qt|| < q|rd
|}
dd|
 dg}tj|tt d |S )z
    Concatenate chunks of data read with low_memory=True.

    The tricky part is handling Categoricals, where different chunks
    may have different inferred categories.
    r   c                   s   g | ]}|  qS r   )r-   )r    chunkri   r   r$   r%   d  rN   z'_concatenate_chunks.<locals>.<listcomp>c                 S  s   h | ]
}|j qS r   rS   )r    ar   r   r$   	<setcomp>f  rN   z&_concatenate_chunks.<locals>.<setcomp>c                 S  s   h | ]}t |s|qS r   )r   rO   r   r   r$   ru   h  rN   rZ   F)Zsort_categories, z	Columns (zK) have mixed types. Specify dtype option on import or set low_memory=False.)
stacklevel)r9   keysr>   npZfind_common_typeobjectrb   strr-   r   r   
isinstancer   Zconstruct_array_typeZ_concat_same_typeZconcatenatejoinwarningswarnr
   r   )re   r4   Zwarning_columnsresultZarrsZdtypesZnumpy_dtypesZcommon_typer   Z
array_typeZwarning_namesZwarning_messager   rs   r$   r[   X  s>    



r[   z*DtypeArg | dict[Hashable, DtypeArg] | Nonez*DtypeObj | dict[Hashable, DtypeObj] | None)r   rH   c                   s0   t  tr fdd D S  dur,t S  S )zc
    Ensure we have either None, a dtype object, or a dictionary mapping to
    dtype objects.
    c                   s   i | ]}|t  | qS r   )r   )r    rT   rS   r   r$   rM     rN   z%ensure_dtype_objs.<locals>.<dictcomp>N)r}   rq   r   rS   r   rS   r$   r0     s
    
r0   )'Z
__future__r   typingr   r   r   r   Znumpyrz   Zpandas._libs.parsersZ_libsr2   Zpandas._typingr   r   r   r	   Zpandas.errorsr
   Zpandas.util._exceptionsr   Zpandas.core.dtypes.commonr   r   Zpandas.core.dtypes.concatr   Zpandas.core.dtypes.dtypesr   Zpandasr   r   Zpandas.core.indexes.apir   Zpandas.io.parsers.base_parserr   r   r   r[   r0   r   r   r   r$   <module>   s$     1@