
    d,&                        d Z ddlmZmZ ddlZddlZddlZddlZddl	Z	ddl
mZmZ ddlmZ e	j                            d          Ze	j                            d          Zd Zed             Zee	j                            d	d
dg          e	j                            dg d          d                                     Zd Zd Ze	j                            ddi  eddgi          fdddi eddgi          fdddgi edddgi          fddgdd eddgi          fddgdd edej        dgi          fg          d              Zd! Ze	j                            d"g d#          d$             Zee	j                            d%ddg          d&                         Zed'             Ze	j                            dg d(          d)             Z ee	j                            dg d*          d+                         Z!ed,             Z"ed-             Z#e	j                            d          e	j                            d.d/d0g          d1                         Z$dS )2zZ
Tests encoding functionality during parsing
for all of the parsers defined in parsers.py
    )BytesIOTextIOWrapperN)	DataFrameread_csvpyarrow_skippyarrow_xfailc                     d}| }t          d                    |                    }|                    |d|          }t          ddggddg	          }t	          j        ||           d S )
Ncp1255u   שלום:1234
562:123:)sepencodingi2  {   u   שלום1234columnsr   encoder   r   tmassert_frame_equal)all_parsersr   parserdataresultexpecteds         `/var/www/html/t/fyr/venv311/lib/python3.11/site-packages/pandas/tests/io/parser/test_encoding.pytest_bytes_io_inputr      su    HF+228<<==D__TsX_>>F3*
F/CDDDH&(+++++    c                     | }t          d                                          }|                    |ddd           }t          ddgg          }t	          j        ||           d S )Nu   Łaski, Jan;1;utf-8)r   r   headeru   Łaski, Jan   r   )r   r   r   r   r   s        r   test_read_csv_unicoder#   %   si    F&--//00D__TsWT_JJF,a0122H&(+++++r   r   ,	r   )utf-16zutf-16lezutf-16bec                 l   | }d                     d|          }dt          j                     d}|dd}d}t          j        |          5 }|                    |          }t          |d          5 }	|	                    |           d d d            n# 1 swxY w Y   t          t          |                    |                    |	          5 }
 |j
        |fd
|i|} |j
        |
fd
|i|}d d d            n# 1 swxY w Y   t          j        ||           d d d            d S # 1 swxY w Y   d S )Nz)skip this
skip this too
A,B,C
1,2,3
4,5,6r$   __z__.csv   )r   skiprowsr    wbr   r   )replaceuuiduuid4r   ensure_cleanr   openwriter   r   r   r   )r   r   r   r   r   pathkwargsutf8
bytes_datafbytes_bufferr   r   s                r   test_utf16_bom_skiprowsr9   /   s%   
 F	 
S
 
	 	 %
$$$Da((FD			 	0$[[**
$ 	 GGJ	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  74;;t#4#455EEE 	N$V_TGGHGGGF&v|MMdMfMMH	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	fh///	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0sZ   &D)1BD)B	D)B	4D)!C<0D)<D 	 D)D 	D))D-0D-c                     t           j                            |d          }| }|                    |dd          }t	          |          dk    sJ d S )Nzutf16_ex.txtr&   r%   )r   r   2   )osr3   joinr   len)r   csv_dir_pathr3   r   r   s        r   test_utf16_exampler@   L   sP    7<<n55DF__TH$_??Fv;;"r   c                     t           j                            |d          }| }|                    |d d          }|                    d          }|d         d         }d}||k    sJ d S )Nunicode_series.csvlatin-1)r!   r   r   r"   i`  u$   Á köldum klaka (Cold Fever) (1994))r<   r3   r=   r   	set_index)r   r?   r3   r   r   gotr   s          r   test_unicode_encodingrF   S   sj    7<<&:;;DF__T$_CCFa  F
)D/C9H(??????r   zdata,kwargs,expectedza
1ar"   z"a"
1	quotechar"zb
1namesb1
1T)rJ   skip_blank_linesFc                 <  	 | }dd		fd}|j         dk    rT|dk    rN|                    dd          r8|j                            t          j                            d	                      |j         ||          fd
	i|}t          j	        ||           d S )Nu   ﻿r    c                 R    | z                                  }t          |          S )N)r   r   )_databom_databomr5   s     r   _encode_data_with_bomz,test_utf8_bom.<locals>._encode_data_with_bomx   s(    %K''--x   r   pyarrowrM   rN   TzPyarrow can't read blank lines)reasonr   )
enginegetnode
add_markerpytestmarkxfailr   r   r   )
r   r   r4   r   requestr   rT   r   rS   r5   s
           @@r   test_utf8_bomr_   _   s    * F
CD! ! ! ! ! !
 	""EMMJJ)400  	K%EFF	
 	
 	
 V_22488RR4R6RRF&(+++++r   c                     t          dgdgd          }| }|                    |          }d                    |          }|                    t	          |          |          }t          j        ||           d S )Ng333333@test)mb_num	multibytezmb_num,multibyte
4.8,testr,   )r   formatr   r   r   r   r   )r   	utf_valueencoding_fmtr   r   r   r   r   s           r   test_read_csv_utf_aliasesrg      sz    SEAABBHF""9--H'..x88D__WT]]X_>>F&(+++++r   zfile_path,encoding)))ior   csvz	test1.csvr    ))rh   r   r   rB   rC   ))rh   r   r   zsauron.SHIFT_JIS.csvshiftjisc                 z   | } || }|                     ||          }t          ||          5 }|                     |          }|j        rJ 	 d d d            n# 1 swxY w Y   t          j        ||           t          |d          5 }	|                     |	|          }|	j        rJ 	 d d d            n# 1 swxY w Y   t          j        ||           t          |dd          5 }	|                     |	|          }|	j        rJ 	 d d d            n# 1 swxY w Y   t          j        ||           d S )Nr,   rbmoder   )rn   	buffering)r   r1   closedr   r   )
r   	file_pathr   datapathr   fpathr   far   fbs
             r   test_binary_mode_file_buffersrv      s    FHi Eux88H	eh	'	'	' 2$$9               (F+++	e$			 2h779               (F+++	e$!	,	,	, h779               (F+++++s5   AA #A !B;;B?B?-!DD"Dpass_encodingc                 l   | }|                     |          }t          ddgi          }t          j        d|d          5 }|                    d           |                    d           |                    ||r|nd           }t          j        ||           d d d            d S # 1 swxY w Y   d S )	Nfoobarzw+T)rn   r   return_filelikezfoo
barr   r,   )rd   r   r   r0   r2   seekr   r   )	r   re   rf   rw   r   r   r   r7   r   s	            r   test_encoding_temp_filer}      s     F""9--H%%)**H	dXt	L	L	L 0PQ	
	q			-PXXDQQ
fh///0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0s   AB))B-0B-c                    | }d}d}d}t          ||gi          }t          j                    5 }|                    | d|                     |                     |                    d           |                    ||          }t          j        ||           |j	        rJ 	 d d d            d S # 1 swxY w Y   d S )Nz	shift-jisu	   てすとu   こむ
r   r,   )
r   tempfileNamedTemporaryFiler2   r   r|   r   r   r   rp   )r   r   r   titler   r   r7   r   s           r   test_encoding_named_temp_filer      s    FHED%$))H		$	&	& !	5""D""))(33444	q			X66
fh///8                 s   A8B44B8;B8)r    r&   z	utf-16-bez	utf-16-lezutf-32c                     d}t          |                    |                     }t          |d|           }t          ddgddgdd	ggd
dg          }t	          j        ||           d S )Nu   a	b
：foo	0
bar	1
baz	2r%   )	delimiterr   u   ：foor   rz   r"   bazr)   rG   rK   )r   r   r   )r   r   encoded_datar   r   s        r   %test_parse_encoded_special_charactersr      sz     -D4;;x0011LldXFFFF!}uaj5!*EPSUXzZZZH&(+++++r   )r    Nr&   r
   rC   c                    | }t          g dg dg dd          }t          j                    5 }|                    |d|           |                    ||d          }d d d            n# 1 swxY w Y   t          j        ||           d S )	N)Raphael	DonatellozMiguel AngelLeonardo)redpurpleorangeblue)saizbo staffnunchunkkatana)namemaskweaponF)indexr   T)r   
memory_map)r   r   r0   to_csvr   r   )r   r   r   r   filedfs         r   test_encoding_memory_mapr      s     FHHH777???	
 	
 H 
		 GdEH===__TH_FFG G G G G G G G G G G G G G G "h'''''s   1A,,A03A0c                 ,   | }t          dgdz            }d|j        d<   t          j        d          5 }|                    |ddd	           |                    |d
dd          }d
d
d
           n# 1 swxY w Y   t          j        ||           d
S )zO
    Chunk splits a multibyte character with memory_map=True

    GH 43540
    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaai   )r   u   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaąi  zbug-gh43540.csvFr    r   r!   r   NTc)r!   r   rW   )r   ilocr   r0   r   r   r   )r   r   r   fnamedfrs        r    test_chunk_splits_multibyte_charr      s     F	d*	+	+	+B %BGDM	*	+	+ Ou
		%uUW	EEEooeDT#oNNO O O O O O O O O O O O O O O #r"""""s   3A44A8;A8c           	      Z   g }d}d}d}t          t          |          t          |          |          D ]q}d                    d t          ||dz             D                       dz   }	 |                    d           n# t          $ r Y Xw xY w|                    |           r| }t          |          }t          j        d          5 }	|	                    |	d	d	d
           |
                    |	dddd          }
ddd           n# 1 swxY w Y   t          j        ||
           dS )zg
    GH 43787

    Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
        u   𐂀 c                 ,    g | ]}t          |          S  )chr).0r   s     r   
<listcomp>z,test_readcsv_memmap_utf8.<locals>.<listcomp>  s    AAA1AAAAr   r   r    zutf8test.csvFr   NTr   )r!   r   rW   r   )rangeordr=   r   UnicodeEncodeErrorappendr   r   r0   r   r   r   )r   linesline_length
start_charend_charlnumliner   r   r   r   s              r   test_readcsv_memmap_utf8r     s    EKJH c*oos8}}kBB  wwAAdD4K(@(@AAABBTI	KK    ! 	 	 	H	TF	5		B		(	( 
E
		%uUW	EEEoo$4g  
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 

 "c"""""s$   *B  
BB4DDDrn   zw+bzw+tc                 @   | }d}d|v rd}t          j        |          5 }|                    |           |                    d           |                    |          }d d d            n# 1 swxY w Y   t          g dg          }t          j        ||           d S )Ns   abcdtabcdrm   r   r   )r   SpooledTemporaryFiler2   r|   r   r   r   r   )r   rn   r   contenthandler   r   s          r   test_not_readabler   .  s     FG
d{{		&D	1	1	1 %VWA__V$$% % % % % % % % % % % % % % % fX...H"h'''''s   A A,,A03A0)%__doc__rh   r   r   r<   r   r.   numpynpr[   pandasr   r   pandas._testing_testingr   r\   usefixturesskip_pyarrowxfail_pyarrowr   r#   parametrizer9   r@   rF   nanr_   rg   rv   r}   r   r   r   r   r   r   r   r   r   <module>r      s           
			                    {&&~66''88, , , , , , d,,%G%G%GHH0 0 IH -, 04  	 	 	  
YYaSz**+	K%yy#s'<'<=	'C5!99cC:->#?#?@	3%T::IIsQCj<Q<QR e77IsRVQK())	
 &, ,' &,0	, 	, 	,    , , ,0 4-880 0 98 0   ( GGG , , , %S%S%STT( ( UT (  # # #& # # #: ))%00( ( 10 *)( ( (r   