
    h$,f-O                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZ d dlZd dlmZmZmZ d dlmZmZmZmZmZ d dlmZ d	Zd
ZdZ dZ!dZ"eZ#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/d Z0d Z1ejd                  jg                  d      d        Z4d Z5d Z6d  Z7ejd                  jq                  d!e      d"        Z9ejd                  jq                  d!e      d#        Z:d$ Z;d% Z<d& Z=d' Z>d( Z?ejd                  jq                  d!e      d)        Z@ejd                  jq                  d*g d+      ejd                  jq                  d,d-d.g      ejd                  jq                  d/g d0      ejd                  jq                  d!e      d1                             ZAejd                  jq                  d!e      d2        ZBd3 ZCejd                  jq                  d!e      d4        ZDd5 ZEy)6    N)BZ2File)	resources)BytesIO)NamedTemporaryFile)dump_svmlight_fileload_svmlight_fileload_svmlight_files)assert_allcloseassert_array_almost_equalassert_array_equalcreate_memmap_backed_datafails_if_pypy)CSR_CONTAINERSzsklearn.datasets.tests.datazsvmlight_classification.txtzsvmlight_multilabel.txtzsvmlight_invalid.txtzsvmlight_invalid_order.txtc                 :    t        j                  t              | z  S N)r   filesTEST_DATA_MODULE)filenames    Klib/python3.12/site-packages/sklearn/datasets/tests/test_svmlight_format.py_svmlight_local_test_file_pathr   !   s    ??+,x77    c                 ~    t        |       }|j                  d      5 }t        |fi |cddd       S # 1 sw Y   yxY w)zG
    Helper to load resource `filename` with `importlib.resources`
    rbN)r   openr   )r   kwargs	data_pathfs       r   _load_svmlight_local_test_filer   %   s>     /x8I		 /!!.v./ / /s   3<c                     t        t              \  } }| j                  j                  d   dk(  sJ | j                  d   dk(  sJ | j                  d   dk(  sJ |j                  d   dk(  sJ dD ]  \  }}}| ||f   |k(  rJ  | d   dk(  sJ | d   dk(  sJ | d	   dk(  sJ | d
   dk(  sJ | d   dk(  sJ | dxx   dz  cc<   | d   dk(  sJ t	        |g d       y )Nr               )r      g      @r   
   g)r      g      ?r"            ?r"      )r%         )r      )r   r*   )r"      )r"      )r%      )r   r%   r%   r*   )r"   r%   r1      r"   r%   )r   datafileindptrshaper   Xyijvals        r   test_load_svmlight_filer?   .   s   )(3DAq 88>>!!!!771:??771:771:?? 	1c Aw#~~ T7a<<T7a<<T7a<<U8q==U8q== dGqLGT7a<< q,-r   c                     t        j                  t              t        z  } t	        |       } t        |       \  }}t        j                  | t        j                        }	 t        |      \  }}t        |j                  |j                         t        ||       t        j                  |       y # t        j                  |       w xY wr   )r   r   r   r6   strr   osr   O_RDONLYr   dataclose)r   X1y1fdX2y2s         r   test_load_svmlight_file_fdrK   Q   s      01H<III	*FB	BKK	(B#B'B!"''2773!"b)
s   :B/ /Cc                      t        t              } t        t        |             \  }}t        |       \  }}t	        |j
                  |j
                         t	        ||       y r   )r   r6   r   rA   r
   rD   )r   rF   rG   rI   rJ   s        r   test_load_svmlight_pathlibrM   d   sH    .x8II/FB	*FBBGGRWW%Br   c                  >    t        t        d      \  } }|g dk(  sJ y )NT
multilabel))r   r"   )r%    )r"   r%   )r   	multifile)r:   r;   s     r   "test_load_svmlight_file_multilabelrS   n   s!    ))EDAq****r   c                  h   t        t              } t        t        |       gdz  t        j
                        \  }}}}t        |j                         |j                                t        ||       |j                  t        j
                  k(  sJ |j                  t        j
                  k(  sJ t        t        |       gdz  t        j                        \  }}}}}	}
|j                  |j                  k(  sJ |j                  |	j                  k(  sJ |	j                  t        j                  k(  sJ y )Nr%   )dtyper1   )r   r6   r	   rA   npfloat32r   toarrayr   rU   float64)r   X_trainy_trainX_testy_testrF   rG   rI   rJ   X3y3s              r   test_load_svmlight_filesr`   s   s    .x8I':	Y1BJJ($GWff w(&..*:;gv.==BJJ&&&<<2::%%%0#i.1AA1ERZZXBBB88rxx88rxx88rzz!!!r   c                  d   t        t        d      \  } }| j                  j                  d   dk(  sJ | j                  d   dk(  sJ | j                  d   dk(  sJ dD ]  \  }}}| ||f   |k(  rJ  t	        j
                  t              5  t        t        d       d d d        y # 1 sw Y   y xY w)	N   )
n_featuresr   r    r!   r"   )r$   r&   r)   r,   r/   )r   r6   r7   r8   pytestraises
ValueErrorr9   s        r   "test_load_svmlight_file_n_featuresrg      s    )(rBDAq 88>>!!!!771:??771: L 	1cAw#~~ 
z	" @&xB?@ @ @s   B&&B/c                     t        t              \  } }t        dd      5 }|j                          t	        t              j                  d      5 }t        j
                  |j                  d      5 }t        j                  ||       d d d        d d d        t        |j                        \  }}t        j                  |j                         d d d        t        | j                         j                                t        |       t        dd      5 }|j                          t	        t              j                  d      5 }t        |j                  d      5 }t        j                  ||       d d d        d d d        t        |j                        \  }}t        j                  |j                         d d d        t        | j                         j                                t        |       y # 1 sw Y   sxY w# 1 sw Y   xxY w# 1 sw Y   FxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   }xY w)Nzsklearn-testz.gz)prefixsuffixr   wbz.bz2)r   r6   r   rE   r   r   gzipnameshutilcopyfileobjr   rB   remover   rX   r   )	r:   r;   tmpr   fh_outXgzygzXbzybzs	            r   test_load_compressedrw      s   )(3DAq	>%	@ C		+H5::4@ 	.A388T* .f""1f-.	. &chh/S 			#(( aiik3;;=9a%	>&	A S		+H5::4@ 	.A4( .F""1f-.	. &chh/S 			#(( aiik3;;=9a%'. .	. 	. . .	. 	. sw   /H!H0G8H?H/H7H+H4H+<?H78H=HH	
HHH($H++H4	0H77I c                  ~    t        j                  t              5  t        t               d d d        y # 1 sw Y   y xY wr   )rd   re   rf   r   invalidfilerQ   r   r   test_load_invalid_filerz      s+    	z	" 4&{34 4 4   3<c                  ~    t        j                  t              5  t        t               d d d        y # 1 sw Y   y xY wr   )rd   re   rf   r   invalidfile2rQ   r   r   test_load_invalid_order_filer~      s+    	z	" 5&|45 5 5r{   c                      t        d      } t        j                  t              5  t	        | d       d d d        y # 1 sw Y   y xY w)Ns   -1 4:1.
1 0:1
F
zero_based)r   rd   re   rf   r   )r   s    r   test_load_zero_basedr      s8    #$A	z	" 01/0 0 0s	   <Ac                      d} d}t        |       }t        |d      \  }}|j                  dk(  sJ t        |       }t        |      }t        ||gd      \  }}}}	|j                  dk(  sJ |j                  dk(  sJ y )Ns   -1 1:1 2:2 3:3
s   -1 0:0 1:1
autor   )r"   r1   )r"   r5   )r   r   r8   r	   )
data1data2f1r:   r;   f2rF   rG   rI   rJ   s
             r   test_load_zero_based_autor      s    EE	BbV4DAq77f	B	B("bfENBB88v88vr   c                     d} t        t        |       d      \  }}t        |g d       t        |j                         ddgddgd	dgg       t	        t        |       gd
      }t        t        |       d
      }||fD ]E  \  }}}t        |g d       t        |g d       t        |j                         ddgddgd	dgg       G y )NsM   
    3 qid:1 1:0.53 2:0.12
    2 qid:1 1:0.13 2:0.1
    7 qid:2 1:0.87 2:0.12Fquery_id)r1   r%   r    g(\?gQ?gp=
ף?皙?gףp=
?T)r"   r"   r%   )r   r   r   rX   r	   )rD   r:   r;   res1res2qids         r   test_load_with_qidr      s    D gdme<DAqq)$qyy{dD\D#;t$MN>Dgdmd;DD\ S	1c1i(3	*199;$c{T4L(QRSr   zPtesting the overflow of 32 bit sparse indexing requires a large amount of memoryc                     dj                  d t        dd      D              } t        t        |       d      \  }}}t	        |dd g d	       t	        t        j                  |      t        j                  dd             y)
zU
    load large libsvm / svmlight file with qid attribute. Tests 64-bit query ID
       
c              3   Z   K   | ]#  }d j                  |      j                          % yw)z.3 qid:{0} 1:0.53 2:0.12
2 qid:{0} 1:0.13 2:0.1N)formatencode).0r<   s     r   	<genexpr>z&test_load_large_qid.<locals>.<genexpr>   s,      	
 >DDQGNNP	
s   )+r"   i ZbTr   N)r1   r%   r1   r%   )joinranger   r   r   rV   uniquearange)rD   r:   r;   r   s       r   test_load_large_qidr      sm     ::	
1./	
D #74=4@IAq#qv|,ryy~ryy4D'EFr   c                      t        j                  t              5  t        t              } t        t
              }t        t        |       t        |      t        |       g       d d d        y # 1 sw Y   y xY wr   )rd   re   rf   r   r6   ry   r	   rA   )r   invalid_paths     r   test_load_invalid_file2r      sW    	z	" Q28<	5kBS^S->IOPQ Q Qs   AA++A4c                  v    t        j                  t              5  t        d       d d d        y # 1 sw Y   y xY w)NgzG?)rd   re   	TypeErrorr   rQ   r   r   test_not_a_filenamer      s-     
y	! !4 ! ! !   /8c                  v    t        j                  t              5  t        d       d d d        y # 1 sw Y   y xY w)Nztrou pic nic douille)rd   re   OSErrorr   rQ   r   r   test_invalid_filenamer      s,    	w	 3123 3 3r   csr_containerc                 \   t        t              \  }}|j                         } | t        j                  |            }|t        j
                  |j                  d            }|t        j
                  |j                  d            }|||fD ]  }|||fD ]  }dD ]  }	t        j                  t        j                  t        j                  t        j                  fD ]  }
t               }t        j                  |      r|j                  d   dk(  r|j                  }|j                  |
      }t!        |||d|	       |j#                  d       |j%                         }t'        |d      }dt(        j*                  z  |v sJ |j%                         }t'        |d      }dd	g|	   d
z   |v sJ t-        ||
|	      \  }}|j.                  |
k(  sJ t1        |j3                         j4                  |j4                         |j                         }t        j                  |      r|j                         }n|}|
t        j                  k(  r-t7        ||d       t7        |j                  |
d      |d       t7        ||d       t7        |j                  |
d      |d        
   y )Nr   )TFr"   testcommentr   utf-8zscikit-learn %sonezeroz-based)rU   r   r5   F)copyr(   )r   r6   rX   rV   
atleast_2dr   r8   rW   rY   int32int64r   spissparseTastyper   seekreadlinerA   sklearn__version__r   rU   r   sorted_indicesindicesr   )r   X_sparsey_denseX_densey_sparseX_slicedy_slicedr:   r;   r   rU   r   X_inputr   rI   rJ   X2_denseX_input_denses                     r   	test_dumpr     sV   6x@Hg GR]]734H 		(.."345H		(.."345H* 8GX. 7	A+ 6
 jj"**bhhI 5E	A
 {{1~!''!*/ CC  hhuoG&Av* FF1IjjlG!'73G,w/B/BBgMMMjjlG!'73G!6?:6AWLLL/:VFB88u,,,&r'8'8':'B'BBJJO!zz|H{{7+(/(9(/

*1-1M1#NN5uN=r1
 2-2N1#NN5uN=r2g567	8r   c                 $   g dg dg dg}g dg dg dg} | |      }||fD ]k  }t               }t        |||d       |j                  d	       |j                         d
k(  sJ |j                         dk(  sJ |j                         dk(  rkJ  y )N)r"   r   r1   r   r*   r   r   r   r   r   )r   r*   r   r"   r   r   r"   r   )r"   r   r"   )r"   r"   r   TrO   r   s   1 0:1 2:3 4:5
s   0,2 
s   0,1 1:5 3:1
)r   r   r   r   )r   r:   r   r   r;   r   s         r   test_dump_multilabelr   K  s    	/?;A)Y/GW%Hx  0I1at4	q	zz|1111zz|y(((zz|////0r   c                     d} d}d}d}d}| ||||gg dg dg dg dg}| ||||g}t               }t        |||       |j                  d       |j                         d	k(  sJ |j                         d
k(  sJ |j                         dk(  sJ |j                         dk(  sJ |j                         dk(  sJ |j                  d       t	        |      \  }}	t        ||j                                t        ||	       y )Nr"   g @gGz@g     ?r+   )g    eAg NgmCgkcEr   r   r   r   s+   1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1
s!   2.1 0:1000000000 1:2e+18 2:3e+27
s   3.01 
s   1.000000000000001 
s   1 
)r   r   r   r   r   r   rX   )
r   twothreeexactalmostr:   r;   r   rI   rJ   s
             r   test_dump_conciser   Z  s   
C
CEEF	c5%(	A 
c5%(A	Aq!QFF1I::<JJJJ::<@@@@::<:%%%::<2222::<7"""FF1I"FBa.a$r   c                     t        t              \  } }| j                         } t               }d}t	        | |||d       |j                  d       t        |d      \  }}t        | |j                                t        ||       d}t               }t        j                  t              5  t	        | |||       d d d        |j                  d      }t               }t	        | |||d       |j                  d       t        |d      \  }}t        | |j                                t        ||       t               }t        j                  t              5  t	        | ||d	       d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)
Nz*This is a comment
spanning multiple lines.Fr   r   r   s   It is true that
½² = ¼)r   r   zI've got a  .)r   r6   rX   r   r   r   r   r   rd   re   UnicodeDecodeErrordecoderf   )r:   r;   r   ascii_commentrI   rJ   utf8_commentunicode_comments           r   test_dump_commentr   y  sE   )(3DAq			A	AAMq!Q%HFF1Ie4FBa.a$ CL	A	)	* :1aL9: #))'2O	Aq!QEJFF1Ie4FBa.a$	A	z	" >1a,<=> >: :> >s   +E1E=1E:=Fc                  H   t        t              \  } }t               }|g}t        j                  t
              5  t        | ||       d d d        t               }t        j                  t
              5  t        | |d d |       d d d        y # 1 sw Y   FxY w# 1 sw Y   y xY w)N)r   r6   r   rd   re   rf   r   )r:   r;   r   y2ds       r   test_dump_invalidr     s    )(3DAq	A#C	z	" &1c1%& 		A	z	" )1afa() )	& &) )s   B2BBB!c                  p   t        t              \  } }| j                         } t        j                  | j
                  d         dz  }t               }t        | |||d       |j                  d       t        |dd      \  }}}t        | |j                                t        ||       t        ||       y )Nr   r%   Tr   r   )r   r6   rX   rV   r   r8   r   r   r   r   r   )r:   r;   r   r   rF   rG   	query_id1s          r   test_dump_query_idr     s    )(3DAq			Ayy$)H	Aq!QdCFF1I*1tMBIa.a$h	2r   c                  :   d} t        t        |       d      \  }}}g dg dg dg dg}g d}g d}t        ||       t        |j                         |       t        ||       t               }t	        ||||d       |j                  d	       t        |dd      \  }}}t        ||       t        |j                         |       t        ||       |j                  d	       t        |d
d      \  }}t        ||       t        |j                         |       y )Ns   
    1 qid:0 0:1 1:2 2:3
    0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
    0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985Tr   )r"   r%   r1   )ixU   \.,N^iY)r"   r   r   r1   )r   r   l l    r   r   F)r   r   r   rX   r   r   )rD   r:   r;   r   true_Xtrue_ytrueQIDr   s           r   test_load_with_long_qidr     s    OD
 #74=4@IAq# 	222	F FOGq&!qyy{F+sG$	Aq!Q>FF1I"1tEIAq#q&!qyy{F+sG$FF1Ia%DADAqq&!qyy{F+r   c                 R   t               } | t        j                  d            }t        j                  g d      }t	        |||       dD ]X  }|j                  d       t        |d|      \  }}t        ||       t        |j                         |j                                Z y )N)r1   r5   r8   r   )r   TFr   r5   )rc   r   )	r   rV   zerosarrayr   r   r   r   rX   )r   r   r   r   r   r:   r;   s          r   test_load_zerosr     s    	A288&12FXXi Fvvq)+ A
	q	!!jI1!!V,!!))+v~~/?@	Ar   sparsity)r   r   g      ?gGz?r"   	n_samples   e   rc   )r%   r    )   c                    t         j                  j                  d      }|j                  dd||f      }| rd||| k  <    ||      }|j	                  dd|      }t               }t        |||       |j                  d       t        |j                               }d}	|dz  }
|
|	z
  }d|z  dz  }||
z
  }t        |||	|	      \  }}t        |||
|	      \  }}t        |||
      \  }}t        j                  |||g      }t        j                  |||g      }t        ||       t        |j                         |j                                y )Nr           r+   lowhighsizer%   r1   r5   r*   )rc   offsetlength)rc   r   )rV   randomRandomStateuniformrandintr   r   r   lengetvaluer   concatenater   vstackr   rX   )r   r   rc   r   rngr:   r;   r   r   mark_0mark_1length_0mark_2length_1X_0y_0X_1y_1X_2y_2y_concatX_concats                         r   test_load_with_offsetsr    sM   
 ))


"C#Y
,CDA!h,aA	2A	Aq!QFF1Iqzz|D FQYFHX]FH "	jHC "	jHC "!
6JHC~~sCo.Hyy#sC)Ha*aiik8+;+;+=>r   c           
      >   t         j                  j                  d      }t        j                  g dg dg dg dg dg dg dg      } | |      }|j                  \  }}|j                  dd|      }t        j                  |      dz  }t               }t        ||||	       |j                  d       t        |j                               }t        |      D ]  }	|j                  d       t        ||d
d|	      \  }
}}t        ||d
|	d      \  }}}t        j                  ||g      }t        j                  ||g      }t        j                   |
|g      }t#        ||       t%        ||       t#        |j'                         |j'                                 y )Nr   )r   r   r   r   r   r   )r"   r%   r1   r5   r   r!   )r"   r   r1   r   r   r   )r   r   r   r   r   r"   )r"   r   r   r   r   r   r%   r   r   T)rc   r   r   r   r   )rV   r   r   r   r8   r   r   r   r   r   r   r   r   r   r   r   r  r   r   rX   )r   r  r:   r   rc   r;   r   r   r   markr  r	  q_0r
  r  q_1q_concatr  r  s                      r   "test_load_offset_exhaustive_splitsr    sn   
))


"C
	

	A 	aAGGIz	2Ayy#q(H	Aq!Q2FF1Iqzz|D d C	q	**tAd
S# +*tD
S# >>3*->>3*-99c3Z(!!X.8X.!!))+x/?/?/ABCr   c                      t        j                  t        d      5  t        t        dd       d d d        y # 1 sw Y   y xY w)Nzn_features is required)matchr1   )r   r   )rd   re   rf   r   r6   rQ   r   r   test_load_with_offsets_errorr  ;  s6    	z)A	B E&x!DE E Es	   8Ac                    t        | dz        }t        j                  j                  d      }|j	                  dd      j                  t        j                        }t        j                  g d      }t        j                  g d      }t        j                  g d      } ||||fd	      }t        |||d
       t        |d
      \  }	}
g d}|
|k(  sJ y)z
    Ensure that if y contains explicit zeros (i.e. elements of y.data equal to
    0) then those explicit zeros are not encoded.
    svm_explicit_zero*   r1   r*   )r   r%   r1   r!   )r   r%   r%   r   r"   r%   )r   r"   r"   r"   r"   r   )r1   r1   r   TrO   )g       @r  )r   r+   N)
rA   rV   r   r   randnr   rY   r   r   r   )tmp_pathr   	save_pathr  r:   r7   r   rD   r;   _y_loady_trues               r    test_multilabel_y_explicit_zerosr$  @  s     H223I
))


#C		!Qrzz*AXXl#Fhh)*G88&'DtWf-V<A q!Y48"9>IAv)FVr   c                     t         j                  j                  d      }|j                  dd      }|j                  d      }t	        ||g      \  }}t        | dz        }t        |||       y)zEnsure that there is no ValueError when dumping a read-only `X`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28026
    r  r*   r%   svm_read_onlyN)rV   r   r   r  r   rA   r   )r  r  r:   r;   r   s        r   test_dump_read_onlyr'  Z  sc     ))


#C		!QA		!A %aV,DAqH./Iq!Y'r   )Frl   rB   rn   bz2r   	importlibr   ior   tempfiler   numpyrV   rd   scipy.sparsesparser   r   sklearn.datasetsr   r   r	   sklearn.utils._testingr
   r   r   r   r   sklearn.utils.fixesr   r   r6   rR   ry   r}   
pytestmarkr   r   r?   rK   rM   rS   r`   rg   rw   rz   r~   r   r   r   r  skipr   r   r   r   parametrizer   r   r   r   r   r   r   r   r  r  r  r$  r'  rQ   r   r   <module>r5     s8    	     '     X X  /0 (%	$+
8/ .F&+
" @"&84
5
0S" VGGQ!3
 .9B :BJ .90 :0%>>B
)3!,H .9
A :
A %;<r3i0z2.9!? : 3 1 =!?H .9'C :'CTE
 .9 :2(r   