
    e!hK                     <   d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlZddlZddlZddlZddlmZ ddlmZ dd	lmZmZmZmZ dd
lmZ ddlmZ ddl m!Z!m"Z"m#Z# dZ$dZ% G d d      Z& eed      Zd Z'ejP                  jS                  ddddidddfdddddddfdddidddfdddddddfdddid d!dfdd"ddd d!dfd#dd#id$d%d&fd'dd'id(d)dfd'd*d+id(d)dfd,dd,id!d-dfd,d*d.id!d-dfd/dd/id0d$dfg      ejP                  jS                  d1d2d3g      ejP                  jS                  d4dd5g      d6                      Z*ejP                  jS                  ddddidddfdddddddfdddidddfdddddddfdddid d!dfdd"ddd d!dfd#dd#id$d%d&fd'dd'id(d)dfd'd*d+id(d)dfd,dd,id!d-dfd,d*d.id!d-dfg      ejP                  jS                  d1d2d3g      d7               Z+ejP                  jS                  dg d8      d9        Z,ejP                  jS                  d1d2d3g      d:        Z-ejP                  jS                  d1d2d3g      d;        Z.ejP                  jS                  d1d2d3g      ejP                  jS                  d<d=d=d>gg      d?               Z/ejP                  jS                  dg d@      ejP                  jS                  d1d2d3g      dA               Z0ejP                  jS                  dg dB      ejP                  jS                  d1d2d3g      dC               Z1dD Z2 ejf                  dEF      dG        Z4 ejf                  dEF      dH        Z5ejP                  jS                  dIg dJ      ejP                  jS                  d4dd5g      dK               Z6ejP                  jS                  dLd1dMidNfdOdMidPfg      dQ        Z7ejP                  jS                  dRddSdTdSdSdTd5d3dTd5dSdTg      dU        Z8ejP                  js                  dV      ejP                  jS                  dLd1d3idWfdOdidXfd3ddYdXfg      dZ               Z:ejP                  js                  dV      ejP                  jS                  d[d\d]g      d^               Z;d_ Z<ejP                  jS                  d4dd5g      d`        Z=ejP                  jS                  d4dd5g      da        Z>ejP                  jS                  d4dd5g      ejP                  jS                  d1d2d3g      db               Z?ejP                  jS                  d4dd5g      ejP                  jS                  dcdddiddeddfg      dg               Z@ejP                  jS                  d4dd5g      ejP                  jS                  dhddd*deieAdifdddjdkgdleAdmfd/d/d5dneAdofdddpddqeAdrfdddpd5dqeAdrfdddsdleBdtfdddsdkgdleBdtfg      ejP                  jS                  d1d2d3g      du                      ZCejP                  jS                  dvdwddxdfeAdyfdwdzd{eAd|fdwdzdxdfeAd}fi eAd~fg      d        ZDejP                  jS                  d4dd5g      d        ZEejP                  jS                  d4dd5g      d        ZFejP                  jS                  d4dd5g      d        ZGd ZHejP                  jS                  d4dd5g      d        ZIejP                  jS                  ddd5g      d        ZJd ZKd ZLejP                  jS                  d4dd5g      d        ZMejP                  jS                  dg d      d        ZNd ZOejP                  jS                  d4dd5g      ejP                  jS                  d1d      d               ZPd ZQd ZRd ZSy)zTest the openml loader.    N)partial)	resources)BytesIO)	HTTPError)config_context)fetch_openml)_OPENML_PREFIX_get_local_path_open_openml_url_retry_with_clean_cache)Bunch)check_pandas_support)SkipTestassert_allcloseassert_array_equalz"sklearn.datasets.tests.data.openmlTc                   8    e Zd Zd Zd	dZd Zd Zd Zd Zd Z	y)
_MockHTTPResponsec                      || _         || _        y N)datais_gzip)selfr   r   s      j/var/www/html/diagnosisapp-backend/venv/lib/python3.12/site-packages/sklearn/datasets/tests/test_openml.py__init__z_MockHTTPResponse.__init__'   s    	    c                 8    | j                   j                  |      S r   )r   read)r   amts     r   r   z_MockHTTPResponse.read+   s    yy~~c""r   c                 8    | j                   j                          y r   )r   closer   s    r   r    z_MockHTTPResponse.close.   s    		r   c                 &    | j                   rddiS i S )NzContent-Encodinggzipr   r!   s    r   infoz_MockHTTPResponse.info1   s    <<&//	r   c                 ,    t        | j                        S r   )iterr   r!   s    r   __iter__z_MockHTTPResponse.__iter__6   s    DIIr   c                     | S r    r!   s    r   	__enter__z_MockHTTPResponse.__enter__9   s    r   c                      y)NFr*   )r   exc_typeexc_valexc_tbs       r   __exit__z_MockHTTPResponse.__exit__<   s    r   N))
__name__
__module____qualname__r   r   r    r%   r(   r+   r0   r*   r   r   r   r   &   s%    #
r   r   )	data_homec                 <  	
 dddddt         j                  t        dz   d| z   
fd
fd			fd
	fd	fd
fdfd}t        r,| j	                  t
        j                  j                  d|       y y )Nz(https://api.openml.org/api/v1/json/data/z1https://api.openml.org/api/v1/json/data/features/zhttps://api.openml.org/data/v1/z-https://api.openml.org/api/v1/json/data/list/z.gz.id_c                    t        j                  dd| t        d      d        |z   z   }|j                  dd      j                  dd      j                  dd	      j                  d
d      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      S )Nz\W-zhttps://api.openml.org/z-json-data-listz-jdlz-json-data-featuresz-jdfz-json-data-qualitiesz-jdqz
-json-dataz-jdz
-data_namez-dnz	-downloadz-dlz-limitz-lz-data_versionz-dvz-statusz-sz-deactivatedz-dactz-activez-act)resublenreplace)urlsuffixoutputpath_suffixs      r   
_file_namez4_monkey_patch_webbased_functions.<locals>._file_nameU   s    FF5#s3'@#A#CDE 	 NN,f5W*F3W+V4W\5)W\5)W[%(WXt$W_e,WY%W^W-WY'	
r   c                 z   | j                  |      sJ  	| |      }t        j                  
      |z  }|j                  d      5 }|r0r.t	        |j                               }t        |d      cd d d        S  |d      }t	        |j                               }t        |d      cd d d        S # 1 sw Y   y xY w)NrbTF)
startswithr   filesopenr   r   r   )r?   has_gzip_headerexpected_prefixr@   data_file_namedata_file_pathffpdecompressed_frC   data_modulegzip_responseread_fns            r   _mock_urlopen_sharedz>_monkey_patch_webbased_functions.<locals>._mock_urlopen_sharedk   s    ~~o...#C0"5F  & 	4!=QVVX&(T2	4 	4
 ")D!1^0023(U3	4 	4 	4s   )B1:-B11B:c                      | |d      S N.jsonr?   rI   rJ   r@   r*   )r?   rI   rS   url_prefix_data_descriptions     r   _mock_urlopen_data_descriptionzH_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_descriptionz   s    #+7	
 	
r   c                      | |d      S rU   r*   )r?   rI   rS   url_prefix_data_featuress     r   _mock_urlopen_data_featureszE_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_features       #+4	
 	
r   c                      | |d      S )Nz.arffrW   r*   )r?   rI   rS   url_prefix_download_datas     r   _mock_urlopen_download_datazE_monkey_patch_webbased_functions.<locals>._mock_urlopen_download_data   r]   r   c                 t   | j                        sJ  	| d      }t        j                  
      |z  }|j                  d      5 } |d      }|j	                         j                  d      }t        j                  |      }d d d        dv rt        d ddd t                     |j                  d      5 }|r.t        |j	                               }t        |d      cd d d        S  |d      }t        |j	                               }t        |d	      cd d d        S # 1 sw Y   xY w# 1 sw Y   y xY w)
NrV   rE   zutf-8error  Simulated mock errorr?   codemsghdrsrN   TF)rF   r   rG   rH   r   decodejsonloadsr   r   r   )r?   rI   rK   rL   rM   rO   	decoded_s	json_datarN   rC   rP   rR   url_prefix_data_lists            r   _mock_urlopen_data_listzA_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_list   s2   ~~2333#C1"5F   & 	.!$Q-N&++-44W=I

9-I	. is(>Tgi    & 	4!QVVX&(T2	4 	4
 ")D!1^0023(U3	4 	4	. 	.	4 	4s   >D":'D.+-D."D+.D7c                 8   | j                         }| j                  d      dk(  }|j                        r	 ||      S |j                  
      r	 ||      S |j                        r	 ||      S |j                  	      r	 ||      S t        d|z        )NzAccept-encodingr#   zUnknown mocking URL pattern: %s)get_full_url
get_headerrF   
ValueError)requestargskwargsr?   rI   rY   r\   ro   r`   rX   r[   rn   r_   s        r   _mock_urlopenz7_monkey_patch_webbased_functions.<locals>._mock_urlopen   s    ""$!,,->?6I>>./*3@@^^45.sODD^^45.sODD^^781#GG>DEEr   urlopen)r#   rH   OPENML_TEST_DATA_MODULEtest_offlinesetattrsklearndatasets_openml)contextdata_idrQ   rw   rC   rY   r\   ro   r`   rS   rP   rB   rR   rX   r[   rn   r_   s     ` @@@@@@@@@@@@@r    _monkey_patch_webbased_functionsr   G   s     #MR@JKiiG)C/Cy/AK
,4


42F F ((00)]K r   z9data_id, dataset_params, n_samples, n_features, n_targets=   r            iris)nameversion      &   anneal1        cpu鍞     H      _  
      r   zadult-census  M   MiceProtein  i  parser	liac-arffpandasrQ   Fc                    t        j                  d      }t        | ||       t        d	dd|d|}	t	        |	j
                  d         |k(  sJ t        |	t              sJ t        |	j                  |j                        sJ |	j                  j                  |||z   fk(  sJ t        |	j                  |j                        sJ |	j                  j                  ||fk(  sJ |dk(  r>t        |	j                  |j                        sJ |	j                  j                  |fk(  sAJ t        |	j                  |j                        sJ |	j                  j                  ||fk(  sJ |	j                  J y)
zCheck the behaviour of `fetch_openml` with `as_frame=True`.

    Fetch by ID and/or name (depending if the file was previously cached).
    r   rQ   TFas_framecacher   idr   Nr*   )pytestimportorskipr   r   intdetails
isinstancer   frame	DataFrameshaper   targetSeries
categories)
monkeypatchr   dataset_params	n_samples
n_features	n_targetsr   rQ   pdbunchs
             r   test_fetch_openml_as_frame_truer      s`   P 
		X	&B$['W  	E u}}T"#w...eU###ekk2<<000;;J,B CCCCejj",,///::	:6666A~%,,		222||!!i\111%,,555||!!i%;;;;###r   c                 h   t        j                  d       t        | |d       t        d	dd|d|}t	        |j
                  d         |k(  sJ t        |t              sJ |j                  J t        |j                  t        j                        sJ |j                  j                  ||fk(  sJ t        |j                  t        j                        sJ |dk(  r|j                  j                  |fk(  sJ |j                  j                  ||fk(  sJ t        |j                  t              sJ y)
znCheck the behaviour of `fetch_openml` with `as_frame=False`.

    Fetch both by ID and/or name + version.
    r   Tr   Fr   r   Nr   r*   )r   r   r   r   r   r   r   r   r   r   npndarrayr   r   r   dict)r   r   r   r   r   r   r   r   s           r    test_fetch_openml_as_frame_falser     s    H !$['N  	E u}}T"#w...eU###;;ejj"**---::	:6666ellBJJ///A~||!!i\111||!!i%;;;;e&&---r   )r   r   r   c                   
 t        j                  d      t        | |d       t        |ddd      }t        |ddd      }|j                  |j                  c}

fd}|j                  |      }j                  j                  |
       |j                  |j                  c}j                  j                  |j                     
       fd}|j                  |      }	j                  j                  |	       y	)
z:Check the consistency of the LIAC-ARFF and pandas parsers.r   Tr   Fr   r   r   r   r   c                     | j                      }j                  j                  j                  |      r| j	                  |j
                        S | S r   )r   apitypesis_numeric_dtypeastypedtype)seriespandas_seriesdata_pandasr   s     r   convert_numerical_dtypeszFtest_fetch_openml_consistency_parser.<locals>.convert_numerical_dtypes]  sA    #FKK066<<((7==!4!455Mr   c                 D   | j                      }j                  j                  j                  |      r| j	                  |j
                        S t        |j
                  j                        r/| j                  j                  |j                  j                        S | S r   )r   r   r   r   r   r   r   CategoricalDtypecatrename_categoriesr   )r   r   frame_pandasr   s     r   (convert_numerical_and_categorical_dtypeszVtest_fetch_openml_consistency_parser.<locals>.convert_numerical_and_categorical_dtypesq  sz    $V[[166<<((7==!4!455++R-@-@A :://0A0A0L0LMMMr   N)
r   r   r   r   r   applytestingassert_frame_equalr   feature_names)r   r   
bunch_liacbunch_pandas	data_liacr   data_liac_with_fixed_dtypes
frame_liacr   frame_liac_with_fixed_dtypesr   r   r   s             @@@r   $test_fetch_openml_consistency_parserr   E  s     
		X	&B$['N	J  	L (__l.?.?I{ #,//2J"KJJ!!"={K  *//1C1CJ JJ!!,|/I/I"JKX $.#3#30$  JJ!!">Mr   c                 
   t        j                  d       d}t        | |d       t        |dd|      }t        |dd|      }t	        |j
                  |j
                         t        |j                  |j                         y)z^Check the equivalence of the dataset when using `as_frame=False` and
    `as_frame=True`.
    r   r   Tr   Fr   N)r   r   r   r   r   r   r   r   )r   r   r   bunch_as_frame_truebunch_as_frame_falses        r   -test_fetch_openml_equivalence_array_dataframer     s    
 !G$['N&	 (	 (--/B/G/GH+224G4N4NOr   c                 ,   t        j                  d      }|j                  j                  j                  }d}d}d}d} |g d      }t
        j                  gdz  }	g d}
d	}t        | |d
       t        |d
d|      }|j                  }|j                  }|j                  }t        ||j                        sJ t        j                  |j                  |	k(        sJ |j                   |k(  sJ t        j                  |j"                  |
k(        sJ t        j                  |j$                  |
k(        sJ |j&                  |gk(  sJ t        ||j(                        sJ |j*                  |k(  sJ |j                   |k(  sJ |j,                  |k(  sJ |j.                  j0                  sJ t        ||j                        sJ |j                   |k(  sJ t        j                  |j                  |	|gz   k(        sJ |j.                  j0                  sJ y)z>Check fetching on a numerical only dataset with string labels.r   r   r   r   )r   )r      )zIris-setosazIris-versicolorzIris-virginicar   )sepallength
sepalwidthpetallength
petalwidthclassTFr   N)r   r   r   r   r   r   float64r   r   r   r   r   r   r   alldtypesr   columnsr   target_namesr   r   r   index	is_unique)r   r   r   r   r   
data_shapetarget_shapeframe_shapetarget_dtypedata_dtypes
data_namestarget_namer   r   r   r   s                   r   test_fetch_openml_iris_pandasr     s    
		X	&Bvv||44GJLK#<L ::,"KKJK$['4@	E ::D\\FKKEdBLL)))66$++,---::###66$,,*,---66%%%3444+...fbii(((<<<'''<<<''';;+%%%<<!!!!eR\\***;;+%%%66%,,+">>???;;    r   target_columnr   r   c                 ,   t        j                  d      }d}t        | |d       t        |dd||      }t        |dd|      }|j                  j                  |j                  |j                         t        |t              r[|j                  j                  |j                  j                  |j                  |             |j                  j                  dk(  sJ y	|j                  j                  |k(  sJ |j                  j                  dk(  sJ y	)
z@Check that we can force the target to not be the default target.r   r   TF)r   r   r   r   r   r   )r      r   N)r   r   r   r   r   r   r   r   listassert_index_equalr   r   Indexr   r   r   )r   r   r   r   r   bunch_forcing_targetbunch_defaults          r   !test_fetch_openml_forcing_targetsr     s     
		X	&BG$['4@'# !	M JJ!!"6"<"<m>Q>QR-&


%% ''//-1H	
 $((..(:::#**//=@@@#((..(:::r   )r   r   r   r   r   c                    t        j                  d      }t        | |d       t        |ddd|      }t        |ddd|      \  }}|j                  j                  |j                  |       t        ||j                        r'|j                  j                  |j                  |       y|j                  j                  |j                  |       y)z>Check the behaviour of `return_X_y=True` when `as_frame=True`.r   Tr   Fr   r   r   
return_X_yr   N)r   r   r   r   r   r   r   r   r   assert_series_equalr   )r   r   r   r   r   Xys          r   .test_fetch_openml_equivalence_frame_return_X_yr    s     
		X	&B$['NE DAq JJ!!%**a0!RYY


&&u||Q7


%%ellA6r   )r   r   r   r   c                     t        j                  d       t        | |d       t        |ddd|      }t        |ddd|      \  }}t	        |j
                  |       t	        |j                  |       y)z?Check the behaviour of `return_X_y=True` when `as_frame=False`.r   Tr   Fr   N)r   r   r   r   r   r   r   )r   r   r   r   r  r  s         r   .test_fetch_openml_equivalence_array_return_X_yr    st     !$['NE DAq uzz1%u||Q'r   c                    t        j                  d       d}t        | |d       d}t        ||dd      }t        ||dd      }|j                  j
                  j                  dk(  sJ |j                  j
                  d	k(  sJ y
)z9Check the difference between liac-arff and pandas parser.r   r   Tr   Fr   r   rM   ON)r   r   r   r   r   r   kind)r   r   r   bunch_liac_arffr   s        r   $test_fetch_openml_difference_parsersr  (  s    
!G$['N H"	O  	L %%**c111""c)))r   module)scopec                  2    g dg dg dg dg dg dg ddS )	z+Returns the columns names for each dataset.)r   r   r   r   r   )'familyzproduct-typesteelcarbonhardnesstemper_rolling	conditionformabilitystrength
non-ageingsurface-finishzsurface-qualityenamelabilitybcbfbtbw%2Fmeblmchromphoscbondmarviexptlferrocorrblue%2Fbright%2Fvarn%2Fcleanlustrejurofmspr   thickwidthr=   oilborepackingr   )vendorMYCTMMINMMAXCACHCHMINCHMAXr   )N Mean_Acc1298_Mean_Mem40_CentroidMean_Acc1298_Mean_Mem40_RolloffMean_Acc1298_Mean_Mem40_FluxMean_Acc1298_Mean_Mem40_MFCC_0Mean_Acc1298_Mean_Mem40_MFCC_1Mean_Acc1298_Mean_Mem40_MFCC_2Mean_Acc1298_Mean_Mem40_MFCC_3Mean_Acc1298_Mean_Mem40_MFCC_4Mean_Acc1298_Mean_Mem40_MFCC_5Mean_Acc1298_Mean_Mem40_MFCC_6Mean_Acc1298_Mean_Mem40_MFCC_7Mean_Acc1298_Mean_Mem40_MFCC_8Mean_Acc1298_Mean_Mem40_MFCC_9Mean_Acc1298_Mean_Mem40_MFCC_10Mean_Acc1298_Mean_Mem40_MFCC_11Mean_Acc1298_Mean_Mem40_MFCC_12Mean_Acc1298_Std_Mem40_CentroidMean_Acc1298_Std_Mem40_RolloffMean_Acc1298_Std_Mem40_FluxMean_Acc1298_Std_Mem40_MFCC_0Mean_Acc1298_Std_Mem40_MFCC_1Mean_Acc1298_Std_Mem40_MFCC_2Mean_Acc1298_Std_Mem40_MFCC_3Mean_Acc1298_Std_Mem40_MFCC_4Mean_Acc1298_Std_Mem40_MFCC_5Mean_Acc1298_Std_Mem40_MFCC_6Mean_Acc1298_Std_Mem40_MFCC_7Mean_Acc1298_Std_Mem40_MFCC_8Mean_Acc1298_Std_Mem40_MFCC_9Mean_Acc1298_Std_Mem40_MFCC_10Mean_Acc1298_Std_Mem40_MFCC_11Mean_Acc1298_Std_Mem40_MFCC_12Std_Acc1298_Mean_Mem40_CentroidStd_Acc1298_Mean_Mem40_RolloffStd_Acc1298_Mean_Mem40_FluxStd_Acc1298_Mean_Mem40_MFCC_0Std_Acc1298_Mean_Mem40_MFCC_1Std_Acc1298_Mean_Mem40_MFCC_2Std_Acc1298_Mean_Mem40_MFCC_3Std_Acc1298_Mean_Mem40_MFCC_4Std_Acc1298_Mean_Mem40_MFCC_5Std_Acc1298_Mean_Mem40_MFCC_6Std_Acc1298_Mean_Mem40_MFCC_7Std_Acc1298_Mean_Mem40_MFCC_8Std_Acc1298_Mean_Mem40_MFCC_9Std_Acc1298_Mean_Mem40_MFCC_10Std_Acc1298_Mean_Mem40_MFCC_11Std_Acc1298_Mean_Mem40_MFCC_12Std_Acc1298_Std_Mem40_CentroidStd_Acc1298_Std_Mem40_RolloffStd_Acc1298_Std_Mem40_FluxStd_Acc1298_Std_Mem40_MFCC_0Std_Acc1298_Std_Mem40_MFCC_1Std_Acc1298_Std_Mem40_MFCC_2Std_Acc1298_Std_Mem40_MFCC_3Std_Acc1298_Std_Mem40_MFCC_4Std_Acc1298_Std_Mem40_MFCC_5Std_Acc1298_Std_Mem40_MFCC_6Std_Acc1298_Std_Mem40_MFCC_7Std_Acc1298_Std_Mem40_MFCC_8Std_Acc1298_Std_Mem40_MFCC_9Std_Acc1298_Std_Mem40_MFCC_10Std_Acc1298_Std_Mem40_MFCC_11Std_Acc1298_Std_Mem40_MFCC_12BH_LowPeakAmpBH_LowPeakBPMBH_HighPeakAmpBH_HighPeakBPMBH_HighLowRatioBHSUM1BHSUM2BHSUM3zamazed.suprisedzhappy.pleasedzrelaxing.calmzquiet.stillz
sad.lonelyzangry.aggresive)age	workclasszfnlwgt:z
education:zeducation-num:zmarital-status:zoccupation:zrelationship:zrace:zsex:zcapital-gain:zcapital-loss:zhours-per-week:znative-country:r   )NDYRK1A_NITSN1_NBDNF_NNR1_NNR2A_NpAKT_NpBRAF_N	pCAMKII_NpCREB_NpELK_NpERK_NpJNK_NPKCA_NpMEK_NpNR1_NpNR2A_NpNR2B_NpPKCAB_NpRSK_NAKT_NBRAF_NCAMKII_NCREB_NELK_NERK_NGSK3B_NJNK_NMEK_NTRKA_NRSK_NAPP_N
Bcatenin_NSOD1_NMTOR_NP38_NpMTOR_NDSCR1_NAMPKA_NNR2B_NpNUMB_NRAPTOR_NTIAM1_NpP70S6_NNUMB_NP70S6_NpGSK3B_NpPKCG_NCDK5_NS6_NADARB1_NAcetylH3K9_NRRP1_NBAX_NARC_NERBB4_NnNOS_NTau_NGFAP_NGluR3_NGluR4_NIL1B_NP3525_NpCASP9_NPSD95_NSNCA_NUbiquitin_NpGSK3B_Tyr216_NSHH_NBAD_NBCL2_NpS6_NpCFOS_NSYP_N	H3AcK18_NEGR1_NH3MeK4_NCaNA_Nr   )pclasssurvivedr   sexr  sibspparchticketfarecabinembarkedboatbody	home.destr   r   r   r   r   r   r   r*   r*   r   r   datasets_column_namesr  G  s9     P(
R SO
`
"O
`
{m mr   c                      i i ddddddddd	d
dddddddd
dddddddddddddd
ddddddddddd
dd
i i i ddiddddddd d!d"S )#Nr  r   r  	   r  r   r  r   r  r   r  r  r  r  r  r     r  r  r   r!  r"  r#  )
r$  r%  r&  r'  r(  r)  r*  r+  r.  r0  r  r   i  r   i  i7  i  i4  )r  r  r  r  r  r  r  r  r*   r*   r   r   datasets_missing_valuesr  :  s0    
b
a
 
 1	

 "
 b
 R
 "
 "
 "
 q
 !
 
 R
 B
  R!
" R#
$ ,.7
: !}
G, ,r   zJdata_id, parser, expected_n_categories, expected_n_floats, expected_n_ints))r   r   r   r   r   )r   r   r   r   r   )r   r   !   r   r   )r   r   r  r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   E   r   )r   r   r  r   r   )r   r   r  r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   c	           	      8   t        j                  d      }	|	j                  j                  j                  }
t        | ||       t        |dd|      }|j                  }t        |j                  D cg c]  }t        ||
      s| c}      }t        |j                  D cg c]  }|j                  dk(  s| c}      }t        |j                  D cg c]  }|j                  dk(  s| c}      }||k(  sJ ||k(  sJ ||k(  sJ |j                  j                         ||   k(  sJ |j                         j                         j!                         }|j#                         D ]!  \  }}||   j%                  |d      }||k(  r!J  y	c c}w c c}w c c}w )
zYCheck that `fetch_openml` infer the right number of categories, integers, and
    floats.r   r   TFr   rM   ir   N)r   r   r   r   r   r   r   r   r=   r   r   r	  r   tolistisnasumto_dictitemsget)r   r   r   expected_n_categoriesexpected_n_floatsexpected_n_intsrQ   r  r  r   r   r   r   r   n_categoriesn_floatsn_intsframe_feature_to_n_nanr   	n_missingexpected_missings                        r   !test_fetch_openml_types_inferencer  k  s   P 
		X	&Bvv||44$['W	E KKE!LLP5Ju>N,OPL u||IeuzzS7HEIJHU\\GEUZZ35F%GHF0000((((_$$$==!%:7%CCCC"ZZ\--/7791779 -i27;??aH,,,,- 	QIGs$   2FF"F7FF*Fzparams, err_msgunknownz:The 'parser' parameter of fetch_openml must be a str amongr   z<The 'as_frame' parameter of fetch_openml must be an instancec                     d}t        | |d       t        j                  t        |      5  t	        dd|i| d d d        y # 1 sw Y   y xY w)Nr   Tmatchr   r*   )r   r   raisesrs   r   r   paramserr_msgr   s       r   &test_fetch_openml_validation_parameterr    sH     G$['4@	z	1 0/W//0 0 0s   AAr  auto)r   r   c                     d}	 t        d       t        d      # t        $ rP t        | |d       d}t	        j
                  t        |      5  t        d	d|i| ddd       Y y# 1 sw Y   Y yxY ww xY w)
z=Check that we raise the proper errors when we require pandas.r   !test_fetch_openml_requires_pandasz.This test requires pandas to not be installed.Tz:requires pandas to be installed. Alternatively, explicitlyr  r   Nr*   )r   r   ImportErrorr   r   r  r   )r   r  r   r  s       r   'test_fetch_openml_requires_pandas_errorr    s{     GI@A GHH  4(gtDN]];g6 	433F3	4 	4 	44s'    3A3A%A3%A/	*A3/A3z2ignore:Version 1 of dataset Australian is inactivez:Sparse ARFF datasets cannot be loaded with parser='pandas'z9Sparse ARFF datasets cannot be loaded with as_frame=True.)r   r   c                     t        j                  d       d}t        | |d       t        j                  t        |      5  t        d|dd| ddd       y# 1 sw Y   yxY w)	ztCheck that we raise the expected error for sparse ARFF datasets and
    a wrong set of incompatible parameters.
    r   $  Tr  F)r   r   Nr*   )r   r   r   r  rs   r   r  s       r   #test_fetch_openml_sparse_arff_errorr    sc    , !G$['4@	z	1 
 	
	
 	

 
 
s    AA!zdata_id, data_type)r   	dataframe)r  sparsec                     t        j                  d      }t        | |d       t        |dd      }|dk(  r|j                  nt
        j                  j                  }t        |j                  |      sJ y)z&Check the auto mode of `fetch_openml`.r   Tr  F)r   r   r   r  N)
r   r   r   r   r   scipyr  
csr_matrixr   r   )r   r   	data_typer   r   klasss         r   test_fetch_openml_auto_moder    s^     
		X	&B$['4@&FD%4BLL%,,:Q:QEdii'''r   c                    t        j                  d       d}t        | |d       d}t        j                  t        |      5  t        d      5  t        |ddd	
       ddd       ddd       y# 1 sw Y   xY w# 1 sw Y   yxY w)z[Check that we raise a warning regarding the working memory when using
    LIAC-ARFF parser.r   r   Tz*Could not adhere to working_memory config.r  gư>)working_memoryFr   r   N)r   r   r   warnsUserWarningr   r   )r   r   rg   s      r   :test_convert_arff_data_dataframe_warning_low_memory_pandasr    s     !G$['4@
6C	k	- 40 	"		 	 	 s$   A<A0A<0A9	5A<<Bc                     d}d}t        | ||       t        j                  d      }t        j                  t
        |      5  t        |ddd       ddd       y# 1 sw Y   yxY w)	z\Check that a warning is raised when multiple versions exist and no version is
    requested.r   r   a;  Multiple active versions of the dataset matching the name iris exist. Versions may be fundamentally different, returning version 1. Available versions:
- version 1, status: active
  url: https://www.openml.org/search?type=data&id=61
- version 3, status: active
  url: https://www.openml.org/search?type=data&id=969
r  Fr   )r   r   r   r   N)r   r;   escaper   r  r  r   )r   rQ   r   	data_namerg   s        r   ,test_fetch_openml_iris_warn_multiple_versionr  (  sg     GI$['=I
))	BC 
k	- 
		

 
 
s   AA$c                     d}d}d}d}t        | ||       t        ||ddd      }|j                  j                  ||fk(  sJ |j                  J y)z/Check that we can get a dataset without target.r   Nr   r   Fr   r   r   r   r   r   )r   r   r   r   r   )r   rQ   r   r   expected_observationsexpected_featuresr   s          r   test_fetch_openml_no_targetr  C  sn     GM$['=I#D 99??46GHHHH;;r   c                 (   t        j                  d       d}t        | ||       t        |dd|      }|j                  j
                  d   }|j                  d   j                         j                         sJ t        |j                  g d       y	)
zRcheck that missing values in categories are compatible with pandas
    categoricalr   iY  r   FTr   r   r   r   r  )FEMALEMALE_N)
r   r   r   r   r   r   r  anyr   r   )r   rQ   r   r   penguins	cat_dtypes         r   test_missing_values_pandasr$  W  s    
 !G$['W	H $$U+I==$$&**,,,y++-DEr   r     glass2)r   r   r   c                    d}t        | ||       d}t        j                  t        |      5  t	        ddddd|}ddd       j
                  j                  dk(  sJ |j                  d	   d
k(  sJ y# 1 sw Y   9xY w)z;Check that we raise a warning when the dataset is inactive.r%  z(Version 1 of dataset glass2 is inactive,r  Fr   )r   r   r   N)   r  r   40675r*   )r   r   r  r  r   r   r   r   )r   rQ   r   r   rg   r&  s         r   test_fetch_openml_inactiver*  m  s     G$['=I
4C	k	- 
 
%
?M

 ;;(((>>$7***
 
s   A55A>z"data_id, params, err_type, err_msgzNo active dataset glass2 foundr   r   )r   r   z1Can only handle homogeneous multi-target datasets)r   r   zOSTRING attributes are not supported for array representation. Try as_frame=Truer  )r   r   r   zTarget column 'family'	undefinedz(Could not find target_column='undefined'c                     t        | ||       |j                  dd      s|dk(  rt        j                  d       t        j                  ||      5  t        dd|d| d d d        y # 1 sw Y   y xY w)Nr   Tr   r  F)r   r   r*   )r   r  r   r   r  r   )r   rQ   r   r  err_typer  r   s          r   test_fetch_openml_errorr.    si    d %['=Izz*d#v'9H%	xw	/ ;:5:6:; ; ;s   A))A2zparams, err_type, err_msgr1   r   zCThe 'version' parameter of fetch_openml must be an int in the rangenAmE)r   r   zCThe 'data_id' parameter of fetch_openml must be an int in the rangez6The 'version' parameter of fetch_openml must be an intzFNeither name nor data_id are provided. Please provide name or data_id.c                 r    t        j                  ||      5  t        di |  d d d        y # 1 sw Y   y xY w)Nr  r*   )r   r  r   )r  r-  r  s      r   )test_fetch_openml_raises_illegal_argumentr1    s2    4 
xw	/ v  s   -6c                    d}d}d}t        | ||       d}|j                  |      }t        j                  t        |      5  t        ||ddd       d d d        d	}|j                  |      }t        j                  t        |      5  t        ||ddd       d d d        d}|j                  |      }t        j                  t        |      5  t        ||d
gddd       d d d        d	}|j                  |      }t        j                  t        |      5  t        ||d
gddd       d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   kxY w# 1 sw Y   y xY w)Nr   z.target_column='{}' has flag is_row_identifier.z&target_column='{}' has flag is_ignore.MouseIDr  Fr   r  Genotyper   )r   formatr   r  r  r   )r   rQ   r   expected_row_id_msgexpected_ignore_msg
target_colrg   s          r   test_warn_ignore_attributer9    sp   GJB$['=IJ

$
$Z
0C	k	- 
$	

 J

$
$Z
0C	k	- 
$	

 J

$
$Z
0C	k	- 
%w/	

 J

$
$Z
0C	k	- 
%w/	

 
?
 

 

 

 
s0   D5	EEE5D>E
EE"c                     d}t        | ||       d}t        j                  t        |      5  t	        |ddd       d d d        y # 1 sw Y   y xY w)Nr   zJOpenML registered a problem with the dataset. It might be unusable. Error:r  Fr   r  r   r   r  r  r   r   rQ   r   rg   s       r   test_dataset_with_openml_errorr=    sM    G$['=I
VC	k	- WWEE+VW W W   AAc                     d}t        | ||       d}t        j                  t        |      5  t	        |ddd       d d d        y # 1 sw Y   y xY w)Nr   zFOpenML raised a warning on the dataset. It might be unusable. Warning:r  Fr   r  r;  r<  s       r    test_dataset_with_openml_warningr@    sM    G$['=I
RC	k	- WWEE+VW W Wr>  c                 h   t        j                  d       d}t        | |d       |dddd}t        di |}t        di |dddii}t	        d	 |j
                  d
   j                  j                  D              sJ t        d |j
                  d
   j                  j                  D              rJ y)zACheck that we can overwrite the default parameters of `read_csv`.r   6  Fr   rQ   Tr   read_csv_kwargsskipinitialspacec              3   >   K   | ]  }|j                  d         yw NrF   .0r   s     r   	<genexpr>zFtest_fetch_openml_overwrite_default_params_read_csv.<locals>.<genexpr>0  s       #s   r   c              3   >   K   | ]  }|j                  d         ywrG  rI  rJ  s     r   rL  zFtest_fetch_openml_overwrite_default_params_read_csv.<locals>.<genexpr>3  s!       	srM  Nr*   )	r   r   r   r   r   r   r   r   r!  )r   r   common_paramsadult_without_spacesadult_with_spacess        r   3test_fetch_openml_overwrite_default_params_read_csvrR    s    
!G$['QVW 	M (8-8$ 
*<e)D  '8'>'>w'G'K'K'V'V     '--g6::EE    r   c                    d}t        | ||       t        j                  j                  j                  j                  |      }t        |j                  d            }t        ||      }t        ||      }t        j                  j                  |      sJ t        ||      }|j                         |j                         k(  sJ y )Nr   scikit_learn_data)r   r|   r}   r~   
_DATA_FILEr5  strmkdirr   r
   ospathisfiler   )	r   rQ   tmpdirr   openml_pathcache_directory	response1location	response2s	            r   test_open_openml_url_cachera  =  s    G$['=I""**55<<WEK&,,':;<O o>I{O<H77>>(### o>I>>y~~////r   write_to_diskc                    d}t         j                  j                  j                  j	                  |      }t        |j                  d            }t        ||      fd}| j                  t         j                  j                  d|       t        j                  t        d      5  t        ||       d d d        t        j                  j                        rJ y # 1 sw Y   +xY w)Nr   rT  c                     r1t        d      5 }|j                  d       d d d        t        d      t        d      # 1 sw Y   t        d      xY w)Nw Invalid request)rH   writers   )rt   ru   rv   rM   r_  rb  s       r   rw   z>test_open_openml_url_unlinks_local_path.<locals>._mock_urlopenU  sP    h$ *++j*++*++s   A  Arx   rg  r  )r|   r}   r~   rU  r5  rV  rW  r
   r{   r   r  rs   r   rX  rY  exists)r   r[  rb  r   r\  r]  rw   r_  s     `    @r   'test_open_openml_url_unlinks_local_pathrj  N  s    G""**55<<WEK&,,':;<O{O<H, ((00)]K	z):	; 7o67 ww~~h''''7 7s   *C!!C*c                 ,   d}t         j                  j                  j                  j	                  |      }t        | j                  d            }t        ||      t        j                  t        j                  j                               t        d      5 }|j                  d       d d d        t        ||      fd       }d}t        j                   t"        |      5   |       }d d d        dk(  sJ y # 1 sw Y   RxY w# 1 sw Y   xY w)	Nr   rT  re  rf  c                  Z    t         j                  j                         rt        d      y)NzFile exist!r   )rX  rY  ri  	Exception)r_  s   r   
_load_dataz/test_retry_with_clean_cache.<locals>._load_datam  s#     77>>(#M**r   z!Invalid cache, redownloading filer  r   )r|   r}   r~   rU  r5  rV  rW  r
   rX  makedirsrY  dirnamerH   rh  r   r   r  RuntimeWarning)	r[  r   r\  r]  rM   rn  warn_msgresultr_  s	           @r   test_retry_with_clean_cachert  c  s    G""**55<<WEK&,,':;<O{O<HKK)*	h	 	 [/: ; 3H	nH	5 Q;;  s   C>&D
>D
Dc                 8   d}t         j                  j                  j                  j	                  |      }t        | j                  d            }t        ||      d        }d}t        j                  t        |      5   |        d d d        y # 1 sw Y   y xY w)Nr   rT  c                  2    t        d ddd t                     )Nrc   rd   re   r   r   r*   r   r   rn  z:test_retry_with_clean_cache_http_error.<locals>._load_data  s    3$:')
 	
r   rd   r  )r|   r}   r~   rU  r5  rV  rW  r   r   r  r   )r[  r   r\  r]  rn  	error_msgs         r   &test_retry_with_clean_cache_http_errorry  z  s    G""**55<<WEK&,,':;<O[/:
 ;

 'I	y		2   s   ?BBc                    d }d}t        |j                  d            }t        | ||       t        |d|ddd      \  }}| j	                  t
        j                  j                  d|       t        |d|ddd      \  }}	t        j                  j                  ||       t        j                  j                  ||	       y )	Nc                 :    t        d| j                         z        )NzhThis mechanism intends to test correct cachehandling. As such, urlopen should never be accessed. URL: %s)rs   rq   rt   ru   rv   s      r   _mock_urlopen_raisez4test_fetch_openml_cache.<locals>._mock_urlopen_raise  s%     ")"6"6"89
 	
r   r   rT  TFr   )r   r   r5   r   r   r   rx   )rV  rW  r   r   r{   r|   r}   r~   r   r   r   )
r   rQ   r[  r}  r   r]  	X_fetched	y_fetchedX_cachedy_cacheds
             r   test_fetch_openml_cacher    s    
 G&,,':;<O$['=I'!Iy ((00)=PQ%!Hh JJ!!)X6JJ!!)X6r   zas_frame, parser))Tr   )Fr   )Tr   )Fr   c                 p   |s|dk(  rt        j                  d       d}t        | |d       t        dz   d| z   }d}t	        j
                  |      |z  }|dz  |j                  d      5 }	t        j                  |	d      }
t        |
j                               }d	|t        |      d
z
  <   ddd       t        j                  d      5 }|j                         ddd       t        j                  j                  j                   fd}| j#                  t        j                  j                  d|       t        j$                  t&              5 }t        j                  j)                  |d||       ddd       j+                  d      sJ y# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   5xY w)z/Check that the checksum is working as expected.r   r   Tr7   r8   zdata-v1-dl-1666876.arff.gzztest_invalid_checksum.arffrE   %   r   Nwbc                     | j                         }|j                  d      r;t        d      5 }|j                         }d d d        t	        t              d      S  |       S # 1 sw Y   'xY w)Nzdata/v1/download/1666876rE   Tr$   )rq   endswithrH   r   r   r   )rt   ru   rv   r?   rM   corrupted_datacorrupt_copy_pathmocked_openml_urls         r   swap_file_mockz9test_fetch_openml_verify_checksum.<locals>.swap_file_mock  sh    ""$<<23'. *!!"*$W^%<dKK$W--	* *s   A&&A/rx   Fr  1666876)r   r   r   ry   r   rG   rH   r#   	bytearrayr   r=   GzipFilerh  r|   r}   r~   rx   r{   r  rs   r   r  )r   r   r   r[  r   r   original_data_moduleoriginal_data_file_nameoriginal_data_path	orig_file	orig_gzipr   modified_gzipr  excr  r  s                  @@r   !test_fetch_openml_verify_checksumr    s    6X%H%G$['4@ 3S8S	?J:")=>AXX!==		 	 	& !)IIi.	)* SY]!
 
($	/ "=D!"  ((0088. ((00)^L 
z	" 
c%%58F 	& 	


 99Y=! !
" "(
 
s%   -AFF $F,F F),F5c                    d }| j                  t        j                  j                  d|       d}t	        j
                  t        t        j                  dt        |z    d            5 }t	        j                  t        d      5  t        |d d	       d d d        t        |      d
k(  sJ 	 d d d        y # 1 sw Y   #xY w# 1 sw Y   y xY w)Nc                 2    t        d ddd t                     )Ni  Simulated network errorre   rw  r|  s      r   _mock_urlopen_network_errorzPtest_open_openml_url_retry_on_network_error.<locals>._mock_urlopen_network_error  s    3$=DWY
 	
r   rx   zinvalid-urlz+A network error occurred while downloading z. Retrying...r  r  r   )delayr   )r{   r|   r}   r~   r   r  r  r;   r  r	   r  r   r   r=   )r   r  invalid_openml_urlrecords       r   +test_open_openml_url_retry_on_network_errorr    s    

   )-H '	ii!334MC

 	  
]]9,EF 	@/Q?	@6{a	  	 	@ 	@	  	 s$   *CB7C7C 	<CC)r   r   c                     |dk(  rt        j                  d       d}t        | ||       t        j                  j                  |dd|      }|J |d   j                  dk(  sJ d|d	   vsJ y)
zCheck that we can load the "zoo" dataset.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/14340
    r   >   Fr  Nr   )e      animalr   )r   r   r   r|   r}   r   r   )r   rQ   r   r   datasets        r   &test_fetch_openml_with_ignored_featurer     s     H%G$['=I++uuV , G  6?  I---7?3333r   c                 F   t        j                  d      }d}t        | |d       dd|d}t        dddi|}t        dddi|}|j                  j                  |j                  |j                         |j                  j                  j                  d	      j                         rJ |j                  j                  j                  d	      j                         rJ t        ddd
d|}t        ddd
d|}|j                  j                  |j                  d   |j                  d          |j                  d   j                  j                  d	      j                         rJ |j                  d   j                  j                  d	      j                         rJ y)zCheck that we strip the single quotes when used as a string delimiter.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/23381
    r   r   FrC  Tr   r   r   r   r   'r  )r   r   r   Nr*   )r   r   r   r   r   r  r   rV  rF   r!  r  r   )r   r   r   rO  mice_pandasmice_liac_arffs         r   test_fetch_openml_strip_quotesr    s    
		X	&BG$['QVW!%'JM@h@-@K!FFFNJJ"";#5#5~7L7LM!!%%00599;;;!!%%..s377999 XhhX-XK! (6CN JJ""'"N$8$8$A   )--88=AACCC  )--66s;??AAAAr   c                     t        j                  d      }d}t        | |d       dd|d}t        dddi|}t        dddi|}|j                  j                  |j                  d	   |j                  d	          y
)zCheck that we can strip leading whitespace in pandas parser.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25311
    r   rB  FrC  Tr  r   r   r   Nr*   )r   r   r   r   r   r  r   r   r   r   rO  adult_pandasadult_liac_arffs         r   $test_fetch_openml_leading_whitespacer  3  s     
		X	&BG$['QVW!%'JMAxA=AL"G+GGOJJ""7#_%:%:7%Cr   c                     t        j                  d      }d}t        | |d       dd|d}t        d
ddi|}t        d
ddi|}|j                  j                  |j                  |j                         y	)zCheck that we can handle escapechar and single/double quotechar.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25478
    r   iZ  FrC  Tr  r   r   Nr*   )r   r   r   r   r   r   r   r  s         r   &test_fetch_openml_quotechar_escapecharr  E  sv     
		X	&BG$['QVW!%'JMAxA=AL"G+GGOJJ!!,"4"4o6K6KLr   )T__doc__r#   rj   rX  r;   	functoolsr   	importlibr   ior   urllib.errorr   numpyr   r   scipy.sparser	  r|   r   sklearn.datasetsr   fetch_openml_origsklearn.datasets._openmlr	   r
   r   r   sklearn.utilsr   $sklearn.utils._optional_dependenciesr   sklearn.utils._testingr   r   r   ry   rz   r   r   markparametrizer   r   r   r   r   r   r  r  r  fixturer  r  r  r  r  filterwarningsr  r  r  r  r  r$  r*  rs   KeyErrorr.  r1  r9  r=  r@  rR  ra  rj  rt  ry  r  r  r  r  r  r  r  r*   r   r   <module>r     sS
      	 	    "     " >    E  ?  < (D9tLv ? 
i_c1a(	f+S!Q7	
YNBA&	
X!,b"a8	y#Q*	u+S!Q7	E"BA.		4 "b!,	'R3	E"Ar1-	'B2	E"D"a0'0 K#:;4-8($ 9 <14($V ? 
i_c1a(	f+S!Q7	
YNBA&	
X!,b"a8	y#Q*	u+S!Q7	E"BA.		4 "b!,	'R3	E"Ar1-	'B2#, K#:;$. <-.$.N $56<N 7<N~ K#:;P <P4 K#:;,! <,!^ K#:;<,9V*WX; Y <;> $=>K#:;7 < ?76 $:;K#:;( < <(0*> ho  od h-  -` P4 4-8)- 956)-`  y!H	

 #J	
	00 V,v.h/f-	II PQ x H	

 G	

  T2G	
"
# R$
  PQ( R(& 4-8
 9
4 4-8 9& 4-8K#:;F < 9F( 4-8	E(q9
+ 9
+ 4-8(	"J0PQlG-DE?		
 51!	
 H$G$		
 H%H$		
 [96		
 k7-CD6		
G),Z K#:;; <[, 9^;  DY?Q	
 F+Q	
 FyAD	
 T	
!232
 4-8.
 9.
b 4-8W 9W 4-8W 9W@ 4-80 90  4-8( 9((.  4-87 97D + + \ 8 4-8#:;4 < 94(B:$Mr   