
    e!h4              	          d Z ddlZddlmZmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZ ddlmZm Z   e       Z! e       Z"d Z#ejH                  jK                  dee z         d        Z&d Z'd Z(d Z)d Z*d Z+ejH                  jK                  dddg      d        Z,d Z-d Z.d Z/d Z0d Z1 ed ed1i dd i!      ejH                  jK                  d"d#d$g      d%               Z2 ed ed1i dd&i!      ejH                  jK                  d"d#d$g      d'               Z3d( Z4ejH                  jK                  d)e      d*        Z5d+ Z6ejH                  jK                  dee z         d,        Z7ejH                  jK                  d-d.d/g      ejH                  jK                  dddg      d0               Z8y)2zD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
    N)Mockpatch)parallel_backend)load_diabetes	load_irismake_classification)IsolationForest)_average_path_length)roc_auc_score)ParameterGridtrain_test_split)check_random_state)assert_allcloseassert_array_almost_equalassert_array_equalignore_warnings)CSC_CONTAINERSCSR_CONTAINERSc                 <   t        j                  ddgddgg      }t        j                  ddgddgg      }t        dgg dddgd      }t               5  |D ]-  }t	        dd	| i|j                  |      j                  |       / 	 d
d
d
       y
# 1 sw Y   y
xY w)z6Check Isolation Forest for various parameter settings.r            )      ?      ?r   TF)n_estimatorsmax_samples	bootstraprandom_stateN )nparrayr   r   r	   fitpredict)global_random_seedX_trainX_testgridparamss        k/var/www/html/diagnosisapp-backend/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_iforest.pytest_iforestr*   "   s    hhAA'(GXX1v1v&'F]$PUWD 
	  	FF);FvFJJgfo	  s   3BBsparse_containerc                 ~   t        |       }t        t        j                  dd |      \  }}t	        ddgddgd      } ||      } ||      }|D ]j  }t        dd	| d
|j                  |      }	|	j                  |      }
t        dd	| d
|j                  |      }|j                  |      }t        |
|       l y)z=Check IForest for various parameter settings on sparse input.N2   r   r   r   TF)r   r   
   )r   r   r   )	r   r   diabetesdatar   r	   r"   r#   r   )r$   r+   rngr%   r&   r'   X_train_sparseX_test_sparser(   sparse_classifiersparse_resultsdense_classifierdense_resultss                r)   test_iforest_sparser9   2   s     /
0C&x}}Sb'9LOGV#s4-PQD%g.N$V,M :+ 
*<
@F

#n
 	 +22=A + 
*<
@F

#g, 	 )008>=9:    c                     t         j                  } d}t        j                  t        |      5  t        d      j                  |        ddd       t        j                         5  t        j                  dt               t        d      j                  |        ddd       t        j                         5  t        j                  dt               t        t        j                  d            j                  |        ddd       t        j                  t              5  t               j                  |       j                  | ddd	df          ddd       y# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   vxY w# 1 sw Y   yxY w)
z7Test that it gives proper exception on deficient input.3max_samples will be set to n_samples for estimationmatch  r   Nerrorautor   r   )irisr1   pytestwarnsUserWarningr	   r"   warningscatch_warningssimplefilterr    int64raises
ValueErrorr#   )Xwarn_msgs     r)   test_iforest_errorrO   L   s)   		A
 EH	k	2 1D)--a01		 	 	" 3g{3F+//23 
	 	 	" 8g{3BHHQK044Q78
 
z	" 3a ((1ab523 31 13 38 8
3 3s0   E&6E*8A	E6"2FE'*E36E?Fc            
         t         j                  } t               j                  |       }|j                  D ]O  }|j
                  t        t        j                  t        j                  | j                  d                     k(  rOJ  y)zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)rC   r1   r	   r"   estimators_	max_depthintr    ceillog2shape)rM   clfests      r)   test_recalculate_max_depthrY   b   sd    		A




"C B}}BGGBGGAGGAJ,?$@ AAAABr:   c                     t         j                  } t               j                  |       }|j                  | j
                  d   k(  sJ t        d      }d}t        j                  t        |      5  |j                  |        d d d        |j                  | j
                  d   k(  sJ t        d      j                  |       }|j                  d| j
                  d   z  k(  sJ y # 1 sw Y   dxY w)Nr   i  r@   r<   r=   g?)	rC   r1   r	   r"   max_samples_rV   rD   rE   rF   )rM   rW   rN   s      r)   test_max_samples_attributer\   j   s    		A




"Cqwwqz)))
c
*CDH	k	2 
qwwqz)))
c
*
.
.q
1CsQWWQZ//// s   1C&&C/c                    t        |       }t        t        j                  |      \  }}t	        d|       j                  |      }|j                  d       |j                  |      }|j                  d       |j                  |      }t        ||       t	        d|       j                  |      }|j                  |      }t        ||       y)zCheck parallel regression.r.   r   )n_jobsr   r   r^   r   N)	r   r   r0   r1   r	   r"   
set_paramsr#   r   )r$   r2   r%   r&   ensembley1y2y3s           r)    test_iforest_parallel_regressionre   y   s    
/
0C&x}}3GOGVa6HIMMgVHq!			&	!Bq!			&	!Bb"%a6HIMMgVH			&	!Bb"%r:   c                    t        |       }d|j                  dd      z  }|j                  t        j                  |dz   |dz
  f            }|dd }|j                  ddd	      }t        j                  |dd |f      }t        j                  d
gdz  dgdz  z         }t        d|      j                  |      }|j                  |       }t        ||      dkD  sJ y)z#Test Isolation Forest performs wellg333333?iX  r   Nr?   r   )   r   )lowhighsizer   rh   d   )r   r   g\(\?)r   randnpermutationr    vstackuniformr!   r	   r"   decision_functionr   )	r$   r2   rM   r%   
X_outliersr&   y_testrW   y_preds	            r)   test_iforest_performanceru      s     /
0CciiQA		1q5!a%.12AhG !(;JYY$%*-.FXXqcCi1#)+,F c
<
@
@
IC ##F++F (4///r:   contamination      ?rB   c           	      N   ddgddgddgddgddgddgddgddgg}t        || 	      }|j                  |       |j                  |       }|j                  |      }t	        j
                  |dd        t	        j                  |d d       kD  sJ t        |d
dgz  ddgz  z          y )Nrg   r   r         	   r   rv      )r	   r"   rq   r#   r    minmaxr   )rv   r$   rM   rW   decision_funcpreds         r)   test_iforest_worksr      s     bB8b"X1v1v1v1vAwOA '9
WCGGAJ**1--M;;q>D66-$%}Sb/A(BBBBtQ!WqB4x/0r:   c                      t         j                  } t               j                  |       }|j                  |j
                  k(  sJ y N)rC   r1   r	   r"   r[   _max_samples)rM   rW   s     r)   test_max_samples_consistencyr      s8    		A




"Cs/////r:   c                      t        d      } t        t        j                  d d t        j                  d d |       \  }}}}t        d      }|j                  ||       |j                  |       y )Nr   r-   r.   g?)max_features)r   r   r0   r1   targetr	   r"   r#   )r2   r%   r&   y_trainrs   rW   s         r)    test_iforest_subsampled_featuresr      sb    
Q
C'7crHOOCR0s($GVWf s
+CGGGWKKr:   c                  p   dt        j                  d      t         j                  z   z  dz
  } dt        j                  d      t         j                  z   z  dz
  }t        t	        dg      dg       t        t	        dg      dg       t        t	        d	g      d
g       t        t	        dg      | g       t        t	        dg      |g       t        t	        t        j
                  g d            dd
| |g       t	        t        j                  d            }t        |t        j                  |             y )N       @g      @g?g     0@g}?r   g        r   r   r        )r   r   r   r   )	r    logeuler_gammar   r
   r!   aranger   sort)
result_one
result_twoavg_path_lengths      r)    test_iforest_average_path_lengthr      s     sbnn45GJu67:MMJ(!-u5(!-u5(!-u5(!-
|<(#/*>RXXn56	c:z*
 +299Q<8O(@Ar:   c                     ddgddgddgg} t        d      j                  |       }t               j                  |       }t        |j                  ddgg      |j	                  ddgg      |j
                  z          t        |j                  ddgg      |j	                  ddgg      |j
                  z          t        |j                  ddgg      |j                  ddgg             y )Nr   r   皙?)rv   r   )r	   r"   r   score_samplesrq   offset_)r%   clf1clf2s      r)   test_score_samplesr      s    1v1v1v&G-11':D  )DS#J<(c
|,t||; S#J<(c
|,t||; S#J<($*<*<sCj\*Jr:   c                  <   t        d      } | j                  dd      }t        dd| d      }|j                  |       |j                  d   }|j                  d       |j                  |       t        |j                        dk(  sJ |j                  d   |u sJ y)	z/Test iterative addition of iTrees to an iForestr      r   r/   T)r   r   r   
warm_start)r   N)r   rm   r	   r"   rQ   r`   len)r2   rM   rW   tree_1s       r)   test_iforest_warm_startr      s     Q
C		"aA RcdC GGAJ__QFNNN#GGAJs2%%%??1'''r:   z*sklearn.ensemble._iforest.get_chunk_n_rowsreturn_valuer   )side_effectzcontamination, n_predict_calls)rw   r   )rB   r   c                 >    t        ||       | j                  |k(  sJ y r   r   
call_countmocked_get_chunkrv   n_predict_callsr$   s       r)   test_iforest_chunks_works1r     #     }&89&&/999r:   r/   c                 >    t        ||       | j                  |k(  sJ y r   r   r   s       r)   test_iforest_chunks_works2r     r   r:   c                     t        j                  d      } t               }|j                  |        t         j                  j                  d      }t        |j                  |       dk(        sJ t        |j                  |j                  dd            dk(        sJ t        |j                  | dz         dk(        sJ t        |j                  | dz
        dk(        sJ t        j                  |j                  dd      dd      } t               }|j                  |        t        |j                  |       dk(        sJ t        |j                  |j                  dd            dk(        sJ t        |j                  t        j                  d            dk(        sJ |j                  dd      } t               }|j                  |        t        |j                  |       dk(        sJ t        |j                  |j                  dd            dk(        sJ t        |j                  t        j                  d            dk(        sJ y)z=Test whether iforest predicts inliers when using uniform data)rl   r/   r   r   rl   r/   N)
r    onesr	   r"   randomRandomStateallr#   rm   repeat)rM   iforestr2   s      r)   test_iforest_with_uniform_datar     s    		AGKKN
))


"Cwq!Q&'''wsyyb12a7888wq1u%*+++wq1u%*+++ 			#))Ar"C+AGKKNwq!Q&'''wsyyb12a7888wrwwy12a7888 			!RAGKKNwq!Q&'''wsyyb12a7888wrwwy12a7888r:   csc_containerc                 p    t        ddd      \  }} | |      }t        ddd      j                  |       y	)
zdCheck that Isolation Forest does not segfault with n_jobs=2

    Non-regression test for #23252
    iL rl   r   	n_samples
n_featuresr   r/      r   )r   r   r^   N)r   r	   r"   )r   rM   _s      r)   *test_iforest_with_n_jobs_does_not_segfaultr   =  s7     CaPDAqaAQ?CCAFr:   c                  t   t        j                  d      } t        j                  j	                  d      }| j                  |j                  d      dg      }t        dd      }t        j                         5  t        j                  dt               |j                  |       d	d	d	       y	# 1 sw Y   y	xY w)
zCheck that feature names are preserved when contamination is not "auto".

    Feature names are required for consistency checks during scoring.

    Non-regression test for Issue #25844
    pandasr   r{   a)r1   columnsg?r~   rA   N)rD   importorskipr    r   r   	DataFramerm   r	   rG   rH   rI   rF   r"   )pdr2   rM   models       r)   #test_iforest_preserve_feature_namesr   H  s     
		X	&B
))


"C
#))A,6A$?E		 	 	" g{3		!  s   9,B..B7c                 ,   t        ddd      \  }} | |      }|j                          d}t        d|d      j                  |      }|j	                  |      }|dk  j                         |j                  d   z  t        j                  |      k(  sJ y)	zCheck that `IsolationForest` accepts sparse matrix input and float value for
    contamination.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/27626
    r-   r{   r   r   r   r   )r   rv   r   N)	r   sort_indicesr	   r"   rq   sumrV   rD   approx)r+   rM   r   rv   r   
X_decisions         r)   -test_iforest_sparse_input_float_contaminationr   Z  s     JDAqANNMm!	c!f  **1-JN!AGGAJ.&--2NNNNr:   r^   r   r   c           	         ddgddgddgddgddgddgddgddgg}t        | |d	
      }|j                  |       |j                  |       }|j                  |      }t	        j
                  |dd	       t	        j                  |d	d       kD  sJ t        |ddgz  ddgz  z          t        | |d
      }|j                  |       t        d|      5  |j                  |      }d	d	d	       t        |       y	# 1 sw Y   xY w)z5Check that `IsolationForest.predict` is parallelized.ry   rg   r   r   rz   r{   r|   r}   N)r   rv   r^   r   	threadingr_   )	r	   r"   rq   r#   r    r   r   r   r   )	r$   rv   r^   rM   rW   r   r   clf_parallelpred_paralells	            r)   test_iforest_predict_parallelr   n  s!   
 bB8b"X1v1v1v1vAwOA '}TC GGAJ**1--M;;q>D 66-$%}Sb/A(BBBBtQ!WqB4x/0"'}RL Q	+f	5 0$,,Q/0 t]+	0 0s   C::Dr   )9__doc__rG   unittest.mockr   r   numpyr    rD   joblibr   sklearn.datasetsr   r   r   sklearn.ensembler	   sklearn.ensemble._iforestr
   sklearn.metricsr   sklearn.model_selectionr   r   sklearn.utilsr   sklearn.utils._testingr   r   r   r   sklearn.utils.fixesr   r   rC   r0   r*   markparametrizer9   rO   rY   r\   re   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r:   r)   <module>r      s    %   # J J , : ) C ,  ? {?  +^n-LM: N:23,B0&(00 4.91 :10B("(. 0+*+ 9I{;ST: U	
: 0,+, 9I{;ST: U	
:9D .9G :G$ +^n-LMO NO& Aq6*4.9, : +,r:   