
    e!h                     \   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlZd dlZd dlmZmZ d dlmZ d d	lmZ d d
lmZmZmZmZmZmZmZm Z  d dl!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z*m+Z+m,Z, d dl-m.Z.m/Z/m0Z0 dZ1dZ2e1e2z   Z3d Z4d Z5d Z6d Z7d Z8d Z9ejt                  jw                  deef      d        Z<d Z=d Z>d Z?d Z@d ZAd ZBd  ZCd! ZDd" ZEd# ZFd$ ZGd% ZHd& ZId' ZJd( ZKd) ZLd* ZMejt                  j                  e.d+,      d-        ZOd. ZPd/ ZQd0 ZRd1 ZSd2 ZTejt                  jw                  deef      d3        ZUd4 ZVd5 ZWd6 ZXd7 ZYd8 ZZejt                  jw                  deef      d9        Z[d: Z\d; Z]d< Z^d= Z_d> Z`d? Zaejt                  jw                  d@ej                  ej                  ej                  g      dA        ZedB ZfdC ZgdD ZhdE ZidF ZjdG ZkdH ZldI ZmdJ ZndK ZodL Zpejt                  jw                  deeef      dM        Zqejt                  jw                  dNej                  ej                  g      dO        Ztejt                  jw                  dP ee/e0            dQ        Zuejt                  jw                  dRej                  ej                  dSfej                  ej                  dSfej                  ej                  dTfej                  ej                  dTfg      dU        Zxejt                  jw                  dV edWX       edWX       edWX      g      dY        ZydZ Zzd[ Z{e,ejt                  jw                  d\e0      d]               Z|ejt                  jw                  d^eeeg      d_        Z}ejt                  jw                  d^eeeg      ejt                  jw                  d`dae~dbfdceddfg      de               Zejt                  jw                  d^ee ej                  e      g      ejt                  jw                  dfdg dh g      ejt                  jw                  didcdag      dj                      Zejt                  jw                  d^eeeg      dk        Zejt                  jw                  deeeg      ejt                  jw                  dldmdngdddoddpdqdrdsf	ddt ddoddpdudrdsf	ddv ddodwdxdydudzf	ddd{ dodwd| d}drd~f	dddddd ddrd~f	dg      d               Zejt                  jw                  deddddddgfee1ff      d        Zd Zd Zejt                  jw                  d^eeeeg      d        Zejt                  jw                  d\e0      d        Zejt                  jw                  dej                  ej                  g      d        Zy)    N)defaultdict)Mapping)partial)StringIO)product)assert_array_almost_equalassert_array_equal)sparse)clone)ENGLISH_STOP_WORDSCountVectorizerHashingVectorizerTfidfTransformerTfidfVectorizerstrip_accents_asciistrip_accents_unicode
strip_tags)GridSearchCVcross_val_scoretrain_test_split)Pipeline)	LinearSVC)assert_allclose_dense_sparseassert_almost_equalskip_if_32bit)_IS_WASMCSC_CONTAINERSCSR_CONTAINERS)zthe pizza pizza beer copyrightzthe pizza burger beer copyrightz!the the pizza beer beer copyrightzthe burger beer beer copyrightzthe coke burger coke copyrightzthe coke burger burger)zthe salad celeri copyrightz)the salad salad sparkling water copyrightzthe the celeri celeri copyrightzthe tomato tomato salad waterz the tomato salad water copyrightc                 4    t        |       j                         S N)r   upperss    r/var/www/html/diagnosisapp-backend/venv/lib/python3.12/site-packages/sklearn/feature_extraction/tests/test_text.py	uppercaser%   9   s     #))++    c                 &    | j                  dd      S )N   ée)replacer"   s    r$   strip_eacuter+   =   s    99T3r&   c                 "    | j                         S r    splitr"   s    r$   split_tokenizer/   A   s    779r&   c                     dgS )Nthe_ultimate_feature r"   s    r$   lazy_analyzer3   E   s    "##r&   c                     d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d	} d
}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d
}t        |       |k(  sJ y )N   àáâãäåçèéêëaaaaaaceeee   ìíîïñòóôõöùúûüýiiiinooooouuuuy   إu   ا   this is à testthis is a testu   öou   ̀́̂̃ u   ȫ)r   aexpecteds     r$   test_strip_accentsrA   I   s     AH #x///(A H #x/// 	AH #x/// 	AH #x/// 	AH #x/// 	#AH #x/// 	AH #x///r&   c                      d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ y )	Nr5   r6   r7   r8   r9   r=   r:   r;   )r   r>   s     r$   test_to_asciirC   m   sz     AHq!X---(A Hq!X--- 	AHq!X--- 	AHq!X---r&   
Vectorizerc                     | d      j                         }d}g d} ||      |k(  sJ d}g d} ||      |k(  sJ  | d      j                         }t        d	      }g d
} ||      |k(  sJ  | t              j                         }d}g d} ||      |k(  sJ  | t        d      j                         }d}g d} ||      |k(  sJ y )Nasciistrip_accents:   J'ai mangé du kangourou  ce midi, c'était pas très bon.)
aimangedu	kangouroucemidietaitpastresbonz0This is a test, really.

 I met Harry yesterday.)thisistestreallymetharry	yesterdayfile)input'This is a test with a file-like object!)rT   rU   rV   withr[   likeobjectpreprocessoru;   J'ai mangé du kangourou  ce midi,  c'était pas très bon.)
AIMANGEDU	KANGOUROUCEMIDIETAITPASTRESBON)	tokenizerrH   )
zj'airK   rL   rM   rN   zmidi,zc'etaitrQ   rR   zbon.)build_analyzerr   r%   r/   )rD   watextr@   s       r$   test_word_analyzer_unigramsrq      s    	'	*	9	9	;BGDH d8x?DLHd8x	&	!	0	0	2B=>DGHd8x 
	+	:	:	<BHDH d8x 
nG	D	S	S	UBGDH d8xr&   c                  b    t        ddd      j                         } d}g d} | |      |k(  sJ y )Nwordunicode      analyzerrH   ngram_rangerI   )rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   zai mangezmange duzdu kangourouzkangourou cezce midiz
midi etaitz	etait paszpas tresztres bon)r   rn   )ro   rp   r@   s      r$   'test_word_analyzer_unigrams_and_bigramsr{      sA    	yf
n  HDH* d8xr&   c                  p   d} | j                  d      }t        dd      j                         }t        j                  t
              5   ||       d d d        t        ddd      j                         }t        j                  t
              5   ||       d d d        y # 1 sw Y   PxY w# 1 sw Y   y xY w)	NrI   zutf-8ru   rF   )rz   encodingchar      )ry   rz   r}   )encoder   rn   pytestraisesUnicodeDecodeError)rp   
text_bytesro   cas       r$   test_unicode_decode_errorr      s     HDW%J 
Vg	>	M	M	OB	)	* 
: 
Vg
n  
)	* 
:   s   	B 	B, B),B5c                  Z   t        ddd      j                         } d}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ d
}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ t        ddd      j                         } t        d      }g d} | |      d d |k(  sJ y )Nr~   rt   r   rx   u9   J'ai mangé du kangourou  ce midi, c'était pas très bon)zj'az'aizai zi mz ma   )zs tresz tres ztres bzres bozes bon1This 
	is a test, really.

 I met Harry yesterday)thihisis zs iz is)z yesteyesteresterdsterdaterdayr[   r\   ry   rz   r]   r   rn   r   cngarp   r@   s      r$   test_char_ngram_analyzerr      s    yfn 	 GD2H:bq>X%%%AH:bc?h&&&BD2H:bq>X%%%AH:bc?h&&&v6n 	 =>D2H:bq>X%%%r&   c                     t        ddd      j                         } d}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ t        d
dd      j                         } t        d      }g d} | |      d d |k(  sJ y )Nchar_wbrt   r   rx   r   )z thr   r   r   z thir   )r   r   r   r   zerday r   r[   r   zA test with a file-like object!)z a z tetesestzst z tesr   r   r   s      r$   test_char_wb_ngram_analyzerr     s    )n 	 CD3H:bq>X%%%AH:bc?h&&&yfn 	 56D:H:bq>X%%%r&   c                     t        ddd      j                         } d}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ t        d
dd      j                         }t        |      } ||       | |      k(  sJ y )Nrs   rt   r   rx   r   )zthis is testzis test reallyztest really metr   )ztest really met harry yesterdayzthis is test really met harryz"is test really met harry yesterdayr[   r   r   )r   rp   r@   	cnga_filer[   s        r$   test_word_ngram_analyzerr     s    yfn 	 CDDH:bq>X%%%H
 :bc?h&&&v6n  D>DT?d4j(((r&   c                  B   ddd} t        | j                               }t        t        t        t        t        t              fD ]  } ||       }t        |      }|j                  t               t        |t              r|j                  | k(  sJ t        |j                        |k(  sJ |j                  t              }|j                  d   t!        |      k(  sJ  ||       }t        |      }|j#                  |      }t!        |      |j                  d   k(  rJ  y )Nr   rv   pizzabeer
vocabulary)setkeysdictlistiterr   r   intr   fitJUNK_FOOD_DOCS
isinstancer   vocabulary_	transformshapeleninverse_transform)vocabtermstypvvectXinvs          r$   &test_countvectorizer_custom_vocabularyr   6  s    #E

E dD'+s";< &J!, a!##u,,,t''(E111NN>*wwqzSZ'''J!,$$Q'3x1771:%%%&r&   c                     ddg} t        dt        |       fdt               fg      }|j                  t              }t        |j                  d   j                        t        |       k(  sJ |j                  d   t        |       k(  sJ y )Nr   r   countr   tfidfrv   )
r   r   r   fit_transformALL_FOOD_DOCSr   named_stepsr   r   r   )what_we_likepiper   s      r$   /test_countvectorizer_custom_vocabulary_pipeliner   K  s    V$Lo>?&()	
D 	=)At(445\9JJJJ771:\****r&   c                      ddd} d}t        j                  t        |      5  t        |       }|j	                  dg       d d d        y # 1 sw Y   y xY w)Nr   r   z$Vocabulary contains repeated indicesmatchr   pasta_sizilianar   r   
ValueErrorr   r   )r   msgr   s      r$   7test_countvectorizer_custom_vocabulary_repeated_indicesr   X  sN    #E
0C	z	- &%0#$%& & &s   AAc                      ddd} t        j                  t        d      5  t        |       }|j	                  dg       d d d        y # 1 sw Y   y xY w)Nrv   rw   r   zdoesn't contain indexr   r   pasta_verdurar   r   r   s     r$   0test_countvectorizer_custom_vocabulary_gap_indexr   `  sI    #E	z)@	A $%0/"#$ $ $s   A		Ac                     t               } | j                  d       | j                         t        k(  sJ | j                  d       t	        j
                  t              5  | j                          d d d        | j                  d       t	        j
                  t              5  | j                          d d d        g d}| j                  |       | j                         t        |      k(  sJ y # 1 sw Y   xY w# 1 sw Y   JxY w)Nenglish
stop_words_bad_str_stop__bad_unicode_stop_)someotherwords)r   
set_paramsget_stop_wordsr   r   r   r   r   )cvstoplists     r$   test_countvectorizer_stop_wordsr   g  s    		BMMYM'"4444MM-M.	z	" 
MM1M2	z	" 
)HMMXM&#h-///  s   !C3%C?3C<?Dc                  @   t        j                  t        d      5  t        g       } | j	                  dg       d d d        t        j                  t        d      5  t        dd      }|j	                  g d       d d d        y # 1 sw Y   NxY w# 1 sw Y   y xY w)	Nzempty vocabularyr   r   foo      ?r   )max_dfr   )zto be or not to bez
and me toozand so do your   )r   r   s     r$   %test_countvectorizer_empty_vocabularyr   v  s    	z);	< "-% 
z);	< E39=	CDE E	 E Es   B!BBBc                      t               } | j                  t        d d       }| j                  t        dd        }|j                  d   |j                  d   k7  sJ y )Nr   rv   )r   r   r   r   )r   X1X2s      r$   test_fit_countvectorizer_twicer     sV    		B			-+	,B			-+	,B88A;"((1+%%%r&   c                      g d} d}t        |      }|j                  |        g d}|j                         }t        ||       y)zCheck `get_feature_names_out()` when a custom token pattern is passed.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/12971
    z&This is the 1st document in my corpus.z This document is the 2nd sample.zAnd this is the 3rd one.zIs this the 4th document?z'[0-9]{1,3}(?:st|nd|rd|th)\s\b(\w{2,})\btoken_pattern)documentonesampleN)r   r   get_feature_names_outr	   )corpusr   
vectorizerr@   feature_names_outs        r$   )test_countvectorizer_custom_token_patternr     sG    
F ?M }=JV$,H"88:((3r&   c                      g d} d}d}t        |      }t        j                  t        |      5  |j	                  |        ddd       y# 1 sw Y   yxY w)zCheck that we raise an error if token pattern capture several groups.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/12971
    r   z)([0-9]{1,3}(?:st|nd|rd|th))\s\b(\w{2,})\bz,More than 1 capturing group in token patternr   r   Nr   r   r   r   r   )r   r   err_msgr   s       r$   <test_countvectorizer_custom_token_pattern_with_several_groupr     sO    
F AM<G }=J	z	1 v  s   AAc                  T   g d} d}t        d|       }t        j                  t        |      5  |j	                  |        d d d        t        j                         5  t        j                  dt               |j                  |        d d d        y # 1 sw Y   RxY w# 1 sw Y   y xY w)N)SampleUpperCase
VocabularyzyUpper case characters found in vocabulary while 'lowercase' is True. These entries will not be matched with any documentsT)	lowercaser   r   error)	r   r   warnsUserWarningr   warningscatch_warningssimplefilterr   )r   messager   s      r$   'test_countvectorizer_uppercase_in_vocabr    s     ;J	)  !4JGJ	k	1 #z"# 
	 	 	" )g{3Z() )# #) )s   B,BBB'c                      g dg dg dg} t        dd      j                  |       }g d}|j                  |      }t        ||       y)	z0Check get_feature_names_out for TfidfTransformerrv   rv   rv   rv   rv   r   rv   r   r   Tl2
smooth_idfnorm)r?   cbN)r   r   r   r	   )r   trfeature_names_inr   s       r$   %test_tf_transformer_feature_names_outr    sI    	Iy)A	T	5	9	9!	<B&001AB'):;r&   c                  v   g dg dg dg} t        dd      }|j                  |       j                         }|dk\  j                         sJ t	        |dz  j                  d	
      g d       g dg dg dg} t        dd      }|j                  |       j                         }|dk\  j                         sJ y )Nr	  r
  r  Tr  r  r   rw   rv   axisr   r   r   )r   r   toarrayallr   sumr   r  r   s      r$   test_tf_idf_smoothingr    s    	Iy)A	T	5BQ'')EQJ uaxnn!n4oF 
Iy)A	T	5BQ'')EQJr&   zcno floating point exceptions, see https://github.com/numpy/numpy/pull/21895#issuecomment-1311525881)reasonc                     g dg dg dg} t        dd      }|j                  |       j                         }|dk\  j                         sJ t	        |dz  j                  d	
      g d       g dg dg dg} t        dd      }d}t        j                  t        |      5  |j                  |       j                          d d d        y # 1 sw Y   y xY w)Nr	  r
  r  Fr  r  r   rw   rv   r  r  zdivide by zeror   )	r   r   r  r  r   r  r   r  RuntimeWarning)r   r  r   in_warning_messages       r$   test_tfidf_no_smoothingr"    s     
Iy)A	U	6BQ'')EQJ uaxnn!n4oF 
Iy)A	U	6B)	n,>	? &
##%& & &s   # CCc                      dgdgdgg} t        ddd       }|j                  |       j                         }|d   dk(  sJ |d   |d   kD  sJ |d   |d   kD  sJ |d   dk  sJ |d   dk  sJ y )Nrv   rw   r   TF)sublinear_tfuse_idfr  r   )r   r   r  r  s      r$   test_sublinear_tfr&    s    
qcA3A	tU	FBQ'')E8q==8eAh8eAh8a<<8a<<r&   c                  .	   t        t        d d       } t        d   g}t        t              dz
  }t        d      }|j	                  |       }t        |d      r|j                         }|d|j                  d   f   dk(  sJ t        |j                  	      }||fD ]  }|j                  |      }t        |d      r|j                         }|j                  }|d|d
   f   dk(  sJ |d|d   f   dk(  sJ |d|d   f   dk(  sJ d|vsJ d|vsJ |d|d   f   dk(  sJ |d|d   f   dk(  sJ |d|d   f   dk(  sJ |d|d   f   dk(  rJ  t        d      }	|	j                  |      j                  |      j                         }
t        |	j                        t        |j                        k(  sJ |
j                  |t        |j                        fk(  sJ |	j                        j                         }|j                  t        |      t        |j                        fk(  sJ t        dd      }|j                  |      j                  |      j                         }t        |d      rJ t        d      }t        j                  t               5  |j                  |       d d d        t#        t%        j&                  |d      dg|z         t        t        d d       } t)        d      }|j*                  |_        |j	                  |       j                         }|j,                  rJ t#        |
|       |j                  |      j                         }t#        ||       t        d 	      }t        j                  t               5  |j                  |        d d d        |j/                  dd       |j1                         }d}t3        |      } ||      }||k(  sJ |j/                  dd        t        j                  t               5  |j1                          d d d        d |_        t        j                  t               5  |j5                          d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   \xY w# 1 sw Y   y xY w)!Nrv         ?r   tocsrr   r   rw   r   saladtomatowaterthe	copyrightcokeburgerr   l1r  F)r  r%  idf_Tr%  r  r   rF   )rH   r   rI   _gabbledegook_)rH   rb   _invalid_analyzer_type_)r   r   r   r   r   hasattrr+  r   r   r   r   r  r5  r   r   r   r   r   npr  r   r   fixed_vocabulary_r   build_preprocessorr   rn   )
train_data	test_datan_trainv1counts_trainv2r   counts_testr   t1r   
tfidf_testt2tft3tvtfidf2tfidf_test2v3	processorrp   r@   results                          r$   test_vectorizerrO    s   mCR()Jr"#I- 1$G 
	$B##J/L|W%#))+2>>'223q888 
BNN	3B "X 8kk),;(%++-K]]
1j112a7771j223q8881j112a777 J&&& *,,, 1j001Q6661j223q8881j001Q6661j112a777/84 
t	$BFF< **<8@@BErww<3r~~....;;7C$78888 k*224JIBNN0CDDDD 
tU	3B			'	'	5	=	=	?Br6""" 
$	'B	z	" #
\"# bffRa03%'/B mCR()J	d	#B		BIj)113F####eV, ,,y)113Kj+6 
D	)B	z	" !
Z ! MM5M9%%'IGD"4(Ht_Fv MM 0tMD	z	"  
  .BM	z	" 
 Q# #,! !   
 s0   Q&Q3Q?R&Q03Q<?RRc                     d\  } }}}t        | |||      }|j                  t               |j                  j                  | k(  sJ |j                  j
                  |k(  sJ |j                  j                  |k(  sJ |j                  j                  |k(  sJ d|_        d|_        d|_        d|_        |j                  j                  | k(  sJ |j                  j
                  |k(  sJ |j                  j                  |k(  sJ |j                  j                  |k(  sJ |j                  t               |j                  j                  |j                  k(  sJ |j                  j
                  |j
                  k(  sJ |j                  j                  |j                  k(  sJ |j                  j                  |j                  k(  sJ y )N)r  FFF)r  r%  r  r$  r3  T)r   r   r   _tfidfr  r%  r  r$  )r  r%  r  r$  rI  s        r$   test_tfidf_vectorizer_settersrR  i  s   .G+D':|	7z
B FF>99>>T!!!99'''99:---99!!\111 BGBJBMBO99>>T!!!99'''99:---99!!\111FF>99>>RWW$$$99

***992==00099!!R__444r&   c                     t               } | j                  t              }|j                  }|j                  t        t              | j                  fk(  sJ |j                  | j                  k(  sJ t        j                  |j                        dkD  sJ t        j                  |j                        dk  sJ t        j                  |j                        dkD  sJ t        j                  |j                        dk  sJ t        |j                  d         D ]9  }t        t        j                  j                  |d   j                  d      d       ; t        dd      } | j                  t              }|j                  t        t              | j                  fk(  sJ |j                  | j                  k(  sJ |j                  }||kD  sJ |d|z  k  sJ t        j                  |j                        dkD  sJ t        j                  |j                        dk  sJ t        |j                  d         D ]9  }t        t        j                  j                  |d   j                  d      d       ; y )	Nr(  r   rv   rw   r   ru   r3  )rz   r  )r   r   r   nnzr   r   
n_featuresdtyper:  mindatamaxranger   linalgr  )r   r   	token_nnzi
ngrams_nnzs        r$   test_hashing_vectorizerr_    s   A	M"AI77s=)1<<888877agg 66!&&>B66!&&>A66!&&>A66!&&>A 1771: ?BIINN1Q499a8#>? 	f48A	M"A77s=)1<<888877agg J	!!!I%%% 66!&&>B66!&&>A 1771: ?BIINN1Q499a8#>?r&   c                  4   t        d      } t        j                  t              5  | j	                          d d d        | j
                  rJ | j                  t              }|j                  \  }}t        | j                        |k(  sJ | j	                         }t        |t        j                        sJ |j                  t        k(  sJ t        |      |k(  sJ t!        g d|       t#        |      D ]%  \  }}|| j                  j%                  |      k(  r%J  g d}t        |      } | j	                         }t!        g d|       | j
                  sJ t#        |      D ]%  \  }}|| j                  j%                  |      k(  r%J  y # 1 sw Y   XxY w)Nr)  r*  	r   r2  celerir1  r   r,  	sparklingr-  r.  r   )r   r   r   r   r   r;  r   r   r   r   r   r   r:  ndarrayrV  r`   r	   	enumerateget)r   r   	n_samplesrU  feature_namesidxnamer   s           r$   test_feature_namesrk    s   		$B 
z	" #
  "##### 	'AGGIzr~~*,,,,,.MmRZZ000&(((}+++
	
 	 }- /	Tbnn((..../
E 
E	*B,,.M
	
 	 }- /	Tbnn((..../# #s   FFc                 ~    h d} | dd      }|j                  t               t        |j                        |k(  sJ y )N>   r   r   r,  r2  g333333?   )r   max_features)r   r   r   r   )rD   expected_vocabularyr   s      r$   test_vectorizer_max_featuresrp    s<    > 3Q7JNN=!z%%&*====r&   c                     t        d      } t        d      }t        d       }| j                  t              j                  d      }|j                  t              j                  d      }|j                  t              j                  d      }| j	                         }|j	                         }|j	                         }d|j                         k(  sJ d|j                         k(  sJ d|j                         k(  sJ d|t        j                  |         k(  sJ d|t        j                  |         k(  sJ d|t        j                  |         k(  sJ y )Nrv   rn  r   r   r     r/  )r   r   r   r  r   rY  r:  argmax)	cv_1cv_3cv_Nonecounts_1counts_3counts_None
features_1
features_3features_Nones	            r$   "test_count_vectorizer_max_featuresr~    s;    *D*D40G!!.15515=H!!.15515=H''7;;;CK++-J++-J113M !!!! Jryy23333Jryy23333M"))K"89999r&   c                  L   g d} t        dd      }|j                  |        d|j                  j                         v sJ t	        |j                  j                               dk(  sJ d|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ d	|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ y )
Nabcdeaeatr~   r   ry   r   r?   r   r)  rm  rv   )r   r   r   r   r   r   r>  r   s     r$   test_vectorizer_max_dfr    s   %IF37DHHY$""''))))t$$&'1,,,DKHHYd&&++----t$$&'1,,,DKHHYd&&++----t$$&'1,,,r&   c                  L   g d} t        dd      }|j                  |        d|j                  j                         v sJ t	        |j                  j                               dk(  sJ d|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ d	|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ y )
Nr  r~   rv   )ry   min_dfr?   r   rw   r  g?)r   r   r   r   r   r  r  s     r$   test_vectorizer_min_dfr  )  s   %IF15DHHY$""''))))t$$&'1,,,DKHHYd&&++----t$$&'1,,,DKHHYd&&++----t$$&'1,,,r&   c                     ddg} t        dd      }|j                  |       j                         }t        g d|j	                                t        g dg dg|       t        ddd	
      }|j                  |       j                         }t        g dg dg|       t        ddd	t
        j                        }|j                  |       }|j                  t
        j                  k(  sJ y )Naaabcabbder~   r   r  )r?   r  r  dr)   )r   rv   rv   r   r   )rv   rw   r   rv   rv   T)ry   r   binary)rv   rv   rv   r   r   )rv   rv   r   rv   rv   )ry   r   r  rV  )r   r   r  r	   r   r:  float32rV  )r>  r   r   X_sparses       r$   test_count_binary_occurrencesr  ;  s    '"IF37D9%--/A0$2L2L2NO91= F3tDD9%--/A91= F3t2::VD!!),H>>RZZ'''r&   c                     ddg} t        ddd       }|j                  |       }t        j                  |dd j                        dk(  sJ t        j                  |dd	 j                        d	k(  sJ |j
                  t        j                  k(  sJ t        ddd
d       }|j                  |       }t        j                  |j                        dk(  sJ |j
                  t        j                  k(  sJ t        ddd
d t        j                        }|j                  |       }|j
                  t        j                  k(  sJ y )Nr  r  Fr~   )alternate_signry   r  r   rv   r   rw   T)ry   r  r  r  )ry   r  r  r  rV  )r   r   r:  rY  rX  rV  float64)r>  r   r   s      r$   test_hashed_binary_occurrencesr  O  s   '"IEFNDy!A66!Aa&++!###66!Aa&++!###77bjj    dD 	y!A66!&&>Q77bjj    dRZZD 	y!A77bjj   r&   c                 ~   t         } |        }|j                  |      }|j                  |      }t        |t              sJ |j                         }t        ||      D ]g  \  }}t        j                  t        j                   ||                  }t        j                  t        j                  |            }t        ||       i t        j                  |      sJ |j                  dk(  sJ |j                         }	|j                  |	      }
t        ||
      D ]7  \  }}t        t        j                  |      t        j                  |             9 |j                         }|j                  |      }t        ||      D ]7  \  }}t        t        j                  |      t        j                  |             9 y )Ncsr)r   r   r   r   r   rn   zipr:  sortuniquer	   r
   issparseformatr  tocsc)rD   rX  r   transformed_datainversed_dataanalyzedocinversed_termsr   transformed_data2inversed_data2terms2transformed_data3inversed_data3terms3s                  r$   !test_vectorizer_inverse_transformr  i  s|    DJ!//5001ABMmT***'')G"47 2^		'#,/0>!:;5.12
 ??+,,,""e+++ )002112CDN]N; <v2775>2776?;< )..0112CDN]N; <v2775>2776?;<r&   c                     t         t        z   } dgt        t               z  dgt        t              z  z   }t        | |dd      \  }}}}t	        dt               fdt               fg      }dd	gd
d}t        ||dd      }|j                  ||      j                  |      }	t        |	|       |j                  dk(  sJ |j                  j                  d   }
|
j                  dk(  sJ y )Nr(  rv   g?r   	test_sizerandom_stater   svcrv   rv   ru   hingesquared_hinge)vect__ngram_range	svc__lossr   )n_jobsr   r   )r   NOTJUNK_FOOD_DOCSr   r   r   r   r   r   r   predictr	   best_score_best_estimator_r   rz   rX  targetr=  r>  target_traintarget_testpipeline
parametersgrid_searchpredbest_vectorizers              r$   -test_count_vectorizer_pipeline_grid_selectionr    s    --D TC''1#4E0F*FFF 8Hf!84J	< &/"34uik6JKLH %f-/J xA!DK ??:|4<<YGDt[)
 ""c)))!11==fEO&&&000r&   c                     t         t        z   } dgt        t               z  dgt        t              z  z   }t        | |dd      \  }}}}t	        dt               fdt               fg      }dd	gd
dd}t        ||d      }|j                  ||      j                  |      }	t        |	|       |j                  dk(  sJ |j                  j                  d   }
|
j                  dk(  sJ |
j                  dk(  sJ |
j                   rJ y )Nr(  rv   g?r   r  r   r  r  ru   )r3  r  r  )r  
vect__normr  )r  r   r  )r   r  r   r   r   r   r   r   r   r  r	   r  r  r   rz   r  r;  r  s              r$   'test_vectorizer_pipeline_grid_selectionr    s%   --D TC''1#4E0F*FFF 8Hf!84J	< &/"34uik6JKLH %f-"/J xA>K ??:|4<<YGDt[)
 ""c)))!11==fEO&&&0004'''00000r&   c                      t         t        z   } dgt        t               z  dgt        t              z  z   }t        dt	               fdt               fg      }t        || |d      }t        |g d       y )Nr(  rv   r   r  r   )r   r  )r   r  r   r   r   r   r   r	   )rX  r  r  	cv_scoress       r$   )test_vectorizer_pipeline_cross_validationr    sj    --D TC''1#4E0F*FFF&/"34uik6JKLH$1=Iy/2r&   c                     d} t               }|j                  | g      }|j                  dk(  sJ t        d d      }|j	                  | g      }|j                  dk(  sJ |j
                  |j
                  k(  sJ t        t        j                  |j                        t        j                  |j                               y )Nu   Машинное обучение — обширный подраздел искусственного интеллекта, изучающий методы построения алгоритмов, способных обучаться.)rv      F)r  r  )rv   i   )
r   r   r   r   r   rT  r	   r:  r  rX  )r   r   	X_countedX_hasheds       r$   test_vectorizer_unicoder    s    	1  D""H:.I??g%%%$u=D~~xj)H>>Z''' ==HLL((( rwwy~~.0FGr&   c                      ddg} t        |       }|j                  t              }|j                  t              }t	        |j                         |j                                |j                  sJ y )Nr   rb  r   )r   r   r   r   r   r  r;  )r   r   X_1X_2s       r$   +test_tfidf_vectorizer_with_fixed_vocabularyr    sY    8$Jj1D


]
+C
..
'CckkmS[[];!!!!r&   c                     t               t        d      t        d      t        d      t               t        t              t        t              t        t              j	                  t
              t        t        	      j	                  t
              t               t        t              t               j	                  t
              g} | D ]  }t        j                  |      }t        j                  |      }t        |      |j                  k(  sJ |j                         |j                         k(  sJ t        |j                  t
              |j                  t
                      y )
Nr3  r4  T)r  ru   rz   ra   )ry   rG   )r   r   r   r3   r   r   r+   r   pickledumpsloadstype	__class__
get_paramsr   r   )	instancesorigr#   copys       r$   test_pickling_vectorizerr    s   t$&f-Z0.Z044^Dl377G.n-I  
LL||ADzT^^+++ DOO$5555$~.~.	

r&   factoryc                     t               } | |      }d}t        j                  t        j                  |            } ||      } ||      }||k(  sJ y)z_Tokenizers cannot be pickled
    https://github.com/scikit-learn/scikit-learn/issues/12833
    rI   N)r   r  r  r  )r  vecfunctionrp   roundtripped_functionr@   rN  s          r$   test_pickling_built_processorsr    sS     
Cs|HGD"LLh)?@~H"4(FXr&   c                     t         j                  j                  d      } t        j                  g d      }t	        dd      D ]  }t        | j                  |dd            }t        |      }t        j                  t        j                  |            }|j                  t               |j                  t               t        |j                         |j                                 y Nr   ra  d   r   F)sizer*   r   )r:  randomRandomStatearrayrZ  r   choicer   r  r  r  r   r   r	   r   )rngvocab_wordsx	vocab_setr   unpickled_cvs         r$   -test_countvectorizer_vocab_sets_when_picklingr  3  s     ))


"C((
	
K 1c] 


;Q
FG		2||FLL$45
}'$$&(J(J(L	

r&   c                     t         j                  j                  d      } t        j                  g d      }t	        dd      D ]  }t               }| j                  |dd      }t	        dd      D ]
  }||||   <    t        |      }t        j                  t        j                  |            }|j                  t               |j                  t               t        |j                         |j                                 y r  )r:  r  r  r  rZ  r   r  r   r  r  r  r   r   r	   r   )r  r  r  
vocab_dictr   yr   r  s           r$   .test_countvectorizer_vocab_dicts_when_picklingr  O  s    
))


"C((
	
K 1c] 
V


;Q
>q! 	%A#$JuQx 	%
3||FLL$45
}'$$&(J(J(L	

r&   c                     t               j                  t              } t               j	                  |       }t        j                  |      }t        j                  |      }t        |      |j                  k(  sJ t        |j                  |       j                         |j                  |       j                                y r    )r   r   r   r   r   r  r  r  r  r  r	   r  )r   r  r#   r  s       r$   test_pickling_transformerr  l  s    ''7A!!!$DTA<<?D:'''t))!,4468J8J18M8U8U8WXr&   c                  2   t               j                  t              } t               j	                  |       }t               }|j
                  |_        t        |j                  |       j                         |j                  |       j                                y r    )	r   r   r   r   r   r5  r	   r   r  )r   r  r  s      r$   test_transformer_idf_setterr  u  si    ''7A!!!$DD		DIt~~a(002DNN14E4M4M4OPr&   c                     t        d      } | j                  t               t        | j                  d      }| j                  |_        t        |j                  t              j                         | j                  t              j                                t        | j                  d      }d}t        j                  t        |      5  | j                  |_        d d d        y # 1 sw Y   y xY w)NTr6  r   r%  Fz+`idf_` cannot be set when `user_idf=False`.r   )r   r   r   r   r5  r	   r   r  r   r   r   )r  r  r   s      r$   test_tfidf_vectorizer_setterr  }  s    4(DHH^d&6&6ED		DI~&..0~&..0
 d&6&6FD;G	z	1 II	  s   C''C0c                  &   t        d      } | j                  t               t        | j                  d      }t	        | j
                        }dg|dz   z  }t        j                  t              5  t        |d|       d d d        y # 1 sw Y   y xY w)NTr6  r  r   rv   r5  )
r   r   r   r   r   r5  r   r   r   setattr)r   r  expected_idf_leninvalid_idfs       r$   %test_tfidfvectorizer_invalid_idf_attrr    sw    4(DHH^d&6&6ED499~%+a/0K	z	" +fk*+ + +s   0BBc                      g d} t        |       }t        j                  t              5  |j	                  g        d d d        y # 1 sw Y   y xY w)N)r?   r  r  r?   r?   r   r   r   s     r$   test_non_unique_vocabr    s=    %Ee,D	z	"   s   AAc                      d} t         }d }t        j                  ||       5   |        d d d        y # 1 sw Y   y xY w)Nz?np.nan is an invalid document, expected byte or unicode string.c                  \    t               } | j                  dt        j                  dg       y )Nhello worldhello hello)r   r   r:  nan)hvs    r$   funcz0test_hashingvectorizer_nan_in_docs.<locals>.func  s#     
-?@r&   r   )r   r   r   )r  	exceptionr  s      r$   "test_hashingvectorizer_nan_in_docsr    s?     PGIA 
y	0   s   4=c                  0   t        ddd       } | j                  sJ | j                  ddg      j                         }t	        |j                         g d       | j                  ddg      j                         }t	        |j                         g d       y )NTF)r  r%  r  r  r  )rv   rv   rv   r   )r   r  r   r  r	   ravelr   )r   r   r   s      r$   test_tfidfvectorizer_binaryr    ss    tU>A88O8	67??AAqwwy,/	
m]3	4	<	<	>Brxxz<0r&   c                      t        d      } | j                  t               t        | j                  | j
                  j                         y )NTr6  )r   r   r   r   r5  rQ  )r   s    r$   test_tfidfvectorizer_export_idfr    s0    4(DHH^dii)9)9:r&   c                      t        dg      } t        |       }| j                  t               |j                  t               |j                  | j                  k(  sJ y )Nr/  r   )r   r   r   r   r   )
vect_vocabvect_vocab_clones     r$   test_vectorizer_vocab_cloner    sM     UG4JZ(NN=!''':+A+AAAAr&   c                    d} |        }t        j                  t        |      5  |j                  d       d d d        t        j                  t        |      5  |j	                  d       d d d        |j	                  ddg       t        j                  t        |      5  |j                  d       d d d        y # 1 sw Y   xY w# 1 sw Y   ^xY w# 1 sw Y   y xY w)NzBIterable over raw text documents expected, string object received.r   zhello world!	some textzsome other text)r   r   r   r   r   r   )rD   r  r  s      r$   &test_vectorizer_string_object_as_inputr    s     SG
,C	z	1 *.)* 
z	1   GG[+,-	z	1 &n%& &* *   & &s#   B=C	"C=C	CCX_dtypec                     t        j                  dd| d      }t               j                  |      }|j                  |j                  k(  sJ y N
    N  *   rV  r  )r
   randr   r   rV  )r  r   X_transs      r$   test_tfidf_transformer_typer#    s?    BW2>A ..q1G==AGG###r&   zcsc_container, csr_containerc                 $   t        j                  ddt        j                  d      } | |      } ||      }t	               j                  |      }t	               j                  |      }t        ||       |j                  |j                  k(  sJ y r  )r
   r!  r:  r  r   r   r   r  )csc_containercsr_containerr   X_cscX_csrX_trans_cscX_trans_csrs          r$   test_tfidf_transformer_sparser+    sz     	BRZZbAA!E!E"$2259K"$2259K k:!3!3333r&   z0vectorizer_dtype, output_dtype, warning_expectedTFc                    t        j                  g d      }t        |       }d}|r6t        j                  t
        |      5  |j                  |      }d d d        nHt        j                         5  t        j                  dt
               |j                  |      }d d d        j                  |k(  sJ y # 1 sw Y   xY w# 1 sw Y   'xY w)N)numpyscipysklearnrV  z'dtype' should be used.r   r   )r:  r  r   r   r  r  r   r  r  r  rV  )vectorizer_dtypeoutput_dtypewarning_expectedr   r   warning_msg_matchX_idfs          r$   test_tfidf_vectorizer_typer6    s     	./A '78J1\\+->? 	0,,Q/E	0 	0 $$& 	0!!';7,,Q/E	0 ;;,&&&	0 	0	0 	0s   B82,C8CCr  )rw   rv   r  c                    | j                   }t        j                  d| d      }t        j                  t
        |      5  | j                  dg       d d d        t        j                  t
        |      5  | j                  dg       d d d        t        | t              r7t        j                  t
        |      5  | j                  dg       d d d        y y # 1 sw Y   xY w# 1 sw Y   ]xY w# 1 sw Y   y xY w)NzInvalid value for ngram_range=z/ lower boundary larger than the upper boundary.r   zgood news everyone)rz   reescaper   r   r   r   r   r   r   r   )r  invalid_ranger  s      r$   $test_vectorizers_invalid_ngram_ranger;    s     OOMii
( 89 	9G
 
z	1 (%&'( 
z	1 2/012 #()]]:W5 	2MM/01	2 	2 *( (2 2	2 	2s$   C7C&=C2C#&C/2C;c                     | j                         }| j                         }| j                         }| j                  |||      S r    )r   build_tokenizerr<  _check_stop_words_consistency)	estimatorr   tokenize
preprocesss       r$   r>  r>     sA    ))+J((*H--/J22:z8TTr&   c                     d} d| z  }t               t               t               fD ]]  }|j                  g d       t	        j
                  t        |      5  |j                  dg       d d d        |`t        |      du r]J  t        j                         5  t        j                  dt               j                  dg       d d d        t              J |j                  g d	       t	        j
                  t        |      5  |j                  dg       d d d        y # 1 sw Y   xY w# 1 sw Y   mxY w# 1 sw Y   y xY w)
Nz\['and', 'll', 've'\]z}Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens %s not in stop_words.)you'veyouyou'llANDr   r   r  Fr   )rC  rD  rE  blahrF  )r   r   r   r   r   r  r  r   _stop_words_idr>  r  r  r  )lstrr  r  s      r$   'test_vectorizer_stop_words_inconsistentrJ  '  s7   #D	')-	. 
  !?#46G6IJ ;"DE\\+W5 	/}o.	/ ,S1U:::; 
	 	 	" +g{3=/*+ )-555 NNHNI	k	1 +=/*+ +	/ 	/+ ++ +s$   D&-D2
D>&D/	2D;>Er&  c                 J    | dt         j                        }t         j                  }|j                  j                  |      |_        |j                  j                  |      |_        dddd}t               j                  ||      }||j                  j                  k(  sJ y)z
    Check that CountVectorizer._sort_features preserves the dtype of its sparse
    feature matrix.

    This test is skipped on 32bit platforms, see:
        https://github.com/scikit-learn/scikit-learn/pull/11295
    for more details.
    )r   r   r0  r   rv   rw   )zscikit-learnrU   zgreat!N)r:  int64indicesastypeindptrr   _sort_featuresrV  )r&  r   INDICES_DTYPEr   Xss        r$   7test_countvectorizer_sort_features_64bit_sparse_indicesrS  B  s     	fBHH-A HHM		  /AIxx}-AH"#1:J			)	)!Z	8BBJJ,,,,,r&   	Estimatorc                 .   ddig} |        }t        |      du sJ  | d dg      }t        |      dk(  sJ t        |      J |j                  |        G d d	|       } |dg
      }t        |      dk(  sJ  | d dg      }t        |      du sJ y )Nrp   r  Tc                     | d   S Nrp   r2   r  s    r$   <lambda>z?test_stop_word_validation_custom_preprocessor.<locals>.<lambda>e  s
    1V9 r&   and)rb   r   r   c                       e Zd Zd Zy)Ftest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimatorc                     d S )Nc                     | d   S rW  r2   rX  s    r$   rY  zktest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimator.build_preprocessor.<locals>.<lambda>m  s
    QvY r&   r2   )selfs    r$   r<  zYtest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimator.build_preprocessorl  s    &&r&   N)__name__
__module____qualname__r<  r2   r&   r$   CustomEstimatorr\  k  s    	'r&   rc  r   c                 J    t        j                  d      j                  |       S )Nz\w{1,})r8  compilefindallr  s    r$   rY  z?test_stop_word_validation_custom_preprocessor.<locals>.<lambda>s  s    bjj3;;C@ r&   )rm   r   )r>  r   )rT  rX  r  rc  s       r$   -test_stop_word_validation_custom_preprocessorrh  \  s     [!"D
+C(-555
!4%
IC(-888(-555d') ' eW
-C(-888
@eWC )-555r&   zinput_type, err_type, err_msgfilenamer=   r[   z$'str' object has no attribute 'read'c                     dg}t        j                  ||      5   | d |      j                  |       d d d        y # 1 sw Y   y xY w)N"this is text, not file or filenamer   c                 "    | j                         S r    r-   rX  s    r$   rY  z.test_callable_analyzer_error.<locals>.<lambda>  s    QWWY r&   ry   r\   )r   r   r   )rT  
input_typeerr_typer   rX  s        r$   test_callable_analyzer_errorrp  x  sJ     11D	xw	/ V.jAOOPTUV V Vs	   ?Ary   c                     t        | d      S )Nr)openrg  s    r$   rY  rY    s    T#s^ r&   c                 "    | j                         S r    )readrg  s    r$   rY  rY    s     r&   rn  c                     dg}t        j                  t        t        f      5   | ||      j	                  |       d d d        y # 1 sw Y   y xY w)Nrk  rm  )r   r   FileNotFoundErrorAttributeErrorr   )rT  ry   rn  rX  s       r$   &test_callable_analyzer_change_behaviorry    sL     11D	)>:	; K8:6DDTJK K Ks   AAc                     d }| j                  d      }|j                  d       t        j                  t        d      5   ||d      j                  |g       d d d        y # 1 sw Y   y xY w)Nc                     t        d      )Ntesting)	Exceptionrg  s    r$   ry   z6test_callable_analyzer_reraise_error.<locals>.analyzer  s    	""r&   zfile.txtzsample content
r|  r   r[   rm  )joinwriter   r   r}  r   )tmpdirrT  ry   fs       r$   $test_callable_analyzer_reraise_errorr    sb    
# 	JAGG	y		2 F862@@!EF F Fs   A%%A.zjstop_words, tokenizer, preprocessor, ngram_range, token_pattern,analyzer, unused_name, ovrd_name, ovrd_msgrC  rE  r  r~   z'stop_words'
'analyzer'	!= 'word'c                 "    | j                         S r    r-   r"   s    r$   rY  rY        aggi r&   z'tokenizer'c                 "    | j                         S r    r-   r"   s    r$   rY  rY    r  r&   \w+rs   'token_pattern'zis not Nonec                 "    | j                         S r    r!   r"   s    r$   rY  rY    r  r&   c                 "    | j                         S r    r  r"   s    r$   rY  rY    r  r&   z'preprocessor'zis callableru   c                 "    | j                         S r    r  r"   s    r$   rY  rY    r  r&   z'ngram_range')	NNNr  r  r~   r  r  r  c
                     t         }
 |        }|j                  ||||||       d|d|d|	}t        j                  t        |      5  |j                  |
       d d d        y # 1 sw Y   y xY w)N)r   rm   rb   rz   r   ry   zThe parameter z will not be used since  r   )r   r   r   r  r  r   )rD   r   rm   rb   rz   r   ry   unused_name	ovrd_nameovrd_msgr=  r   r   s                r$   test_unused_parameters_warnr    sw    r  J<DOO!#   	C
 
k	-   s   A&&A/zVectorizer, Xrv   rw   )r   barr   )r   bazc                 l     |        }t        |d      rJ |j                  |       t        |d      rJ y )Nn_features_in_)r9  r   )rD   r   r   s      r$   test_n_features_inr    s<     Jz#3444NN1z#34444r&   c                      t        d      } | j                  ddg      j                  }| j                  ddg      j                  }||k(  sJ y )Nrv   rr  helloworld)r   r   r   )r  vocab1vocab2s      r$   )test_tie_breaking_sample_order_invariancer  %  sN     q
)CWWgw'(44FWWgw'(44FVr&   c                  j    t        dd      } | j                  dg      j                  }|d   dk\  sJ y )Ni@B )rw   r   )rU  rz   z22pcs efuturer   )r   r   rM  )hashingrM  s     r$   2test_nonnegative_hashing_vectorizer_result_indicesr  .  s7    7GG 12::G1:??r&   c                 .     |        }t        |d      rJ y)z0Check that vectorizers do not define set_output.
set_outputN)r9  )rT  r   s     r$   'test_vectorizers_do_not_have_set_outputr  5  s    
 +CsL))))r&   c                    t        j                  ddt        j                  d      } | |      }|j	                         }t               j                  |      }|j                  |d      }t        ||       ||usJ |j                  |d      }||u sJ t        j                  t              5  t        ||       ddd       y# 1 sw Y   yxY w)	zJCheck the behaviour of TfidfTransformer.transform with the copy parameter.r  r  r  r   T)r  FN)r
   r!  r:  r  r  r   r   r   r   r   r   AssertionError)r&  r   r(  X_csr_originaltransformerX_transforms         r$   test_tfidf_transformer_copyr  >  s     	BRZZbAA!E ZZ\N"$((/K''D'9K 7e###''E':K%	~	& <$UN;< < <s   0CCrV  c                     t        d      D cg c]  }t        t        j                               ! }}t	        |       j                  |      }|j                  j                  | k(  sJ yc c}w )zCheck that `idf_` has the same dtype as the input data.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/30016
    i r0  N)rZ  struuiduuid4r   r   r5  rV  )rV  r]  r   r   s       r$   (test_tfidf_vectorizer_perserve_dtype_idfr  S  sX     %*'N3qTZZ\	3A3 u-11!4J??  E))) 	4s   $A+)r  r8  r  r  collectionsr   collections.abcr   	functoolsr   ior   	itertoolsr   r-  r:  r   numpy.testingr   r	   r.  r
   sklearn.baser   sklearn.feature_extraction.textr   r   r   r   r   r   r   r   sklearn.model_selectionr   r   r   sklearn.pipeliner   sklearn.svmr   sklearn.utils._testingr   r   r   sklearn.utils.fixesr   r   r   r   r  r   r%   r+   r/   r3   rA   rC   markparametrizerq   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  xfailr"  r&  rO  rR  r_  rk  rp  r~  r  r  r  r  r  r  r  r  r  r  r  rn   r<  r=  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#  r+  int32rL  r6  r;  r>  rJ  rS  rh  rw  rx  rp  paramry  r  r  r  r  r  r  r  r  r2   r&   r$   <module>r     s;    	   # #      G  	 	 	 T S % ! 
 I H  !22, $!0H.* 9J'KL:  M: z <&&4&().&*
+&$0E&4&&)*<  	M  &&&dN5:#?LD/N 'IJ> K>:4-$-$((!4 'IJ< K<>!1H$1N
3H0"
6 &&**''


8
:YQ +1;B ?O5FG&& RZZ$<=$ >$ "GNN$K44 6	2::t$	2::t$	RZZ'	RZZ'	'' 	f-F+F+22(U+6 .9- : -0 /?4EF662 /?4EF #	&+	!GHVV &' +-CD 
';<K =K /?4EF	F	F ?$5G 	5
 x 
	
 
	
 
	
 
	
 
	

	
qCITUIZ@ 	Qq11Q3GHI	.)55 /?4DFWX** .9< :<( 2::rzz":;* <*r&   