
    sii                     Z	   d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlZddlZddlm
Z ddlmZ dd	l m!Z! d
dl"m#Z#m$Z$m%Z% d
dl&m'Z'm(Z( d
dl)m*Z*m+Z+ d
dl,m-Z. erddl/m0Z1 neZ1 ej2        d          Z3e34                    dd          Z5de6de6deeeej7        ej7        f         ee8e8f         f         ddf         fdZ9defdZ:defdZ;deddfdZ<e5j=        deej7        ej7        f         fd            Z>e5j=        deej7        ej7        f         fd            Z?e5j=        deej7        ej7        f         fd            Z@e5j=        deej7        ej7        f         fd            ZAe5j=        dee1ej7        f         fd            ZBe5j=        d eCdee!jD        ej7        ej7        e!jD        ej7        ej7        e!jD        ej7        ej7        f	         fd!            ZE	 dWd"d#d$d%e6de6d&e6d'eFd(eFd)e6deeej7                 eej7                 eej7                 f         fd*ZGee!jD        ejH        ejI                 ejH        ejI                 f         ZJe	 G d+ d,                      ZK G d- d.e          ZL G d/ d0          ZMd1ejH        ejI                 deejH        ejH        ejH        f         fd2ZN	 dXd4e!jD        d5ejH        ejI                 d6ejH        ejI                 d7eOdejH        ejP                 f
d8ZQd9ee!jD        ejH        ejI                 ejH        ejI                 f         d:ejH        ejP                 deKfd;ZRd<eLdeeKeeK         f         fd=ZSd4e!jD        d5ejH        ejI                 d6ejH        ejI                 d>ejH        ejI                 d?ejH        ejT                 dee!jD        ejH        ejI                 ejH        ejI                 ejH        ejI                 f         fd@ZUdAedBee$         dCeCddfdDZVe5j=        de6de6dEeOdFeFdeee!jD                 ej7        f         f
dG            ZWdHe6dIe6deeC         fdJZXdKdLd"d#ejT        dMdNde6de6dOe6dPeFdEeOdQeOdReFd)e6dSej
        jY        dCeCdee*ej7        f         fdTZZ G dU dVe#          Z[dS )YzUtilities for data generation.    N)ThreadPoolExecutor)	dataclass)TYPE_CHECKINGAnyCallableDict	GeneratorList
NamedTupleOptionalSequenceSetTupleTypeUnion)request)typing)r	   )sparse   )DataIterDMatrixQuantileDMatrix)is_pd_cat_dtypepandas_pyarrow_mapper)	ArrayLike	XGBRanker)train)	DataFramejoblibz
./cachedir)verbose	n_samples
n_featuresreturnc              #   0  K   t          j        d          }t          j                            d          }|                    dd| |z                                | |          }t          j        t          j        t          j	        t          j
        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        g}|D ]H}t          j        ||          }||fV  |                                |                                fV  I|D ]H}t          j        ||          }|                    |          }|                    |          }	||	fV  I|                    dd| |z  	                              | |          }t          j        t@          fD ]}
t          j        ||
          }||fV  t          j        t@          fD ]H}t          j        ||          }|                    |          }|                    |          }	||	fV  Id
S )z*Enumerate all supported dtypes from numpy.pandas  r      lowhighsizedtype   g      ?r+   N)!pytestimportorskipnprandomRandomStaterandintreshapeint32int64byteshortintcint_longlonguint32uint64ubyteushortuintcuint	ulonglongfloat16float32float64halfsingledoublearraytolistr   binomialbool_bool)r!   r"   pdrngorigdtypesr-   Xdf_origdfdtype1dtype2s               Z/var/www/html/bet.cuttalo.com/ml/venv/lib/python3.11/site-packages/xgboost/testing/data.py	np_dtypesrZ   /   sL      
	X	&	&B
)


%
%C;;13Y-C;DDLL: D 	






	
	

	










	
	)F,  ( (HT'''AgkkmmQXXZZ'''''  HT''',,t$$\\!__rk <<3Y%;<<<DD: D 8T"  HT(((Ag8T"  HT(((,,t$$\\!__rk	     c            	   #   \  K   t          j        d          } |                                 |                                 |                                 |                                 |                                 |                                 |                                 | 	                                g}t          j        }|                     dd|dgdd|dgdt          j                  }t          j        d| j        fD ]/}|D ]*}|                     dd|dgdd|dgd|          }||fV  +0t          j        }|                                 |                                 g}|                     d	d
|dgdd
|d	gdt          j                  }t          j        d| j        fD ]s}|D ]n}|                     d	d
|dgdd
|d	gd|          }||fV  |d         }|d         }t#          || j                  sJ t#          || j                  sJ ||fV  ot|                    d          }|j        D ]*}||         j                            t.                    ||<   +t          j        d| j        fD ]<}|                     dd|dgdd|dgd|                                           }||fV  =d| j        fD ]u}dd|dgdd|dgd}	|                     |	|t          j        n|                                           }|                     |	|                                           }||fV  vdS )z/Enumerate all supported pandas extension types.r%   r.   r         f0f1r,   N      ?g       @g      @r`   categoryTF)r0   r1   
UInt8DtypeUInt16DtypeUInt32DtypeUInt64Dtype	Int8Dtype
Int16Dtype
Int32Dtype
Int64Dtyper2   nanr   rF   NAFloat32DtypeFloat64Dtype
isinstanceSeriesastypecolumnscatrename_categoriesintCategoricalDtyperN   BooleanDtype)
rP   rS   NullrR   r-   rV   ser_origsercdatas
             rY   	pd_dtypesr~   j   s     		X	&	&B 	






	F %'FD<<1dAq!T1o66bj   D ru%   	 	E1dAq!T1o>>e   B (NNNN		 6Doo!2!23F<<S$$S#tS,ABB"*   D ru% 
  
  		  		 ES$,S#tS4IJJRW   B (NNNDzHT(Cc29-----h	22222C-		  ;;z""D\ 5 5q'+//44Qru%  \\q$?1aq/::%%''  
 
 Bh ru  UD$/tT47PQQ||DDLbooFWFW|XX\\$boo&7&7\88Bh r[   c            	   #     K   t          j        d          } t          j        d          }t          }d| j        dfD ]}|D ]}|                    d          s|                    d          r-|                     |          s|dk    r|nt          j        }|                     dd|d	gd
d	|dgdt          j	                  }|                     dd|d	gd
d	|dgd|          }||fV  | j        dfD ]}|                     dd|dgdd|dgd| 
                                          }|                     dd|dgdd|dgd|                     |                                                    }||fV  dS )z*Pandas DataFrame with pyarrow backed type.r%   pyarrowNr   rE   rO   r.   r   r]   r^   r_   r,   FT)r0   r1   r   rm   
startswithisnar2   rl   r   rF   rx   
ArrowDtyperN   )rP   parS   ry   r-   	orig_nullrR   rV   s           rY   pd_arrow_dtypesr      s     		X	&	&B		Y	'	'B #F. rua    	 	E	** e.>.>v.F.F $&GGDMMKdaiiRVI<<1i+Aq)Q3GHHj    D
 1dAq!T1o>>e   B (NNNN	   	 	||%t,UD$4MNN//##  
 
 \\%t,UD$4MNN--

++  
 
 Bh	 	r[   rQ   c                    |                      d                              dd          }|                      d          }t          j        |d<   t	          j        t          d          5  t          ||           ddd           n# 1 swxY w Y   t	          j        t          d          5  t          ||           ddd           dS # 1 swxY w Y   dS )	zValidate there's no inf in X.    r/      r^   )   r   zInput data contains `inf`matchN)	r3   r6   r2   infr0   raises
ValueErrorr   r   )rQ   rT   ys      rY   	check_infr      sG   


##Aq))A


AfAdG	z)D	E	E	E  1               
z)D	E	E	E  1                 s$   +BBB.CCCc                     dt           j                            d          t          j        d          } dt
          t                   dt
          t                   dt
          t                   dt           j        ffd}d	t          d
t          dt           j        ffd}|                      |ddgddgddg           |ddgddgddg           |dd           |dd           |dd           |dd            |d!d"           |d#d$           |d%d&          d'	          }||j	        
                    d(g                                                   }|d(                                         }||fS ))zSynthesize a dataset similar to the sklearn California housing dataset.

    The real one can be obtained via:

    .. code-block::

        import sklearn.datasets

        X, y = sklearn.datasets.fetch_california_housing(return_X_y=True)

    iP  i  r%   meanssigmasweightsr#   c                                         t          |d         z            | d         |d                   }                     |j        d         z
  | d         |d                   }t          j        ||gd          S )Nr   )r+   locscaler.   axis)normalrv   shaper2   concatenate)r   r   r   l0l1r!   rQ   s        rY   mixture_2compz-get_california_housing.<locals>.mixture_2comp  s     ZZi'!*,--E!HF1I  
 
 ZZi"(1+5E!HFSTIZVV~r2hQ////r[   meanstdc                 6                         | |f          S )Nr   r   r+   )r   )r   r   r!   rQ   s     rY   normz$get_california_housing.<locals>.norm  s    zzd#YLzAAAr[   g5ŀ]g~(Fv^gr-|E?g3mE^1?gDi-T?gÅv-W?gXcB@g&	@@g6?g](?g8W nx?gd?g|["@g2{e?)r   r   gVb<@g>+)@gZK@g@g)P=?g˧^T?g/E@g@gI@gtbO$@gg9h @gk}v?)		LongitudeLatitudeMedIncHouseAgeAveRooms	AveBedrms
PopulationAveOccupMedHouseValr   )r2   r3   default_rngr0   r1   r
   floatndarrayr   rs   
differenceto_numpy)rP   r   r   rV   rT   r   r!   rQ   s         @@rY   get_california_housingr      s    I
)


%
%C		X	&	&B0E{0$(K0:>u+0	0 0 0 0 0 0 0B5 Bu B B B B B B B B 
&}-#%78Z( 
 &k*#%78Z( 
 d 28JKKK"4:LMMM"39JKKK#4:MNNN$$6<MNNN"4:LMMM4%6<NOOO#	
 	

 
B* 	2:  -112;;==A
=""$$Aa4Kr[   c                  n    t          j        d          } |                                 }|j        |j        fS )z&Fetch the digits dataset from sklearn.sklearn.datasets)r0   r1   load_digitsr}   target)datasetsr}   s     rY   
get_digitsr   *  s5     "#566H!!D9dk!!r[   c                  V    t          j        d          } |                     d          S )z-Fetch the breast cancer dataset from sklearn.r   T)
return_X_y)r0   r1   load_breast_cancer)r   s    rY   
get_cancerr   2  s,     "#566H&&$&777r[   c                     t          j        d          } t          j                            d          }d}d}|                     ||          \  }}|                    d||j                  }t          |j        d                   D ]:}t          |j        d                   D ]}|||f         rt          j	        |||f<   ;||fS )zGenerate a sparse dataset.r      i  g      ?)random_stater.   r   )
r0   r1   r2   r3   r4   make_regressionrM   r   rangerl   )	r   rQ   nsparsityrT   r   flagijs	            rY   
get_sparser   9  s     "#566H
)


$
$CAH##AC#88DAq<<8QW--D171: ! !qwqz"" 	! 	!AAqDz !&!Q$	! a4Kr[   c                  4   t           rddlnt          j        d          t          j                            d          d                                } dt          t          t          t          f         t          f         dt          dj        ffd	} |d
dddddd          | d<    |ddddd          | d<    |dddddd          | d<    |ddd d!d"d#d$d%d          | d&<    |d'd(d)d!d*d+          | d,<    |d-d(d.d/d0d"d1d2d3d          | d4<    |d5d6d7d8d9d:d;          | d<<    |d=d>d?d@d$dAd          | dB<    |dCdDdd"dEd          | dF<    |d@dGdGdHdI          | dJ<   dKt          dLt          dt          dj        ffdM} |dNdOd          | dP<    |dQdRd          | dS<    |dTdUd          | dV<    |dWdXd          | dY<    |dZd[d          | d\<    |d]d^d          | d_<    |d`dad          | db<    |dcddd          | de<    |dfdgd          | dh<    |didjd          | dk<   t          | j                  }                    |           | |         } t	          j        fl          }| j        D ]e}t#          | |         j        j                  r3|| |         j        j                            t          j                  z  }U|| |         j        z  }f|dm|                                z  z  }|dn|                                z
  z  }| |fS )oam  Get a synthetic version of the amse housing dataset.

    The real one can be obtained via:

    .. code-block::

        from sklearn import datasets

        datasets.fetch_openml(data_id=42165, as_frame=True, return_X_y=True)

    Number of samples: 1460
    Number of features: 20
    Number of categorical features: 10
    Number of numerical features: 10
    r   Nr%   r&   i  
name_probadensityr#   c           	         t          	d|z
  z            }t          j        d|z
            dk    o|dk    }|rd|z
  }|| t          j        <   t	          |                                           }t	          |                                           }|dxx         dt          j        |          z
  z  cc<                       |	|          }
	                    |

                    t          d |                              }|S )	Nr.   rb   ư>r   )r+   pc                 ,    t          | t                    S N)rp   str)xs    rY   <lambda>z5get_ames_housing.<locals>.synth_cat.<locals>.<lambda>v  s    As!3!3 r[   r,   )rv   r2   absrl   listkeysvaluessumchoicerq   rw   filter)r   r   n_nullshas_nanr   r   r   r   seriesr!   rP   rQ   s            rY   	synth_catz#get_ames_housing.<locals>.synth_catd  s    i1w;/00&w''$.>7Q; 	*W}H!)JrvJOO%%&&""$$%%	"rvayy JJt)qJ11%%33T::   
 
 r[   gqu ]?gqh.?gsmB<?g5C(?goEb?)1Fam2fmConDuplexTwnhsTwnhsErb   BldgTypegwD?g. ҥ?g)$;?)UnfRFnFing_9?GarageFinishgW歺?gbFx{?gbFx{?gQfL2rf?)CornerCulDSacFR2FR3	LotConfigg?g/ؗ?gf׽?g$A
?g5e?g() l?g[iF?)TypMin2Min1ModMaj1Maj2Sev
Functionalg M?g?gMq?)NoneBrkFaceStoneBrkCmng3f?
MasVnrTypeg3f?gI/j ?g,	PS˦?ge@?gQ~?gZ	%qv?)1Story2Storyz1.5FinSLvlSFoyerz1.5Unfz2.5Unfz2.5Fin
HouseStyleg$	P?gHp?gK$?gՐ?g4*p?)GdTAFaExPogE`o?FireplaceQugș&l??皙?g5e?gunڌ`?)r  r  r  r  r  	ExterCondgn0a?g{gUId?)r  r  r  r  	ExterQualg8 nV?)r  r  r  g(xߢs?PoolQCr   r   c                 0                        | |          }t          d|z
  z            }t          j        d|z
            dk    r-|dk    r'                    |d          }t          j        ||<                       |t          j                  S )	Nr   r.   rb   r   r   Fr+   replacer,   )r   rv   r2   r   r   rl   rq   rG   )	r   r   r   r   r   null_idxr!   rP   rQ   s	         rY   	synth_numz#get_ames_housing.<locals>.synth_num  s    JJ3c	J::i1w;/006#-  4''GaKKzz)'5zIIH&AhKyy"*y---r[   gmtF@gOfK<Q=@	3SsnPorchgݹsΝ?g2Tf?
FireplacesgR u?gP$[r?BsmtHalfBathgvS?g_-?HalfBathgbĈ#F?g+?
GarageCarsg$[Q<@g"$#e?TotRmsAbvGrdg$[Q<{@g%Ǒ|@
BsmtFinSF1ge0OFG@g*Ӛ{7*d@
BsmtFinSF2gNڭ@gCk@	GrLivAreagg6.@gK@ScreenPorchr   g(e@g.A)r   r%   r0   r1   r2   r3   r   r   r   r   r   r   rq   r   rs   shufflezerosrp   r-   rw   rt   codesrr   rG   r   r   r   )	rV   r   r  rs   r   r|   r!   rP   rQ   s	         @@@rY   get_ames_housingr  J  s   "  + **
)


%
%CI	BsEz*E12=B	       . Y	
 	
 		 	BzN #(;;W B~  i		
 	
 	 B{O !y	
 	
 	 B| !y		
 	
 	 B| !y		
 		
 	 B| "		
 	
 		 	B}  i	
 	
 		 	B{O  i		
 	
 	 B{O 9	
 	

 	 BxL.u .5 .5 .RY . . . . . . . .  i 24EsKKB{O y!24FLLB|"#79LcRRB~Y24FLLBzN y!35GMMB|"#46H#NNB~ y!24EsKKB| y!24FLLB|i 13DcJJB{O!	"46H#NNB}2:GKK	GB 		|$$$AZ  bek2#677 	A''
333AAAAA 	QUUWW	$$A	affhh	&&Aq5Lr[   dpathc           	      r   t          j        d          }d}t          j                            | d          }t          j                            |          st          j        ||           t          j	        |d          5 }|
                    |            ddd           n# 1 swxY w Y   |                    t          j                            | d          t          j                            | d	          t          j                            | d
          fdd          \	  }}}}}	}
}}}|||||	|
|||f	S )zFetch the mq2008 dataset.r   z>https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zipz
MQ2008.zip)urlfilenamer)pathNzMQ2008/Fold1/train.txtzMQ2008/Fold1/test.txtzMQ2008/Fold1/vali.txtTF)query_id
zero_based)r0   r1   osr%  joinexistsr   urlretrievezipfileZipFile
extractallload_svmlight_files)r   r   srcr   fx_trainy_train	qid_trainx_testy_testqid_testx_validy_valid	qid_valids                 rY   
get_mq2008r;    s}    "#566H
JCW\\%..F7>>&!! 6f5555		%	% !	%   ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! 	$$GLL 899GLL 788GLL 788	

  	% 	 	
 	
 
s   B$$B(+B(Fr&   )	vary_sizer   n_samples_per_batch	n_batchesuse_cupyr<  r   c                   g }g }g }|r1ddl }	|	j                            t          j        |                    }
nt          j                            |          }
t          |          D ]}|r| |dz  z   n| }|
                    ||          }|
                    |          }|
                    dd|          }|                    |           |                    |           |                    |           |||fS )zMake batches of dense data.r   N
   r.   r(   )	cupyr3   r4   r2   r?   r   randnuniformappend)r=  r"   r>  r?  r<  r   rT   r   wrB  rQ   r   r!   _X_y_ws                   rY   make_batchesrJ  :  s     	A
A
A 2k%%bi&=&=>>i##L119  4=V'!b&00CV	YYy*--YYy!![[QQY[77			a7Nr[   c                       e Zd ZU dZej        ed<   ej        e	j
                 ed<   ej        e	j
                 ed<   ej        e	j                 ed<   ej        e	j
                 ed<   ej        e	j                 ed<   dS )		ClickFoldzCA structure containing information about generated user-click data.rT   r   qidscoreclickposN)__name__
__module____qualname____doc__r   
csr_matrix__annotations__nptNDArrayr2   r7   rF   r8    r[   rY   rL  rL  [  s         MM
{28	RX	;rz"""";rx    	RX	r[   rL  c                   >    e Zd ZU dZeed<   eed<   eed<   defdZdS )	RelDataCVzPSimple data struct for holding a train-test split of a learning to rank dataset.r   testmax_relr#   c                     | j         dk    S )z6Whether the label consists of binary relevance degree.r.   )r]  selfs    rY   	is_binaryzRelDataCV.is_binaryn  s    |q  r[   N)	rQ  rR  rS  rT  RelDatarV  rv   rO   ra  rY  r[   rY   r[  r[  g  sR         ZZNNN
MMMLLL!4 ! ! ! ! ! !r[   r[  c                       e Zd ZdZdeddfdZdej        ej	                 dej        ej
                 dej        ej	                 fdZdS )	PBMa  Simulate click data with position bias model. There are other models available in
    `ULTRA <https://github.com/ULTR-Community/ULTRA.git>`_ like the cascading model.

    References
    ----------
    Unbiased LambdaMART: An Unbiased Pairwise Learning-to-Rank Algorithm

    etar#   Nc                     t          j        g d          | _        t          j        g d          }t          j        ||          | _        d S )N)r  g{Gz?Q?gp=
ף?rb   )
g(\?gQ?gQ?g(\?rg  皙?g)\(?r  g{Gz?gQ?)r2   rK   
click_probpower	exam_prob)r`  re  rk  s      rY   __init__zPBM.__init__}  sM    (#?#?#?@@HHHH
 
	 )S11r[   labelspositionc                    t          j        |d          }t          j        |j                  }d||dk     <   d||t	          | j                  k    <   | j        |         }t          j        |j                  }|j        |j        k    sJ t          j        |d          }d||| j        j        k    <   | j        |         }t           j        	                    d          }|                    |j        d         t           j
                  }t          j        |j        t           j                  }d||||z  k     <   |S )	zSample clicks for one query based on input relevance degree and position.

        Parameters
        ----------

        labels :
            relevance_degree

        T)copyr   r   r&   )r+   r-   r,   r.   )r2   rK   r  r   lenri  r+   rk  r3   r   rF   r7   )	r`  rm  rn  ri  rk  ranksrQ   probclickss	            rY   sample_clicks_for_queryzPBM.sample_clicks_for_query  s    &t,,,Xfl++
vz13vT_---._V,
HV\**	}++++---.0et~**+N5)	i##D))zzv|AbjzAA(*RX(N(N(N01ti*,,-r[   )rQ  rR  rS  rT  r   rl  rW  rX  r2   r7   r8   ru  rY  r[   rY   rd  rd  s  s         2E 2d 2 2 2 2!k"(+!7:{287L!	RX	! ! ! ! ! !r[   rd  r   c           
         t          j        |           } | j        }t           j        dt          j        t          j        | dd         | dd         d                     dz   f         }t          j        t           j        ||f                   }| |         }t          j        |t          j        | j        g                    }|||fS )zzRun length encoding using numpy, modified from:
    https://gist.github.com/nvictus/66627b580c13068589957d6ab0919e66

    r   r.   Nr   T)	equal_nan)	r2   asarrayr+   r_flatnonzeroisclosediffrE  rK   )r   r   startslengthsr   indptrs         rY   rlencoder    s    
 	
1A	AU1bnbj122#2#$&O&O&O%OPPSTTTUFgbeFAI&''GvYFYvrx1122F7F""r[   r  rT   r   rM  sample_ratec                    t           j                            d          }t          | j        d         |z            }t          j        d| j        d         t           j                  }|                    |           |d|         }| |         }||         }||         }	t          j        |	          }
||
         }||
         }|	|
         }	t          dd          }|
                    |||	           |                    |           }|S )	zWe use XGBoost to generate the initial score instead of SVMRank for
    simplicity. Sample rate is set to 0.1 by default so that we can test with small
    datasets.

    r&   r   r,   Nz	rank:ndcghist)	objectivetree_method)rM  )r2   r3   r   rv   r   aranger?   r  argsortr   fitpredict)rT   r   rM  r  rQ   r!   indexX_trainr3  r4  
sorted_idxltrscoress                rY   init_rank_scorer    s     )


%
%CAGAJ,--I1agaj	BBBEKK*9*EhGhGE
I I&&Jj!Gj!G*%I
kv
>
>
>CGGGW)G,,, [[^^FMr[   foldscores_foldc                    | \  }}}|j         t          j        k    sJ t          j        |          }t          j        |j        ft          j                  }t          j        |j        ft          j                  }t          d          }|D ]u}	|	|k    }
|
                    |
j	        d                   }
||
         }t          j
        |          ddd         }|||
<   ||
         }|                    ||          }|||
<   v|j	        d         |j	        d         k    sJ |j	        |j	        f            |j	        d         |j	        d         k    sJ |j	        |j	        f            t          ||||||          S )zSimulate clicks for one fold.r,   rb   )re  r   Nr   )r-   r2   r7   uniqueemptyr+   r8   rd  r6   r   r  ru  rL  )r  r  X_foldy_foldqid_foldqidsrn  rt  pbmqqid_maskquery_scoresquery_positionrelevance_degreesquery_clickss                  rY   simulate_one_foldr    sz   
  $FFH>RX%%%%9XDxbh777HXv{nBH555F
#,,,C  
( 
(=##HN1$566"8,L11$$B$7+"8,223DnUU'x<?hnQ////&,1O///<?fl1o---fl/K---VVX{FHMMMr[   cv_datac           	      v   t          t          | j        | j                            \  }}}t	          j        dgd |D             z             t	          j                  t                    dk    sJ t          j	        |          }t	          j
        |          }t	          j
        |          }t          |||          fdt          dj                  D             }g g g g g g f\  t          j        dz
            D ]}t          ||         ||         ||         f||                   }	                    |	j                                       |	j                                       |	j                                       |	j                                       |	j                                       |	j                   ʈfdt          j        dz
            D             }
t          d          D ](}|
|         ||         k                                    sJ )t                    dk    r;t/          d         d         d         d         d         d                   }d}n0fd	t          t                              D             \  }}||fS )
z6Simulate click data using position biased model (PBM).r   c                 (    g | ]}|j         d          S )r   r  ).0vs     rY   
<listcomp>z#simulate_clicks.<locals>.<listcomp>  s    333AQWQZ333r[   r]   c                 B    g | ]}|d z
           |                  S )r.   rY  )r  r   r  scores_fulls     rY   r  z#simulate_clicks.<locals>.<listcomp>  s/    TTTk&Q-&)34TTTr[   r.   c                      g | ]
}|         S rY  rY  )r  r   s_lsts     rY   r  z#simulate_clicks.<locals>.<listcomp>  s    ???1eAh???r[   r   Nc           
   3      K   | ]<}t          |         |         |         |         |         |                   V  =d S r   )rL  )r  r   X_lstc_lstp_lstq_lstr  y_lsts     rY   	<genexpr>z"simulate_clicks.<locals>.<genexpr>$  sa       
 
 eAha%(E!HeAhaQQ
 
 
 
 
 
r[   )r   zipr   r\  r2   rK   cumsumrq  r   vstackr   r  r   r+   r  rE  rT   r   rM  rN  rO  rP  allrL  )r  rT   r   rM  X_fully_fullqid_fullr  r   r  scores_check_1r   r\  r  r  r  r  r  r  r  r  s                @@@@@@@@rY   simulate_clicksr     s   S5566IAq# Xqc33333344FYvFv;;%]1F^AF~c""H "&&(;;KTTTTTeAv{>S>STTTF/12r2r2/E,E5%u6;?##   !A$!c!f!5vayAATVTVTXTZ   TZ   TX????fkAo(>(>???N1XX 6 6q!VAY.33555555
5zzQ%(E!HeAha%(ERSHUU
 
 
 
 
 
 
 
 
3u::&&
 
 
t $;r[   rt  rP  c           
         t          j        |          }| |         } ||         }||         }||         }t          |          \  }}}t          d|j                  D ]^}||dz
           }	||         }
|	|
k     sJ |	|
f            t          j        ||	|
                   j        dk    sJ |	|
f            ||	|
         }|                                dk    sJ |                                            |                                |j        dz
  k    s?J |                                |j        |t          j        ||	|
                   f            t          j        |          }| |	|
         |         | |	|
<   ||	|
         |         ||	|
<   ||	|
         |         ||	|
<   ||	|
         |         ||	|
<   `| |||f}|S )z,Sort data based on query index and position.r.   r   )r2   r  r  r   r+   r  minmax)rT   r   rM  rt  rP  r  r  _r   begend	query_posr}   s                rY   sort_ltr_samplesr  +  s    CJ	*AJF
j/C
j/CC==LFAq1fk"" 0 0QUmQiSyyy3*yyyySW&&+q0003*000CL	}}!###Y]]__###}}).1"4444MMOONIc#c'l##	7
444 Z	**
s3wZ
+#c'
 S/*5s3ws3wZ
+#c'
3s7|J/CGfaDKr[   DTypeDMatrixTdevicec                 h   t           j                                        } | |                    ddd                              t           j                                      dd                    }t          |d          r|j        dddf         }n|dddf         }|} ||||	          }t          j
        t          d
          5  t          d|d|           ddd           n# 1 swxY w Y   t          |d          s, | |                                                    dd                    }||k                                    sJ |j        j        j        du sJ |j        j        j        du sJ |                    |j        	            | |                                                    dd                    }||j        k                                    sJ |}|                    |           |                                }	|                    |                    d|j                             |                                }
|
|	k                                    sJ |                    t           j                  }|                    |           |                                }||	k                                    sJ |                    dddd          }t          j
        t          d
          5  |                    |           ddd           dS # 1 swxY w Y   dS dS )zRun tests for base margin.r   rb   d   r/   2   r   ilocN)base_marginz.*base_margin.*r   r  )r  r  FTr.   r   )r2   r3   r   r   rr   rF   r6   hasattrr  r0   r   r   train_fnget_base_marginr  Tflagsc_contiguousf_contiguousset_infoset_base_marginr+   rG   )r  r  r  rQ   rT   r   r  Xygotbm_colbm_rowbm_f64s               rY   run_base_margin_infor  \  s   
)


!
!CcjjCcj**11"*==EEb!LLMMAq& F111a4LaaadGK	!QK	0	0	0B	z);	<	<	< @ @6::B???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ 1f ,eB&&((00Q7788{"'')))))}"/58888}"/47777
...eB&&((00B7788{}$))+++++ 
;'''##%%
;..q+2BCCDDD##%%& %%''''' "((44
;'''##%%& %%''''' ii1a++]:-?@@@ 	, 	,{+++	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	,7, ,s$   
C**C.1C.L%%L),L)r   as_densec                     t          t          j        d          sWt          j                            d          }t	          j         dz
  |d          }|                    dd           }||fS t          t          j                              dt          d	t          j
        f fd
}g }t                    5 }	t                    D ]+}
|                    |	                    ||
                     ,	 ddd           n# 1 swxY w Y   g }g }|D ]C}|                                \  }}|                    |           |                    |           Dt!          |          k    sJ t	          j        |d          }t          j        |          }|                    |j        d         |j        d         f          j        }t          j        |d          }|j        d          k    sJ |j        d         k    sJ |j        d          k    sJ |rQ|                                }|j        d          k    sJ |j        d         k    sJ t          j        ||dk    <   ||fS ||fS )zMake sparse matrix.

    Parameters
    ----------

    as_dense:

      Return the matrix as np.ndarray with missing values filled by NaN

    r   r&   rb   csr)mr   r   r   format        r   t_idr#   c                    t           j                            d| z            }
z  }| 
dz
  k    r	| |z  z
  }n|}t          j        	|dz
  |                                          }t          j        	df          }t          |j        d                   D ]b}|j        |dz            |j        |         z
  }|dk    r<||d d |f         	                                |                    	df          z  dz  z  }c||fS )Nr&   r.   rb   )r  r   r   r   r   rh  )
r2   r3   r   r   tocscr  r   r   r  toarray)r  rQ   thread_sizen_features_tlocrT   r   r   r+   r"   r!   	n_threadsr   s           rY   
random_cscz*make_sparse_regression.<locals>.random_csc  s   i##D4K00 I-9q=  (4++==OO)OM(N	
 
 

 %'' 	
 Hi^$$qwqz"" 	J 	JA8AE?QXa[0DqyyQqqq!tW__&&YN)C)CCcII!tr[   )max_workersNr  r   r.   r   )r  r2   r3   r4   r   r   r  multiprocessing	cpu_countrv   
csc_matrixr   r   rE  submitresultrq  hstackrx  r6   r   r  r   r  rl   )r!   r"   r   r  rQ   rT   r   r  futuresexecutorr   	X_results	y_resultsr1  r  arrr  s   ```             @rY   make_sparse_regressionr    s    29m,, 
i##D))M(N
 
 
 JJ3c	J::!t O-//<<I !2         . G			2	2	2 ;hy!! 	; 	;ANN8??:q99::::	;; ; ; ; ; ; ; ; ; ; ; ; ; ; ; II  xxzz1y>>Y&&&&#]9UCCCC

9A			171:qwqz*++-A
qqA9Q<9$$$$9Q<:%%%%71:"""" kkmmy|y((((y|z))))C1HAv6Ms   <DDD	n_stringsseedc                 x   d}t                      }t          j                            |          }t	          |          | k     rjd                    |                    t          t          j	                  |d                    }|
                    |           t	          |          | k     jt          |          S )zGenerate n unique strings.r    Tr  )setr2   r3   r   rq  r)  r   r   stringascii_lettersadd)r  r  name_lenunique_stringsrQ   
random_strs         rY   unique_random_stringsr    s    H"uuN
)


%
%C
n

	
)
)WWJJtF011$JOO
 

 	:&&&	 n

	
)
) r[   r  rb   cpu)r   	cat_ratior  r   	cat_dtyper  n_categoriesonehotr  r  r  c          	         t          j        d          }
t          j                            |          }t          j                            |dz             }|
                                }t          |          D ]X}|                    d|d          d         }|dk    rt          j        |t          j	                  r;t          j
        t          ||                    }|                    || d          }n-t          j        d|          }|                    d||           }|
                    |d	          |t!          |          <   |t!          |                   j                            |          |t!          |          <   |                    d||           }|
                    ||j        	          |t!          |          <   Zt          j        | f
          }|j        D ]C}t-          ||         j        |
j                  r|||         j        j        z  }8|||         z  }D|dz  }|dk    rt          |          D ]}|                    d| dz
  t3          | |z                      }t          j        |j        ||f<   t9          |j        j        |                   r4|t          j        |j        j        |         j                  j         k    sJ |j!        d         |k    sJ |r|
"                    |          }|r1tG          |j                  }|$                    |           ||         }|	dk    r8|	dv sJ ddl%}ddl&}|'                    |          }|
                    |          }||fS )a/  Generate categorical features for test.

    Parameters
    ----------
    n_categories:
        Number of categories for categorical features.
    onehot:
        Should we apply one-hot encoding to the data?
    sparsity:
        The ratio of the amount of missing values over the number of all entries.
    cat_ratio:
        The ratio of features that are categorical.
    shuffle:
        Whether we should shuffle the columns.
    cat_dtype :
        The dtype for categorical features, might be string or numeric.

    Returns
    -------
    X, y
    r%   r.   r/   r   Tr  r(   rc   r,   r  r  r  )cudagpuN)(r0   r1   r2   r3   r4   r   r   rM   
issubdtypestr_rK   r  r   r  r5   rq   r   rt   set_categoriesr-   r  rs   rp   rw   r  rv   rl   r  r   rS   r  
categoriesr+   r   get_dummiesr   r  cudfrB  from_pandas)r!   r"   r  r  r   r  r  r   r  r  rP   rQ   row_rngrV   r   r   r  r|   numlabelcolr  rs   r  rB  s                            rY   make_categoricalr    s\   D 
	X	&	&B )


-
-Ci##L1$455G	B: 9 9a33A6Q;;}Y00 N  X&;L!&L&LMM
NN:ItNLLYq,77
OO9OMM1J77Bs1vvJCFF66zBBBs1vvJJ//al/KKC3ci88Bs1vvJJHI<(((Ez  bgmR%899 	RW[&&EERWEE	QJE#~~z"" 	T 	TAOOIMI4H0I0I $  E !#BGE1Hry~a011 T#ry1B1M'N'N'SSSSS8A;*$$$$  ^^B rz""   [((((b!!

5!!u9r[   c                        e Zd ZdZddddededee         dee         d	ed
ee         ddf fdZ	de
defdZddZdeeej        ej        f         eee         f         fdZ xZS )IteratorForTestzCIterator for testing streaming DMatrix. (external memory, quantile)FN)on_hostmin_cache_page_bytesrT   r   rF  cacher  r  r#   c                    t          |          t          |          k    sJ || _        || _        || _        d| _        t                                          |||           d S )Nr   )cache_prefixr  r  )rq  rT   r   rF  itsuperrl  )r`  rT   r   rF  r  r  r  	__class__s          rY   rl  zIteratorForTest.__init__T  sr     1vvQ!5 	 	
 	
 	
 	
 	
r[   
input_datac                 D   | j         t          | j                  k    rdS t          j        t
          d          5   || j        | j                  | j        | j                  d            d d d            n# 1 swxY w Y    || j        | j                                                  | j        | j                                                  | j        r$| j        | j                                                  nd            t          j
                     | xj         dz  c_         dS )NFzKeyword argumentr   )r}   r  weightr.   T)r  rq  rT   r0   r   	TypeErrorr   rp  rF  gccollect)r`  r   s     rY   nextzIteratorForTest.nexti  s8   7c$&kk!!5]9,>??? 	? 	?Jtvdgtw>>>	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	
%%''&/&&((-1V=46$'?'')))	
 	
 	
 	

 	
1ts   .A55A9<A9c                     d| _         d S )Nr   )r  r_  s    rY   resetzIteratorForTest.resetz  s    r[   c                 D   t          | j        d         t          j                  rt          j        | j        d          }nt          j        | j        d          }t          j        | j        d          }| j        rt          j        | j        d          }nd}|||fS )zReturn concatenated arrays.r   r  r  r   N)	rp   rT   r   rU  r  r2   r   r   rF  )r`  rT   r   rF  s       rY   	as_arrayszIteratorForTest.as_arrays}  s     dfQi!233 	/dfU333AAtvA...AN46***6 	tvA...AAA!Qwr[   )r#   N)rQ  rR  rS  rT  r   r   r   rO   rv   rl  r   r&  r(  r   r   r2   r   r   rU  r   r*  __classcell__)r  s   @rY   r  r  Q  s       MM .2
 
 

 
 H	
 }
 
 'sm
 

 
 
 
 
 
*x D    "   	uRZ!223Y@SS	T       r[   r  )F)r  )\rT  r$  r  r(  r  r,  concurrent.futuresr   dataclassesr   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   urllibr   numpyr2   r0   rW  numpy.randomRNGscipyr   corer   r   r   r}   r   r   sklearnr   r   trainingr   r  r%   r   
DataFrameTr1   r   Memorymemoryrv   r   r   rZ   r~   r   r   r  r   r   r   r   r  r   rU  r;  rO   rJ  rX  r7   rb  rL  r[  rd  r  r   rF   r  r  r  r8   r  r  r  r  	DTypeLiker  r  rY  r[   rY   <module>r:     s	   $ $ 				     				   1 1 1 1 1 1 ! ! ! ! ! !                                               ) ) ) ) ) )       5 5 5 5 5 5 5 5 5 5 9 9 9 9 9 9 9 9 * * * * * * * * ( ( ( ( ( ( .......J		X	&	&	|Q	/	/88 #8uU2:rz12E$*4EEFdRS8 8 8 8v>9 > > > >B7 7 7 7 7t
3 
4 
 
 
 
 4bj"*&< = 4 4 4 4n "E"*bj01 " " " " 8E"*bj01 8 8 8 8 E"*bj01      s%
BJ 67 s s s sl 55

JJ
JJ
JJ
5 5 5 5x 	     	   4
T"*-tBJ/??@   < !3;rx#8#+bh:OO
P        	! 	! 	! 	! 	!
 	! 	! 	!4 4 4 4 4 4 4 4n#BH% #%S[#+0U*V # # # #& 	! !!
{28! 
RX	! 	!
 	[! ! ! !HN
!3;rx#8#+bh:OO
PNRZ(N N N N NF(Y (5HY<O1O+P ( ( ( (V..
{28. 
RX	. K!	.
 
RX	. 
KKK. . . .b+, +,DM +,3 +,SW +, +, +, +,^ RR #R/4R@DR
5"#RZ/0R R R Rj S    S	        , %'X] ] ]]] ]
 ] ] ] ] ] y"] ] 9bj !] ] ] ]@9 9 9 9 9h 9 9 9 9 9r[   