
    rii5                        d Z ddlZddlmZ ddlmZmZmZmZm	Z	m
Z
mZmZmZmZ ddlZddlZddlZddlZddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lm Z m!Z! ddl"m#Z#m$Z$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z-  ej.        d          Z/eee0ef                  Z1g dZ2 G d de$          Z3edej4        dej5        de
ej4        e0f         fd            Z6edej4        ddde
ej4        df         fd            Z6dej4        de	ej5                 de
ej4        e	e0         f         fdZ6de0dz  dej4        dej5        dej5        de	ej5                 de	ej5                 de
ej4        ej5        ej5        e	ej5                 e	ej5                 f         fdZ7dee         dee0ee         f         fd Z8d!e1dee0ee         f         fd"Z9d#e1d$e	e#         d%e	e         d&e	e         de
ee0ee         f         e	eeef                  f         f
d'Z:d#e	e1         de;fd(Z<d)e;dej=        fd*Z>dAd+Z?dd,d-e	e         d%e	e         d.e	e         d/e@d0eAd#e	e1         d1eAd2e;d3e	eA         d4e	e%         d$e	e#         d5e	e         de&fd6ZBd-e	e         d%e	e         d.e	e         d/e@d0eAd2e;d#e	e1         d$e	e#         d5e	e         de%fd7ZCd8e;dede%fd9ZDd:eEd;eAd<eEd=eeA         d>ee0         d?eAd$e	e#         de
e%ee
e%e0f                  f         fd@ZFdS )Bz)Copyright 2019-2025, XGBoost contributors    N)Sequence)
AnyCallableDictListOptionalTupleTypeVarUnioncastoverload)	dataframe   )
collective)
Categories)FeatureNamesFeatureTypes)concatimport_cupy)BoosterDataIterDMatrixQuantileDMatrix)
is_on_cuda)get_model_categoriespick_ref_categories)	_RefErrorz[xgboost.dask])labelweightbase_marginqidlabel_lower_boundlabel_upper_boundc                        e Zd ZdZ	 	 	 ddee         dee         deee	e
f                  dee         deee                  ddf fd	Zd
edee         fdZdefdZddZdedefdZ xZS )DaskPartitionIterz.A data iterator for the `DaskQuantileDMatrix`.Ndatafeature_namesfeature_typesfeature_weightskwargsreturnc           	         t           t          d           f}|| _        t          D ]G}t	          | ||                    |d                      t          t          | |          |          sJ H|| _        || _	        || _
        t          | j        t                     sJ d| _        t                                          d           d S )Nr   T)release_data)r   type_datametasetattrget
isinstancegetattr_feature_names_feature_types_feature_weights_itersuper__init__)	selfr&   r'   r(   r)   r*   typesk	__class__s	           W/var/www/html/bet.cuttalo.com/ml/venv/lib/python3.11/site-packages/xgboost/dask/data.pyr:   zDaskPartitionIter.__init__4   s     4::&
 	7 	7AD!VZZ400111gdA..666666 ,+ /$*h/////
d+++++    attrc                 \    t          | |          t          | |          | j                 S d S N)r4   r8   )r;   rA   s     r?   _getzDaskPartitionIter._getM   s.    4*4&&tz22tr@   c                 &    | j         | j                 S )z5Utility function for obtaining current batch of data.)r/   r8   r;   s    r?   r&   zDaskPartitionIter.dataR   s    z$*%%r@   c                     d| _         dS )zReset the iteratorr   N)r8   rF   s    r?   resetzDaskPartitionIter.resetV   s    


r@   
input_datac           	            j         t           j                  k    rdS  fdt          D             } |d                                 d j         j         j        d|  xj         dz  c_         dS )zYield next batch of dataFc                 <    i | ]}|                     |          S  )rD   ).0r=   r;   s     r?   
<dictcomp>z*DaskPartitionIter.next.<locals>.<dictcomp>`   s%    000a!TYYq\\000r@   N)r&   groupr'   r(   r)      TrL   )r8   lenr/   r0   r&   r5   r6   r7   )r;   rI   r*   s   `  r?   nextzDaskPartitionIter.nextZ   s    :TZ((500004000
 	
-- 1	
 	
 	
 	
 	
 	

a

tr@   )NNNr+   N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r:   strrD   r&   rH   r   boolrR   __classcell__)r>   s   @r?   r%   r%   1   s)       88
 15CG)-, ,3i,  -,  lJ&> ?@	,
 "#, 49%, 
, , , , , ,2 #    
&c & & & &   x D        r@   r%   dfcolr+   c                     d S rC   rL   r[   r\   s     r?   _add_columnr_   m   s    ORsr@   c                     d S rC   rL   r^   s     r?   r_   r_   q   s    KN3r@   c                     || |fS d}|j          d| }|| j        v r|dz  }|j          d| }|| j        v  | j        di ||i} | |fS )Nr   _rP   rL   )namecolumnsassign)r[   r\   trailsuids       r?   r_   r_   u   s     {3wFX
 
 
 
 C


!$$F$$ 

 
	 	 c3Z	 	 Bs7Nr@   devicer!   ysample_weightr    c                 "   t          |          \  }t          |          \  }t          |          \  }t          |          \  }	| | dk    rdnd}
t          j                            d|
i          5                                  |                             d          j                                        j        j        |<   	                    |          
                    |          |                                         }|j                                        j                                        }t!          |          }t#          ||d         d	z   gz             }                    |d
|                                          ddd           n# 1 swxY w Y   |         }|         }fd||	fD             \  }}d ||||	fD             }                    |d	                                          ||||fS )zA function to prevent query group from being scattered to different
    workers. Please see the tutorial in the document for the implication for not having
    partition boundary based on query groups.

    Ncpup2ptaskszdataframe.shuffle.methodcategory)byrP   F)drop	divisionsc              3   `   K   | ](}| t          t          j        |                   nd V  )d S rC   )r   ddSeries)rM   rg   r[   s     r?   	<genexpr>z!no_group_split.<locals>.<genexpr>   sM       " "BECORY3   " " " " " "r@   c                     g | ]}||S rC   rL   )rM   rg   s     r?   
<listcomp>z"no_group_split.<locals>.<listcomp>   s    NNNCcoCooor@   )axis)r_   daskconfigsetpersistastypecatas_knowncodessort_valuesgroupbycountindexcomputevaluestolistsortedtuple	set_indexrr   )rh   r[   r!   ri   rj   r    qid_uidy_uidw_uidbm_uidshufflecntdivuidss    `            r?   no_group_splitr      sX     b#&&KBB""IBB..IBR--JB ~5eegG	4g>	?	?  ZZ\\k((448AACCGM7^^w^''jj!!'*0022i!!(//11SkkC3r7Q;-'((\\  
 
 '))	 	              " W+C
5	A" " " "JOQW" " "M; ONGUE6:NNND	A			&	&	(	(BsA}k11s   ;DF##F'*F'r*   c                     
                       d          }|J t          |          }t          |d                   rddlm
 nddlm
 dt          dt          t          t          f         f fddt          dt          j        f
fdfd	t          |          D             }t          |          }|j        j        r S t                              d
t#          j                    |           t          |          }t          |          r)t'                      }|                    |j                  }nt+          j        |j                  }|j        |ddf         }t/          |d          r|j        |ddf         }n||ddf         }                     d|gi           t3          |j                  D ])\  }}	|	 v sJ                      |	||	         gi           * S )z>Sort worker-local data by query ID for learning to rank tasks.r&   Nr   )	DataFrameir+   c                      dt           t          t                            dt           t                   f fdfdt          D             }d |                                D             }|S )zDReturn a dictionary containing all the meta info and all partitions.rA   r+   c                     | |          S d S rC   rL   )rA   r   s    r?   rD   z0sort_data_by_qid.<locals>.get_dict.<locals>._get   s    Aw4r@   c           	      P    i | ]"}|                      |d                     #S rC   )r2   )rM   rc   rD   r*   s     r?   rN   z6sort_data_by_qid.<locals>.get_dict.<locals>.<dictcomp>   s3    HHH4D$$vzz$5566HHHr@   c                     i | ]
\  }}|||S rC   rL   )rM   r=   vs      r?   rN   z6sort_data_by_qid.<locals>.get_dict.<locals>.<dictcomp>   s    CCCAQ]1]]]r@   )r   r   r   listr0   items)r   data_optr&   rD   r*   s   `  @r?   get_dictz"sort_data_by_qid.<locals>.get_dict   s    	xS	* 	x~ 	 	 	 	 	 	
 IHHHH4HHHCC!1!1CCCr@   c                 0     |           } |          S rC   rL   )r   r&   r   r   s     r?   map_fnz sort_data_by_qid.<locals>.map_fn   s    x{{yr@   c                 &    g | ]} |          S rL   rL   )rM   r   r   s     r?   ry   z$sort_data_by_qid.<locals>.<listcomp>   s!    444&&))444r@   a  [r%d]: Sorting data with %d partitions for ranking. This is a costly operation and will increase the memory usage significantly. To avoid this warning, sort the data based on qid before passing it into XGBoost. Alternatively, you can use set the `allow_group_split` to False.iloc)r2   rQ   r   cudfr   pandasintr   rX   r   pdranger   r!   is_monotonic_increasingLOGGERwarningcollget_rankr   argsortnpr   hasattrupdate	enumeraterd   )r*   
data_partsn_parts
meta_partsdfqdfxcp
sorted_idxr   cr   r   r   s   `         @@@r?   sort_data_by_qidr      s:   F##J!!!*ooG*Q-   %"""""""$$$$$$C DdO      # ",        5444U7^^444J


C
w& 
NN	T 	  " 

C# )]]ZZ((

Z((

(:qqq=
!CsF !hz111}%*aaa- 
MM6C5/"""#+&& % %1F{{{{q3q6(m$$$$Mr@   list_of_partsc                      t           t                    sJ i dt          dt          ddf fd}t	                     D ]'\  }} ||d           t
          D ]} |||           (                    dd          }|t          di S )	z8Convert list of dictionaries into a dictionary of lists.r   rc   r+   Nc                     ||          v r|          |         }nd }|&|vrg |<   |                              |           d S d S rC   )append)r   rc   partr   results      r?   r   z!_get_worker_parts.<locals>.append  si    =### #D)DDD6!!!t4L%%%%% r@   r&   r!   rL   )r3   r   r   rX   r   r0   r2   r   )r   r   r   rb   r=   r!   r   s   `     @r?   _get_worker_partsr      s    mT*****#%F&# &S &T & & & & & & & -((  1q& 	 	AF1aLLLL	 **UD
!
!C
!++F++Mr@   partsmodelr(   xy_catsc                     t          |           }|d         d         }t          |||          \  }}t          |||          }||fS )Nr&   r   )r   r   r   )r   r   r(   r   unzipped_dictXrb   
model_catss           r?   _extract_datar     sQ     &e,,Mfa A(E=AAMAz$Q
G<<J*$$r@   c                    | )t          | d                             d                    }nd}t          t          j        t          j        |gt
          j                  t          j        j	                  d                   }|S )Nr   r&   F)dtype)
r   r2   rY   r   	allreducer   arrayint32OpMAX)r   is_cudas     r?   _get_is_cudar   '  sg    U1X\\&11224>"(G9BH"E"E"Etw{SSTUVWWGNr@   r   c                 z    | r$t                      }|                    d          }nt          j        d          }|S )N)r   r   )r   emptyr   )r   r   r   s      r?   _make_emptyr   1  s:     !]]    Lr@   c                  l    t          j                    } t                              d| j                   d S )NzWorker %s has an empty DMatrix.)distributed
get_workerr   r   address)workers    r?   _warn_emptyr   :  s-    #%%F
NN4fnEEEEEr@   )refr'   r)   missingnthreadmax_binenable_categoricalmax_quantile_batchesr   Xy_catsc           
          t          |          }|1t                       t          t          |          | |||	||          S t	          ||
||          \  }}t          t          di ||| |d||||	||          S )N)r'   r(   r   r   r   r   )r(   r'   r)   )r   r   r   r   r   r   rL   )r   r   r   r   r   r%   )r'   r(   r)   r   r   r   r   r   r   r   r   r   r   r   r   s                  r?   _create_quantile_dmatrixr   ?  s     5!!G}  ''1!5
 
 
 	
 !.eUM7 S SM: 	
 	
	
$'+		
 	
 	
 	
 -1   r@   c        	            t          |          }	|.t                       t          t          |	          | ||          S t	          d          }
dt
          t          |
                  dt          |
         fd}t          ||||          \  }}i }|                                D ]\  }} ||          }|||<   t          di ||| ||||dS )	zdGet data that local to worker from DaskDMatrix.

    Returns
    -------
    A DMatrix object.

    N)r'   r(   r   Tr&   r+   c                 V    t          d | D                       rd S t          |           S )Nc              3      K   | ]}|d u V  	d S rC   rL   )rM   r   s     r?   rw   z:_create_dmatrix.<locals>.concat_or_none.<locals>.<genexpr>  s&      --tt|------r@   )anyr   )r&   s    r?   concat_or_nonez'_create_dmatrix.<locals>.concat_or_none  s2    ------- 	4d||r@   )r   r'   r(   r   r   r)   rL   )	r   r   r   r   r
   r   r   r   r   )r'   r(   r)   r   r   r   r   r   r   r   r   r   r   r   concated_dictkeyvaluer   s                     r?   _create_dmatrixr   m  s    & 5!!G}  ''1	
 
 
 	
 	AXhqk2 x{    
 !.eUM7 S SM:$&M#))++  
UN5!!c  
# -'    r@   is_quantilec                 6    | rt          di |S t          di |S )NrL   )r   r   )r   r*   s     r?   _dmatrix_from_list_of_partsr     s2     2'11&111$$V$$$r@   	train_reftrain_idrefsevals_id
evals_name	n_threadsc          
         t          di | ||d d}g }|                                }	t          |          D ]\  }
}||
         |k    r|                    |||
         f           /|                    dd           7|d         |k    rt          t                    |d= t          di ||||	|d}nt          di |||	|d}|                    |||
         f           ||fS )N)r   r   r   r   )r   r   r   r   )r   r   r   rL   )r   get_categoriesr   r   r2   
ValueErrorr   )r   r   r   r   r   r   r   Xyevalsr   r   r   eval_xys                r?   _get_dmatricesr    s`    
% 
 


&eT
 
 
 
B
 (*E!!GD// / /3A;(""LL"jm,---775$+5zX%% +++E
1  (b'   GG 2  ('   G 	gz!}-....u9r@   rS   )GrW   loggingcollections.abcr   typingr   r   r   r   r   r	   r
   r   r   r   r{   r   numpyr   r   r   r   ru    r   r   _data_utilsr   _typingr   r   compatr   r   corer   r   r   r   r&   r   sklearnr   r   trainingr   	getLoggerr   rX   
_DataPartsr0   r%   r   rv   r_   r   r   r   r   rY   r   ndarrayr   r   floatr   r   r   r   dictr  rL   r@   r?   <module>r     s   / /  $ $ $ $ $ $                                                 ! ! ! ! ! ! $ $ $ $ $ $ 0 0 0 0 0 0 0 0 ( ( ( ( ( ( ( ( > > > > > > > > > > > >       ? ? ? ? ? ? ? ?            		+	,	,$sCx.!
  9 9 9 9 9 9 9 9x 
 RBL Rry RU2<;L5M R R R 
 R 
 NBL Nt NblD6H0I N N N 
 N
#BI.
2<#&'    02$J02
02 
02 
y	02
 BI&02 ")$02 L")RY(;Xbi=PP02 02 02 02fEtCy ET#tCy.-A E E E EPZ Dd3i4H    4
%
%G
% L)
% j!	
%
 4T#Y%j0H*I!JJK
% 
% 
% 
%,      "*    F F F F  "+ + +L)+ L)+ c]	+
 + + J+ + + #3-+ 
'	+ G+ j!+ + + + +\3L)3 L)3 c]	3
 3 3 3 J3 G3 j!3 3 3 3 3l%T %S %W % % % %$$$ $ sm	$
 $ $ G$ 7Dw|,--.$ $ $ $ $ $r@   