
    sii0                        d Z ddlmZmZ ddlmZmZmZmZm	Z	m
Z
mZmZmZ ddlZddlZddlmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ dej         dej!        fdZ" edd          Z# e#dddddd          Z$de
eej!                          de
ej!                 fdZ%deej&                 deej&        e'e(gdf         ddfdZ) G d de          Z*dej&        defd Z+d!ee'e	ej!                 f         d"e
e,         d#ee'ef         d$e
e         d%ee'ef         defd&Z-deej&                 d'e
ee'                  d"e
e,         d(e(d)ee'ef         d*e(d+e(deee
e         f         fd,Z.	 	 d2d.ed!ed/e
e         d0e(dej!        f
d1Z/dS )3z*Utilities for processing spark partitions.    )defaultdict
namedtuple)	AnyCallableDictIteratorListOptionalSequenceTupleUnionN)
csr_matrix   )	ArrayLikeconcat)DataIterDMatrixQuantileDMatrix)XGBModel   )
get_loggerseriesreturnc                 Z    |                      d          }t          j        |          }|S )zStack a series of arrays.F)copy)to_numpynpstack)r   arrays     X/var/www/html/bet.cuttalo.com/ml/venv/lib/python3.11/site-packages/xgboost/spark/data.pystack_seriesr"      s'    OOO''EHUOOEL    Alias)datalabelweightmarginvalidqidvaluesr&   r'   
baseMarginvalidationIndicatorr*   seqc                 (    | rt          |           S dS )z&Concatenate the data if it's not None.Nr   )r.   s    r!   concat_or_noner0      s    
 c{{4r#   iteratorappendc                 p   dt           j        dt          ddffd}d}| D ]}|t          j        |j        v }|du rt          j        |j        v sJ |rD|j        |t          j                  ddf         }|j        |t          j                 ddf         }n|d}} ||d           | ||d           dS )znExtract partitions from pyspark iterator. `append` is a user defined function for
    accepting new partition.partis_validr   Nc                      | t           j        |            | t           j        |            | t           j        |            | t           j        |            | t           j        |           d S )N)aliasr%   r&   r'   r(   r*   )r4   r5   r2   s     r!   	make_blobz#cache_partitions.<locals>.make_blob+   sw    tUZ***tU[(+++tU\8,,,tU\8,,,tUY)))))r#   TF)pd	DataFrameboolr7   r)   columnsloc)r1   r2   r8   has_validationr4   trainr)   s    `     r!   cache_partitionsr@   %   s   * * * * * * * * * &*N # #!"[DL8NT!!;$,.... 	&Hd5;//23EHT%+.12EE5E	%IeT"""# #r#   c                        e Zd ZdZdeeef         dee         de	ddf fdZ
deeej                          deej                 fdZd	edefd
ZddZ xZS )PartIterz7Iterator for creating Quantile DMatrix from partitions.r%   	device_idkwargsr   Nc                     d| _         || _        || _        || _        t	                                          d           d S )Nr   T)release_data)_iter
_device_id_data_kwargssuper__init__)selfr%   rC   rD   	__class__s       r!   rL   zPartIter.__init__H   sB     
#
d+++++r#   c                     |sd S | j         Ldd l}dd l}|j        j                            | j                    |                    || j                           S || j                 S Nr   )rH   cudfcupycudaruntime	setDevicer:   rG   )rM   r%   rQ   cps       r!   _fetchzPartIter._fetchR   sn     	4?&KKK GO%%do666>>$tz"2333DJr#   
input_datac                    | j         t          | j        t          j                           k    rdS  |d|                     | j        t          j                           |                     | j                            t          j        d                     |                     | j                            t          j        d                     |                     | j                            t          j	        d                     |                     | j                            t          j
        d                     d| j         | xj         dz  c_         dS )NFr%   r&   r'   base_marginr*   r   T )rG   lenrI   r7   r%   rW   getr&   r'   r(   r*   rJ   )rM   rX   s     r!   nextzPartIter.nexta   s   :TZ
344445
 	
TZ
344++djnnU[$??@@;;tz~~elDAABBDJNN5<$F$FGGDJNN59d;;<<	
 	
 l	
 	
 	
 	

a

tr#   c                     d| _         d S rP   )rG   )rM   s    r!   resetzPartIter.reseto   s    


r#   )r   N)__name__
__module____qualname____doc__r   strr	   r
   intr   rL   r   r9   r:   rW   r   r;   r_   ra   __classcell__)rN   s   @r!   rB   rB   E   s        AA,dO,08,IL,	, , , , , , 8HR\$:;  @V        x D           r#   rB   r4   c                    g dgg }}}d}t          | j        | j        | j        | j                  D ]\  }}}}|dk    rt          |          }	|}
|}n1t          |          }	t          j        |	t          j	                  }
|}|dk    r|	}||	k    sJ |
                    |
           |
                    |d         t          |
          z              |
                    |           t          j        |          }t          j        |          }t          j        |          }t          |||ft          |           |f          S )Nr   )dtype)shape)zipfeatureVectorTypefeatureVectorSizefeatureVectorIndicesfeatureVectorValuesrg   r]   r   arangeint32r2   r    concatenater   )r4   csr_indices_listcsr_indptr_listcsr_values_list
n_featuresvec_type	vec_size_vec_indices
vec_valuesvec_sizecsr_indices
csr_valuescsr_indptr_arrcsr_indices_arrcsr_values_arrs                  r!   )_read_csr_matrix_from_unwrapped_spark_vecr   s   sg   9;aS"oJ8;! 	9 9 + +4)[* q==9~~H%K#JJ :H)HBH===K#J??!JX%%%%,,,r2S5E5EEFFFz****Xo..Nn%566O^O44N	.9#d))ZAX   r#   r%   dev_ordinalmetarefparamsc                     | s#t          t          j        d          |          S t          | |fi |}t          |fi |d|i}|S )z+Handle empty partition for QuantileDMatrix.r   r   )r   r   )r   r   emptyrB   )r%   r   r   r   r   itms          r!   make_qdmr      se      :rx//S9999	$	,	,t	,	,B..f..#...AHr#   feature_colsuse_qdmrD   enable_sparse_data_optimhas_validation_colc           	      b   t          t                    t          t                    ddt          j        dt          dt
          ddffd}dt          j        dt          dt
          ddffd}d	t          t          t          t          j	                 f         d
t          t          t          f         dt          fd}	|r|}
dv rd         dk    sJ n|}
dt          t          t          t          f         t          t          t          t          t          t
          f         f         f         ffd} |            \  }}&|r$t!          | |
           t#          ||d|          }ne|st!          | |
            |	          }nD&|r$t!          | |
           t#          ||d|          }nt!          | |
            |	          }|r'|rt#          ||||          }n|r |	          nd}nd}|,|                                |                                k    sJ ||fS )a~  Create DMatrix from spark data partitions.

    Parameters
    ----------
    iterator :
        Pyspark partition iterator.
    feature_cols:
        A sequence of feature names, used only when rapids plugin is enabled.
    dev_ordinal:
        Device ordinal, used when GPU is enabled.
    use_qdm :
        Whether QuantileDMatrix should be used instead of DMatrix.
    kwargs :
        Metainfo for DMatrix.
    enable_sparse_data_optim :
        Whether sparse data should be unwrapped
    has_validation:
        Whether there's validation data.

    Returns
    -------
    Training DMatrix and an optional validation DMatrix.
    r   r4   namer5   r   Nc                    |t           j        k    s	|| j        v r|t           j        k    r" |          j        d         dk    r	|          }nA| |         j        d         dk    r(| |         }|t           j        k    rt	          |          }nd }|t           j        k    r(|&dk    r|j        d         |j        d         k    sJ |d S |r|                             |           d S |                             |           d S d S Nr   r   )r7   r%   r<   rl   r"   r2   )r4   r   r5   r    r   rx   
train_data
valid_datas       r!   append_mz0create_dmatrix_from_partitions.<locals>.append_m   s(   5:!5!5
"" ,&,Q/!33.2<.@d!!$q((T
5:%%(//Euz!!e&7??!&QJ!U[^3333} /4 ''.....4 ''.....5 "6!5r#   c                 N   |t           j        k    s	|| j        v r|t           j        k    r6t          |           }dk    r|j        d         |j        d         k    sJ n| |         }|r|                             |           d S |                             |           d S d S r   )r7   r%   r<   r   rl   r2   )r4   r   r5   r    rx   r   r   s       r!   append_m_sparsez7create_dmatrix_from_partitions.<locals>.append_m_sparse   s     5:!5!5uz!!A$GG??!&QJ!U[^33333T
 /4 ''.....4 ''..... "6!5r#   r+   rD   c           	      v   t          |           dk    rBt          d                              d           t          ddt	          j        d          i|S t          | t          j                           }t          | 	                    t          j
        d                     }t          | 	                    t          j        d                     }t          | 	                    t          j        d                     }t          | 	                    t          j        d                     }t          d|||||d|S )Nr   XGBoostPySparkz_Detected an empty partition in the training data. Consider to enable repartition_random_shuffler%   r   rZ   r\   )r]   r   warningr   r   r   r0   r7   r%   r^   r&   r'   r(   r*   )r+   rD   r%   r&   r'   r(   r*   s          r!   makez,create_dmatrix_from_partitions.<locals>.make  s   v;;!'((00.  
 ;; 0 0;F;;;fUZ011vzz%+t<<==

5< > >??

5< > >??VZZ	48899 
U6v3
 
RX
 
 	
r#   missingg        c                  j    d} i }i }                                 D ]\  }}|| v r|||<   |||<   ||fS )N)max_binr   silentnthreadenable_categorical)items)non_data_keysnon_data_paramsr   kvrD   s        r!   split_paramsz4create_dmatrix_from_partitions.<locals>.split_params  s_    

 LLNN 	 	DAqM!!%&""Q_$$r#   )r   listr9   r:   rf   r;   r   r	   r   ndarrayr   r   r   r   rg   floatr@   r   num_col)r1   r   r   r   rD   r   r   r   r   r   	append_fnr   r   r   dtraindvalidrx   r   r   s    `  `           @@@r!   create_dmatrix_from_partitionsr      s   D /:$.?.?J.9$.?.?JJ/r| /3 /$ /4 / / / / / / / / /</bl /# / /$ / / / / / / / /"
T#tBJ//0 
$sCx. 
W 
 
 
 
$   #	F""vi'8C'?'?'?'?'?	%%S#XS%UD@P:Q5Q0R RS % % % % % %*  <>>LD&G9---":{D$OO		!'	!9---j&))		'	9---*k4vFF9---j&))   	N(0Kvv) )FF 2DMTT*f---FF~~6>>#3#333336>r#   Fmodelr[   strict_shapec           	          |                      d          }t          ||| j        | j        | j        | j        | j                  }|                                                     |dd||          S )z4Predict contributions with data with the full model.N)r[   r   r   feature_typesfeature_weightsr   TF)pred_contribsvalidate_featuresiteration_ranger   )	_get_iteration_ranger   r   n_jobsr   r   r   get_boosterpredict)r   r%   r[   r   r   data_dmatrixs         r!   r   r   U  s     0066O)- 3  L &&'! '   r#   )NF)0re   collectionsr   r   typingr   r   r   r   r	   r
   r   r   r   numpyr   pandasr9   scipy.sparser   _typingr   compatr   corer   r   r   sklearnr   utilsr   Seriesr   r"   r$   r7   r0   r:   rf   r;   r@   rB   r   rg   r   r   r   r\   r#   r!   <module>r      s   0 0 / / / / / / / / X X X X X X X X X X X X X X X X X X X X X X         # # # # # #             5 5 5 5 5 5 5 5 5 5             rz     	
7QRRh<9NPUVV"*!56 8BJ;O    #r|$#.6c47PRV7V.W#	# # # #@+ + + + +x + + +\(BL (Z ( ( ( (V
sD$$
%# sCx. 
'		
 cN    er|$e 8C=)e #	e
 e cNe #e e 7HW%%&e e e eV (,	 
 )$ 	
 Z     r#   