
    Iri1                       d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
 d dlZd dlZd dlZd dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ dd	lmZ d d
lmZ ddlmZ  ddlm!Z"  e
d          Z#e G d d                      Z$ G d d          Z% G d d          Z& ed           G d d                      Z'd Z(d Z)d Z*d Z+d Z,d Z- ej.        e,ej/        g          Z0 ej.        e,ej1        g          Z2 ej.        e-ej3        g          Z4 G d  d!          Z5 G d" d#          Z6 e7            Z8 G d$ d%          Z9d?d'Z:d?d(Z;d?d)Z< G d* d+          Z= G d, d-e=          Z> G d. d/e=          Z?d?d0Z@d?d1ZAd2 ZBd3 ZCd4 ZD e6            ZE eeE          ZFd5 ZGd6 ZH G d7 d8          ZI G d9 d:ejJ                  ZK G d; d<          ZL G d= d>ee#                   ZMdS )@    )annotationsN)TupleListDictCallableTypeVar)	dataclass)TritonSemantic)KernelInterface)TensorDescriptor   )InterpreterError)partial   )interpreter)irTc                  r    e Zd ZU dZded<   ded<    ej        e          Zded<   d	 Z	d
 Z
d Zd Zd ZdS )TensorHandlez
        data: numpy array
        dtype: triton type, either pointer_type or scalar_type.
        we don't store block_type here because the shape information is already available in the data field
        attr: a dictionary of attributes
    znp.arraydataztl.dtypedtype)default_factoryr   attrc                    t          | j        | j                  s4t          d| j        j        dz   d| j        j         d| j                   d S )Nznumpy data itemsize (   z) bits) exceeds dtype primitive_bitwidth (z bits) for triton type )_validate_np_data_sizer   r   
ValueErroritemsizeprimitive_bitwidthselfs    r/var/www/html/bestrading.cuttalo.com/models/btc_v9/venv/lib/python3.11/site-packages/triton/runtime/interpreter.py__post_init__zTensorHandle.__post_init__&   s~    %di<< 	e dTY5G!5K d d!%!>d dW[Wad d e e e	e 	e    c                N    t          | j                                                  S N)boolr   allr    s    r"   __bool__zTensorHandle.__bool__+   s    DIMMOO$$$r$   c                b    | j         }t          |d          r|j        }t          |d          |S )N
element_ty)r   hasattrr+   )r!   r   s     r"   get_element_tyzTensorHandle.get_element_ty.   s<    
e\** 	%$E e\** 	%r$   c                Z    t          | j                                        | j                  S r&   )r   r   copyr   r    s    r"   clonezTensorHandle.clone4   s     DINN,,dj999r$   c                    || j         |<   d S r&   )r   )r!   keyvalues      r"   set_attrzTensorHandle.set_attr7   s    	#r$   N)__name__
__module____qualname____doc____annotations__dataclassesfielddictr   r#   r)   r-   r0   r4    r$   r"   r   r      s           NNNOOO""4888D8888e e e
% % %  : : :    r$   r   c                      e Zd Zd Zd ZdS )BlockPointerHandlec                Z    || _         || _        || _        || _        || _        || _        d S r&   )baseshapestridesoffsetsblock_shapeorder)r!   rA   rB   rC   rD   rE   rF   s          r"   __init__zBlockPointerHandle.__init__=   s1    	
&


r$   c                   | j                                         }|j        dz  }t          j        | j         j        | j                  }t          j        | j        t                    }t          t          | j                            D ]}dgt          | j                  z  }| j        |         ||<   | j        |         j        t          j        | j        |                   z                       |          }|||z  | j        |         j        z                      t          j                  z   }||v r ||| j        |         j        k     z  |dk    z  }t%          || j         j        j                  }||fS )Nr   r   r   r   )rA   r-   r   npbroadcast_tor   rE   onesr'   rangelenrD   arangereshaperC   astypeuint64rB   r   r   scalar)	r!   boundary_checkdtype_ttn_bytesptrsmasksdim
bcast_dimsoffs	            r"   materialize_pointersz'BlockPointerHandle.materialize_pointersE   sL   9++---2ty~t/?@@(555T-..// 	J 	JCs4#3444J".s3JsO<$)BId6Fs6K,L,LLUUV`aaC7S=4<+<+AAII")TTTDn$$tz#';!;<qID$)/"899U{r$   N)r5   r6   r7   rG   r\   r=   r$   r"   r?   r?   ;   s2              r$   r?   c                  $    e Zd ZddZd	 ZddZdS )TensorDescHandlerA   r   rB   List[TensorHandle]rC   rE   	List[int]c                t    || _         t          |          | _        || _        || _        || _        || _        d S r&   )rA   rN   ndimrB   rC   rE   padding)r!   rA   rB   rC   rE   rc   s         r"   rG   zTensorDescHandle.__init__W   s7    	JJ	
&r$   c                &   | j         j                                        dz  dk    s
J d            t          | j                  | j        k    sJ t          | j                  | j        k    sJ | j        dk    s
J d            | j         j        j        }|j	        dz  }| j        d d         D ]1}|j                                        |z  }|dz  dk    s
J d            2| j        d         j                                        dk    s
J d	            d S )
N   r   zbase must be 16-byte alignedr   z"descriptor cannot be 0 dimensionalr   zstride must be 16-byte alignedzlast dim must be contiguous)
rA   r   itemrN   rC   rb   rE   r   r+   r   )r!   	scalar_tyr   stridebyte_strides        r"   validatezTensorDescHandle.validate`   s'   y~""$$r)Q...0N...4<  DI----4#$$	1111yA~~~C~~~IO.	/14l3B3' 	K 	KF +**,,x7K#q(((*J((((|B$))++q0002O00000r$   rD   c                r   t          |          | j        k    sJ | j        j        j        }|j        dz  }|d         j        |z  dz  dk    s
J d            t          j        | j        j        | j	                  }t          j
        | j	        t                    }t          t          | j	                            D ]}dgt          | j	                  z  }| j	        |         ||<   ||         j        t          j        | j	        |                   z                       |          }|||z  | j        |         j        z                      t          j                  z   }|d|k    z  || j        |         j        k     z  }|j        t          j        k    sJ t'          || j        j        j                  }||fS )Nr   rf   re   r   z*block offset start must be 16-byte alignedrI   r   )rN   rb   rA   r   r+   r   r   rJ   rK   rE   rL   r'   rM   rO   rP   rC   rQ   rR   rB   r   rS   )	r!   rD   rh   r   rW   rX   rY   rZ   r[   s	            r"   r\   z%TensorDescHandle.materialize_pointersm   s   7||ty((((IO.	/14 8+r1Q6668d666ty~t/?@@(555T-..// 	F 	FCs4#3444J".s3JsO3<$ry1A#1F'G'GGPPQ[\\C8c>DL,=,BBJJ29UUUDQ#X&#
30D*DEEEzRY&&&&D$)/"899U{r$   N)rA   r   rB   r_   rC   r_   rE   r`   )rD   r_   )r5   r6   r7   rG   rk   r\   r=   r$   r"   r^   r^   U   sN           P P P     r$   r^   T)frozenc                      e Zd ZU dZded<   dZded<   dZded<   dZd	ed
<   dZded<   dZ	ded<   dZ
d	ed<   dZded<   dZded<   dZd	ed<   dS )InterpreterOptionsNr<   extern_libsFr'   debugTsanitize_overflowstrarch)fp8e5fp8e5b16fp8e4nvfp8e4b8fp8e4b15z
Tuple[str]supported_fp8_dtypesr=   !deprecated_fp8_dot_operand_dtypestf32default_dot_input_precision)r|   tf32x3ieeeallowed_dot_input_precisionsr   intmax_num_imprecise_acc_defaultr   backend_name)r5   r6   r7   rp   r9   rq   rr   rt   rz   r{   r}   r   r   r   r=   r$   r"   ro   ro      s         KE"""""D'^^^^^46%6666'-----/I IIII)*!****%L%%%%%%r$   ro   c                    t          |t          j                  rdS | j        dz  }|j        }|dk     rd}||k    rdS dS )NTr   F)
isinstancetlpointer_typer   r   )np_arraytl_dtypenp_dtype_bitwidthtl_dtype_bitwidths       r"   r   r      s[    (BO,, t )A- 3 1,,,u4r$   c                    | t           j        k    rt           j        S | t           j        k    rt           j        S | t           j        k    rt           j        S | t           j        k    rt           j        S | S r&   )	rJ   uint8int8uint16int16uint32int32rR   int64rI   s    r"   _get_signed_np_dtyper      sW    w	x	x	xLr$   c                
   t          | t          j                  rt          j        t          j                  S i t          j        t          j        t                    t          j        t          j        t          j                  t          j	        t          j        t          j	                  t          j
        t          j        t          j
                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  t          j        t          j        t          j                  i}t          | t          j                  rJt          | j        t          j                  rt          j        t          j                  S || j                 S ||          S r&   )r   r   r   rJ   r   rR   int1r'   float16float32float64r   r   r   r   r   r   r   bfloat16float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15
block_typer+   )tt_dtypenp_typess     r"   _get_np_dtyper      sX   (BO,, #x	"""
$

BHRZ(( 	
BHRZ(( 	
BHRZ((	
 	"'"" 	"(28$$ 	"(28$$ 		28BI&& 	"(28$$ 		28BI&& 	"(28$$ 		28BI&& 	RXbi((  	RXbh''!" 	**#$ 	rx))%& 	rx))'( 	**) H, (BM** -h)2?;; 	'8BI&&&+,,Hr$   c                   t          t          d|j                   }t          t          d|j                   }t          j        |                                 |          }||j        dz
  z	  dz  }|j        |j        z
  dz
  }|j        |j        z
  dz
  }	|d|j        z  dz
  z  }
|j        }|j        }||j        z	  d|z  dz
  z                      t          j                  }|dk    }t          j	        |          rt          j
        |t          j                  }t          |j                  D ]}|
|z	  dz  }|j        |z
  ||dk    <   |
dk    }d||         z
  ||<   ||z
  |||z  <   |
|         ||         z  d|j        z  dz
  z  |
|<   t          j        dt          j        ||z
  |z   d|	z  dz
                      }|                    |          }|                    |          }|j        |j        k    rl|
|j        |j        z
  z	  d|j        z  dz
  z  }|t          j        j        k    r!|
d|j        |j        z
  dz
  z  z  }||dk    z   }|                    |          }n3|
                    |          |j        |j        z
  z  d|j        z  dz
  z  }|dk    }t          j	        |          r||j        z	  d|z  dz
  z                      t          j                  }|dk    }||z  }t          j
        |t          j                  }d|z
  ||         |z
  z
  ||<   ||         ||         z	  d|j        ||         z
  z  z  ||<   ||j        dz
  z  ||j        z  z  |z  }|                    | j                  S )NuintrI   r   r   )getattrrJ   r   
frombuffertobytesfp_mantissa_widthexponent_biasrQ   r   any
zeros_likerM   maximumminimum_irROUNDING_MODERTNErP   rB   )inputinput_dtypeoutput_dtyperounding_modeinput_uint_dtypeoutput_unint_dtype	input_binsigninput_exponent_widthoutput_exponent_widthsignificand
bias_inputbias_outputexponentsubnormal_indexbit_posi	bit_indexzero_significand_indexexponent_outputsign_outputsignificand_outputcut_offnon_zero_exponent_indexshiftoutputs                             r"   _convert_floatr      s   r#J+*H#J#JKK %ML,K%M%MNNemmoo5EFFFI+81<=ED&9K<YY\]](;l>\\_``[%B BaGHK*J,Kk;;FZAZ^_@_`hhikiqrrH!mO	vo 6
 -	:::{455 	H 	HA%*d2I&1&Ca&GGIN##!,!1$%(@$@!=G+=U'/9:(3O(DP_H`(`+//14(6O$ jBJ:0E0SWX\qWquvVv$w$wxxO%,,-?@@O++011K%(GGG)k.KlNl.lm,00A57C-222!Q;+H<Ki+ilm+m%noG!3w{!C/667IJJ)001CDD+=@]]_#$(F#F!"KM &*O	vo L
 +"??QJ^E^bcDcdllmomuvv"*a-),CCirx888"#k/h6OR\6\!]o/A//RV[\kVl/l,053IIJ/L?+l=AB<99;=OPF>>%+&&&r$   c                *    t          j        |           S r&   )matherfxs    r"   _erfr   
  s    8A;;r$   c                F    t          |           t          |          z  dz	  S )N@   )r   )abs     r"   
_umulhi_64r     s     FFSVVO""r$   )otypesc                  $    e Zd Zed             ZdS )ExtraFunctionsc                j    t          j        |j                            | j        ||          |          S r&   )r   tensorbuildercreate_fp_to_fphandle)r   dst_tyfp_downcast_rounding	_semantics       r"   _convert_custom_typesz$ExtraFunctions._convert_custom_types  s-    y*::5<Qeffhnooor$   N)r5   r6   r7   staticmethodr   r=   r$   r"   r   r     s2        p p \p p pr$   r   c                  8   e Zd Zej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j	        ej        j	        iZ
ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        i
ZddZd Zd Zd Zd Zd Zd	 Zd
 Zd Z d Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/d Z0d Z1d Z2d Z3d Z4d  Z5d! Z6d" Z7d# Z8d$ Z9d% Z:d& Z;d' Z<d( Z=d) Z>d* Z?d+ Z@d, ZAd- ZBd. ZCd/ ZDd0 ZEd1 ZFd2 ZGd3 ZHd4 ZId5 ZJd6 ZKd7 ZLd8 ZMd9 ZNd: ZOd; ZPd< ZQd= ZRd> ZSd? ZTd@ ZUdA ZVdB ZWdC ZXdD ZYdE ZZdF Z[dG Z\dH Z]dI Z^dJ Z_dK Z`dL ZadM ZbdN ZcdO ZddP ZedQ ZfdR ZgdS ZhdT ZidU ZjdV ZkdW ZldX ZmdY ZndZ Zod[ Zpd\ Zqd] Zrd^ Zsd_ Ztd` Zuda Zvdb Zwdc Zxdd Zyde Zzdf Z{dg Z|eKZ}eKZ~dh Zdi Zdj Zdk Zdl Zdm Zdn Zdo Zdp Zdq Zdr Zds Zdt Zdu Zdv Zdw Zdx Zdy Zdz Zd{ Zd| Zd} Zd~ Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z	 dddZddZddZddZddZd ZdS )InterpreterBuilderreturnNonec                    d | _         t                      | _        i | _        t          j        | j        d<   d | j        d<   d S )Nconvert_custom_typesc                    dS )N)r   r   r   r=   )lhsTyperhsTypes     r"   <lambda>z-InterpreterBuilder.__init__.<locals>.<lambda>;  s    I r$   min_dot_size)rt   ro   optionscodegen_fnsr   r   r    s    r"   rG   zInterpreterBuilder.__init__6  sE    	)++3A3W/0+M+M(((r$   c                    || j         d         k     st          d          || j         d         k     st          d          || j         d         k     st          d          |||f| _        d S )Nr   zx >= grid_dim[0]r   zy >= grid_dim[1]r   zz >= grid_dim[2])grid_dimr   grid_idxr!   r   yzs       r"   set_grid_idxzInterpreterBuilder.set_grid_idx=  sv    4=###/0004=###/0004=###/000Aq	r$   c                    |||f| _         d S r&   )r   )r!   nxnynzs       r"   set_grid_dimzInterpreterBuilder.set_grid_dimF  s    Rr$   c                    t           j        S r&   )r   r   r    s    r"   get_half_tyzInterpreterBuilder.get_half_tyK  
    zr$   c                    t           j        S r&   )r   r   r    s    r"   get_bf16_tyzInterpreterBuilder.get_bf16_tyN  
    {r$   c                    t           j        S r&   )r   r   r    s    r"   get_float_tyzInterpreterBuilder.get_float_tyQ  r   r$   c                    t           j        S r&   )r   r   r    s    r"   get_double_tyz InterpreterBuilder.get_double_tyT  r   r$   c                    t           j        S r&   )r   r   r    s    r"   get_int1_tyzInterpreterBuilder.get_int1_tyW  	    wr$   c                    t           j        S r&   )r   r   r    s    r"   get_int8_tyzInterpreterBuilder.get_int8_tyZ  r
  r$   c                    t           j        S r&   )r   r   r    s    r"   get_uint8_tyzInterpreterBuilder.get_uint8_ty]  	    xr$   c                    t           j        S r&   )r   r   r    s    r"   get_int16_tyzInterpreterBuilder.get_int16_ty`  r  r$   c                    t           j        S r&   )r   r   r    s    r"   get_uint16_tyz InterpreterBuilder.get_uint16_tyc  
    yr$   c                    t           j        S r&   )r   r   r    s    r"   get_int32_tyzInterpreterBuilder.get_int32_tyf  r  r$   c                    t           j        S r&   )r   r   r    s    r"   get_uint32_tyz InterpreterBuilder.get_uint32_tyi  r  r$   c                    t           j        S r&   )r   r   r    s    r"   get_int64_tyzInterpreterBuilder.get_int64_tyl  r  r$   c                    t           j        S r&   )r   rR   r    s    r"   get_uint64_tyz InterpreterBuilder.get_uint64_tyo  r  r$   c                    t           j        S r&   )r   r   r    s    r"   get_fp8e4nv_tyz!InterpreterBuilder.get_fp8e4nv_tyr  
    }r$   c                    t           j        S r&   )r   r   r    s    r"   get_fp8e4b15_tyz"InterpreterBuilder.get_fp8e4b15_tyu  
    ~r$   c                    t           j        S r&   )r   r   r    s    r"   get_fp8e4b8_tyz!InterpreterBuilder.get_fp8e4b8_tyx  r  r$   c                    t           j        S r&   )r   r   r    s    r"   get_fp8e5_tyzInterpreterBuilder.get_fp8e5_ty{  r  r$   c                    t           j        S r&   )r   r   r    s    r"   get_fp8e5b16_tyz"InterpreterBuilder.get_fp8e5b16_ty~  r"  r$   c                ,    t          j        ||          S r&   )r   r   )r!   elt_ty
addr_spaces      r"   
get_ptr_tyzInterpreterBuilder.get_ptr_ty  s    vz222r$   c                ,    t          j        ||          S r&   )r   r   )r!   r   rB   s      r"   get_block_tyzInterpreterBuilder.get_block_ty  s    }UE***r$   c                t    t          t          j        |gt          j                  t          j                  S NrI   )r   rJ   arraybool_r   r   r!   r3   s     r"   get_int1zInterpreterBuilder.get_int1  s'    BHeWBH===rwGGGr$   c                t    t          t          j        |gt          j                  t          j                  S r0  )r   rJ   r1  r   r   r3  s     r"   	get_uint8zInterpreterBuilder.get_uint8  '    BHeWBH===rxHHHr$   c                t    t          t          j        |gt          j                  t          j                  S r0  )r   rJ   r1  r   r   r3  s     r"   get_int8zInterpreterBuilder.get_int8  s'    BHeWBG<<<bgFFFr$   c                t    t          t          j        |gt          j                  t          j                  S r0  )r   rJ   r1  r   r   r3  s     r"   
get_uint16zInterpreterBuilder.get_uint16  '    BHeWBI>>>	JJJr$   c                t    t          t          j        |gt          j                  t          j                  S r0  )r   rJ   r1  r   r   r3  s     r"   	get_int16zInterpreterBuilder.get_int16  r7  r$   c                t    t          t          j        |gt          j                  t          j                  S r0  )r   rJ   r1  r   r   r3  s     r"   
get_uint32zInterpreterBuilder.get_uint32  r<  r$   c                t    t          t          j        |gt          j                  t          j                  S r0  )r   rJ   r1  r   r   r3  s     r"   	get_int32zInterpreterBuilder.get_int32  r7  r$   c                t    t          t          j        |gt          j                  t          j                  S r0  )r   rJ   r1  rR   r   r3  s     r"   
get_uint64zInterpreterBuilder.get_uint64  r<  r$   c                t    t          t          j        |gt          j                  t          j                  S r0  )r   rJ   r1  r   r   r3  s     r"   	get_int64zInterpreterBuilder.get_int64  r7  r$   c                t    t          t          j        |gt          j                  t          j                  S r0  )r   rJ   r1  r   r   r3  s     r"   get_fp16zInterpreterBuilder.get_fp16  '    BHeWBJ???LLLr$   c                t    t          t          j        |gt          j                  t          j                  S r0  )r   rJ   r1  r   r   r3  s     r"   get_fp32zInterpreterBuilder.get_fp32  rI  r$   c                t    t          t          j        |gt          j                  t          j                  S r0  )r   rJ   r1  r   r   r3  s     r"   get_fp64zInterpreterBuilder.get_fp64  rI  r$   c                f    t          t          j        dgt          |                    |          S Nr   rI   )r   rJ   r1  r   )r!   types     r"   get_null_valuez!InterpreterBuilder.get_null_value  s+    BHaSd0C0CDDDdKKKr$   c                    | j         t          d          t          t          j        | j         |         gt          j                  t          j                  S )Nzgrid_idx is NonerI   )r   r   r   rJ   r1  r   r   r!   axiss     r"   create_get_program_idz(InterpreterBuilder.create_get_program_id  sF    = /000BHdmD&9%:"(KKKRXVVVr$   c                    t          t          j        | j        |         gt          j                  t
          j                  S r0  )r   rJ   r1  r   r   r   rS  s     r"   create_get_num_programsz*InterpreterBuilder.create_get_num_programs  s/    BHdmD&9%:"(KKKRXVVVr$   c                    t          t          j        |j        t                    t
          j                  }d }|                     ||||||          S r0  )r   rJ   	ones_liker   r'   r   r   create_masked_load)r!   ptr_0_1is_volatilemaskothers          r"   create_loadzInterpreterBuilder.create_load  sG    BL>>>HH&&sD%RMMMr$   c                    t          t          j        |j        t                    t
          j                  }|                     |||d d           S r0  )r   rJ   rY  r   r'   r   r   create_masked_store)r!   r[  valr\  r]  r_  s         r"   create_storezInterpreterBuilder.create_store  s@    BL>>>HH''S$dCCCr$   c                
   |                                 }t          |          }|)t          t          j        |j        |          |          }t          j        |j        |j        |j        |          }	t          |	|          S r0  )r-   r   r   rJ   r   r   _interpreterload)
r!   rW   r_  r`  cache_modifiereviction_policyr^  rU   dtype_nprets
             r"   rZ  z%InterpreterBuilder.create_masked_load  sq    &&(( **= ty!I!I!I8TTE	49ej(KKC***r$   c                L    t          j        |j        |j        |j                  S r&   )rg  storer   )r!   rW   r3   r_  ri  rj  s         r"   rc  z&InterpreterBuilder.create_masked_store  s    !$)UZCCCr$   c                   |j         j        }|j        }|t          j        k    r|t          j        k    s |t          j        k    r\|t          j        k    rLt          |j        ||d                               t          |                    }t          ||j                  S t          |j        
                    t          |                    |j                  S r&   )r   rS   r   r   r   r   r   viewr   r   rQ   )r!   srcdst_typesrc_element_typedst_element_typer   s         r"   	cast_implzInterpreterBuilder.cast_impl  s    9+#?++0@BJ0N0N
**/?2;/N/N!#(,<>NPTUUZZ[hiq[r[rssDho666h0G0G H H(/ZZZr$   c                .    |                      ||          S r&   ru  r!   rq  rr  s      r"   r   zInterpreterBuilder.<lambda>      $..h2O2O r$   c                .    |                      ||          S r&   rw  rx  s      r"   r   zInterpreterBuilder.<lambda>  ry  r$   c                .    |                      ||          S r&   rw  rx  s      r"   r   zInterpreterBuilder.<lambda>  ry  r$   c                .    |                      ||          S r&   rw  rx  s      r"   r   zInterpreterBuilder.<lambda>  ry  r$   c                .    |                      ||          S r&   rw  rx  s      r"   r   zInterpreterBuilder.<lambda>  s    sH0M0M r$   c                .    |                      ||          S r&   rw  rx  s      r"   r   zInterpreterBuilder.<lambda>  ry  r$   c                .    |                      ||          S r&   rw  )r!   rq  rr  	is_signeds       r"   r   zInterpreterBuilder.<lambda>  s    T^^CQY=Z=Z r$   c                    |j         j        }|j        }t          |j        |||                              t          |                    }t          ||j                  S r&   )r   rS   r   r   rp  r   r   )r!   rq  rr  r   rs  rt  r   s          r"   r   z"InterpreterBuilder.create_fp_to_fp  sV    9+#?ch(8:JMZZ__`mnv`w`wxxD(/222r$   c                v    t          |j                            t          |                    |j                  S r&   )r   r   rp  r   rS   rx  s      r"   create_bitcastz!InterpreterBuilder.create_bitcast  s*    CHMM-*A*ABBHOTTTr$   c                     ||j         |j                   }|j        j        }t          ||          s"|                    t          |                    }t          ||          S r&   r   r   rS   r   rQ   r   r   )r!   lhsrhsopr   r   s         r"   	binary_opzInterpreterBuilder.binary_op  s\    CHch''9#%fh77 	<]]=#:#:;;FFH---r$   c                D    |                      ||t          j                  S r&   r  rJ   addr!   r  r  s      r"   r   zInterpreterBuilder.<lambda>  s    S"&)I)I r$   c                D    |                      ||t          j                  S r&   r  rJ   multiplyr  s      r"   r   zInterpreterBuilder.<lambda>      S"+)N)N r$   c                D    |                      ||t          j                  S r&   r  rJ   divider  s      r"   r   zInterpreterBuilder.<lambda>  s    S"))L)L r$   c                D    |                      ||t          j                  S r&   r  rJ   fmodr  s      r"   r   zInterpreterBuilder.<lambda>      S"')J)J r$   c                D    |                      ||t          j                  S r&   r  rJ   subtractr  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>      sC(M(M r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  s    S")1T1T r$   c                .    |                      ||          S r&   create_idivr  s      r"   r   zInterpreterBuilder.<lambda>      )9)9#s)C)C r$   c                .    |                      ||          S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  s    sC(H(H r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   )r  rJ   
left_shiftr  s      r"   r   zInterpreterBuilder.<lambda>   s    sC(O(O r$   c                D    |                      ||t          j                  S r&   )r  rJ   right_shiftr  s      r"   r   zInterpreterBuilder.<lambda>  s    S".)Q)Q r$   c                D    |                      ||t          j                  S r&   r  rJ   r   r  s      r"   r   zInterpreterBuilder.<lambda>      $..c2:*N*N r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>      T^^Cbj-Q-Q r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>      DNN3RZ,P,P r$   c                D    |                      ||t          j                  S r&   r  rJ   r   r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>	  r  r$   c                D    |                      ||t          j                  S r&   r  rJ   
less_equalr  s      r"   r   zInterpreterBuilder.<lambda>
      DNN3R],S,S r$   c                D    |                      ||t          j                  S r&   r  rJ   lessr  s      r"   r   zInterpreterBuilder.<lambda>      DNN3RW,M,M r$   c                D    |                      ||t          j                  S r&   r  rJ   greater_equalr  s      r"   r   zInterpreterBuilder.<lambda>      DNN3REU,V,V r$   c                D    |                      ||t          j                  S r&   r  rJ   greaterr  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  rJ   equalr  s      r"   r   zInterpreterBuilder.<lambda>  s    4>>#sBH+M+M r$   c                D    |                      ||t          j                  S r&   r  rJ   	not_equalr  s      r"   r   zInterpreterBuilder.<lambda>  s    4>>#sBL+Q+Q r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>      DNN3RX,N,N r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>      DNN3R\,R,R r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    |                      ||t          j                  S r&   )r  rJ   bitwise_andr  s      r"   r   zInterpreterBuilder.<lambda>       sC(P(P r$   c                D    |                      ||t          j                  S r&   )r  rJ   bitwise_xorr  s      r"   r   zInterpreterBuilder.<lambda>!  r  r$   c                D    |                      ||t          j                  S r&   )r  rJ   
bitwise_orr  s      r"   r   zInterpreterBuilder.<lambda>"  s    t~~c3'N'N r$   c                    t          |j        t          j        |j        |j                  z
  |j        z  |j        j                  S r&   )r   r   rJ   r  r   rS   r  s      r"   r  zInterpreterBuilder.create_idiv&  s9     SX#((C(CCPRUR[Rbcccr$   c                $   t          |j        j                  }t          |j        j                  }|j                            |          |_        |j                            |          |_        |                     ||t
          j                  S r&   )r   r   r   rQ   r  rJ   r  )r!   r  r  	lhs_dtype	rhs_dtypes        r"   create_ashrzInterpreterBuilder.create_ashr,  sf    (88	(88	8??9--8??9--~~c3777r$   c                   |j         j        }|t          j        k    s|t          j        k    r2t          t          |j         |j                   |j        j                  S t          t          d|j	        dz  dz             }|j         
                    |          }|j         
                    |          }t          j        ||          |j	        dz  z	  }t          |
                    |          |j        j                  S )Nr   r   r   )r   r   rJ   r   rR   r   np_umulhi_u64rS   r   r   rQ   r  )r!   r  r  r   compute_dtypelhs_datarhs_dataret_datas           r"   create_umulhiz InterpreterBuilder.create_umulhi4  s    BH 2 2ch A A39CSTTT#B(Gu~/AA/E(G(GHHMx}55Hx}55H{8X665>A;MNH 6 6	8HIIIr$   c                     ||j         |j         |j                   }|j        j        }t          ||          s"|                    t          |                    }t          ||          S r&   r  )r!   r  r  r`  r  r   r   s          r"   
ternary_opzInterpreterBuilder.ternary_op@  s`    CHch
33;%%fh77 	<]]=#:#:;;FFH---r$   c                F    |                      |||t          j                  S r&   )r  rJ   clip)r!   arglohipropagate_nanss        r"   r   zInterpreterBuilder.<lambda>I  s    doocSUWY[][b>c>c r$   c                F    |                      |||t          j                  S r&   )r  rJ   where)r!   condr  r  s       r"   r   zInterpreterBuilder.<lambda>J  s    sCQSQY1Z1Z r$   c                `    t          |j        |j        z  |j        z   |j        j                  S r&   r   r   r   rS   r   s       r"   
create_fmazInterpreterBuilder.create_fmaL  s%    AFQVOaf4agnEEEr$   c                R    t           ||j                  |j        j                  S r&   r  )r!   r  r  s      r"   unary_opzInterpreterBuilder.unary_opP  s!    BBsxLL#)*:;;;r$   c                    |j         }|j        dz
  }t          t          d|j                   }|j                            |          }d|z  dz
  }||z                      t          |                    }t          ||j         j                  S )Nr   r   )	r   r   r   rJ   r   rp  r   r   rS   )r!   r  rU   mask_bitwidthnp_uint_dtyper   r_  rl  s           r"   create_fabszInterpreterBuilder.create_fabsS  s    9 3a7$H8+F$H$HIIx}}]++]"a'd{  x!8!899C!1222r$   c                B    |                      |t          j                  S r&   )r  rJ   cosr!   r  s     r"   r   zInterpreterBuilder.<lambda>]      4==bf#=#= r$   c                B    |                      |t          j                  S r&   )r  rJ   expr	  s     r"   r   zInterpreterBuilder.<lambda>^  r
  r$   c                B    |                      |t          j                  S r&   )r  rJ   exp2r	  s     r"   r   zInterpreterBuilder.<lambda>_      DMM#rw$?$? r$   c                B    |                      |t          j                  S r&   )r  rJ   absr	  s     r"   r   zInterpreterBuilder.<lambda>`  s    DMM#rv$>$> r$   c                B    |                      |t          j                  S r&   )r  rJ   floorr	  s     r"   r   zInterpreterBuilder.<lambda>a  s    T]]3%A%A r$   c                B    |                      |t          j                  S r&   )r  rJ   ceilr	  s     r"   r   zInterpreterBuilder.<lambda>b  r  r$   c                B    |                      |t          j                  S r&   )r  rJ   logr	  s     r"   r   zInterpreterBuilder.<lambda>c  r
  r$   c                B    |                      |t          j                  S r&   )r  rJ   log2r	  s     r"   r   zInterpreterBuilder.<lambda>d  r  r$   c                B    |                      |t          j                  S r&   r  rJ   sqrtr	  s     r"   r   zInterpreterBuilder.<lambda>e  s    DMM#rw,G,G r$   c                B    |                      |t          j                  S r&   r  r	  s     r"   r   zInterpreterBuilder.<lambda>f  r  r$   c                B    |                      |t          j                  S r&   )r  rJ   sinr	  s     r"   r   zInterpreterBuilder.<lambda>g  r
  r$   c                    |j         j        t          j        k    rt	          |j                   nt          |j                   }t          ||j        j                  S r&   )r   r   rJ   r   np_erf_fp32np_erf_fp64r   rS   )r!   r  rl  s      r"   
create_erfzInterpreterBuilder.create_erfi  sG    '*x~'C'Ck#(###UXU]I^I^C!1222r$   c                j    t          dt          j        |j                  z  |j        j                  S Nr   )r   rJ   r  r   r   rS   r	  s     r"   create_rsqrtzInterpreterBuilder.create_rsqrtm  s(    A 1 11393CDDDr$   c                f    t          |j                            |          |j        j                  S r&   )r   r   rP   r   rS   )r!   r  rB   allow_reorders       r"   r   zInterpreterBuilder.<lambda>q  s(    \#(JZJZ[`JaJacfclcs=t=t r$   c                f    t          t          j        |j        |          |j        j                  S r&   )r   rJ   	transposer   r   rS   )r!   r  perms      r"   create_transzInterpreterBuilder.create_transs  s%    BL488#):JKKKr$   c                <   |j         }|j         }|j        j        dk    r|j                                        s)|j        j        dk    r|j                                        r|t	          ||j        t
          j        d                               t          j                  }t	          ||j        t
          j        d                               t          j                  }t          t          j
        |||j         j                  |j         z   |j        j                  S )Nr   rI   )r   r   r   is_floatingr   r   r   rp  rJ   r   matmulrS   )r!   r   r   dinput_precisionmax_num_imprecise_acca_datab_datas           r"   
create_dotzInterpreterBuilder.create_dotv  s    G&!++0C0C0E0E+G&!++0C0C0E0E+#FAGRZFFKKBJWWF#FAGRZFFKKBJWWFBIffAFLIIIAFRTUT[Tbcccr$   c                t    t          t          j        ||t          j                  t          j                  S r0  )r   rJ   rO   r   r   )r!   ret_tystartstops       r"   create_make_rangez$InterpreterBuilder.create_make_range  s'    BIeTBBBBHMMMr$   c                   |8t          t          j        |j        t                    t
          j                  }t          j        |j        |j        j                  }t          j        |j        |j        t          j	        |j                            }t          j
        ||d|f|          d         }|dxx         t          j        |j                                                  z  cc<   t          |t
          j                  S )NrI   r   )binsrM   weights)r   rJ   rY  r   r'   r   r   r   r  r   	histogramlogical_notsumr   )r!   r   r<  r_  dummy_weightsr>  s         r"   create_histogramz#InterpreterBuilder.create_histogram  s    <TYd C C CRWMMD TYdioFFF x	49bmDI.F.FGGLDD	=YYYZ[\	!ty1155777Irx000r$   c                t    t          t          j        |j        |j        |          |j        j                  S )NrT  )r   rJ   take_along_axisr   r   rS   )r!   rq  indicesrT  s       r"   create_gatherz InterpreterBuilder.create_gather  s/    B.sxDQQQSVS\Scdddr$   c                    |                                 }|j        }t          d|dz            }t          |j        ||j                            t          j                  z  z   |j                  S )Nr   r   )	r-   r   maxr   r   rQ   rJ   rR   r   )r!   r[  offsetrU   element_bitwidthelement_bytewidths         r"   create_addptrz InterpreterBuilder.create_addptr  se    %%''#6#3q#899CH'86;;M;Mbi;X;X'XXZ]Zcdddr$   c                   |                     |          \  }}|                                }	t          |	          }
|d }n|t          j        j        k    r*t          t          j        |j	        |
          |	          }n_|t          j        j
        k    r8t          t          j        |j	        t          d          |
          |	          }nt          d|           |                     ||||||          S )NrI   nanzunsupported padding option )r\   r-   r   r   PADDING_OPTIONPAD_ZEROr   rJ   r   r   PAD_NAN	full_likefloatr   rZ  )r!   r[  rT   padding_optionri  rj  r^  rW   rX   rU   rk  r`  s               r"   create_tensor_pointer_loadz-InterpreterBuilder.create_tensor_pointer_load  s    ..~>>e&&(( **!EEs1::: ty!I!I!I8TTEEs1999 diuX!V!V!VX`aaEEK>KKLLL&&tUE>?\ghhhr$   c                d    |                     |          \  }}|                     |||||          S r&   r\   rc  )r!   r[  r3   rT   ri  rj  rW   rX   s           r"   create_tensor_pointer_storez.InterpreterBuilder.create_tensor_pointer_store  s5    ..~>>e''eUNO\\\r$   c                f    t          t          j        |j        |          |j        j                  S r&   )r   rJ   expand_dimsr   r   rS   )r!   r  rT  s      r"   create_expand_dimsz%InterpreterBuilder.create_expand_dims  s%    BN38T::CI<LMMMr$   c                f    t          t          j        |j        |          |j        j                  S r&   )r   rJ   rK   r   r   rS   )r!   r  rB   s      r"   create_broadcastz#InterpreterBuilder.create_broadcast  s%    BOCHe<<ci>NOOOr$   c                r    t          t          j        |j        |j        g          |j        j                  S r&   )r   rJ   concatenater   r   rS   r  s      r"   
create_catzInterpreterBuilder.create_cat  s*    BNCHch+?@@#)BRSSSr$   c                v    t          t          j        |j        |j        gd          |j        j                  S )Nrf   rD  )r   rJ   stackr   r   rS   r  s      r"   create_joinzInterpreterBuilder.create_join  s/    BHch%9CCCSYEUVVVr$   c                    t          |j        d         |j        j                  t          |j        d         |j        j                  fS )N).r   ).r   r  )r!   rd  s     r"   create_splitzInterpreterBuilder.create_split  s<    SXf-sy/?@@,sxX^O_adajaqBrBrssr$   c           	     r   |j         }t          |j        t          j                  rLt          t          j        ||j        d         t          |j                            |j        j
                  S t          t          j        ||j        t          |j                            |j        j
                  S rO  )rB   r   r   r   r   r   rJ   fullr   r   rS   )r!   r7  r  rB   s       r"   create_splatzInterpreterBuilder.create_splat  s    ci// 	lsx{-PSPYBZBZ [ [ []`]f]mnnnsx}SY?W?W X X XZ]ZcZjkkkr$   c           	         t          t          j        d|j        d         t	          |j                            |j        j                  S )Nr   r   rI   )r   rJ   rh  r   r   r   rS   r	  s     r"   create_unsplatz!InterpreterBuilder.create_unsplat  s:    BGE38A;mCI>V>VWWWY\YbYijjjr$   c                    || j         vrt          d|           | j         |         }t          t          j        |j        |j        |j        |          |j        j                  S )Nunsupported semantic )ir_sem_to_interpreter_semr   r   rg  
atomic_casr   r   rS   )r!   r[  cmprd  semscopes         r"   create_atomic_casz$InterpreterBuilder.create_atomic_cas  sc    d444:S::;;;,S1L3CHchRUVVX[XaXhiiir$   c           	         || j         vrt          d|           || j        vrt          d|           | j         |         }| j        |         }t          t	          j        ||j        |j        |j        |          |j        j                  S )Nzunsupported rmwOp rn  )	ir_rmw_op_to_interpreter_rmw_opr   ro  r   rg  
atomic_rmwr   r   rS   )r!   rmwOpr[  rd  r_  rr  rs  s          r"   create_atomic_rmwz$InterpreterBuilder.create_atomic_rmw  s    <<<9%99:::d444:S::;;;4U;,S1L3E38SXtyZ]^^`c`i`pqqqr$   c                     t          d          )Nz4extern_elementwise not supported in interpreter modeNotImplementedError)r!   libNamelibPathsymbolargListretTypeisPures          r"   create_extern_elementwisez,InterpreterBuilder.create_extern_elementwise  s    !"XYYYr$   c                     t          d          )Nz,inline_asm not supported in interpreter moder{  )r!   	inlineAsmconstraintsvaluesrP  r  packs          r"   create_inline_asmz$InterpreterBuilder.create_inline_asm  s    !"PQQQr$   c                   d| j         d          d| j         d          d| j         d          d}|r|d| z  }|rt          j        dd	 i
           |D ]}t          |d|j         z              |rt          j        d 
           d S d S )N(r   z, r   r   ) r(   c                    d| dS )N0x02xr=   r   s    r"   r   z1InterpreterBuilder.create_print.<locals>.<lambda>  s    LLLL r$   )	formatter)r   rJ   set_printoptionsprintr   )r!   prefixhexr  isSignedmsgr3   s          r"   create_printzInterpreterBuilder.create_print  s    
 N$-"MMdmA&6MM$-:JMMM 	 <v<<C 	K52H2H*IJJJJ 	* 	*E#(EJ((()))) 	0$//////	0 	0r$   c                     |sJ |             d S r&   r=   )r!   	conditionmessages      r"   create_assertz InterpreterBuilder.create_assert  s    &&W,&&&&&r$   c                    |s
J d            d S )NzAssume failedr=   )r!   r  s     r"   create_assumez InterpreterBuilder.create_assume  s    ))/)))))r$   c                    d S r&   r=   r    s    r"   create_barrierz!InterpreterBuilder.create_barrier  s    r$   c                B    d |D             }t          ||||||          S )Nc                6    g | ]}|                                 S r=   r0   .0rJ  s     r"   
<listcomp>z<InterpreterBuilder.create_make_block_ptr.<locals>.<listcomp>  s     <<<&v||~~<<<r$   )r?   )r!   rA   rB   rC   rD   rE   rF   new_offsetss           r"   create_make_block_ptrz(InterpreterBuilder.create_make_block_ptr  s.    <<G<<<!$w[RWXXXr$   c                t   t          |j                  t          |          k    rt          d          d |j        D             }t          |j        |j        |j        ||j        |j                  }t          t          |                    D ](}|j        |         xj
        ||         j
        z  c_
        )|S )Nz len(ptr.offsets) != len(offsets)c                6    g | ]}|                                 S r=   r  r  s     r"   r  z5InterpreterBuilder.create_advance.<locals>.<listcomp>  s     @@@&v||~~@@@r$   )rN   rD   r   r?   rA   rB   rC   rE   rF   rM   r   )r!   r[  rD   r  rl  r   s         r"   create_advancez!InterpreterBuilder.create_advance  s    s{s7||++?@@@@@CK@@@ 39ck;PSP_adajkks7||$$ 	3 	3AKN71:?2
r$   zerorA   r   rB   r_   rC   tensor_shaper`   r  r'   rc   rs   c                T    t          |||||          }|                                 |S r&   )r^   rk   )r!   rA   rB   rC   r  r  rc   descs           r"   create_make_tensor_descriptorz0InterpreterBuilder.create_make_tensor_descriptor  s)    eWlGLLr$   r  r^   rF  c                &   t          |t                    sJ |                    |          \  }}|                                }t	          |          }|j        }	|	t          j        j        k    r*t          t          j        |j        |          |          }
n_|	t          j        j        k    r8t          t          j        |j        t          d          |          |          }
nt!          d|	           |                     |||
||d          S )NrI   rO  zunsupported padding F)ri  rj  r^  )r   r^   r\   r-   r   rc   r   rP  rQ  r   rJ   r   r   rR  rS  rT  r   rZ  )r!   r  rF  ri  rj  rW   r_  rU   rk  rc   r`  s              r"   create_descriptor_loadz)InterpreterBuilder.create_descriptor_load  s	   $ 011111..w77
d&&(( **,c(111 ty!I!I!I8TTEE*222 diuX!V!V!VX`aaEE=G==>>>&&tT57FTY ' [ [ 	[r$   r3   c                d    |                     |          \  }}|                     |||d d           S r&   rX  )r!   r  r3   rF  rW   r_  s         r"   create_descriptor_storez*InterpreterBuilder.create_descriptor_store"  s5    ..w77
d''eT4FFFr$   	x_offsetsy_offsetc                   |j         j        j        }t          |          }t	          j        |j        j        d         |j        d         g|          }d }d }	t          |j                  D ]E\  }
}t          |t          j                  |g}|                     ||||	          j        ||
d d f<   Ft          ||          S )Nr   rf   rI   )rA   r   r+   r   rJ   zerosr   rB   rE   	enumerater   r   r   r  )r!   r  r  r  rP  r   np_dtyperesultri  rj  r   x_offsetrF  s                r"   create_descriptor_gatherz+InterpreterBuilder.create_descriptor_gather&  s    	* ''9>/2D4DR4HIQYZZZ$Y^44 	l 	lKAx#Hbh77BG66tWnVeffkF1aaa4LLFE***r$   c                    t          |j                  D ]X\  }}t          |j        |         |j                  }t          |t          j                  |g}|                     |||           Yd S r&   )r  r   r   r   r   r   r  )	r!   r  r3   r  r  r   r  slicerF  s	            r"   create_descriptor_scatterz,InterpreterBuilder.create_descriptor_scatter1  sq    $Y^44 	? 	?KAx A<<E#Hbh77BG((ug>>>>	? 	?r$   c                   t          |          }d|j        v r*t          t          j        dd|          |j                  S |t          j        k    r*t          t          j        dd|          |j                  S t          d|           )Nr   r   rf   rI   Tzunsupported type )r   namer   rJ   rh  rS   r2  	TypeError)r!   rP  np_types      r"   get_all_ones_valuez%InterpreterBuilder.get_all_ones_value8  s    %%GL  2W = = =t{KKK  4w ? ? ?MMM666777r$   Nr   r   )r  )rA   r   rB   r_   rC   r_   r  r`   r  r'   rc   rs   )r  r^   rF  r_   )r  r^   r3   r   rF  r_   )r  r^   r  r   r  r   )r  r^   r3   r   r  r   r  r   )r5   r6   r7   r   MEM_SEMANTICACQUIRErg  RELEASERELAXEDACQUIRE_RELEASEro  	ATOMIC_OPADDRMW_OPFADDMINUMINMAXUMAXANDORXORXCHGrv  rG   r   r   r   r  r  r  r	  r  r  r  r  r  r  r  r  r  r!  r$  r&  r(  r,  r.  r4  r6  r9  r;  r>  r@  rB  rD  rF  rH  rK  rM  rQ  rU  rW  ra  re  rZ  rc  ru  create_si_to_fpcreate_ui_to_fpcreate_fp_to_sicreate_fp_to_uicreate_fp_extcreate_fp_trunccreate_int_castr   r  r  create_faddcreate_fmulcreate_fdivcreate_fremcreate_fsub
create_mulcreate_precise_divfcreate_sdivcreate_udivcreate_sremcreate_urem
create_add
create_sub
create_shlcreate_lshrcreate_minsicreate_minuicreate_minimumfcreate_minnumfcreate_maxsicreate_maxuicreate_maximumfcreate_maxnumfcreate_icmpSLEcreate_icmpSLTcreate_icmpSGEcreate_icmpSGTcreate_icmpULEcreate_icmpULTcreate_icmpUGEcreate_icmpUGTcreate_icmpEQcreate_icmpNEcreate_fcmpOLTcreate_fcmpOGTcreate_fcmpOLEcreate_fcmpOGEcreate_fcmpOEQcreate_fcmpONEcreate_fcmpULTcreate_fcmpUGTcreate_fcmpULEcreate_fcmpUGEcreate_fcmpUEQcreate_fcmpUNE
create_and
create_xor	create_orcreate_int_to_ptrcreate_ptr_to_intr  r  r  r  create_clampfcreate_selectr   r  r  
create_cos
create_expcreate_exp2create_iabscreate_floorcreate_ceil
create_logcreate_log2create_precise_sqrtcreate_sqrt
create_sinr#  r&  create_reshaper,  r5  r:  rB  rG  rM  rV  rY  r\  r^  ra  rd  rf  ri  rl  rt  ry  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r=   r$   r"   r   r   !  s	        ,";"C ,";"C ,";"C(,*C*S	! 	<.2L/4<.2L/4<.2L/4<.2,-0<.2L/4'#N N N N" " "% % %
                                    3 3 3+ + +H H HI I IG G GK K KI I IK K KI I IK K KI I IM M MM M MM M ML L LW W W
W W WN N N
D D D+ + +D D D[ [ [ POOOOOOOOOOOMMMOOOZZO3 3 3U U U. . . JIKNNKLLKJJKNNKMMJTTCCKCCKJJKJJKHHJMMJOOJQQKNNLNNLQQOPPNNNLNNLQQOPPNSSNMMNVVNPPNSSNMMNVVNPPNMMMQQMMMNPPNSSNVVNNNNRRNMMNPPNSSNVVNNNNRRNPPJPPJNNI&&d d d8 8 8	J 	J 	J. . . dcMZZMF F F< < <3 3 3 >=J==J??K>>KAAL??K==J??KGG??K==J3 3 3E E E utNL L Ld d dN N N1 1 1$e e e
e e ei i i] ] ]N N NP P PT T TW W Wt t tl l lk k kj j jr r rZ Z ZR R R0 0 0' ' '* * *  Y Y Y
   `f    [ [ [ [ G G G G	+ 	+ 	+ 	+? ? ? ?8 8 8 8 8r$   r   c                  *    e Zd ZdZddZdd
ZddZdS )_LangPatchScopez2Tracks patched attributes so they can be restored.r   r   c                    g | _         d S r&   )_changesr    s    r"   rG   z_LangPatchScope.__init__H  s    :<r$   objobjectr  rs   r3   c                    t          ||t                    }| j                            |||f           t	          |||           d S r&   )r   _MISSINGr"  appendsetattr)r!   r#  r  r3   originals        r"   r4   z_LangPatchScope.set_attrK  sH    3h//c42333T5!!!!!r$   c                    | j         rQ| j                                         \  }}}|t          u rt          ||           nt	          |||           | j         Od S d S r&   )r"  popr&  delattrr(  )r!   r#  r  r)  s       r"   restorez_LangPatchScope.restoreP  sv    m 	-"&-"3"3"5"5Cx8##T""""T8,,, m 	- 	- 	- 	- 	-r$   Nr  )r#  r$  r  rs   r3   r$  r   r   )r5   r6   r7   r8   rG   r4   r-  r=   r$   r"   r   r   E  sV        <<= = = =" " " "
- - - - - -r$   r   rs  c                d    t          |          |dfd
}|                    | ||           d S )N)memberc                T     | |i d |                                 D             diS )Nc                &    i | ]\  }}|d k    ||S )r   r=   )r  kvs      r"   
<dictcomp>z1_patch_attr.<locals>.<lambda>.<locals>.<dictcomp>\  s;     AV AV AVEIQDEDTDT BCADTDTDTr$   r   items)r/  argskwargssemantics      r"   r   z_patch_attr.<locals>.<lambda>[  sd     :kAV AVMS\\^^AV AV AV:k :k bj:k :k :k r$   )r
   r4   )r#  r  r/  r   rs  
new_memberr9  s         @r"   _patch_attrr;  Y  sW    g&&H&, l l l l l lJ 
NN3j)))))r$   c                    t          j        |           D ]7\  }}t          j                            |          rt          | ||||           8d S r&   )inspect
getmembersr   core
is_builtinr;  )pkgr   rs  r  r/  s        r"   _patch_builtinrB  b  sY    *3// ; ;f7f%% 	;T67E:::; ;r$   c                    d d }|                     | dd            |                     | dfd           |                     | dd            |                     | d	d
            |                     | dt          |                     d S )Nc                R    | j         j        }|j        dk    rt          |          ndS )Nr   T)r   r   sizer'   )r!   r   s     r"   	_get_boolz%_patch_lang_tensor.<locals>._get_boolj  s(    { "Y!^^tDzzz5r$   c                   t          t          j        | j        j                  | j        j                  }| j                                        sJ t          | j        j	                  }|d         |d         c|d<   |d<   t          j                            | j        |          }t          j                            ||          S )Nrf   )r   rJ   r*  r   r   r   rP  is_blocklistrB   r   r?  r   r   )r!   r   rE   res_tys       r"   _get_transposez*_patch_lang_tensor.<locals>._get_transposep  s    bl4;+;<<dk>OPPy!!#####49?+++6r?KO(BR##DJ<<w~~ff---r$   	__index__c                4    t          | j        j                  S r&   )r   r   r   r    s    r"   r   z$_patch_lang_tensor.<locals>.<lambda>x  s    S9I5J5J r$   r)   c                     |           S r&   r=   )r!   rF  s    r"   r   z$_patch_lang_tensor.<locals>.<lambda>y  s    IIdOO r$   __repr__c                4    t          | j        j                  S r&   )reprr   r   r    s    r"   r   z$_patch_lang_tensor.<locals>.<lambda>z  s    D9I4J4J r$   __str__c                4    t          | j        j                  S r&   )rs   r   r   r    s    r"   r   z$_patch_lang_tensor.<locals>.<lambda>{  s    3t{7G3H3H r$   r   )r4   property)r   rs  rL  rF  s      @r"   _patch_lang_tensorrV  h  s    6 6 6. . . 
NN6;(J(JKKK	NN6:'C'C'C'CDDD	NN6:'J'JKKK	NN69&H&HIII	NN63 8 899999r$   c                  ,    e Zd Zd Zd Zd Zd Zd ZdS )ReduceScanOpInterfacec                "    || _         || _        d S r&   )rT  
combine_fn)r!   rT  rZ  s      r"   rG   zReduceScanOpInterface.__init__  s    	$r$   c                ^    |(|t          |          k    rt          d| d|           d S d S )Nzaxis z out of bounds for shape )rN   r   )r!   rB   rT  s      r"   
check_axisz ReduceScanOpInterface.check_axis  sE    E

 2 2KTKKEKKLLL  2 2r$   c                    |D ]`}t          |t          j        j                  st	          dt          |                     |                     |j        | j                   ad S )Nzinput must be a tensor, got )	r   r   r?  r   r   rP  r\  rB   rT  )r!   r   r  s      r"   check_tensorz"ReduceScanOpInterface.check_tensor  sj     	2 	2Cc27>22 M !KS		!K!KLLLOOCIty1111	2 	2r$   c                `   t          |          }t          |d          rD|j        r=|                    |          }t	          j        |t          |j                            }nt          j        |g|          }|}t          j	        
                    t          ||j                  |          S )NrB   rI   )r   r,   rB   rQ   r   r   rJ  rJ   r1  r?  r   r   rS   )r!   rl  r   r  ret_types        r"   	to_tensorzReduceScanOpInterface.to_tensor  s     ''3   	SY 	**X&&C}UDOO<<HH(C5111CHw~~l3==xHHHr$   c                   t          |t                    s|                     |f          d         S |                     |           |                     |          }t          |t
          t          f          rt          |          n|fS Nr   )r   tupleapplyr^  
apply_implrJ  )r!   r   rl  s      r"   re  zReduceScanOpInterface.apply  sw    %'' 	,::ui((++%   ooe$$'dE];;HuSzzz#Hr$   N)r5   r6   r7   rG   r\  r^  ra  re  r=   r$   r"   rX  rX    sj        % % %M M M2 2 2I I II I I I Ir$   rX  c                  >     e Zd Z fdZd Zd ZddZd Zd Z xZ	S )		ReduceOpsc                Z    t                                          ||           || _        d S r&   )superrG   	keep_dims)r!   rT  rZ  rk  	__class__s       r"   rG   zReduceOps.__init__  s(    z***"r$   c                    g }|D ]f}||                     |           d}|                     |                     |j        j                                        |j                             gt          |          |fS rc  )r'  ra  r   r   flattenr   rd  )r!   r   rT  rl  r   s        r"   unravelzReduceOps.unravel  s     	S 	SD

4    

4>>$+*:*B*B*D*DdjQQRRRRSzz4r$   c                      j         }                      j                   \  }g }g }d         j        j        j        }|d|         ||dz   d          z   }D ]Y}|                    |j        j                   |                    t          j        ||j        j        j                             Zt          |d         j
                  D ]}	t          j        |	|          d|         |dz   d          z   t           fdt          |          D                       }
|         dk    rMt          t          |                    D ]/}|
|         j        j                                        ||         <   0t           fdt          |          D                       }  j        j        g ||
R  }t%          |t                    s|fn|}t          t          |                    D ]\}t%          ||         t&          j        j                  r$||         j        j                                        n||         ||         <   ]g }t          |          D ]\  }	} j        rM|t          j        ||          }nKt          t          |                    D ]}t          j        |d          }n||                                }|                                         ||	         j                             |S )Nr   r   rI   c              3  l   K   | ].\  }}                     |         |         j                  V  /d S r&   ra  r   )r  iir0  r   input_indexr!   s      r"   	<genexpr>z+ReduceOps.generic_reduce.<locals>.<genexpr>  sB      ssTYTVXYq~uRy O Ossssssr$   c              3  l   K   | ].\  }}                     |         |         j                  V  /d S r&   rr  )r  oior   output_indexr!   s      r"   ru  z+ReduceOps.generic_reduce.<locals>.<genexpr>  sB      !w!wW\WY[\$..<%)/"R"R!w!w!w!w!w!wr$   )rT  ro  r   r   rB   r'  rJ   r  r   rM   rE  unravel_indexrd  r  rN   rg   rZ  fnr   r   r?  r   rk  r[  ra  )r!   r   original_axisrT  
input_dataoutput_datainput_shapeoutput_shaper  r   input_tuplej	acc_tuplecombine_fn_retrl  r   _rt  ry  s   ``               @@r"   generic_reducezReduceOps.generic_reduce  sE   	ll5$)44t
Aho*0"1T6*[-CC 	T 	TCcjo...rxCJO<QRRRSSSSz!})** 	H 	HA*1k::K&qv.TAXYY1GGLssssss]fgq]r]rsssssK4 A%%s;//00 U UA3>q>3H3M3R3R3T3TKN<00U "!w!w!w!w!w!w`iju`v`v!w!w!www	!3!3!MY!M!M!M!M6@QV6W6Wk^..]k	s;//00 H HAV`!!bgnW6 W6 4H9Q<3F3K3P3P3R3R3R;DQ<  N<00H  -- 	= 	=GAt~ 	# ,>$55DD"3{#3#344 7 7!~dA667 &yy{{JJt~~dE!HN;;<<<<
r$   Nc                   t          |t                    r|d         n|}d }d }|r;|                      ||j        j        | j        | j                  |j                  }|r@|                      ||j        j        | j        | j                  t          j	                  }||||fS ||S ||S t          d          )Nr   rT  keepdimsz-val_reduce_op and idx_reduce_op are both None)r   rd  ra  r   r   rT  rk  r   r   r   r   )r!   r   val_reduce_opidx_reduce_oprd  idxs         r"   min_maxzReduceOps.min_max  s    &ue44?a% 	y..u|/@ty[_[i!j!j!jlqlwxxC 	v..u|/@ty[_[i!j!j!jlnltuuC?s8O_J_JLMMMr$   c                    |                      t          j        |j        j        | j        | j                  |j                  S )Nr  )ra  rJ   r@  r   r   rT  rk  r   r!   r   s     r"   r@  zReduceOps.sum  s6    ~~bfU\%6TYQUQ_```bgbmnnnr$   c                   | j         t          j        j        k    r2|                     |d         t
          j        t
          j                  S | j         t          j        j        k    r2|                     |d         t
          j	        t
          j
                  S | j         t          j        j        k    r(|                     |d         t
          j        d           S | j         t          j        j        k    r(|                     |d         t
          j        d           S | j         t          j        j        k    r|                     |d                   S |                     |          S )Nr   )r  r  )rZ  r   standard_argmin_combine_tie_break_leftr  rJ   minargmin_argmax_combine_tie_break_leftrI  argmax_elementwise_maxnanmax_elementwise_minnanmin_sum_combiner@  r  r  s     r"   rf  zReduceOps.apply_impl  s   ?bkHHH<<abi<XXX_ JJJ<<abi<XXX_ <<<<<a	QU<VVV_ <<<<<a	QU<VVV_ 88888E!H%%% &&u---r$   r&   )
r5   r6   r7   rG   ro  r  r  r@  rf  __classcell__rl  s   @r"   rh  rh    s        # # # # #     ) ) )VN N N N$o o o. . . . . . .r$   rh  c                  6     e Zd Z fdZd Zd Zd Zd Z xZS )ScanOpsc                Z    t                                          ||           || _        d S r&   )rj  rG   reverse)r!   rT  rZ  r  rl  s       r"   rG   zScanOps.__init__  s(    z***r$   c                    |                      t          j        |j        j        | j                  |j                  gS NrD  rI   )ra  rJ   cumsumr   r   rT  r   r  s     r"   r  zScanOps.cumsum  s5    ry):KKKSXS^__``r$   c                    |                      t          j        |j        j        | j                  |j                  gS r  )ra  rJ   cumprodr   r   rT  r   r  s     r"   r  zScanOps.cumprod
  s5    rz%,*;$)LLLTYT_``aar$   c           	         g }g }d         j         j        j        }D ]Y}|                    |j         j                   |                    t	          j        ||j         j        j                             Zt          |d         j                  D ]}t	          j	        ||          t           fdt          |          D                       } j                 dk    rMt          t          |                    D ]/}||         j         j                                        ||         <   0t           fdt          t                              D                       t           fdt          |          D                       }	  j        j        g |	|R  }
t#          |
t                    s|
fn|
}	t          t          |                    D ]\}t#          |	|         t$          j        j                  r$|	|         j         j                                        n|	|         ||         <   ]g }t          |          D ]9\  }}|                                         ||         j                             :|S )Nr   rI   c              3  l   K   | ].\  }}                     |         |         j                  V  /d S r&   rr  )r  rs  r0  indexr   r!   s      r"   ru  z'ScanOps.generic_scan.<locals>.<genexpr>  s?      ffur1%%)/BBffffffr$   c              3  T   K   | ]"}|j         k    r|         d z
  n|         V  #dS )r   NrD  )r  r   r  r!   s     r"   ru  z'ScanOps.generic_scan.<locals>.<genexpr>  s>      "k"kTU1	>>58a<<uQx"k"k"k"k"k"kr$   c              3  l   K   | ].\  }}                     |         |         j                  V  /d S r&   rr  )r  rw  rx  r   
prev_indexr!   s      r"   ru  z'ScanOps.generic_scan.<locals>.<genexpr>  sB      !u!uUZUWYZ$..:b	"P"P!u!u!u!u!u!ur$   )r   r   rB   r'  rJ   r  r   rM   rE  rz  rd  r  rT  rN   rg   rZ  r{  r   r   r?  r   ra  )r!   r   r}  r~  rB   r  r   r   r  r  r  rl  r  r  s   ``          @@r"   generic_scanzScanOps.generic_scan  s   
a$* 	M 	MCcjo...rxSZ_5JKKKLLLLz!})** 	H 	HA$Q..EffffffPYZdPePefffffDTY1$$s;//00 G GA,0GN,?,D,D,F,FKN5))G #"k"k"k"k"kY^_bch_i_iYjYj"k"k"kkk
!!u!u!u!u!u!u^ghs^t^t!u!u!uuu	!3!3!FY!F!F!F!F6@QV6W6Wk^..]k	s;//00 H HAOY!!bgnP6 P6 -HIaL,?,D,I,I,K,K,K;DQ<  N5))H  -- 	= 	=GAtJJt~~dE!HN;;<<<<
r$   c           	     B   g }| j         rW|D ]S}|                    |                     t          j        |j        j        | j                  |j                             Tn|}| j	        t          j        j        k    r|                     |d                   }nK| j	        t          j        j        k    r|                     |d                   }n|                     |          }| j         r4|D ]1}t          j        |j        j        | j                  |j        _        2|S )NrD  r   )r  r'  ra  rJ   flipr   r   rT  r   rZ  r   r  r  r  _prod_combiner  r  )r!   r   	new_inputr  rl  s        r"   rf  zScanOps.apply_impl+  s   	< 	 f f  
di0X0X0XZ]Zc!d!deeeef I?bk666++il++CC_ 999,,y|,,CC ##I..C< 	K K K"$'#*/	"J"J"J

r$   )	r5   r6   r7   rG   r  r  r  rf  r  r  s   @r"   r  r    sz            a a ab b b  <      r$   r  c                
   dd}dd}|                      t          d|           |                      t          d|           |                      t          j        d|           |                      t          j        d|           d S )NFc                J    t          |||                              |           S r&   )rh  re  )r   rT  rZ  rk  r8  s        r"   _new_reducez'_patch_reduce_scan.<locals>._new_reduceC  s"    z955;;EBBBr$   c                J    t          |||                              |           S r&   )r  re  )r   rT  rZ  r  r8  s        r"   	_new_scanz%_patch_reduce_scan.<locals>._new_scanF  s"    tZ1177>>>r$   reduceassociative_scan)F)r4   r   r?  )rs  r  r  s      r"   _patch_reduce_scanr  ?  s    C C C C? ? ? ? 
NN2x---	NN2)9555	NN27Hk222	NN27.	:::::r$   c                   d }dd}dd}d }|                     | d|           |                     | d|           |                     | d|           |                     | d	t                     |                     | j        d
|           |                     | dt          |d                     |                     | dt          |d                     |                     | dt          |d                     t	          |           d S )Nc                F   | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         d	k    r|	                                S | j         d
k    r|
                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S | j         dk    r|                                S t%          d|  d          )Nvoidr   r   r   r   r   r   r   r   rR   ru   rw   ry   fp16bf16fp32fp64zfail to convert z to ir type)r  get_void_tyr	  r  r  r  r  r  r  r  r  r&  r  r!  r   r  r  r  r   )r!   r   s     r"   
_new_to_irz$_patch_lang_core.<locals>._new_to_irQ  s+   9&&(((Y&  &&(((Y&  &&(((Y'!!'')))Y'!!'')))Y(""((***Y'!!'')))Y(""((***Y'!!'')))Y(""((***Y'!!'')))Y)##))+++Y*$$**,,,Y&  &&(((Y&  &&(((Y&  '')))Y&  ((***=D===>>>r$   c                B    |d}|d| }}n| |}}t          |||          S )Nr   r   )rM   )arg1arg2stepr8  r8  ends         r"   
_new_rangez$_patch_lang_core.<locals>._new_rangey  s7    <D<D3EEt3EUC&&&r$    c                    | s
J |            d S r&   r=   )r  r  s     r"   _new_static_assertz,_patch_lang_core.<locals>._new_static_assert  s    Sr$   c                X   t          | t          j                  s| S t          |t          t          f          s|gn|}d |D             }t          |          t          dt          | j                            k    rt          d|           | j	        
                    ||           | S )Nc                T    g | ]%}t          |t          j                  r|j        n|&S r=   )r   r   	constexprr3   r  r3  s     r"   r  z7_patch_lang_core.<locals>._set_attr.<locals>.<listcomp>  s/    PPPAZ2<88?!''aPPPr$   r   z$len(values) != len(input.shape) for )r   r   r   rJ  rd  rN   rI  rB   r   r   r4   )r   r  r  s      r"   	_set_attrz#_patch_lang_core.<locals>._set_attr  s    %++ 	L!+FT5M!B!BN&PPPPPv;;#aU[!1!12222JDJJKKKdF+++r$   rM   static_rangestatic_assertstatic_printto_irmultiple_ofztt.divisibilityr  max_contiguousztt.contiguitymax_constancyztt.constancy)NN)r  )r4   r  r   r   r  )langrs  r  r  r  r  s         r"   _patch_lang_corer  O  s;   $? $? $?P' ' ' '   
 
 
 
NN4*---	NN4444	NN4*<===	NN4///	NN4:w
333	NN4	@Q(R(R(RSSS	NN4)79?+S+S+STTT	NN4').*Q*Q*QRRRur$   c                   t                      }d | j                                        D             }t          |          dk    s
J d            |D ]~}t	          |t
          |           t	          |j        t
          |           |t          k    rt	          |j        t
          |           t          |j        |           t          ||           t	          t          j        j        t
          |           |S )Nc                j    g | ]0\  }}t          j        |          |t          t          j        fv .|1S r=   )r=  ismoduler   r?  )r  r  r3   s      r"   r  z_patch_lang.<locals>.<listcomp>  sA    pppxq%W=Me=T=TpY^cegignboYoYoUYoYoYor$   r   z:triton.language must be visible from within jit'd function)r   __globals__r6  rN   rB  interpreter_builderr   r   r   rV  r  r?  tensor_descriptor_base)r{  rs  langsr  s       r"   _patch_langr    s    Epp2>#7#7#9#9pppEu::???X??? & &t0%888t{$7???2::49&95AAA4;...u%%%%2713FNNNLr$   c                v    t          | d          r t          |           | n t          |           |          S )N_fields)r,   rP  )r  contentss     r"   _tuple_creater    s;     $+3	#:#:S9499h	S		(@S@SSr$   c                V   t          | t                    rt          j        t          j        j                            |           d           }t          j	        }d| cxk    rdk     rn nt          j	        }nid| cxk    rdk     rn nt          j
        }nLd| cxk    rdk     rn nt          j        }n/d| cxk    rdk     rn nt          j        }nt          d|            t          t          j        | g|          |          }t          j        ||          S t#          | d	          rt          j        t          j        j                            |           d           }t          t          j        |                                 gt          j                  |          }t          j        ||          S t          | t&                    r#t)          | t+          t,          |                     S t          | t.                    rd
 | j        D             }| j        d         dk    sJ t          j        d          |d<   t5          t7                                }|                    t-          | j                  d | j        D             |d | j        D             | j                   S | S )Ni   l        l        l         l            l            zUnsupported integer value rI   data_ptrc                ,    g | ]}t          |          S r=   _implicit_cvtr  ss     r"   r  z!_implicit_cvt.<locals>.<listcomp>  s     999=##999r$   rf   r   c                ,    g | ]}t          |          S r=   r  r  s     r"   r  z!_implicit_cvt.<locals>.<listcomp>  s     5Z5Z5Z1mA6F6F5Z5Z5Zr$   c                6    g | ]}t          j        |          S r=   )r   r  )r  r   s     r"   r  z!_implicit_cvt.<locals>.<listcomp>  s6     <V <V <V@A =?LOO <V <V <Vr$   )rA   rB   rC   rE   rU  )!r   r   r   	str_to_tytritonruntimejitmangle_typerJ   r   r   r   rR   r   r   r1  r   r,   r  rd  r  mapr  r   rC   r  r
   r   make_tensor_descriptorrA   rB   rE   rc   )r  tyr   r   rC   r9  s         r"   r  r    s   #s %\&.,88==tDDS    5     HEEc!!!!E!!!!!IEEs""""U"""""HEEc!!!!E!!!!!IEE?#??@@@bhuE:::B??y$$$sJ s\&.,88==tDDbh'7ryIII2NNy$$$	C		 
sS#mS"9"9:::	C)	*	* s99S[999{2!####l1oo!"4"6"677..M#(4K4K5Z5ZPSPY5Z5Z5Zdk<V <VEH_<V <V <Vfifq / s s 	s Jr$   c                \    t          | t          j        j        j                  r| j        S | S r&   )r   r  r  r  TensorWrapperrA   )ts    r"   _unwrap_tensorr    s(    !V^'566 vHr$   c                    t          |t          j        j        j                  r*t          j        j                            | |j                  S | S r&   )r   r  r  r  r  r   )r  original_tensors     r"   _rewrap_tensorr    s@    /6>#5#CDD J~!//?3HIIIHr$   c                  *    e Zd Zg fdZd Zd Zd ZdS )GridExecutorc                    ddl m || _        || _        || _        || _        fd|j                                        D             fd|D             | _        d S )Nr   )_normalize_tyc                .    i | ]\  }}| |          S r=   r=   )r  r  r  r  s      r"   r4  z)GridExecutor.__init__.<locals>.<dictcomp>  s)    ^^^xtR4r!2!2^^^r$   c                F    g | ]}                     |          d k    |S )r  )get)r  r  r9   s     r"   r  z)GridExecutor.__init__.<locals>.<listcomp>  s2    bbbD9L9LT9R9RVa9a9a49a9a9ar$   )	r  r  r{  	arg_namesgridpre_run_hooksr9   r6  
constexprs)r!   r{  r  r  r  r9   r  s        @@r"   rG   zGridExecutor.__init__  sz    &&&&&&"	*^^^^2CUC[C[C]C]^^^bbbbIbbbr$   c                    i fdfd|D             }i }|                                 D ]\  }} |          ||<   ||fS )Nc                R   t          | t                    rt          | t          |                     S t          | t                    r5t	           | j                  | j        | j        | j        | j	                  S t          | d          s| S t          |           }|                                                                vr=|                                }|                                |                                <   |                                                                         }|                    dd          }|                    ||                                |                                |                                           t)          ||           }|S )Nr  r   cpu)device)r  )r   rd  r  r  r   rA   rB   rC   rE   rc   r,   r  untyped_storager  r  	new_emptyset_storage_offsetrE  ri   r  )r  unwrapped_argstoragecpu_arg_to_cpustoragess       r"   r  z,GridExecutor._init_args_hst.<locals>._to_cpu  s}   #u%% $S#gs*;*;<<<C!122 	'GCH%%IKOK   S*-- 
*3//M,,..7799II'7799/6{{}}))++,}<<>>GGIIJG#--a->>GLL-">">"@"@-BTBTBVBVXeXlXlXnXnooo$WcBBBGNr$   c                &    g | ]} |          S r=   r=   )r  r  r  s     r"   r  z/GridExecutor._init_args_hst.<locals>.<listcomp>  s!    555SGGCLL555r$   r5  )	r!   args_devr8  args_hst
kwargs_hstr2   r3   r  r  s	          @@r"   _init_args_hstzGridExecutor._init_args_hst  s    	 	 	 	 	 	2 6555H555 
 ,,.. 	- 	-JC%gennJsOO##r$   c                  
 i 
fd
t          ||          D ]\  }} 
||           |                                D ]\  }}||         }	 
||	                                           D ]\  }}|                    |           d S )Nc                   t          | d          rot          |           t          |          }} |                                 |                                f|                                                                 <   d S t	          | t
                    r$t          | |          D ]\  } } | |           d S t	          | t                    r | j        |j                   d S d S )Nr  )	r,   r  r  r  r   rd  zipr   rA   )arg_devarg_hst	_from_cpur  s     r"   r!  z1GridExecutor._restore_args_dev.<locals>._from_cpu  s    w
++ 6#1'#:#:N7<S<SBIBYBYB[B[]d]t]t]v]vAw0022;;==>>>GU++ 6*-gw*?*? 0 0&WgIgw////0 0G%566 6	',555556 6r$   )r  r6  r  copy_)r!   r  r  r8  r  r  r   r2   	kwarg_dev	kwarg_hstr!  r  s             @@r"   _restore_args_devzGridExecutor._restore_args_dev  s    		6 		6 		6 		6 		6 		6 !$Hh 7 7 	( 	(GWIgw'''' %llnn 	, 	,NC"3IIi++++"*//"3"3 	# 	#WgMM'""""	# 	#r$   c                V    t          j         j                  fd|                                D             }                     ||          \  }} j        D ]
} ||i | t           j                  }	 t          j         j        g|R i |} fd|                                D             }t           j	                  r 	                    |          n j	        }t          |          dk    s
J d            |ddt          |          z
  z  z   }t          j        |  	 t          |d                   D ][}	t          |d                   D ]C}
t          |d                   D ]+}t                              |	|
|             j        d	i | ,D\nF# t          $ r9}t           j        j        j        r t)          t+          |                    |d }~ww xY w	 |                                 n# |                                 w xY w                     ||||           d S )
Nc                .    i | ]\  }}|j         v ||S r=   )r7  )r  r2  r3  argspecs      r"   r4  z)GridExecutor.__call__.<locals>.<dictcomp>6  s+    GGG41aQ',5F5F!Q5F5F5Fr$   c                L    i | ] \  }}||j         v r|nt          |          !S r=   )r	  r  )r  r  r  r!   s      r"   r4  z)GridExecutor.__call__.<locals>.<dictcomp>B  s:    oooU^UY[^D!8!8##mC>P>Pooor$      z#grid must have at most 3 dimensionsrk  r   r   r   r=   )r=  getfullargspecr{  r6  r  r  r  getcallargscallabler  rN   r  r   rM   r   	Exceptionr  knobscompilationfront_end_debuggingr   rR  r-  r%  )r!   r  r8  r  r  hookpatch_scoper7  r  r   r   r   er(  s   `            @r"   __call__zGridExecutor.__call__1  sj    (11GGGG6<<>>GGG#228VDD*& 	* 	*DD()j))))!$'**	" &twHHHHZHHDoooobfblblbnbnoooD&.ty&9&9H499T???tyDt99>>>#H>>>%1s4yy=11D,d33	7tAw , ,A"47^^ , ,!&tAw , ,A/<<Q1EEE#DGOOdOOOO,,,
  7 7 7<+? &tAww//Q67, !!!!K!!!!x6:FFFFFs2   <B-G8 *A1F G8 
G&4GGG8 8HN)r5   r6   r7   rG   r  r%  r5  r=   r$   r"   r   r     sb        :< c c c c"$ "$ "$H# # #2$G $G $G $G $Gr$   r   c                      e Zd Zd ZdS )ASTTransformerc           	        g }|j         D ]}||                     |          gz  }t          |          dk    rt          d          t	          j        t	          j        t	          j        dt	          j                              dt	          j                              |j	        t	          j
        d          gg 	          |_	        |S )
Nr   z&Multiple assignments are not supportedinterpreter_semantic)idctxra  )r3   r   r;  F)r3   )funcr7  keywords)targetsvisitrN   r   astCall	AttributeNameLoadr3   Constant)r!   nodenamestargets       r"   visit_AssignzASTTransformer.visit_AssignZ  s    l 	* 	*Fdjj(())EEu::>>EFFF XSX1GSXZZ%X%X%X_j#&8::/ / /6:j#,UZB[B[B[5\gik k k
 r$   N)r5   r6   r7   rI  r=   r$   r"   r7  r7  X  s#            r$   r7  c                  L    e Zd Z e            Zd Zd Zd Zd Zd Z	d Z
d ZdS )	FunctionRewriterc                >    || _         || _        d| _        d| _        d S )Nr  r   )r{  r8  filenamedef_file_lineno)r!   r{  r8  s      r"   rG   zFunctionRewriter.__init__k  s%    $%r$   c                `   	 t          j        | j                  \  }}n# t          $ r
 | j        cY S w xY w|                                 \  | _        | _        |                     |          | _        | 	                    |          }| 
                    |          }|                     |          S r&   )r=  getsourcelinesr{  r.  _get_jit_fn_file_linerM  rN  	_find_def
def_lineno_prepare_source_transform_ast_compile_and_exec)r!   linesr  rq  transformed_asts        r"   rewrite_astzFunctionRewriter.rewrite_astr  s    	-dg66HE11 	 	 	7NNN	 /3.H.H.J.J+t+..//""5))--c22%%o666s    33c                D    ddl m}m}  | || j                            S )Nr   )get_jit_fn_file_lineJITFunction)r  r[  r\  r{  )r!   r[  r\  s      r"   rQ  z&FunctionRewriter._get_jit_fn_file_line  s7    ::::::::##KK$8$8999r$   c                    d}t          |          D ]1\  }}|                                                    d          r|dz   }2|S )Nr   zdef r   )r  strip
startswith)r!   rW  rS  r   lines        r"   rR  zFunctionRewriter._find_def  sP    
 '' 	# 	#GAtzz||&&v.. #U
r$   c                x    || j         dz
  d          }d                    |          }t          j        |          S )Nr   r  )rS  jointextwrapdedent)r!   rW  rq  s      r"   rT  z FunctionRewriter._prepare_source  s8    do)**+ggenns###r$   c                    t          j        |          }| j                            |          }t          j        |           | j        dz
  }t          j        ||           |S r%  )r@  parseast_transformerr?  fix_missing_locationsrN  increment_lineno)r!   rq  
parsed_astrX  
inc_linenos        r"   rU  zFunctionRewriter._transform_ast  s^     Ys^^
.44Z@@!/222)A-
_j999r$   c                    t          || j        d          }i | j        }| j        j        }t                                                      D ]\  }}||vr|||<   t          |||           || j        j                 S )Nexec)rM  mode)	compilerM  r8  r{  r  globalsr6  rm  r5   )r!   rX  compiled_codelocal_namespace
fn_globalsr2   r3   s          r"   rV  z"FunctionRewriter._compile_and_exec  s    $-fUUU)T[/W(
!))//++ 	( 	(JC*$$"'
3]J888tw/00r$   N)r5   r6   r7   r7  rg  rG   rY  rQ  rR  rT  rU  rV  r=   r$   r"   rK  rK  h  s        $n&&O& & &7 7 7(: : :  $ $ $
	 	 	1 1 1 1 1r$   rK  c                  T    e Zd ZU i Zded<   ddZd Zd Zd Ze	d	             Z d
 Z
dS )InterpretedFunctionzDict[Callable, Callable]rewritten_fnr   r   c                    || _         t          |fi || _        || _        g | _        t          j        |          }d |j                                        D             | _	        d S )Nc                    g | ]	}|j         
S r=   r  r  s     r"   r  z0InterpretedFunction.__init__.<locals>.<listcomp>  s    HHHQ!&HHHr$   )
r{  rK  rewriterr8  r  r=  	signature
parametersr  r  )r!   r{  r8  rz  s       r"   rG   zInterpretedFunction.__init__  sg    (66v66%b))	HH)*>*E*E*G*GHHHr$   c               v    |rd S |                                  } t          || j        || j                  |i |S r&   )rewriter   r  r  )r!   r  warmupr7  r8  r{  s         r"   runzInterpretedFunction.run  sE     	F\\^^I|Bd6HII4ZSYZZZr$   c                \    t          |          sJ | j                            |           d S r&   )r-  r  r'  )r!   r2  s     r"   add_pre_run_hookz$InterpretedFunction.add_pre_run_hook  s1    ~~!!$'''''r$   c                    | j         | j        vr&| j                                        | j        | j         <   | j        | j                  S r&   )r{  rv  ry  rY  r    s    r"   r}  zInterpretedFunction.rewrite  s?    7$+++)-)B)B)D)DDdg& ))r$   c                    | j         j        S r&   )r{  r5   r    s    r"   r5   zInterpretedFunction.__name__  s    wr$   c                    t          | j                   |                                 }	  ||i |S # t          $ r"}t	          t          |                    |d }~ww xY wr&   )r  r{  r}  r.  r   rR  )r!   r7  r8  r{  r4  s        r"   r5  zInterpretedFunction.__call__  sn    DG\\^^	32t&v&&& 	3 	3 	3"477++2	3s   2 
AAANr  )r5   r6   r7   rv  r9   rG   r  r  r}  rU  r5  r=   r$   r"   ru  ru    s         -/L////I I I I[ [ [( ( (* * *
     X 3 3 3 3 3r$   ru  )rs  r   )N
__future__r   r@  rc  r=  typingr   r   r   r   r   r   numpyrJ   r  triton.languagelanguager   r:   r	   triton.language.semanticr
   triton.runtime.jitr   triton.tools.tensor_descriptorr   errorsr   	functoolsr   _C.libtritonr   rg  r   r   r   r   r?   r^   ro   r   r   r   r   r   r   	vectorizer   r!  r   r"  rR   r  r   r   r$  r&  r   r;  rB  rV  rX  rh  r  r  r  r  r  r  r  r9  r  r  r   NodeTransformerr7  rK  ru  r=   r$   r"   <module>r     sz   " " " " " " 



   7 7 7 7 7 7 7 7 7 7 7 7 7 7                 ! ! ! ! ! ! 3 3 3 3 3 3 . . . . . . ; ; ; ; ; ; $ $ $ $ $ $       6 6 6 6 6 6 $ $ $ $ $ $GCLL        @       4( ( ( ( ( ( ( (V $
& 
& 
& 
& 
& 
& 
& 
&   	 	 	  @=' =' ='@  
# # # bl4555bl4555Z<<<p p p p p p p p^8 ^8 ^8 ^8 ^8 ^8 ^8 ^8B 688- - - - - - - -(* * * *; ; ; ;: : : :.I I I I I I I ID]. ]. ]. ]. ].% ]. ]. ].@; ; ; ; ;# ; ; ;|; ; ; ; K K K K\  T T T  D )(** %~&9::     mG mG mG mG mG mG mG mG`    S(    B1 B1 B1 B1 B1 B1 B1 B1J'3 '3 '3 '3 '3/!, '3 '3 '3 '3 '3r$   