
    Iri%\                     ,   d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlZd dlmZmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd d	lmZ d
efdZde de
j!        fdZ" ej#                    dde fd            Z$ ej#                    de fd            Z%de fdZ& ej#                    de fd            Z' ej#        d          d             Z(de fdZ) ed           G d d                      Z* G d de          Z+dS )    )BaseBackend	GPUTargetLanguage)irpassesllvmnvidia)knobs)
PTXASError)	dataclassN)AnyDictTupleOptional)
ModuleType)Pathtargetc                 L    dt           t          t          t          f         fd}|S )Nreturnc                 f    | j         j        }|j         j        }||k    s
J d            |dk    rdS dS )Nz%lhs and rhs bitwidth must be the same   )   r       )r   r      )scalarprimitive_bitwidth)lhs_typerhs_typelhs_bitwidthrhs_bitwidths       w/var/www/html/bestrading.cuttalo.com/models/btc_v9/venv/lib/python3.11/site-packages/triton/backends/nvidia/compiler.pycheck_dot_compatibilityz-min_dot_size.<locals>.check_dot_compatibility   sE    99|+++-T+++1::    )r   int)r   r"   s     r!   min_dot_sizer%      s0    uS#s]7K     #"r#   archr   c                 R    | dk    rt           j        j        nt           j        j        S )Nd   )r
   r	   ptxas_blackwellptxas)r&   s    r!   	get_ptxasr+   "   s    +/3;;5<''EL<NNr#   P   c                     t           j        j        }||S t          j        t          |           j        dg                              d          }|S )Nz	--versionutf-8)r
   r	   mock_ptx_version
subprocesscheck_outputr+   pathdecode)r&   mock_verversions      r!   get_ptxas_versionr6   &   sI    |,H%y';[&IJJQQRYZZGNr#   c                 4   t          | t                    sJ t          t          |                     d                    \  }}|dk    r|dk     rd|z   S d|z   dz
  S |dk    rd|z   S |dk    rd	|z   S |d
k    rd}||d
z
  dz  z   |z   S t          d| z             )zK
    Get the highest PTX version supported by the current CUDA driver.
    .      r,   r      F   
   ?      Z   z?Triton only support CUDA 10.0 or higher, but got CUDA version: )
isinstancestrmapr$   splitRuntimeError)cuda_versionmajorminorbase_ptxs       r!   ptx_get_versionrJ   /   s    
 lC(((((sL..s3344LE5{{199::>!{{Ez{{Ez{{52:++e33
X[gg
h
hhr#   c                 ^    | j         }|#t          |          j        }t          |          }|S N)ptx_versionr+   r5   rJ   )optionsr&   rM   rF   s       r!   get_ptx_version_from_optionsrO   G   s1    %K .%l33r#   c                 P    t          | |          }t          d|          }d| }|S )NV   z+ptx)rO   min)rN   r&   rM   llvm_ptx_versionfeaturess        r!   get_featuresrU   O   s6    .w==K 2{++(&((HOr#   c                     t          | d          5 }t          j        |                                                                          cd d d            S # 1 swxY w Y   d S )Nrb)openhashlibsha256read	hexdigest)r2   fs     r!   	file_hashr^   ]   s    	dD		 4Q~affhh''11334 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4s   8AAA
capabilityc                 $    | dk    rdnd}d|  | S )Nr@   a sm_ )r_   suffixs     r!   sm_arch_from_capabilityrf   c   s(    "$$SS"F%%V%%%r#   T)frozenc                      e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   d	Z	e
e         ed
<   d	Zeed<   ej        j        Ze
e         ed<   d	Ze
e         ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZee         ed<   dZee         ed<   dZeed<   dZee         ed<   d	Zeed<   d	Zeed<   dZeed<   dZ eed <   dZ!eed!<   d	Z"eed"<   d#Z#eed$<   d% Z$d& Z%d	S )'CUDAOptions   	num_warpsr   num_ctas   
num_stagesr   	warp_sizeNmaxnregrM   ptx_optionsir_overrideTenable_fp_fusionenable_reflect_ftzFlaunch_cooperative_grid
launch_pdl)fp8e5fp8e4b15supported_fp8_dtypesrd   !deprecated_fp8_dot_operand_dtypestf32default_dot_input_precision)r{   tf32x3ieeebf16x3bf16x6allowed_dot_input_precisionsmax_num_imprecise_acc_defaultextern_libsdebugcudabackend_namesanitize_overflowr&   rb   instrumentation_modec                    t          t                    j        dz  }| j        i nt	          | j                  }|                    dd           s&t          j        j        pt          |dz            |d<   t                              | dt          |                                                     | j        dk    r| j        | j        dz
  z  dk    s
J d            d S )Nlib	libdevicezlibdevice.10.bcr   r   r   znum_warps must be a power of 2)r   __file__parentr   dictgetr
   r	   libdevice_pathrB   object__setattr__tupleitemsrk   )selfdefault_libdirr   s      r!   __post_init__zCUDAOptions.__post_init__   s    h.6 ,4bb$t?O:P:P{D11 	n',|'B'mc.[lJlFmFmK$4k6G6G6I6I0J0JKKK~!!t~!9K'LQR&R&R&R/ 'S&R&R&R&Rr#   c                 v   t          | j                  }t          d t          |d                   D                       |d<   d                    d t          |                                          D                       }t          j        |                    d                    	                                S )Nc              3   >   K   | ]\  }}|t          |          fV  d S rL   )r^   ).0kvs      r!   	<genexpr>z#CUDAOptions.hash.<locals>.<genexpr>   s1      (h(htq!!Yq\\):(h(h(h(h(h(hr#   r   _c                 "    g | ]\  }}| d | S )-rd   )r   namevals      r!   
<listcomp>z$CUDAOptions.hash.<locals>.<listcomp>   s&    SSSID#4#SSSr#   r.   )
r   __dict__r   sortedjoinr   rY   rZ   encoder\   )r   	hash_dictkeys      r!   hashzCUDAOptions.hash   s    ''	#((h(hviXeNfGgGg(h(h(h#h#h	- hhSS	@Q@Q9R9RSSSTT~cjj1122<<>>>r#   )&__name__
__module____qualname__rk   r$   __annotations__rl   rn   ro   rp   r   rM   r
   r	   ptxas_optionsrq   rB   rr   rs   boolrt   ru   rv   ry   r   rz   r|   r   r   r   r   r   r   r   r&   r   r   r   rd   r#   r!   ri   ri   i   s        IsHcJIs "GXc]!!!K!&!;K#;;;!%K#%%%!d!!!####$)T)))J'<%*<<<46%uSz666'----/] %*]]]*.!4...KE4L#"t"""D# "#"""0 0 0? ? ? ? ?r#   ri   c                       e Zd ZdZedefd            Zd ZdefdZ	deddf fdZ
defdZd	 Zd
 Zdeeef         fdZd Zed             Zed             Zd Zd Zd Zd Zd Z ej                    d             Z xZS )CUDABackendNr   c                     | j         dk    S )Nr   )backend)r   s    r!   supports_targetzCUDABackend.supports_target   s    ~''r#   c                     d}t          j        ||          }|st          d|           t          |                    d                    S )Nz	^sm(\d+)$z(TRITON_OVERRIDE_ARCH must have the form r   )re	fullmatch
ValueErrorr$   group)r   r&   patternmatchs       r!   _parse_archzCUDABackend._parse_arch   sP    Wd++ 	SQQQRRR5;;q>>"""r#   r   c                 @    |                      |j                  }d| S )Ncuda:)r   r&   )r   rN   r_   s      r!   get_target_namezCUDABackend.get_target_name   s%    %%gl33
#z###r#   c                 X    t                                          |           d| _        d S )Ncubin)super__init__
binary_ext)r   r   	__class__s     r!   r   zCUDABackend.__init__   s&       !r#   c                    dv rd         dk    rdd<   dt           j        j        pd| j        j         i}|                    fdt          j                                        D                        t          | 
                    |d                             }|                    dd	          d	k    r|d
k     rt          d| d          d|vrSt          t          j                  }|dk    r|                    d           t!          t#          |                    |d<   d|vr|d
k    rd|d<   d|vrt           j        j        |d<   |d
k    rdnd|d<   t          di |S )Nr   consanTr   r&   smc                 :    i | ]}|v |         ||         S rL   rd   )r   r   optss     r!   
<dictcomp>z-CUDABackend.parse_options.<locals>.<dictcomp>   s7    uuuATUY]T]T]aefgahatQQatatatr#   rl   r   r@   zBnum_ctas > 1 requires NVIDIA SM90+ (Hopper). Current target is sm_zM. This configuration will fail. Please set num_ctas=1 or target an SM90+ GPU.ry   Y   fp8e4nvrz   )rx   rs   i   @r   r   rd   )r
   runtimeoverride_archr   r&   updateri   __dataclass_fields__keysr$   r   r   r   setry   addr   r   languagedefault_fp_fusion)r   r   argsr_   ry   s    `   r!   parse_optionszCUDABackend.parse_options   s   !T))d3I.Jh.V.V DM3N7NDK<L7N7NOuuuu)I)N)N)P)Puuuvvv))$v,7788
88J""Q&&:?? O6@O O O Q Q Q "--#&{'G#H#H R$((333+08L1M1M+N+ND'(.d::R<J89T))',~'GD#$9Cr9I9Iq,-""T"""r#   c                 *    |j         |j        |j        fS rL   )rk   rl   shared)r   metadatas     r!   pack_metadatazCUDABackend.pack_metadata   s    O
 	
r#   c                     dd l mc mc m} t	          |                     |j                            }|dk    r|j        n|j        t          | j
                  d}|S )Nr   r,   )convert_custom_typesr%   )triton.language.extra.cudar   extrar   r$   r   r&   convert_custom_float8_sm80convert_custom_float8_sm70r%   r   )r   rN   r   r_   codegen_fnss        r!   get_codegen_implementationz&CUDABackend.get_codegen_implementation   s~    111111111111))',7788
 0:R/?/?D++TEd%%
 

 r#   c                     ddl m} d|iS )Nr   )r   ztriton.language.extra.libdevice)r   r   )r   r   s     r!   get_module_mapzCUDABackend.get_module_map   s    88888819==r#   c                     t          j        |           t          j        r!t          j                            |           d S d S rL   )r	   load_dialectsr   instrumentation)r   ctxs     r!   r   zCUDABackend.load_dialects   sE    S!!!& 	;'55c:::::	; 	;r#   c                    t          j        | j                  }|                                 t          j                            |           t          j                            |           |dz  dk     rt          j        	                    |           t          j        
                    |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           |                    | d           | S )Nr=   	   	make_ttir)r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_symbol_dceadd_loop_unrollrun)modr   optr_   pms        r!   r   zCUDABackend.make_ttir   s   _S[))
!!"%%%..r222aK@@DDD''+++###))"---b!!!$$R(((##B'''
sK   
r#   c                 f   |j         E|                     dt          j        | j                                      |j                              t          j        | j                  }|                                }|dz  dk    }t          j	        
                    |d| |j        d|j                   t          j                            |           t          j                            ||           t           j        j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            ||dk               t           j        j                            |           t          j	                            |           |dz  dv r2t          j                            |           t          j                            |           t          j	                            |           t          j                            |           t          j                            |           t           j        j                            ||j         |           t          j        !                    ||j                    t          j        "                    |           t          j        #                    ||j         |           n|dz  dk    rt          j                            |           t          j                            |           t          j	                            |           t          j        $                    |           t          j        %                    |d           t           j        j        &                    |           t          j        !                    ||j                    t          j        "                    |           t          j        '                    ||j                    t          j        #                    ||j         |           t          j        (                    |           t          j                            |           t          j        %                    |d	           t           j        j        )                    |           nt          j	                            |           t          j                            |           t          j	                            |           t          j        *                    |           t          j                            ||dk               t          j        +                    |           t           j        j        ,                    |           |dz  d
k    r$t           j        j        -                    |           t          j                            |           t           j        j        .                    |           t          j        /                    |           t          j        0                    |           t          j	                            |           t          j        1                    |           t           j        j        2                    ||           t           j        j        3                    |           t          j        4                    |           t          j        5                    |           t          j                            |           |6                    | d           | 7                                |d<   | S )Nzttg.maxnregr=   r   r   r   r,   )r   r   FTr   
make_ttgirtensordesc_meta)8rp   set_attrr   builderr   get_int32_attrr   r   r   r   add_convert_to_ttgpuirrk   rl   ttgpuiradd_coalesceadd_f32_dot_tcr	   	ttnvgpuiradd_plan_ctaadd_remove_layout_conversionsadd_optimize_thread_localityadd_accelerate_matmuladd_optimize_dot_operands add_optimize_descriptor_encodingadd_loop_aware_cseadd_fuse_nested_loopsr   r   add_triton_licm add_combine_tensor_select_and_ifhopperadd_hopper_warpspecrn   add_assign_latenciesadd_schedule_loopsadd_pipelineadd_optimize_accumulator_initadd_hoist_tmem_allocadd_promote_lhs_to_tmemadd_warp_specializeadd_optimize_partition_warpsadd_remove_tmem_tokensadd_prefetchadd_coalesce_async_copyadd_optimize_tmem_layoutsadd_tma_loweringadd_interleave_tmemadd_reduce_data_duplicationadd_reorder_instructionsr   add_fence_insertionadd_lower_mmaadd_sccpr   r   get_tensordesc_metadata)r   r   r   r_   r   dump_enabledemuTF32s          r!   r  zCUDABackend.make_ttgir   s    ;"LL
3;(?(?(N(Ns{([([\\\_S[))((#q(**2/Cz/C/CS]TVX[Xdeee##B'''%%b'222,,R00044R88833B777,,R00044R88800Z25EFFF@@DDD&&r***v%%N00444M++B///K''+++M++B///N;;B???M 44RVVVN//CNCCCN--b111N''CNLIIII2##N00444M++B///K''+++N88<<<N//E:::M#;;B???N//CNCCCN--b111N..r3>BBBN''CNLIIIN77;;;N;;B???N//D999M#::2>>>>K''+++''+++&&r***##B'''00Z25EFFF..r22299"===q  M#44R88844R88833B777222666//333&&r***$$R(((33B
CCC--b111r"""b!!!''+++
sL!!!&)&A&A&C&C"#
r#   c                    |}t          j        |j                  }|                                 t          j                            |           t          j                            |           t          j                            |           t          j        j
                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           |                    |d           |                                |d<   |S )Ngluon_to_ttgirr  )r   r   r   r   r   gluonr   add_infer_coalesced_encodingsadd_resolve_auto_encodingsr	   r  r$  r   r   r*  r   r  r  r  r   r+  )r   srcr   rN   r_   r   r   s          r!   r/  zCUDABackend.gluon_to_ttgir@  s"   _S[))
  $$$222666//33300444&&r***r"""&&r***&&r***77;;;
s$%%%&)&A&A&C&C"#
r#   c                    t          || j        j                  }|}t          j        |j                  }|                                 t          j        	                    |           t          j        
                    |           t          j                            |           t          j                            |           t          j        j                            |||           t          j        j                            |           t          j        j                            |           t(          j        j        dk    rt          j                            |           t          j                            |           t          j        j                            ||           t4          j        r&t4          j                            d||j                   t          j        j                            |||           t          j                            |           t          j                             |           t          j        j        !                    |           t          j        j        "                    |           t          j                            |           t          j                             |           t          j        #                    |           t          j        $                    |           t(          j        j%        s0t(          j        j&        st          j'        (                    |           t4          j        r&t4          j                            d||j                   |)                    |d           t(          j        j&        rt(          j        j%        sbt          j        |j                  }|                                 t          j'        (                    |           |)                    |d           t          j        |j                  }|                                 t          j'        *                    |           |)                    |d           tW          j,                     tW          j                    }t(          j        j-        rt]          d          tW          j/        ||          }	ta          |          }
tc          || j        j                  }d}t          j2                     tW          j3        |	||
|           |j4        rt          j5        |	           |j6        r:t          j7        |	          r&d	 |j6        D             }tW          j8        |	|           tW          j9        |	tV          j:                   |;                    d
          }|||d<   |;                    d          |d<   |;                    d          |d<   |;                    d          |d<   |;                    d          |d<   |;                    d          pd|d<   |;                    d          pd|d<   ty          |	          }~	~|S )Nr   ttgpuir_to_llvmirllvmir_to_llvm	make_llirzmake_llir.disable_line_infoz,make_llir.dump_ir_extract_di_local_variableszYAddress Sanitizer Error: Address sanitizer is currently only supported on the AMD backendnvptx64-nvidia-cudac                     g | ]\  }}|S rd   rd   )r   r   r2   s      r!   r   z)CUDABackend.make_llir.<locals>.<listcomp>  s    BBBltTTBBBr#   zttg.total-num-warpsrk   z
ttg.sharedr   zttg.tensor_memory_size	tmem_sizezttg.global_scratch_memory_sizeglobal_scratch_sizez#ttg.global_scratch_memory_alignmentglobal_scratch_alignzttg.profile_scratch_memory_sizer   profile_scratch_sizez$ttg.profile_scratch_memory_alignmentr   profile_scratch_align)=rO   r   r&   r   r   r   r   r   r  r  add_allocate_warp_groupsconvertadd_scf_to_cfr0  r   r	   add_allocate_shared_memory_nvr  add_allocate_tensor_memoryadd_check_matmul_two_ctar
   compilationr   add_concurrency_sanitizer"add_allocate_global_scratch_memoryadd_proxy_fence_insertionr   r   patchadd_to_llvmirr   r   r   add_nvgpu_to_llvmadd_warp_specialize_to_llvmr   add_nvvm_to_llvmdisable_line_info"dump_ir_extract_di_local_variablesllvmiradd_di_scoper   add_di_local_variabler   init_targetsenable_asanrE   	to_modulerf   rU   set_short_ptrattach_datalayoutrt   set_nvvm_reflect_ftzr   has_extern_depslink_extern_libsoptimize_moduleOPTIMIZE_O3get_int_attrrB   )r   r3  r   rN   r_   rM   r   r   r   llvm_modprocrT   triplepathstotal_num_warpsrets                   r!   r7  zCUDABackend.make_llirS  sW   27DK<LMM_S[))
77;;;//333$$R(((  $$$;;B
KXXX::2>>>88<<<1X==N44R88899"===99"jIII& 	T'--.A2s{SSS++B
KHHH''+++b!!!11"555;;B???''+++b!!!$$R(((''+++ 2 	+5;L;o 	+M&&r***& 	Q'--.>CKPPP
sK   ? 	H$6 ;_S[11!!!**2...s9::: --BOOM//333FF3FGGG 	,..( 	mkm m m>#w//&z22)9::&xx@@@% 	2'111 	36#9(#C#C 	3BBg.ABBBE!(E222Xt'7888 **+@AA&$3H[! --l;; # 0 01I J J*-*:*:;[*\*\&'+.+;+;<a+b+b'(+.+;+;<]+^+^+cbc'(,/,<,<=c,d,d,ihi()(mm
r#   c           	         t          || j        j                  }d}t          |          }t	          || j        j                  }dg}	t          j        |||||	|j        d          }
t          j	        d|
          }t          |          dk    sJ |d         |d<   |dz   d	|dz   }t          j        d
d| |
t          j                  }
t          j        dd| |
t          j                  }
t          j        j        st          j        dd|
          }
t          j        j        rt%          d           t%          |
           |
S )Nr8  znvptx-mad-wide-optFz(.visible .entry ([a-zA-Z_][a-zA-Z0-9_]*)r   r   r   r=   r8   z\.version \d+\.\d+z	.version )flagsz\.target sm_\d+z.target sm_z,\s*debug|debug,\s*rb   z // -----// NVPTX Dump //----- //)rO   r   r&   rf   rU   r   translate_to_asmrs   r   findalllensub	MULTILINEr
   rE  rO  r	   
dump_nvptxprint)r   r3  r   r   r_   rM   r`  r_  rT   re  rc  namess               r!   make_ptxzCUDABackend.make_ptx  sN   238HII&&z22T[%566%&#CxH\^cdd
FLL5zzQ 8$b;;;r>;;f*,E,E,EsRTR^___f')Cz)C)CSPRP\]]] C 	: &/S99C<" 	4555#JJJ
r#   c           
         t          | j        j                  j        }t	          j        ddd          5 }t	          j        ddd          5 }|                    |           |                                 |j        dz   }g }	t          j
        j        r|	dd	gz  }	nt          j        j        r|	d
gz  }	n|	dgz  }	|j        rg ndg}
t          |          }t          j        j        rddgng }|j        r|j                            d          ng }|g|	|
d||d| |j        d|}	 t%          j        |dd|           t          j        j        rMt+          |j                  5 }t-          |                                           d d d            n# 1 swxY w Y   t0          j                            |j                  rt1          j        |j                   t0          j                            |j                  rt1          j        |j                   n
# t$          j        $ r}t+          |j                  5 }|                                }d d d            n# 1 swxY w Y   t0          j                            |j                  rt1          j        |j                   |j        dk    rd}n%|j        dt:          j        z   k    rd}n
d|j         }| d| dd                    |           d}t-          d| d| d           tA          |          d }~ww xY wt+          |d          5 }|                                }d d d            n# 1 swxY w Y   t0          j                            |          rt1          j        |           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |S ) NFwz.ptx)deletemodere   rz.logz.oz	-lineinfoz-suppress-debug-infoz-gz--fmad=falsez--opt-level0 z-vz--gpu-name=z-oT)check	close_fdsstderr   z!Internal Triton PTX codegen error   z`ptxas` raised SIGSEGVz`ptxas` failed with error code z
`ptxas` stderr:
z
Repro command: 
zC

================================================================
z

zy
================================================================
please share the reproducer above with Triton project.
rW   )!r+   r   r&   r2   tempfileNamedTemporaryFilewriteflushr   r
   rE  rN  r	   disable_ptxas_optrs   rf   rq   rD   r0   r   dump_ptxas_logrX   rl  r[   osexistsremoveCalledProcessError
returncodesignalSIGSEGVr   r   )r   r3  r   r   r_   r*   fsrcflogfbin
debug_infofmadr&   disable_optptx_extra_options	ptxas_cmdlog_fileelogerrorr]   r   s                        r!   
make_cubinzCUDABackend.make_cubin  s_   $+*++0(COOO G	 SW'u3vNNNG	 RVJJsOOOJJLLL9t#DJ 2 ,{,BCC

/ ,tf$

 {m+
-C22N3CD*:66D 38,2PX=#..VXK ?Bo U 5 5c : : :SU "%)+/2=@QSgaeSgSgimirI$(ydSSSS<. /di /Hhmmoo.../ / / / / / / / / / / / / / / 7>>$),, )Idi(((7>>$),, )Idi(((0 ( ( ($)__ *"--//C* * * * * * * * * * * * * * *7>>$),, )Idi(((<3&&?EE\S6>%9994EELalLLE! C C-0C C+.88I+>+>C C C    
       !'''5(8 dD!! !Q! ! ! ! ! ! ! ! ! ! ! ! ! ! !w~~d##  	$OG	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	  G	 P s   N<CN%.=H +"FH FH  F!A=H N% L'/L"I$	L"$I((L"+I(,B6L""L''N%:MN%MN%"M#6N%N<%N)	)N<,N)	-N<<O O c                 b                          j                  |t          j        k    r fd|d<    fd|d<   n|t          j        k    r
 fd|d<    fd|d<    fd|d	<    fd
|d<   t
          j        j        %t
          j                             ||           d S d S )Nc                 4                         | |          S rL   )r   r3  r   r_   rN   r   s     r!   <lambda>z(CUDABackend.add_stages.<locals>.<lambda>  s    4>>#xQXZd3e3e r#   r   c                 4                         | |          S rL   )r  r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>  s    DOOCSZ\f4g4g r#   ttgirc                 4                         | |          S rL   )r/  r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>  s    D4G4GXW^`j4k4k r#   c                 4                         | |          S rL   )r7  r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>   s    t~~c8WV`/a/a r#   llirc                 H                         | |j        j                  S rL   )rn  r   r&   r3  r   rN   r   s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>!  s     dmmC7TXT_Td.e.e r#   ptxc                 H                         | |j        j                  S rL   )r  r   r&   r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>"  s     XwX\XcXh0i0i r#   r   )r   r&   r   TRITONGLUONr
   r   add_stages_inspection_hook)r   stagesrN   r   r_   s   ` ` @r!   
add_stageszCUDABackend.add_stages  s    %%gl33
x&&eeeeeeF6NggggggF7OO''kkkkkkF7Oaaaaaaveeeeeuiiiiiw=3?M44T67HV`aaaaa @?r#   c                 V    t          | j        j                  }| d| j        j         S )Nr   )r6   r   r&   )r   r5   s     r!   r   zCUDABackend.hash&  s-    #DK$455..DK,...r#   )r   r   r   r   staticmethodr   r   r   rB   r   r   r   r   r   r   r   r   r   r   r   r  r/  r7  rn  r  r  	functools	lru_cacher   __classcell__)r   s   @r!   r   r      s       O(	 ( ( ( \(# # #$# $ $ $ $"y "T " " " " " "#S # # # #>
 
 
  >S*_ 5 > > > >; ; ;
   \  G G \GR  &^ ^ ^@  4J J JXb b b Y/ / / / / / /r#   r   )r,   ),triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   triton.runtime.errorsr   dataclassesr   r  typingr   r   r   r   typesr   rY   r   r|  r  r  r0   pathlibr   r%   r$   
NvidiaToolr+   r  r6   rJ   rO   rU   r^   rf   ri   r   rd   r#   r!   <module>r     s   E E E E E E E E E E 8 8 8 8 8 8 8 8 8 8 8 8       , , , , , , ! ! ! ! ! !     - - - - - - - - - - - -        				   				          # # # # #OC OE, O O O O  C     iS i i i i.     
 
 
 
 
 T4 4 4
& & & & & $)? )? )? )? )? )? )? )?XS/ S/ S/ S/ S/+ S/ S/ S/ S/ S/r#   