
    IriY                        d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d	efd
Zd Zd Z ed           G d d                      Z G d de          ZdS )    )BaseBackend	GPUTargetLanguage)irpassesllvmamd)knobs)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                     d S )Nc                     dS )N)   r   r    )lhs_typerhs_types     t/var/www/html/bestrading.cuttalo.com/models/btc_v9/venv/lib/python3.11/site-packages/triton/backends/amd/compiler.py<lambda>z"get_min_dot_size.<locals>.<lambda>   s    i     r   r   s    r   get_min_dot_sizer      s     0//r   c                 f    t           j        j        | dk    p	| dk    o|du nt           j        j        S )Ngfx942gfx950T)r
   r	   use_block_pingpong)archuse_async_copys     r   is_pingpong_schedule_enabledr#      s:    -5 HM!1!Ln6L;@9;WXr   c                 R    t           j        j        | dk    nt           j        j        S )Nr   )r
   r	   use_in_thread_transposer!   s    r   is_in_thread_transpose_enabledr'      s#    !&!B!JDHPUPYPqqr   T)frozenc                   f   e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   d	Z	e
ed
<   dZeed<   dZeed<   d	Zeed<   dZee         ed<   dZee         ed<   dZeed<   dZee         ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d Zeed!<   d"Zeed#<   d$ Zd% Zd	S )&
HIPOptions   	num_warpsr   waves_per_eu   
num_stagesr   num_ctasNextern_libsFdebugTsanitize_overflowr!   )fp8e4nvfp8e5fp8e5b16fp8e4b8supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypesieeedefault_dot_input_precision)r:   bf16x3bf16x6allowed_dot_input_precisionsenable_fp_fusionlaunch_cooperative_gridmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_name instrumentation_modenoneschedule_hintc                    t          | j        dd                   }|dk    rdnd}t                              | d|           | j        dk    r| j        | j        dz
  z  dk    s
J d	            | j        d
k    rD| j        dk    r9t          j        d| j         d           t                              | dd           t          t                    j
        dz  }| j        i nt          | j                  }dD ]}t          || dz            ||<   t                              | dt          |                                                     d S )N   
       @   	warp_sizer   r   znum_warps must be a power of 2r   zckpack is deprecated starting from gfx950 and will be removed in later releases. So for now kpack = z7 will be overwritten to 1 to make transitioning easier.rB   lib)ocmlocklz.bcr1   )intr!   object__setattr__r,   rB   warningswarnr   __file__parentr1   dictstrtupleitems)self	gfx_majorrQ   default_libdirr1   rR   s         r   __post_init__zHIPOptions.__post_init__N   su   	!B$((	#r//BBr	4i888~!!t~!9K'LQR&R&R&R, 'S&R&R I!!
aM zvz  wA  z  z  z   tWa000h.6 ,4bb$t?O:P:P# 	A 	AC">sKKK#?@@K4k6G6G6I6I0J0JKKKKKr   c                     d                     d | j                                        D                       }t          j        |                    d                                                    S )N_c                 "    g | ]\  }}| d | S )-r   ).0namevals      r   
<listcomp>z#HIPOptions.hash.<locals>.<listcomp>b   s&    OOOID#4#OOOr   utf-8)join__dict__r_   hashlibsha256encode	hexdigest)r`   keys     r   hashzHIPOptions.hasha   sX    hhOO9L9L9N9NOOOPP~cjj1122<<>>>r   ) __name__
__module____qualname__r,   rU   __annotations__r-   r/   r0   r1   r\   r2   boolr3   r!   r]   r8   r   r9   r;   r>   r?   r@   rA   rB   rC   rD   rF   rH   rJ   rc   rt   r   r   r   r*   r*      s        IsL#JHcKE4"t"""D#
 (S%*RRR46%uSz666'----/K %*KKK!d!!!$)T))) !#!!!E3NNN$$$$)*!3***L# "#"""*  M3L L L&? ? ? ? ?r   r*   c                       e Zd ZdZdZedefd            Zdeddf fdZde	fdZ
defdZd	 Zd
 Zdee	ef         fdZd Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zd Z ej                    d             Z xZ S )
HIPBackendNFr   c                     | j         dk    S )NrE   )backendr   s    r   supports_targetzHIPBackend.supports_targetj   s    ~&&r   returnc                     t                                          |           t          |j        t                    sJ d| _        d S )Nhsaco)super__init__
isinstancer!   r]   
binary_ext)r`   r   	__class__s     r   r   zHIPBackend.__init__n   s>       &+s+++++!r   c                     d|j          S )Nhip:r&   r`   optionss     r   get_target_namezHIPBackend.get_target_names   s    $gl$$$r   c                    dt           j        j        p| j        j        i}                    dd          dk    r:t          j        | j        j                  st          d| j        j                   | j        j        dk    rNt          t          j                  }|                    dh           t          t          |                    |d<   dvr)t          t          t          j                            |d<   | j        j        d	k    rOt          t          j                  }|                    d
dh           t          t          |                    |d<   dvrt           j        j        |d<   |                    fdt          j                                        D                        t          di |S )Nr!   r0   r   znum_ctas > 1 not supported on r   tf32r>   r8   r   r6   r7   r9   r?   c                 :    i | ]}|v |         ||         S Nr   )rh   koptss     r   
<dictcomp>z,HIPBackend.parse_options.<locals>.<dictcomp>   s7    uuuASTX\S\S\aefgahatQQatatatr   r   )r
   runtimeoverride_archr   r!   getr	   supports_multi_cta_launch
ValueErrorsetr*   r>   updater^   sortedr8   r9   languagedefault_fp_fusion__dataclass_fields__keys)r`   r   argsr>   r9   s    `   r   parse_optionszHIPBackend.parse_optionsv   s   3Gt{7GH88J""Q&&s/LT[M]/^/^&Pdk>NPPQQQ ;x''+.z/V+W+W((//99938@\9]9]3^3^D/0!--+0
8W1X1X+Y+YD'(;x''03J4`0a0a--44j)5LMMM8=fEf>g>g8h8hD45T))',~'GD#$uuuu)H)M)M)O)Ouuuvvv!!D!!!r   c                 *    |j         |j        |j        fS r   )r,   r0   shared)r`   metadatas     r   pack_metadatazHIPBackend.pack_metadata   s    O
 	
r   c                 .    dt          | j                  iS )Nmin_dot_size)r   r   r   s     r   get_codegen_implementationz%HIPBackend.get_codegen_implementation   s     0 = =>>r   c                     ddl m} d|iS )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )r`   r   s     r   get_module_mapzHIPBackend.get_module_map   s    77777719==r   c                     t          j        |           t          j        r!t          j                            |           d S d S r   )r	   load_dialectsr{   instrumentation)r`   ctxs     r   r   zHIPBackend.load_dialects   sE    #% 	:&44S99999	: 	:r   c                     dd l }d}t          | d          r|                                 |k    S t          | |j                  r:t          | d          r*|                                                                 |k    S dS )Nr   i	ptr_rangeuntyped_storageF)torchhasattrr   r   Tensorr   size)argr   
MAX_INT_32s      r   is_within_2gbzHIPBackend.is_within_2gb   s    
3$$ 	1==??j00c5<(( 	>WS:K-L-L 	>&&((--//:==ur   c                 F    t          j        |           }d| v r|ddggz  }|S )NSztt.pointer_rangerO   )r   
parse_attr)descrets     r   r   zHIPBackend.parse_attr   s3    $T**$;;',--C
r   c                     t          j        | fi |}t          j        j        rt
                              |           r|dz  }|S )Nr   )r   get_tensor_specializationr
   r	   use_buffer_opsr{   r   )r   kwargsr   s      r   r   z$HIPBackend.get_tensor_specialization   sJ    3CBB6BB9# 	
(@(@(E(E 	3JC
r   c                    t          j        | j                  }|                                 t          j                            |           t          j                            |           t          j        	                    |           t          j        
                    |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           |                    | d           | S )N	make_ttir)r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_triton_licmadd_symbol_dceadd_loop_unrollrun)modr   r   pms       r   r   zHIPBackend.make_ttir   s   _S[))
!!"%%%..r222<<R@@@''+++###))"---b!!!##B'''$$R(((##B'''
sK   
r   c                    t          j        | j                  }|                                 t          j                            |d|j         |j        |j	        |j
                   |                    | d           t          j        | j                  }|                                 d}t          j                            |           t          j                            ||           t          j                            |           t          j                            |           t"          j        j                            ||j        |j        |j                   t          j                            |           t"          j        j                            |           t"          j        j                            ||j                   t"          j        j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t8          j        j        }t=          |j        |          }t"          j        j                            ||j                    t"          j        j        !                    |||           |r*t"          j        j        "                    ||j                   t          j                            |           |j#        $                                dk    rB|j#        %                    d          D ]'}t"          j        j        &                    ||           (t          j                            |           t          j        '                    |           tQ          |j                  rCt"          j        j        )                    |           t          j                            |           t"          j        j        *                    |           |r5|j         dk    r*t"          j        j        +                    ||j                    t8          j        j,        rt"          j        j        -                    |           t          j                            |           t"          j        j        .                    ||j        t8          j        j/        t8          j        j0                   t"          j        j        1                    |           t          j                            |           t          j        2                    |           t          j        3                    |           |                    | d           | 4                                |d<   | S )	Nr   make_ttgir_earlyFrI   ,r   
make_ttgirtensordesc_meta)5r   r   r   r   r   r   add_convert_to_ttgpuirr!   r,   rQ   r0   r   ttgpuiradd_coalesceadd_f32_dot_tcadd_remove_layout_conversionsadd_optimize_thread_localityr	   add_accelerate_matmulrA   rB   add_optimize_epilogueadd_optimize_dot_operandsadd_hoist_layout_conversionsadd_fuse_nested_loopsr   r   r   r
   r"   r#   add_schedule_loopsr/   add_pipelineadd_coalesce_async_copyrJ   lowersplitinsert_instruction_sched_hintsadd_reduce_data_duplicationr'   add_in_thread_transposeadd_reorder_instructionsadd_block_pingpongr   add_canonicalize_pointersadd_convert_to_buffer_opsuse_buffer_atomics%buffer_ops_analyze_small_tensor_rangeadd_fold_true_cmpir   r   get_tensordesc_metadata)r   r   r   r   emuTF32r"   r    hints           r   r   zHIPBackend.make_ttgir   s}   _S[))
**2/Dgl/D/DgFWY`Yj+2+;	= 	= 	=
s&'''_S[))
##B'''%%b'22244R88833B777
00W\7C_ahanooo44R888
00444
44RFFF
77;;;,,R000''+++##B'''''+++19',WW
--b'2DEEE
''N<NOOO 	IJ66r7<HHH''+++ &&((F22-33C88 L L
"AA"dKKKK44R888222666)',77 	=J66r:::N88<<<
33B777 	J'"4q"8"8J11"g6HIII9# 	J88<<<M++B///J88	,	?	   	
--b111''+++b!!!$$R(((
sL!!!&)&A&A&C&C"#
r   c                 2   | }t          j        |j                  }|                                 t          j                            |           t          j                            |           t          j        	                    |           t          j
                            |           t          j                            |           t          j                            |           |                    |d           |                                |d<   |S )Ngluon_to_ttgirr   )r   r   r   r   r   gluonr   add_resolve_auto_encodingsr   add_sccpr   add_loop_aware_cser   r    add_combine_tensor_select_and_ifr   r   )srcr   r   r   r   s        r   r   zHIPBackend.gluon_to_ttgir  s    _S[))
  $$$//333r"""&&r***&&r***77;;;
s$%%%&)&A&A&C&C"#
r   c                 `   | }t          j        |j                  }|                                 t          j        j                            ||j                   d}t          j        j        	                    ||j        |           t
          j
                            |           t
          j                            |           t
          j
                            |           t          j        j                            |           t           j        r&t           j                            d||j                   d}t          j        j                            ||j        |           t
          j                            |           t
          j                            |           t
          j
                            |           t
          j
                            |           t
          j                            |           t
          j                            |           t
          j                            |           |j                                        dk    r0t          j        j                            ||j        |j                   t           j        r&t           j                            d||j                   t<          j        j         s0t<          j        j!        st
          j"        #                    |           t          j        j        $                    ||           |%                    |d           t<          j        j!        rt<          j        j         sbt          j        |j                  }|                                 t
          j"        #                    |           |%                    |d           t          j        |j                  }|                                 t
          j"        &                    |           |%                    |d           tO          j(                     tO          j                    }tO          j)        ||          t	          j*                   d	}t<          j        j+        rd
}tO          j,        t          j-        |j        |           t	          j.        |j                   t	          j/        d           t	          j0        dd           t	          j0        dd           t	          j0        dd           t	          j0        d|j1        dk               d 2                                D             }	|	d         3                    t          j4                   |	d         5                    dd|j6        |j1        z              d|j        7                    d          v r|	d         5                    dd           |	d         5                    dd           |	d         5                    d|j8         d|j8                    |j9        rdnd}
|	d         5                    d|
           t<          j        j+        r5|	d         :                    d
           |	d         ;                                 t	          j<        |	d                    t<          j        j+        rgt{          t|                    j?        d z  }t          |d!z            t          |d"z            t          |d#z            g}tO          jA        |           nB|jB        r;fd$|jB        D             }t          |          dk    rtO          jA        |           tO          jD        tN          jE        |j        d	g |jF                   t	          jG        |j                  rQ|	d         H                    d%           |	d         H                    d&           |	d         H                    d'           t<          j        jI        rt	          jJ        |	d                    | K                    d(          |d)<   | K                    d*          pd|d+<   | K                    d,          pd-|d.<   t	          jL                   t	          jM                   t                    S )/Nr   ttgpuir_to_llvmirTrI   llvmir_to_llvm	make_llirzmake_llir.disable_line_infoz,make_llir.dump_ir_extract_di_local_variablesrG   +xnacki  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rP   c                 :    g | ]}|                                 |S r   )is_declaration)rh   fns     r   rk   z(HIPBackend.make_llir.<locals>.<listcomp>x  s)    PPPbB<M<M<O<OPrPPPr   zamdgpu-flat-work-group-sizez1,zmemory-bound-attentionr   zamdgpu-sched-strategyziterative-ilpzuniform-work-group-sizetruezamdgpu-waves-per-euz, zpreserve-signr:   zdenormal-fp-math-f32rR   z
asanrtl.bczocml.bczockl.bcc                 D    g | ]\  }}t          j        |          |S r   )r	   need_extern_lib)rh   ri   pathllvm_mods      r   rk   z(HIPBackend.make_llir.<locals>.<listcomp>  s1    iiiltTSEXYacgEhEhiTiiir   zamdgpu-no-workgroup-id-xzamdgpu-no-workgroup-id-yzamdgpu-no-workgroup-id-zz
ttg.sharedr   zttg.profile_scratch_memory_sizeprofile_scratch_sizez$ttg.profile_scratch_memory_alignmentr   profile_scratch_align)Nr   r   r   r   r	   r   r   add_update_async_wait_countr!   add_optimize_lds_usageconvertadd_scf_to_cfr   r   add_index_to_llvmiradd_allocate_shared_memoryr{   r   patchadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   rJ   r   lower_instruction_sched_hintsr/   r
   compilationdisable_line_info"dump_ir_extract_di_local_variablesllvmiradd_di_scopeadd_builtin_func_to_llvmirr   add_di_local_variabler   init_targets	to_moduleattach_target_tripleenable_asanattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrQ   get_functionsset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr,   r   r-   rC   add_fn_target_featureadd_fn_asan_attrset_all_fn_arg_inregr   rZ   r[   r]   link_extern_libsr1   lenoptimize_moduleOPTIMIZE_O3r?   has_architected_sgprsremove_fn_attrscalarize_packed_fops#add_scalarize_packed_fops_llvm_passget_int_attrcleanup_bitcode_metadatadisable_print_inline)r   r   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   target_featuresfnsdenormal_moderb   pathsr  s                @r   r  zHIPBackend.make_llir  s   _S[))

66r7<HHH 
11"glOTTT$$R(((  $$$**2...
55b999% 	S&,,-@"ckRRR 	
((W\9EEE''+++b!!!''+++**2...''+++b!!!$$R((( &&((F22J<<RwOabbb % 	P&,,-=r3;OOO 2 	+5;L;o 	+M&&r***
55b)DDD
sK   ? 	H$6 ;_S[11!!!**2...s9::: --BOOM//333FF3FGGG 	,..>#w// ***( 	'&Ox):GL/ZZZ 	Hgl333Hc***%h0H%PPP%h0QSWXXX%h0H%PPP%h0H'J[_aJabbb QPH2244PPPA >???A8:dw?PQXQb?b:d:deee#w'<'B'B3'G'GGGF6HHHA4f=== 	A0W5I2c2cWMa2c2cddd+2+EQ6A1=AAA( 	&F((222F##%%%
 	 Q(((( 	7!(^^2U:NN\122NY.//NY.//E
 !(E2222  	7iiiig.AiiiE5zzA~~%h666Xt'7r2wOghhh $W\22 	>F!!"<===F!!"<===F!!"<===9* 	<3CF;;; !--l;;+.+;+;<]+^+^+cbc'(,/,<,<=c,d,d,ihi()$X... 	 ***8}}r   c           	         t          j        d|           }t          |          dk    sJ |d         |d<   g }d|j        v rdnd}t	          j        |                     d                                                    }|d         d	z   |z   }t          j	        | t          j        |j        |||j        |          }t          j        | t          j        |j        |||j        |           t          j        | t          j        |j        |||j        d
          }	t          j
        j        rt#          d           t#          |	           |	S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   ri   gfx11z-real-true16rG   rl   re   Fz!// -----// AMDGCN Dump //----- //)refindallr7  r!   ro   rp   rq   rr   r   translate_to_mirr	   r+  r?   dump_sched_dagtranslate_to_asmr
   dump_amdgcnprint)
r   r   r   namesflagsfeaturesir_hashdump_file_idre   amdgcns
             r   make_amdgcnzHIPBackend.make_amdgcn  s:   
 
QSVWW5zzQ 8%,%<%<>>".G!4!455??AAQx#~/!#s'8',RWY`Yq".0 0C!2GL(ESZSk(	* 	* 	*&sC,=w|XW\^e^v',. .9  	5666&MMMr   c                 V   d}t           j        j        rd}t          j        | |j        |          }t          j                    5 }t          j                    5 }t          |j	        d          5 }|
                    |           d d d            n# 1 swxY w Y   t          j        |j	        |j	                   d d d            n# 1 swxY w Y   t          |j	        d          5 }|                                }	d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |	S )NrG   r  wbrb)r
   r  r)  r	   assemble_amdgcnr!   tempfileNamedTemporaryFileopenri   write
link_hsacoread)
r   r   r   rC  r   tmp_outtmp_infd_infd_outr   s
             r   
make_hsacozHIPBackend.make_hsaco  s   ( 	'&O#CGG(** 	$g,.. :&&+t,, 'KK&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' 'v{GL999: : : : : : : : : : : : : : : glD)) $Vkkmm$ $ $ $ $ $ $ $ $ $ $ $ $ $ $	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 
s~   DC.BCBCB"C:DC
	
DC
	D&D;DD	DD	DD"%D"c                 $    |t           j        k    r fd|d<    fd|d<   n|t           j        k    r	 fd|d<    fd|d<    fd|d	<    fd
|d<   t          j        j        %t          j                             ||d            d S d S )Nc                 2                         | |          S r   )r   r   r   r   r`   s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    4>>#xQX3Y3Y r   r   c                 2                         | |          S r   )r   rh  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    DOOCSZ4[4[ r   ttgirc                 2                         | |          S r   )r   rh  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    D4G4GXW^4_4_ r   c                 2                         | |          S r   )r  rh  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    t~~c8W/U/U r   llirc                 2                         | |          S r   )rV  rh  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    1A1A#xQX1Y1Y r   rU  c                 2                         | |          S r   )re  rh  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    Xw0W0W r   r   )r   TRITONGLUONr
   r   add_stages_inspection_hook)r`   stagesr   r   s   ` ` r   
add_stageszHIPBackend.add_stages  s    x&&YYYYYF6N[[[[[F7OO''_____F7OUUUUUvYYYYYxWWWWWw=3?M44T67HVZ[[[[[ @?r   c                     | j          S r   r   )r`   s    r   rt   zHIPBackend.hash  s    +r   )!ru   rv   rw   r   %supports_native_tensor_specializationstaticmethodr   r~   r   r]   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  rV  re  rt  	functools	lru_cachert   __classcell__)r   s   @r   r{   r{   f   s3       O,1)'	 ' ' ' \'"y "T " " " " " "
%# % % % %"S " " " "2
 
 
? ? ?>S*_ 5 > > > >
: : :
   \   \   \   \  < < \<|   \  Z Z \Zx   \.   \
\ 
\ 
\ Y             r   r{   )triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   dataclassesr   typingr   r   r   typesr   ro   r[  rI  rx  rX   pathlibr   r   r#   r'   r*   r{   r   r   r   <module>r     s   E E E E E E E E E E 5 5 5 5 5 5 5 5 5 5 5 5       ! ! ! ! ! ! # # # # # # # # # #         				           0Y 0 0 0 0X X X
r r r $D? D? D? D? D? D? D? D?NI  I  I  I  I  I  I  I  I  I r   