
    n
qiV              
          d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d dlZd d	lmZ g d
Z ee          Z G d dee          Z e
d           G d d                      Zdeedf         dededz  deedf         fdZ ed          Z  ed          Z!dee e!f         dededz  dee e!f         fdZ"dededdfdZ#deedf         de$e         deedf         fdZ%dede&ee'f         ddfd Z(dedede$e         fd!Z)de$e         ddfd"Z*de$e         ddfd#Z+dedede$e         fd$Z,dede$e         fd%Z-dede$e         fd&Z.dede$e         fd'Z/dede$e         fd(Z0 ed)          Z1 ed*          Z2d+ee2         d,ee2ge1f         de&e1e$e2         f         fd-Z3d.ede$e         fd/Z4d.ede$e         fd0Z5d1ede$e         fd2Z6d1ede$e         fd3Z7defd4Z8dedefd5Z9d1ede$e         fd6Z:d1edefd7Z;d8edefd9Z<d1edefd:Z=d;ede$e         fd<Z>d=ee         defd>Z?de$e         fd?Z@d@ede$e         fdAZAde$e         fdBZBdS )C    N)defaultdict)CallableIterable)asdict	dataclass)Enum)wraps)	getLogger)	ParamSpecTypeVar)signpost_event)AffinityMode)maybe_wrap_command_args_with_numa_bindingmaybe_wrap_with_numa_bindingNumaOptionsc                   "    e Zd ZdZdZdZdZdZdS )r   zW
    See behavior description for each affinity mode
    in torch.distributed.run.
    nodesocket	exclusivezcore-complexN)__name__
__module____qualname____doc__NODESOCKET	EXCLUSIVECORE_COMPLEX     r/var/www/html/bestrading.cuttalo.com/services/ml-inference/venv/lib/python3.11/site-packages/torch/numa/binding.pyr   r      s.         
 DFI!LLLr   r   T)frozenc                   *    e Zd ZU eed<   	 dZeed<   dS )r   affinity_modeF!should_fall_back_if_binding_failsN)r   r   r   r   __annotations__r$   boolr   r   r    r   r   &   s6          /4%t33333r   r   command_args.	gpu_indexnuma_optionsreturnc                    || S | |t          |          d}	 t          ||          }t          | |          }t          ddi |d|i           |S # t          $ r t          ||	           | cY S w xY w)
a  
    Wraps command arguments with numactl to apply NUMA CPU binding.

    This function prepends numactl with appropriate CPU affinity flags to the
    provided command arguments, binding the process to CPUs associated with
    the specified GPU's NUMA node.

    Args:
        command_args: The original command arguments to wrap.
        gpu_index: The index of the GPU that will be used by the subprocess.
        numa_options: Configuration for NUMA binding behavior. If None, returns
            the original command_args unchanged.

    Returns:
        Tuple of command arguments, potentially wrapped with numactl for NUMA binding.
        Returns the original command_args if numa_options is None or if binding fails
        and fallback is enabled.
    N)r'   r(   r)   r(   r)   original_command_argslogical_cpu_indicesnuma_bindingapply_successwrapped_commandcategoryname
parametersr)   logger_kwargs)r   &_get_validated_logical_cpus_to_bind_to_assemble_numactl_command_argsr   	Exception_handle_exception)r'   r(   r)   kwargsr/   wrapped_command_argss         r    r   r   4   s    0  %|,, FD%
 
 

  >". 3 
  
  
 	# !#7 	
 	
 	
 	
 $#   |6JJJJs   ;A A54A5_TParams_TReturnfuncc                      S t                     dt          j        dt          j        dt          f fd            }|S )a  
    Wraps a function to apply NUMA CPU binding before execution.

    This decorator applies NUMA CPU affinity to all threads in the current process
    before calling the wrapped function, binding them to CPUs associated with the
    specified GPU's NUMA node.

    Args:
        func: The function to wrap with NUMA binding.
        gpu_index: The index of the GPU that will be used.
        numa_options: Configuration for NUMA binding behavior. If None, returns
            the original function unchanged.

    Returns:
        A wrapped function that applies NUMA binding before execution, or the
        original function if numa_options is None.
    Nargsr=   r*   c                  6    t                      | i |S )Nr,   ),_maybe_apply_numa_binding_to_current_process)rC   r=   rA   r(   r)   s     r    wrappedz-maybe_wrap_with_numa_binding.<locals>.wrapped   s7    4%	
 	
 	
 	

 tT$V$$$r   )r	   r?   rC   r=   r@   )rA   r(   r)   rF   s   ``` r    r   r   r   sj    . 
4[[%x} % %H % % % % % % % [% Nr   c           	          | t          |          d}	 t          | |          }t          |           t          ddi |dt	          |          i           d S # t
          $ r t          ||           Y d S w xY w)Nr,   r/   r0   r1   r/   r3   r7   )r   r9   4_bind_all_threads_in_current_process_to_logical_cpusr   _get_ranges_str_from_intsr;   r<   )r(   r)   r=   r/   s       r    rE   rE      s     |,, F
KD%
 
 

 	= 3	
 	
 	
 	
 	# %'@AT'U'U 	
 	
 	
 	
 	
 	
  K K K|6JJJJJJJKs   AA A;:A;r.   r/   c                 0    ddt          |           g| R S )Nnumactlz--physcpubind=)rJ   r-   s     r    r:   r:      s5     	I23FGGII 
  r   r8   c                     t          ddi |dt          j                    i           t                              d|           | j        r.t                              dt          j                               d S  )Nr0   apply_exception	tracebackr3   z)Failed to apply NUMA binding for input=%rzHContinuing executing without applying NUMA binding, despite exception %s)r   rO   
format_exclogger	exceptionr$   warningr7   s     r    r<   r<      s     

-//
 
    @-PPP5 V ""	
 	
 	
 	 
r   c                 H    t          | |          }t          |           |S )Nr,   rH   )_get_logical_cpus_to_bind_to_raise_if_binding_invalid)r(   r)   r/   s      r    r9   r9      s8    
 7,   2EFFFFr   c                 n    t          j        d          t          d          | st          d          d S )NrL   z(numactl CLI is required for NUMA bindingz+Must bind to a non-empty set of CPU indices)shutilwhichRuntimeErrorrH   s    r    rV   rV      sG     |I&EFFF JHIIIJ Jr   c                 $   t          j        d          }t          j        d|            t          j        d          D ]Q}	 t	          |          }t          j        |          }||k    rt          j        ||            B# t
          $ r Y Nw xY wd S )Nr   z/proc/self/task)ossched_getaffinitysched_setaffinitylistdirintr;   )r/   original_main_thread_affinitytid_strtidtid_affinitys        r    rI   rI      s    
 %'$8$;$;!
 /000:/00  	g,,C/44L <<<$S*=>>> 	 	 	D	 s   >B  
BBc                 f   |j         t          j        k    rt          |           }n|j         t          j        k    rt          |           }nd|j         t          j        k    rt          |           }n>|j         t          j        k    rt          |           }nt          d|j          d          |S )z
    Args:
        gpu_index: The index of the GPU that will be used by the subprocess.
            Example: 0
        numa_options: See NumaOptions for details.

    Returns:
        Set of logical CPU indices to bind to.
    r(   zAffinity mode z not supported.)r#   r   r   !_node_get_logical_cpus_to_bind_tor   #_socket_get_logical_cpus_to_bind_tor   &_exclusive_get_logical_cpus_to_bind_tor   )_core_complex_get_logical_cpus_to_bind_to
ValueError)r(   r)   logical_cpuss      r    rU   rU     s     !\%66689MMM		#|':	:	::YOOO		#|'=	=	==	RRR		#|'@	@	@@9UUUU,*DUUUVVVr   c                 B    t          |           }t          |          S )z-
    Core logic of 'node' numa strategy.
    rf   numa_node_index)"_get_numa_node_index_for_gpu_index._get_allowed_logical_cpu_indices_for_numa_node)r(   ro   s     r    rg   rg   '  s.     99MMMO9'   r   c                     t          |           }t          |          }t          |          }t                      }|D ]%}|                    t          |                     &|S )z/
    Core logic of 'socket' numa strategy.
    rf   rn   )socket_index)rp   _get_socket_index_for_numa_node'_get_numa_node_indices_for_socket_indexsetupdaterq   )r(   numa_node_index_of_gpurs   numa_node_indicesrl   ro   s         r    rh   rh   2  s     @)TTT2.  L @!   55L, 
 
: /  	
 	
 	
 	
 r   c                    t          |           }t          |          }t          |          }|                    |           }t	          |          }t          |d           }t          t          |                                                    }t          |          t          |          z  }t          |          t          |          z  }|dk     r7t          dt          |           d|ddt          |           d	z             ||z  t          ||          z   }||z   ||k     rdnd
z   }	d t          |                                          ||	         D             }
|
S )z2
    Core logic of 'exclusive' numa strategy.
    rf   rn   c                 <    t          t          |                     S Nlogical_cpu_index)min6_get_logical_cpu_indices_sharing_same_physical_core_asr}   s    r    <lambda>z8_exclusive_get_logical_cpus_to_bind_to.<locals>.<lambda>[  s%    #B"3  #
 #
 r      zThere are only z# physical cores on numa_node_index=,z but there are z% GPUs associated with this NUMA node.r   c                     h | ]	}|D ]}|
S r   r   ).0r/   r~   s      r    	<setcomp>z9_exclusive_get_logical_cpus_to_bind_to.<locals>.<setcomp>  sA     , , , "5, ,
 	 	, , , ,r   )rp   _get_gpu_indices_for_numa_nodesortedindexrq   	_group_bydictitemslenrZ   r   listvalues)r(   ro   gpu_indicesoriginal_gpu_relative_indexallowed_logical_cpu_indices,physical_core_to_allowed_logical_cpu_indicesnum_physical_cores_per_gpu(num_gpus_to_give_one_extra_physical_corestartend$logical_cpu_indices_for_original_gpus              r    ri   ri   I  s    99MMMO0QQQK%%K"-"3"3I">">"P'# # # 4=#	
 	
4 40 48;AACCDD4 40 "%4" "	[		"
 0340 0K0, "A%%wc"NOOwwdswwwWK 0 0WWWX
 
 	
 (*DDs#%MH H E 	
$	% +-UUU A	
 , ,#'8??AA$
 $

)$, , ,( 0/r   c                    t          |           }t          |          }t          |          }|                    |           }t	          |          }t          |d           }t          t          |                                d                     }|t          |          z  }t          |
                                          |         }|S )z
    Core logic of 'core-complex' numa strategy.

    Each GPU is assigned a full core complex (group of cores sharing L3 cache)
    within its affined NUMA node.
    rf   rn   c                 <    t          t          |                     S r|   )r   1_get_logical_cpus_sharing_same_max_level_cache_asr}   s    r    r   z;_core_complex_get_logical_cpus_to_bind_to.<locals>.<lambda>  s%    #="3  #
 #
 r   c                 >    t          | d                    | d         fS )Nr   r   )r   )items    r    r   z;_core_complex_get_logical_cpus_to_bind_to.<locals>.<lambda>  s    s47||mT!W5 r   )key)rp   r   r   r   rq   r   r   r   r   r   r   )r(   ro   r   r   r   .max_level_cache_to_allowed_logical_cpu_indicescache_index_for_original_gpur   s           r    rj   rj     s    99MMMO0QQQK%%K"-"3"3I">">"P'# # # 6?#	
 	
6 62 6::@@BB 65		
 	
 	
6 62 $?6B B $  ,06==??, ,",$( 0/r   KVr   get_keyc                     t          t                    }| D ](} ||          }||                             |           )|S )z2
    Groups elements with same key into sets.
    )r   rv   add)r   r   key_to_valuesvaluer   s        r    r   r     sO     -8,<,<M & &genncu%%%%r   r~   c                     d|  d}t          |          5 }t          |                                          cd d d            S # 1 swxY w Y   d S )N/sys/devices/system/cpu/cpuz/topology/thread_siblings_list)open_get_set_of_int_from_ranges_strread)r~   "thread_siblings_list_absolute_pathfs      r    r   r     s     	X&7WWW ' 
0	1	1 9Q.qvvxx889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9s   !AAAc                    d|  d}d}t                      }t          j        |          D ]}|                    d          r|dd                                          s5t          j                            ||          }t          j                            |d          }t          |          5 }|                                	                                dvr	 d d d            	 d d d            n# 1 swxY w Y   t          j                            |d          }t          |          5 }	t          |	                                          }
d d d            n# 1 swxY w Y   |
|k    rB|
}t          j                            |d	          }t          |          5 }t          |                                          }d d d            n# 1 swxY w Y   |S )
Nr   z/cacher      type>   DataUnifiedlevelshared_cpu_list)rv   r\   r_   
startswith	isdecimalpathjoinr   r   stripr`   r   )r~   cpu_cache_dir_absolute_path	max_level$logical_cpus_sharing_max_level_cacheentrycache_index_absolute_pathtype_absolute_path	type_filelevel_absolute_path
level_filer   shared_cpu_list_absolute_pathshare_cpu_list_files                r    r   r     s    	@&7???   I+.55(788  (( 	abb	0C0C0E0E 	$&GLL1Le$T$T!  W\\*CVLL$%% 	~~%%''/BBB	 	 	 	 	 	 	B	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 !gll+DgNN%&& 	+*
))**E	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+I	(*%'8)
 )
% /00 	4G3R#((**4 40	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
 0/s6   /*C22C6	9C6	,"EE	!E	"GG	G	ro   c                 H    t          |           }t                      }||z  S Nrn   )0_get_cpu_indices_for_numa_node_MAYBE_NOT_ALLOWED+_get_allowed_cpu_indices_for_current_thread)ro   all_cpu_indicesallowed_cpu_indicess      r    rq   rq     s4    F'  O FGG000r   c                     d|  d}	 t          |          5 }|                                }ddd           n# 1 swxY w Y   n&# t          $ r}t          d| d          |d}~ww xY wt	          |          S )z
    Returns:
        Indices of all CPUs associated with numa_node_index. However, the list
        is not filtered based on whether the thread is allowed to use them.
    z/sys/devices/system/node/nodez/cpulistNz:Could not determine CPUs corresponding to numa_node_index=.)r   r   FileNotFoundErrorrZ   r   )ro   cpulist_absolute_pathr   cpu_range_stres        r    r   r     s     VOUUU'(( 	%AFFHHM	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	%   LLLL
 
	 +=999s3   A 8A <A < A 
A'A""A'c                  >    t           j                                        S )N)torchcudadevice_countr   r   r    _get_gpu_countr     s    :""$$$r   c                 f   t           j                            |           }|j        }|j        }|j        }|dd|dd|dd}d| d}t          |          5 }t          t          |	                                
                                          d          cd d d            S # 1 swxY w Y   d S )N04x:02xz.0z/sys/bus/pci/devices/z
/numa_noder   )r   r   get_device_propertiespci_domain_id
pci_bus_idpci_device_idr   maxr`   r   r   )r(   device_propertiesdomainbusdevicepci_addrpci_numa_node_absolute_pathr   s           r    rp   rp     s   
88CC,F

&C,F 777s7777777H"N("N"N"N	)	*	* -a 3qvvxx~~''((!,,	- - - - - - - - - - - - - - - - - -s   AB&&B*-B*c                 R      fdt          t                                D             S )Nc                 <    h | ]}t          |           k    |S )rf   )rp   )r   r(   ro   s     r    r   z1_get_gpu_indices_for_numa_node.<locals>.<setcomp>/  s9       -	BBBoUU 	UUUr   )ranger   rn   s   `r    r   r   .  s<       ~//00   r   c                 B    t          |           }t          |          S Nrn   )	cpu_index)._get_arbitrary_allowed_cpu_index_for_numa_node_get_socket_index_for_cpu)ro   arbitrary_cpu_indexs     r    rt   rt   6  s.    H'   %/BCCCCr   r   c                    d|  d}	 t          |          5 }t          |                                                                          cd d d            S # 1 swxY w Y   d S # t          $ r}t          d|           |d }~ww xY w)Nr   z/topology/physical_package_idz)Could not determine socket for cpu_index=)r   r`   r   r   r   rZ   )r   package_id_absolute_pathr   r   s       r    r   r   >  s    NiNNN R*++ 	)qqvvxx~~''((	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) R R RIYIIJJPQQRs:   A$ 3A
A$ AA$ AA$ $
B.BBc                 <    t          t          |                     S r   )r   rq   rn   s    r    r   r   I  s"    6WWW  r   
ranges_strc                    t                      }|                     d          D ]}|                                }|sd|v r]|                    d          \  }}t          |          t          |          }}|                    t          ||dz                        z|                    t          |                     |S )z
    Util for parsing a string of int ranges, as in a sysfs file.

    Args:
        ranges_str: E.g., "0-2,4,6-7"

    Returns:
        E.g., {0, 1, 2, 4, 6, 7}
    r   -r   )rv   splitr   r`   rw   r   r   )r   ints	range_str	start_strend_strr   r   s          r    r   r   O  s     UUD%%c** 	% 	%	OO%%	 	)!*!5!5IwYW3EKKeS1W--....HHS^^$$$$Kr   r   c                    | sdS t          |           }g }|d         x}}|dd         D ]I}||dz   k    r|}||k    r|                    |            n|                    | d|            |x}}J||k    r|                    |            n|                    | d|            d                    |          S )z
    Convert a set of integers to a compact string with ranges.

    Args:
        ints: E.g., {0, 1, 2, 4, 6, 7}

    Returns:
        E.g., "0-2,4,6-7"
     r   r   Nr   r   )r   appendr   )r   sorted_intsrangesr   prevnums         r    rJ   rJ   g  s      r,,KFq>!ED122  $(??DD}}j))))////000EDD }}j!!!!''''(((88Fr   c                      t          d          5 } |                                 }d d d            n# 1 swxY w Y   t          |          S )Nz!/sys/devices/system/node/possible)r   r   r   )r   possible_nodes_strs     r    !_get_systemwide_numa_node_indicesr    s    	1	2	2 &aVVXX& & & & & & & & & & & & & & & ++=>>>s   155rs   c                     t                      }t                      }|D ];}t          |          }| t          |          k    r|                    |           <|S r   )r  rv   r   r   r   )rs   systemwide_numa_node_indicesmatching_numa_node_indicesro   r   s        r    ru   ru     st    #D#F#F !$7 < <L+
 
 
 4?RSSSSS&**?;;;%%r   c                  *    t          j        d          S )Nr   )r\   r]   r   r   r    r   r     s     """r   )Cr\   rX   rO   collectionsr   collections.abcr   r   dataclassesr   r   enumr   	functoolsr	   loggingr
   typingr   r   r   torch._utils_internalr   __all__r   rQ   strr   r   tupler`   r   r?   r@   r   rE   rv   r:   r   objectr<   r9   rV   rI   rU   rg   rh   ri   rj   r   r   r   r   r   rq   r   r   rp   r   rt   r   r   r   rJ   r  ru   r   r   r   r    <module>r     s   				      # # # # # # . . . . . . . . ) ) ) ) ) ) ) )                   % % % % % % % %  0 0 0 0 0 0   
8			" 	" 	" 	" 	"3 	" 	" 	" $
4 
4 
4 
4 
4 
4 
4 
47S/7 7 $	7
 38_7 7 7 7t 9Z  7:#
8X%
&# # $	#
 h !# # # #LKK%0K	K K K K>#CHoDGH
38_   
 
15c6k1B
	
 
 
 
.

 
 	X	
 
 
 
Jc#h J4 J J J JH	   8  	X	   8C CH    c c#h    .C0 C0S C0 C0 C0 C0L,0C ,0CH ,0 ,0 ,0 ,0^ GCLLGCLLhqk HaS!V,< aQi    99X9 9 9 9#0#0X#0 #0 #0 #0L1s 1sSVx 1 1 1 1::X: : : :&% % % % %-S -S - - - -$s s3x    D D D D D DRC RC R R R Rs s     C    0!HSM !c ! ! ! !H?3s8 ? ? ? ?&S &SX & & & &#SX # # # # # #r   