
    vh                    2   d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z! ddl"mZ# ddl$m%Z& ddl$m'Z' ddl$m(Z) ddl$m*Z+ ddl,m-Z- ddl.m/Z0 ejb                  Z1e'jd                  Z2e'jf                  Z3ejh                  e5cZ5Z6ejn                  e8cZ8Z9 ejt                  d      Z;ejx                  ejz                  e;<   ddZ>d Z?e;j                  e?       e;j                  d        ZB ejt                  d      ZCddZDd  ZEeCj                  eE       eCj                  d!        ZF G d" d#ej                        ZH ejt                  d$      ZI	 	 dd%ZJ  e&j                  eI      eJ       eIj                  dd&       ZMdd'	 dd(ZNdd'dd)ZOdd'dd*ZPdd'dd+ZQdd'dd,ZRdd'dd-ZSdd'dd.ZTdd'dd/ZU ejt                  d0      ZVeVj                  d1        ZWd2 ZX e&j                  eV      d3        ZY ejt                  d4      ZZeZj                  d5         e-j                  eZd6        d7 Z]eZj                  d8        Z^ ejt                  d9      Z_e_j                  d:         e-j                  e_d;        dd<Z`e_j                  d=        Za ejt                  d>      Zbebj                  d?        Zcd@ Zdedej                  eb<   dA Zfefej                  eb<   dB Zh	 ddDZi e1eidEF      Zj e&j                  eb      dG        Zk ejt                  dH      Zlelj                  dI        ZmdJ Znenej                  el<   dK Zoeoej                  el<    e&j                  el      dL        ZpdddddCdM	 ddNZqdddOdP	 ddQZrdddRddSZs	 	 d	 ddTZt ejt                  dU      ZudCdVdWZveuj                  dX        ZwdCdV	 ddYZx e-j                  euex        G dZ d[ej                        Zz ez       Z{ej                  j                  ez       ej                  j                  ez       ej                  j                  ez       ej                   j                  ez        ejt                  d\      ZdEe_        dd]Z	 	 	 	 dd^Zej                  dd_       Zej                  dd`       Zda Z ejb                  ee      ej                  e<    ejb                  e-j                  e      db        Zej                  dc        Z ejt                  dd      ZdEe_        dedf	 	 	 	 	 	 	 	 	 ddgZej                  dh        Zdi Z  e&j                   e      e        ejb                  e-j                  e      dj        Zdk Z G dl dmej                        Z	 ddnZdo Z ejt                  dp      ZdCe_        dq Zej                  dr        Zds Z  e&j                  e      e        ejt                  dt      ZdEe_        	 ddej6                  ddu	 	 	 	 	 	 	 ddvZej                  	 	 ddw       Z	 	 	 	 ddxZeej                  e<   dy Z  e&j                  e      e        ejt                  dz      ZdEe_        ddd{Zej                  d|        Z	 	 	 	 dd}Zeej                  e<   d~ Z  e&j                  e      e       y)zPallas-specific JAX primitives.    )annotationsN)Hashable)AnyCallableSequence)lax)	tree_util)ad_util)api_util)callback)core)dtypes)effects)linear_util)pretty_printer)state)util)ad)batching)partial_eval)	discharge)indexing)types)
primitives)mlir
program_idc                .    t         j                  |       S )a  Returns the kernel execution position along the given axis of the grid.

  For example, with a 2D `grid` in the kernel execution corresponding to the
  grid coordinates `(1, 2)`,
  `program_id(axis=0)` returns `1` and `program_id(axis=1)` returns `2`.

  The returned value is an array of shape `()` and dtype `int32`.

  Args:
    axis: the axis of the grid along which to count the program.
  axis)program_id_pbindr   s    U/opt/face_recognition/venv/lib/python3.12/site-packages/jax/_src/pallas/primitives.pyr   r   ;   s     
				%%    c                   |j                  d      }t        j                         }|r||   j                  S t        j                         }|j                  |      }t        j                  j                  t        | dt        |            S Nr    r   )poppallas_corecurrent_grid_envindex
axis_framesizejax_core	Primitivebind_with_tracer    dict)trace_paramsr   grid_envframes         r"   program_id_bind_with_tracer6   I   sp    	F	$))+(D>

 
 
"% jj!				+	+L%Tt_	UUr#   c                 J    t        j                  dt        j                        S Nr&   r-   ShapedArrayjnpint32r2   s    r"   _program_id_abstract_evalr>   V       			b#))	,,r#   num_programsc                .    t         j                  |       S )z2Returns the size of the grid along the given axis.r   )num_programs_pr!   r   s    r"   r@   r@   \   s    			$		''r#   c                @   |j                  d      }t        j                         }|r||   j                  S t        j                         }|j                  |      }|t        j
                  u r0t        j                  j                  t        | dt        |            S |S r%   )r'   r(   r)   r,   r+   dynamic_grid_dimr-   r.   r/   rB   r0   )r1   r2   r3   r   r4   r5   r,   s          r"   _num_programs_bind_with_tracerE   `   s    	F	$))+(D>

 
 
"%	D	$	[)))--neRSWYY	+r#   c                 J    t        j                  dt        j                        S r8   r9   r=   s    r"   _num_programs_abstract_evalrG   n   r?   r#   c                  (    e Zd ZdZdZdZdZdZdZdZ	y)	AtomicOpTypexchgaddmaxminandorxorN)
__name__
__module____qualname__XCHGADDMAXMINANDORXORr&   r#   r"   rI   rI   r   s%    	$####"#r#   rI   
atomic_rmwc               L   ~|j                  |      \  }}}}t        |      dkD  rt        d      |d   }	|t        |t        j                  k(  rd }
nS|t        j
                  k(  rt        j                  }
n/|t        j                  k(  rt        j                  }
nt        |      t        d |	j                  D              r|	j                  }|D cg c]$  }t        |t               xr |j                  dk(  & }}|D cg c]   }t        |t              r|j                  n|" }}t!        d |D              }t#        j$                  |||      }t!        d	 |D              }||   } |
||      }t#        j&                  |||
      }t!        d |D              }||   }nat        d |	j                  D              r?||	j                     }|j(                  |	j                     j+                   |
||            }nt        |fdt        |       dz
  z  z   |fS c c}w c c}w )N   zOnly one indexer is supported.r   c                    | |z   S Nr&   )xys     r"   <lambda>z,_atomic_rmw_discharge_rule.<locals>.<lambda>   s
    !a% r#   c              3  Z   K   | ]#  }t        |t              xs |j                    % y wr_   
isinstanceSliceshape.0ss     r"   	<genexpr>z-_atomic_rmw_discharge_rule.<locals>.<genexpr>   %     D1*Q

-agg+
-D   )+r&   c              3  X   K   | ]"  }t        |t              r|j                  nd  $ ywr]   Nre   rf   r,   rh   s     r"   rk   z-_atomic_rmw_discharge_rule.<locals>.<genexpr>   "     O!*Q"6A=O   (*slice_sizesc              3  :   K   | ]  }|rd n
t        d         y wr_   sliceri   scalars     r"   rk   z-_atomic_rmw_discharge_rule.<locals>.<genexpr>   s     RFE$K7R   start_indicesc              3  :   K   | ]  }|rd n
t        d        ywr   Nrv   rx   s     r"   rk   z-_atomic_rmw_discharge_rule.<locals>.<genexpr>        OVt4Orz   c              3  >   K   | ]  }t        |t                 y wr_   re   rf   rh   s     r"   rk   z-_atomic_rmw_discharge_rule.<locals>.<genexpr>        
9z!U##
9   r_   )	unflattenlenNotImplementedErrorrI   rU   rV   r;   maximumrW   minimumallindicesre   rf   rg   starttupler   dynamic_slicedynamic_update_sliceatset)in_avals	out_avals	args_treeatomic_type	args_flatrefindexersvalmaskidxmonoidr   rj   scalar_dimsslice_startsrt   out_onesval_indexerx_newout_indexerouts                        r"   _atomic_rmw_discharge_ruler   ~   s    &00;#xd]Q
>
??#	
L$$$Fl&&&[[Fl&&&[[F
k
**DDDkkGGNO!z!U++=2=OKODKLqz!U3AGG:LLLOwOOK  lLHRkRRK
k
C
h
C$$S#\JEO;OOK
;
C

9S[[
99
ckk
CFF3;;##F3$45E

Gs8}q01	13	66 PLs   )H%H!c                   | j                  |      \  }}}}|j                  t        j                  d      k(  r,|t        j                  k7  rt        d|j                   d      |j                  t        j                  d      t        j                  d      t        j                  d      t        j                  hv r&t        d|j                   d|j                   d      t        |d	| iS )
Nfloat16z`atomic_z` does not support f16.boolint8int16z` does not support .r   )	r   dtyper;   rI   rU   
ValueErrorvaluebfloat16_swap_abstract_eval)r   r   
avals_flatr   r2   s        r"   _atomic_abstract_evalr      s    $$Z0,#q!QYY#))I&&;,:J:J+J
x 1 122IJ
KKYY	ii	ii	ii	ll	  
;$$%%81E  
j	>I	>>r#   )r   c                   t        j                  | |d      \  }}t        j                  ||||f      \  }}t	        j
                  |||dS )Nr[   )r   r   )spget_ref_and_transformsr	   tree_flattenatomic_rmw_pr!   )	x_ref_or_viewr   r   r   r   x_ref
transformsr   r   s	            r"   _atomic_rmwr      sX    //S,% #//
C0NO)Y			I;
 r#   c               >    t        | |||t        j                        S )zAtomically exchanges the given value with the value at the given index.

  Args:
    x_ref_or_view: The ref to operate on.
    idx: The indexer to use.
    mask: TO BE DOCUMENTED.

  Returns:
    The value at the given index prior to the aupdate.
  r   r   )r   rI   rT   r   r   r   r   s       r"   atomic_xchgr      s"     
S#Dl6G6G
 r#   c               >    t        | |||t        j                        S )zAtomically computes ``x_ref_or_view[idx] += val``.

  Args:
    x_ref_or_view: The ref to operate on.
    idx: The indexer to use.
    mask: TO BE DOCUMENTED.

  Returns:
    The value at the given index prior to the atomic operation.
  r   )r   rI   rU   r   s       r"   
atomic_addr      "     
S#Dl6F6F
 r#   c               >    t        | |||t        j                        S )a  Atomically computes ``x_ref_or_view[idx] = max(x_ref_or_view[idx], val)``.

  Args:
    x_ref_or_view: The ref to operate on.
    idx: The indexer to use.
    mask: TO BE DOCUMENTED.

  Returns:
    The value at the given index prior to the atomic operation.
  r   )r   rI   rV   r   s       r"   
atomic_maxr      r   r#   c               >    t        | |||t        j                        S )a  Atomically computes ``x_ref_or_view[idx] = min(x_ref_or_view[idx], val)``.

  Args:
    x_ref_or_view: The ref to operate on.
    idx: The indexer to use.
    mask: TO BE DOCUMENTED.

  Returns:
    The value at the given index prior to the atomic operation.
  r   )r   rI   rW   r   s       r"   
atomic_minr      r   r#   c               >    t        | |||t        j                        S )zAtomically computes ``x_ref_or_view[idx] &= val``.

  Args:
    x_ref_or_view: The ref to operate on.
    idx: The indexer to use.
    mask: TO BE DOCUMENTED.

  Returns:
    The value at the given index prior to the atomic operation.
  r   )r   rI   rX   r   s       r"   
atomic_andr     r   r#   c               >    t        | |||t        j                        S )zAtomically computes ``x_ref_or_view[idx] |= val``.

  Args:
    x_ref_or_view: The ref to operate on.
    idx: The indexer to use.
    mask: TO BE DOCUMENTED.

  Returns:
    The value at the given index prior to the atomic operation.
  r   )r   rI   rY   r   s       r"   	atomic_orr     s      
S#Dloo
 r#   c               >    t        | |||t        j                        S )zAtomically computes ``x_ref_or_view[idx] ^= val``.

  Args:
    x_ref_or_view: The ref to operate on.
    idx: The indexer to use.
    mask: TO BE DOCUMENTED.

  Returns:
    The value at the given index prior to the atomic operation.
  r   )r   rI   rZ   r   s       r"   
atomic_xorr   %  r   r#   
atomic_casc                   |j                   |j                   k7  s|j                  |j                  k7  rt        d      | j                  rt        d      |j                  rt        d      |j                  rt        d      t        j                  |j                  |j                         t        j                  d      hfS )Nz1cmp and val must have identical dtypes and shapeszref must be scalar.zcmp must be scalar.zval must be scalar.r   )r   rg   r   r-   r:   r   WriteEffect)ref_avalcmp_avalval_avals      r"   _atomic_cas_abstract_evalr   6  s    ^^x~~%8>>)I
H
II^^
*
++^^
*
++^^
*
++			hnnhnn	=@Q@QRS@T?U	UUr#   c                0    t         j                  | ||      S )a  Performs an atomic compare-and-swap of the value in the ref with the
  given value.

  Args:
    ref: The ref to operate on.
    cmp: The expected value to compare against.
    val: The value to swap in.

  Returns:
    The value at the given index prior to the atomic operation.
  )atomic_cas_pr!   )r   cmpr   s      r"   r   r   C  s     
		3S	))r#   c                H    ~ ~t        j                  ||k(  ||      }|d d f|fS r_   )r;   where)r   r   r   r   r   new_vals         r"   _atomic_cas_discharge_ruler   Q  s/    	IIcSj#s+'
4		##r#   max_contiguousc                    | S r_   r&   r`   r2   s     r"   rb   rb   Y  s     r#   c                    |gS r_   r&   r2   r`   __s      r"   rb   rb   Z  s    QC r#   c                t    t        |t        t        f      s|f}t        j	                  | t        |            S N)values)re   listr   max_contiguous_pr!   r`   r   s     r"   r   r   \  s1    	FT5M	*YF			qv		77r#   c                    | S r_   r&   avalr2   s     r"   _max_contiguous_abstract_evalr   a      	+r#   multiple_ofc                    | S r_   r&   r   s     r"   rb   rb   g  s    a r#   c                    |gS r_   r&   r   s      r"   rb   rb   h  s    ! r#   c                l    t        |t              r|fn
t        |      }t        j	                  | |      S r   )re   intr   multiple_of_pr!   r   s     r"   r   r   j  s.    "63/F9U6]&			Af		--r#   c                    | S r_   r&   r   s     r"   _multiple_of_abstract_evalr   n  r   r#   masked_loadc                    | j                  |      \  }}}}t        j                  |d   j                         |j                        t        j                  d      hfS )Nr   )r   r-   r:   get_indexer_shaper   r   
ReadEffect)r   r   r2   r   r   s        r"   _load_abstract_evalr   u  sW    !++J7#xA8B<99;SYYG
 r#   c           	        | j                   \  }t        j                  | j                  d   | j                        \  }}}}t        j                  |g||j                        }|t        j                  d      t        j                  |||      g}	|U|	t        j                  d      t        j                  d      t        j                  t        j                  ||            gz  }	|U|	t        j                  d      t        j                  d      t        j                  t        j                  ||            gz  }	t        j                  |	      S )Nr   print_shapes <-  mask=zother=)outvarsr	   tree_unflattenr3   invarsr-   pp_varsr   pptextr   pp_ref_transformspp_varconcat)
eqncontextsettingsra   r`   r   r   otherlhsresults
             r"   _load_pp_ruler  ~  s   
{{"!'66szz+7N7:zzC!XtU 	!gH4I4IJ#	ggfo7Ax0&
 




g./ F
 



w/0 F
 
6	r#   c           	     @   |j                  |       \  }}}}|j                  |      \  }}	}	}
|
t        j                  |
      }
t        j                  t        j                  ||||f      d|i|t        j                  t        j                  ||||
f      d|i|fS Nr   )r   r
   instantiateload_pr!   r	   tree_leaves)primalstangentsr   r3   
ref_primalr   r   other_primalref_tangentr2   other_tangents              r"   	_load_jvpr    s    -6-@-@-I**hl%.%8%8%B"+q!]''6Mkk  *hl!KL 
 kk  +x}!MN 
 r#   c                   t        j                  |t         j                        r%t        j                  | t         j                  |      S t        j                  |t
        j                        rt        j                  | d|      S t        j                  |t         j                        r4t        j                  | t        j                  |      j                  |      S t        j                  |t         j                        rt        j                  | d|      S t        j                  |t
        j                        rt        j                  | d|      S t        |      )Nr   F)r;   
issubdtypefloatingfullnanr(   SEMAPHORE_INTERPRET_DTYPEintegeriinforM   r   semaphore_dtyper   rg   r   s     r"   uninitialized_valuer$    s    ^^E3<<(88E377E** ~~e[BBC88E1e$$
~~eS[[)88E399U+//77
~~eSXX&88E5%((
~~e[88988E1e$$E""r#   Fc                    t        d |D              }|rt        d |D              }t        d| j                        }t        j                  | ||      } | S )a  
  DynamicSlice and DynamicUpdateSlice adjust the start index in cases where the
  requested slice overruns the bounds of the array. This pads the array with
  uninitialised values such that the requested slice will never overrun.

  For example, if arr is [1.,2.,3.,4.] and a slice of size 4, start index 2 is
  requested then the result will be [3.,4.,NaN,NaN] after padding, rather than
  [1.,2.,3.,4.] from the unpadded array

  unpad=True performs the inverse operation
  c              3  &   K   | ]	  }d |d f  ywr~   r&   )ri   
slice_sizes     r"   rk   z?_pad_values_to_avoid_dynamic_slice_oob_shift.<locals>.<genexpr>  s     J
!Z+Js   c              3  4   K   | ]  \  }}}| | | f  y wr_   r&   )ri   lowhighinteriors       r"   rk   z?_pad_values_to_avoid_dynamic_slice_oob_shift.<locals>.<genexpr>  s-      H4T8 !D4%(3 Hs   r&   r#  )padding_configpadding_value)r   r$  r   r   pad)r   rt   unpadr,  r-  s        r"   ,_pad_values_to_avoid_dynamic_slice_oob_shiftr0    s^     JkJJ.
 H8FH HN%BekkB-
''%!/ -/% 
,r#   T)r/  c          
        ~|j                  |      \  }}}}t        |      dkD  rt        d      |d   }	t        d |	j                  D              r3|	j                  D ]-  }
t        |
t              s|
j                  dkD  s$t        d       |	j                  }|D 
cg c]"  }
t        |
t               xr |
j                   $ }}
|D 
cg c]   }
t        |
t              r|
j                  n|
" }}
t        d |D              }t        ||      }t        j                  t        j                        }t!        j"                  ||D 
cg c]  }
t        j$                  |
|       c}
|      }t        d |D              }||   }n2t        d	 |	j                  D              r||	j                     }nt        ||t        j&                  |||      }d
t        |       z  |fS c c}
w c c}
w c c}
w )Nr]   -Only one indexer supported in discharge rule.r   c              3  Z   K   | ]#  }t        |t              xs |j                    % y wr_   rd   rh   s     r"   rk   z'_load_discharge_rule.<locals>.<genexpr>  rl   rm   Unimplemented stride support.c              3  X   K   | ]"  }t        |t              r|j                  nd  $ ywro   rp   rh   s     r"   rk   z'_load_discharge_rule.<locals>.<genexpr>  rq   rr   rs   c              3  :   K   | ]  }|rd n
t        d        ywr~   rv   rx   s     r"   rk   z'_load_discharge_rule.<locals>.<genexpr>  r   rz   c              3  >   K   | ]  }t        |t                 y wr_   r   rh   s     r"   rk   z'_load_discharge_rule.<locals>.<genexpr>  r   r   r_   )r   r   r   r   r   re   rf   striderg   r   r   r0  r   canonicalize_dtyper;   int64r   r   astyper   )r   r   r   r   r2   r   r   r   r
  r   rj   r   r   r   rt   	idx_dtyper   r   r   s                      r"   _load_discharge_ruler=    s   (229=#xu]Q
M
NN#DDD[[ C	Au	!((Q,!"ABBC kkGELMz!U++;AGG;MKMDKLqz!U3AGG:LLLOwOOK 7sK
HC))#))4I  	)56Aszz!Y6H
 O;OOK
;
C

9S[[
99
ckk
C
	%+
))D#u
%C	3x=	 #	%%+ NL 7s   'G#%G(G-
masked_swapc           	        | j                  |      \  }}}}|d   j                         }||j                  k7  r)t        d|j                   d|j                   d| d      |j                  |j                  k7  r&t        d|j                   d|j                   d      t        j                  ||j                        t        j                  d      hfS )	Nr   z%Invalid shape for `swap`. Ref shape: z. Value shape: z. Indices: z. z%Invalid dtype for `swap`. Ref dtype: z. Value dtype: r   )	r   r   rg   r   r   r-   r:   r   r   )r   r   r2   r   r   r   expected_output_shapes          r"   r   r     s    #--j9#xa"2,88:cii'

/		{ ;		{+hZr	;  	YY#))

/		{ ;		{"	& 
 0#))<
 r#   c                n   | j                   \  }| j                  d   j                  | j                        \  }}}}t	        j
                  |||      }t        |t        j                        rRt        j                  |t        j                  d      t        j                  t        j                  ||            g      S t        j                  |g||j                        }|t        j                  d      |t        j                  d      |t        j                  d      t        j                  t        j                  ||            g}	|U|	t        j                  d      t        j                  d      t        j                  t        j                  ||            gz  }	t        j                  |	      S )Nr   r   r   z, r   r   )r   r3   r   r   r   r  re   r-   DropVarr  r  r  r  r  r   )
r  r  r	  ra   r`   r   r   r   x_ir  s
             r"   _swap_pp_rulerD    sR    {{"!::k2<<SZZH!XsD
Wa2#8##$99
g!>?A B B sG(2G2GH!ggdm	ggfo	ggdmgghooc7+,& 




g./ F
 
6	r#   c          	     <   |j                  |       \  }}}}|j                  |      \  }}	}
}	t        j                  |
      }
t        j                  t        j                  ||||f      d|i|t        j                  t        j                  |||
|f      d|i|fS r  )r   r
   r  swap_pr!   r	   r  )r  r  r   r3   r  r   
val_primalr   r  r2   val_tangents              r"   	_swap_jvprI  7  s    +4+>+>w+G(*h
D#,#6#6x#@ +q+q##K0+kk  *h
D!IJ 
 kk  +xd!KL 
 r#   c                  ~|j                  |      \  }}}}t        |      dkD  rt        d      |d   }	t        d |	j                  D              re|	j                  D ]-  }
t        |
t              s|
j                  dkD  s$t        d       |	j                  }t        |      D 
cg c]#  \  }}
t        |
t              s|
j                  s|% }}}
|D 
cg c]   }
t        |
t              r|
j                  n|
" }}
t        d |D              }t        ||      }t        j                  |||      }t        j                   ||      }|0|}t        j"                  |||      }t        j"                  |||      }t        j$                  ||      }t        j&                  |||      }t)        ||      }nt        d	 |	j                  D              rj||	j                     }|0|}t        j"                  |||      }t        j"                  |||      }|j*                  |	j                     j-                  |      }nt        |fd
t        |       dz
  z  z   |fS c c}
}w c c}
w )Nr]   r2  r   c              3  Z   K   | ]#  }t        |t              xs |j                    % y wr_   rd   rh   s     r"   rk   z'_swap_discharge_rule.<locals>.<genexpr>S  rl   rm   r4  c              3  X   K   | ]"  }t        |t              r|j                  nd  $ ywro   rp   rh   s     r"   rk   z'_swap_discharge_rule.<locals>.<genexpr>_  rq   rr   rs   r{   c              3  >   K   | ]  }t        |t                 y wr_   r   rh   s     r"   rk   z'_swap_discharge_rule.<locals>.<genexpr>m  r   r   r_   )r   r   r   r   r   re   rf   r8  	enumeraterg   r   r   r0  r   r   r;   squeezer   expand_dimsr   ._unpad_values_to_avoid_dynamic_slice_oob_shiftr   r   )r   r   r   r   r2   r   r   r   r   r   rj   r   ir   r   rt   r   out_r   s                      r"   _swap_discharge_rulerT  L  s0   &00;#xd]Q
M
NN#DDD[[ C	Au	!((Q,!"ABBC kkG g&Aq!U#AGG 	
K 
 ELLqz!U3AGG:LLLOwOOK 7sK
HC


C;
GC
++c;
'CdIIdC%cIIdC&c
//#{
+C$$S#\JE:5+NE

9S[[
99
ckk
CdIIdC%cIIdC&cFF3;;##C(E

Gs8}q01	13	66;
 Ms   )(I%I!)r   r
  cache_modifiereviction_policyvolatilec                   t        j                  | |d      \  }}t        j                  ||||f      \  }	}
t	        j
                  |	|
|||dS )av  Returns an array loaded from the given index.

  If neither ``mask`` nor ``other`` is specified, this function has the same
  semantics as ``x_ref_or_view[idx]`` in JAX.

  Args:
    x_ref_or_view: The ref to load from.
    idx: The indexer to use.
    mask: An optional boolean mask specifying which indices to load.
      If mask is ``False`` and ``other`` is not given, no assumptions can
      be made about the value in the resulting array.
    other: An optional value to use for indices where mask is ``False``.
    cache_modifier: TO BE DOCUMENTED.
    eviction_policy: TO BE DOCUMENTED.
    volatile: TO BE DOCUMENTED.
  load)r   rU  rV  is_volatile)r   r   r	   r   r  r!   )r   r   r   r
  rU  rV  rW  r   r   r   r   s              r"   rY  rY  y  s`    $ //sFK%"//j$&)Y 
#%
 r#   swapr   rV  _function_namec                   t        j                  | ||      \  }}t        j                  ||||f      \  }}	t	        j
                  ||	|dS )zSwaps the value at the given index and returns the old value.

  See :func:`~jax.experimental.pallas.load` for the meaning of the arguments.

  Returns:
    The value stored in the ref prior to the swap.
  )r   rV  )r   r   r	   r   rF  r!   )
r   r   r   r   rV  r]  r   r   r   r   s
             r"   r[  r[    sX     //S.% #//
C0NO)Y	I
 r#   )r   rV  c               &    t        | ||||d      }y)ztStores a value at the given index.

  See :func:`~jax.experimental.pallas.load` for the meaning of the arguments.
  storer\  N)r[  )r   r   r   r   rV  r2   s         r"   r`  r`    s    
 =#s!#!r#   c                F   | j                   dk7  s|j                   dk7  rt        d      |rdnd}|sdnd}|C|t        d      |rt        j                  j                  nt        j                  j
                  }d	d}t        j                   || j                         ||j                              }	t        j                  |	t        j                        rt        j                  nt        j                  }
t        j                  j                  | ||f|ffdf||
      S )
N   z`a` and `b` must be 2D arrays.r   r]   z5Only one of allow_tf32 and precision can be specifiedc                L    | t         j                  k(  rt         j                  S | S )z5Ugly workaround to support float8_e4m3b11fnuz in dot.)r;   float8_e4m3b11fnuzr   r   s    r"   
_handle_f8zdot.<locals>._handle_f8  s    &&&\\Lr#   )r&   r&   )dimension_numbers	precisionpreferred_element_type)r   zjax.typing.DTypeLike)ndimr   r   	PrecisionHIGHHIGHESTr;   promote_typesr   r  r   r<   float32jaxdot_general)abtrans_atrans_b
allow_tf32rh  lhs_contract_dimrhs_contract_dimrf  r   	out_dtypes              r"   dotrz    s    ffkqvv{
5
66!Qq%Q1NOO&0""cmm6K6KI 

Jqww/AGG1D
E%>>%=cii3;;)			+-0@/BCXN& 
 
 r#   
reciprocalapproxc               0    t         j                  | |      S )Nr|  )reciprocal_pr!   r`   r}  s     r"   r{  r{    s    			1V		,,r#   c                   ~| S r_   r&   r  s     r"   _reciprocal_abstract_evalr    s
    	
(r#   c               N    ddd} t        j                  |d      | ||      S )NFr|  c                   |rOt        j                  | j                  t         j                              j                  t         j                        S t        j                  |       S r_   )r;   r{  r;  r   ro  r  s     r"   _reciprocalz._reciprocal_lowering_rule.<locals>._reciprocal  s?    ^^AHHS\\23::3;;GG>>!r#   multiple_results)r   	lower_fun)ctxr`   r}  r  s       r"   _reciprocal_lowering_ruler    s/      % 
 
=e	<	1V
 r#   c                      e Zd Zd Zy)PrintEffectc                     y)NPrintr&   )selfs    r"   rb   zPrintEffect.<lambda>  s    r#   N)rQ   rR   rS   __str__r&   r#   r"   r  r    s     'r#   r  debug_printc                    d}| r<t        t        t        j                         j	                  |                   ^}}}|du}t        j                  || |dS )a  Prints values from inside a Pallas kernel.

  Args:
    fmt: A format string to be included in the output. The restrictions on the
      format string depend on the backend:

      * On GPU, when using Triton, ``fmt`` must not contain any placeholders
        (``{...}``), since it is always printed before any of the values.
      * On GPU, when using the experimental Mosaic GPU backend, ``fmt`` must
        contain a placeholder for each value to be printed. Format specs and
        conversions are not supported. All values must be scalars.
      * On TPU, if all inputs are scalars: If ``fmt`` contains placeholders,
        all values must be 32-bit integers. If there are no placeholders, the
        values are printed after the format string.
      * On TPU, if the input is a single vector, the vector is printed after
        the format string. The format string must end with a single placeholder
        ``{}``.
    *args: The values to print.
  FN)fmthas_placeholders)nextiterstring	Formatterparsedebug_print_pr!   )r  argsr  r2   
field_names        r"   r  r    sX    ( T&"2"2"4":":3"?@AAzA!-			Ts=M	NNr#   c           
        d}t        j                         j                  |       D ]*  \  }}}}||dz  }|s|rt        d      |s!t        d       t	        |      |k7  r$t        d| d|dk(  rdnd d	t	        |             y )
Nr   r]   zDThe format string should not contain any format specs or conversionszDThe format string should not reference arguments by position or namezThe format string expects z	 argument rj   z
, but got )r  r  r  r   r   	TypeError)r  r  n_placeholdersr2   fieldspec
conversions          r"   check_debug_print_formatr    s     .$*$4$4$6$<$<S$A 
 ajnz
P  
P 
 	Y. 

$^$4 5'1,2#6jT	M  !r#   c                R    |rt         | j                  |        yt        | g|  yr8   )printformat)r  r  r  s      r"   debug_print_implr  +  s/    	*#**d
 
 
#	r#   c                    ~~ ~g t         hfS r_   )debug_print_effect)r  r  avalss      r"   debug_print_abstract_evalr  4  s    S"	 !	!!r#   c                h   t        d t        | |      D              }d }g }t        |      D ]G  }t        t	        j
                  ||      ||       }|j                  t        j                  |i |       I t        | D cg c]  }t        j                  |       }}|dt        |      z  fS c c}w )z3Unrolls the print primitive across the mapped axis.c              3  F   K   | ]  \  }}|	|j                   |     y wr_   )rg   )ri   r`   rR  s      r"   rk   z,debug_print_batching_rule.<locals>.<genexpr><  s      K$!QQ]1771:Ks   
!!c                \    |t         j                  u r|S t        j                  || |d      S )NF)r   keepdims)r   
not_mappedr   index_in_dim)rR  dimargs      r"   get_arg_at_dimz1debug_print_batching_rule.<locals>.get_arg_at_dim?  s,    
h!!!jCu==r#   )r   )r  ziprangemap	functoolspartialappendr  r!   r;   stackr   )	r  dimsr3   	axis_sizer  outsrR  args_idxxss	            r"   debug_print_batching_ruler  :  s    K#dD/KK)> 
$ 9a9$$^Q7tDHKK""H7789 #&t*	-B#))B-	-$	-	tc$i	 
.s   B/c           	         t        j                  | t        j                  t        j
                  fi |d t        |      | j                  | j                  d      \  }}}|S )NT)has_side_effect)	r   emit_python_callbackr  r  r  implr   avals_in	avals_out)r  r  r3   r  r2   s        r"   debug_print_lowering_ruler  R  sV    ..	**5f5

4j	ll	mm,&!Q 
-r#   c                D    t        d t        ||      D              } | | S )Nc              3  V   K   | ]!  \  }}|rt        j                  ||      n| # y wr_   )state_typesTransformedRef)ri   rr  ts      r"   rk   z'wrap_with_transforms.<locals>.<genexpr>f  s0      
!Q +,k  A&2s   '))r   r  )fr   r  new_argss       r"   wrap_with_transformsr  d  s,     dJ' ( 
Hr#   
run_scopedr&   )collective_axesc               r   t        |t              s|f}t        j                  ||f      \  }}t	        j
                  t        j                  | t	        j                  d| ||            |      \  }}|D cg c]  }|j                          }	}|	D cg c]*  }t        |t        j                        r|j                  n|, }
}t        d |	D              }t        ||      }t        j                  ||
      \  }}}\   t!        j"                  |||d}t        j$                   |       |      S c c}w c c}w )a  Calls the function with allocated references and returns the result.

  The positional and keyword arguments describe which reference types
  to allocate for each argument. Each backend has its own set of reference
  types in addition to :class:`jax.experimental.pallas.MemoryRef`.

  When `collective_axes` is specified, the same allocation will be returned for
  all programs that only differ in their program ids along the collective axes.
  It is an error not to call the same `run_scoped` in all programs along that
  axis.
  zpallas run_scoped)
debug_infoc              3  l   K   | ],  }t        |t        j                        r|j                  nd  . yw)r&   N)re   r  r  r   )ri   r  s     r"   rk   zrun_scoped.<locals>.<genexpr>  s0      
 !K$>$>?allRGs   24jaxprr  )re   r   r	   r   r   flatten_funlu	wrap_initr  get_ref_avalr  r  r   r  petrace_to_jaxpr_dynamicrun_scoped_pr!   r   )r  r  r   kw_types
flat_typesin_treeflat_funout_tree_thunkr  	ref_avalsr  ref_transformsr  r2   constsr   s                   r"   r  r  q  s=   " 
OU	+&(O!..x/@A*g%11ll1&112E23UHFG 	(N *44Aq~~4)4 
 ![778aeea?%    . "(N;( 228UC%FB6P#		!	!."2C	88# 5s   6D//D4c                   ~~| j                   D ch c]@  }t        |t         j                        r"|j                  t	        | j
                        k\  s|B }}| j                  D cg c]  }|j                   c}|fS c c}w c c}w r_   )r   re   JaxprInputEffectinput_indexr   	constvarsr   r   )r  r  r  effnonlocal_effectsvs         r"   _run_scoped_abstract_evalr    s{    
O 

S'22
3ooU__!55	 
   --	(Q!&&	(*:	:: 
)s   AA>&Bc          	        ~|rt        d      t        |      }t        j                  |      }t        |j                        }t        j                  |g | dgt        |j                        z  z         \  }	}
|
rt        d      t        j                  |	|      }	t        j                  ||	|d}|d | }||d  }t        | |      D cg c]4  \  }}|r+t        |t        j                        r|j                  d      nd 6 }}}t        |      t        |      k(  sJ t        |       dt        |              ||fS c c}}w )Nz:run_scoped discharge does not support collective_axes yet.Fshould_discharge4Cannot handle new consts created by state discharge.r  r   z != )r   r   r  convert_constvars_jaxprr   state_dischargedischarge_stater   convert_invars_to_constvarsr  r!   r  re   r(   AbstractMemoryRefr'   )r  r   r   r  r  r   
num_constsjaxpr_noconstnum_return_valuesdischarged_body
new_constsr   return_valuesref_outputsshouldr   updatess                    r"   _run_scoped_discharge_ruler    sn    
D  9~* ,,U3--//0 / ? ?'5'C4E*EE!/:
 
>@ @ 22?JO/ 		#
 (()-%&'+
 %((8($CE FD #z$8U8U'VkooaE' E 
WX	&L3w<.S]O(LL	&	-		Es   9E c                  |rt        d      t        j                  |      }t        |j                        t        j                  |g d      \  }|rt        d      fd} t        j                  |d      | g| S )NzGrun_scoped lowering outside of Pallas does not support collective_axes.Tr  r  c                    t        |       }j                  |d  D cg c]  }|j                   }}|D cg c]"  }t        |j                  |j
                        $ }}t        j                  g g| | }|d  S c c}w c c}w r_   )r   r   r   r$  rg   r   r-   
eval_jaxpr)	lower_fun_argsr  r  
body_avalsr   	init_valsr   r  r  s	          r"   
_lower_funz-_run_scoped_lowering_rule.<locals>._lower_fun  s    ^$J"1"8"8"EFQ!&&FJF,68$( %

DJJ  8I 8


or
ON
OY
OC!!""	 G8s
   A>'Br  )
r   r  r  r   r   r  r  r   r   r  )	r  r  r  r  r  r  r	  r  r  s	          @@r"   _run_scoped_lowering_ruler
    s    
	  ,,U3--//0 / ? ?R$!0/:->@ @# 
;
T	:3	F	FFr#   c                n    t        | t        j                        r| j                  | j                  fS | dfS r8   )re   r   r  r   r   )r   s    r"   _get_ref_and_transformsr    s.    U))*77CNN""	b.r#   c                      e Zd ZdZdZy)DeviceIdTypemeshlogicalN)rQ   rR   rS   MESHLOGICALr&   r#   r"   r  r    s    	$'r#   r  c                   |/t         j                  t         j                  t         j                  h}t	        | t
        j                        st        d| d|        | j                  }|r|d   j                         }|rt        d| d|       | j                  t        fd|D              st        d| d| d      y )	NzCannot z on a non-semaphore Ref: r   z on a non-()-shaped semaphore: c              3  J   K   | ]  }t        j                  |        y wr_   )r;   r  )ri   sem_type	sem_dtypes     r"   rk   z"check_sem_avals.<locals>.<genexpr>  s$      
 
nnY)s    #zMust z$ semaphores of the following types: r   )r(   	semaphorebarrier_semaphorer  re   r   AbstractRefr   rg   r   r   any)sem_avalsem_transforms_avalsnameallowed_semaphore_types	sem_shaper  s        @r"   check_sem_avalsr     s     $%%--	 
He//	0
wtf$=hZH
IInn)$R(::<I
wtf$CI;O
PPnn)	 - 
 
v #$A	' 	
r#   c                    | j                   |j                   k(  rt        j                  | |      S t        | j                         dk(  r| S t	        d| j                    d|j                          )zEHelper function for indexing into a semaphore during state_discharge.r   zSemaphore value shape z does not match aval shape )rg   r  transform_arrayr   r   )	ref_valuer   r   s      r"   _transform_semaphorer$  $  si    __&**9jAA
9??q 

  1 2NN	 r#   semaphore_readc                    t        |       \  }}||g}t        j                  |      \  }}t        j                  |d|iS r  )r  r	   r   semaphore_read_pr!   )sem_or_viewr   r   r  	flat_argsr   s         r"   r%  r%  5  sE    +K8/#z
z	$"//5)Y					?Y	??r#   c                X    ~~ t        j                  dt        j                  d            S )Nr&   r<   )r-   r:   r;   r   )r   r  s     r"   _semaphore_read_abstract_evalr+  ;  s%    
 Y			b#))G"4	55r#   c                   ~|j                  |      \  }}t        ||| d         }|j                  t        j                        }dt        |       z  |fS )Nr   r_   )r   r$  r;  r;   r<   r   )r   r   r   r)  r   r   	sem_values          r"   _semaphore_read_discharge_ruler.  C  sV     )))43
"3
HQK@)syy))	3x=	 )	++r#   semaphore_signal)	device_iddevice_id_type
core_indexc                   t        |       \  }}t        j                  |t        j                        }|||||g}t	        j
                  |      \  }}	t        j                  ||	|d y )Nre  )r   r1  )r  r;   asarrayr<   r	   r   semaphore_signal_pr!   )
r(  incr0  r1  r2  r   r   r  r)  r   s
             r"   r/  r/  U  sb     ,K8/#zCsyy)#
z3	:	6$"//5)Y#r#   c                J   ~t        j                  | |      \  }}}}}t        ||d       |j                  t	        j                  d      k7  rt        d      |It        j                  |      }|D ]/  }	|	j                  t	        j                  d      k7  s&t        d       g S )Nsignalr<   zMust signal an int32 value.z$`device_id`s must be an int32 value.)r	   r   r   r   r;   r   r  )
r   r1  r  r  r  
value_avaldevice_id_avalscore_index_avaldevice_id_flat_avalsr   s
             r"   _semaphore_signal_abstract_evalr=  h  s      y%0(0(;7++
2
33 $00A$ A	syy)	)?@@A 
)r#   c                D   ~| j                   }| j                  d   }t        j                  ||      \  }}}}}	t	        j
                  t	        j                  d      t	        j                  d      t        j                  |||      t	        j                  d      t	        j                  t        j                  ||            g      }
|t        j                  |      }|s|
S t	        j                  t        j                  |d   |            g}|dd  D ]^  }|j                  t	        j                  d             |j                  t	        j                  t        j                  ||                   ` t	        j
                  |
t	        j
                  |      g      }
|
S )Nr   r/  r   r   r]   )r   r3   r	   r   r  r  r  r   r  r-   r  r  r  )r  r  r	  r   treesemsem_transformsr   
device_idsr2   r   flat_device_idsdevice_ids_ppr0  s                 r"   _semaphore_signal_pp_eqnrE    sW    ::&	K	 $ tV,	
		gg !ggcl7C8ggclgghooeW-. 	# ++J7OjWWX___Q-?IJKM$QR( I	2773<(2778??9g#FGHI ))S"))M23
4C	*r#   c               .   ~~|j                  |      \  }}}}}	|t        d      |	t        d      t        ||| d         }
|j                  t        j
                        }t        j                  |||
|z         \  }}|fdt        |       dz
  z  z   dfS )NzRemote signal not implemented.z&Multiple core support not implemented.r   r_   r]   r&   )	r   r   r$  r;  r(   r  r  transform_swap_arrayr   )r   r   r   r1  r)  r   r   r6  r0  r2  r-  r2   new_sem_values                r"    _semaphore_signal_discharge_rulerI    s    
 2;2E2Ei2P/3
CJ
>
??
F
GG"3
HQK@)

;889#$99	:y3!] 	Gs8}q'89	92	==r#   semaphore_waitc                    t        |       \  }}t        j                  |t        j                        }|||g}t	        j
                  |      \  }}t        j                  |d|i y )Nre  r   )r  r;   r4  r<   r	   r   semaphore_wait_pr!   )r(  decr   r   r  r)  r   s          r"   rJ  rJ    sX    +K8/#zCsyy)#
z3	$"//5)Y8i8r#   c                    t        j                  | |      \  }}}t        ||d       |j                  t	        j                  d      k7  rt        d      g S )Nwaitr<   zMust wait an int32 value.)r	   r   r   r   r;   r   )r   r  r  r  r9  s        r"   _semaphore_wait_abstract_evalrP    sV    /8/G/G0,( * (0&97++
0
11	)r#   c                   ~| j                   }| j                  d   }t        j                  ||      \  }}}t	        j
                  t	        j                  d      t	        j                  d      t        j                  |||      t	        j                  d      t	        j                  t        j                  ||            g      S )Nr   rJ  r   )r   r3   r	   r   r  r  r  r   r  r-   r  )r  r  r	  r   r?  r@  rA  r   s           r"   _semaphore_wait_pp_eqnrR    s     ::&	K	 $
 tV,			ggggcl7C8ggclgghooeW-. 
 r#   c                   ~|j                  |      \  }}}t        ||| d         }|j                  t        j                        }t        j                  ||||z
        \  }}	|	fdt        |       dz
  z  z   dfS )Nr   r_   r]   r&   )r   r$  r;  r(   r  r  rG  r   )
r   r   r   r)  r   r   rM  r-  r2   rH  s
             r"   _semaphore_wait_discharge_rulerT    s     $..y93
C"3
HQK@)

;889#$99	:y3!] 	Gs8}q'89	92	==r#   )r   r   return	jax.Array)r   r   rU  int | jax.Array)r   rI   )r   
Any | Noner   rI   )r   rX  )r`   rV  r   zSequence[int] | intrU  rV  )F)rU  rV  )rU  None)FFNN)rt  r   ru  r   rv  zbool | None)r  zmlir.LoweringRuleContext)r  strr  zjax.typing.ArrayLike)r  r   r  rZ  r  r   )r  r   r  rZ  r  r   )
r  zCallable[..., Any]r   r   r  zHashable | tuple[Hashable, ...]r  r   rU  r   r_   )r]   )r6  rW  r0  z4int | jax.Array | None | tuple[int | jax.Array, ...]r1  r  r2  zint | jax.Array | None)r1  r  )r  zjax_core.JaxprEqnr  zjax_core.JaxprPpContextr	  zjax_core.JaxprPpSettings)rM  rW  )__doc__
__future__r   enumr  r  collections.abcr   typingr   r   r   rp  r   r	   jax._srcr
   r   r   r   r-   r   r   r   r  r   r  r   r   jax._src.interpretersr   r   r   r  jax._src.pallasr(   jax._src.stater   r  r   r   r  r   r   jax.interpretersr   	jax.numpynumpyr;   r  rf   	NDIndexersafe_mapr  
unsafe_mapsafe_zipr  
unsafe_zipr.   r    ragged_mask_no_op_ruleragged_prop_rulesr   r6   def_bind_with_tracedef_abstract_evalr>   rB   r@   rE   rG   EnumrI   r   r   register_discharge_ruledef_effectful_abstract_evalr   r   r   r   r   r   r   r   r   r   r   r   r   r   def_implregister_loweringr   r   r   r   r   r  r   r  pp_eqn_rulesr  primitive_jvpsr$  r0  rQ  r=  rF  r   rD  rI  rT  rY  r[  r`  rz  r  r{  r  r  Effectr  r  lowerable_effectsadd_typecontrol_flow_allowed_effectsremat_allowed_effects"custom_derivatives_allowed_effectsr  r  r  r  r  r  r  primitive_batchersr  transformation2r  r  r  r  r  register_partial_discharge_ruler
  r  r  r   r$  r'  r%  r+  r.  r5  r  r/  r=  rE  rI  rL  rJ  rP  rR  rT  r&   r#   r"   <module>r     s   & "    $ * * 
      %   & )   $ * 4 / 7 # / + ! 


	--Z--Z!x!!,/+3+J+J  < (&	V     !; <-  - $##N3(  " "#@ A!!- "-499  "x!!,/&7=I&7R 6 ' ' ' 56P Q ))? *?  @D ) @D   ?C   ?C   ?C   ?C   >B   ?C  "x!!,/))	V *	V* )((6$ 7$
 &8%%&67    * +   ')? @8
 ## $ #""=1   ' (   }&< =.    ! 
		M	* ## $2 !.  f & &  & #" 7<0 29.d2< . )((0"& 1"&J 
		M	* ## $&8 !.  f $ &  &  )((0)7 1)7X &*d2;< +/#," ,0 # 6;266 "x!!,/ " -    16
	!
   |%> ?!'.. ! !]     " "; /  $ $ - -k :    & &{ 3  * * 3 3K @ #""=1!% O6	).   **" +"
 & .?Y->->}.  M *
 4))=9
 :
"   "x!!,/ $  8:+9+9+9 5+9 	+9
 	+9\ )); *; , ^ > / / / =  4))<8G 9G2499  CG8
 &8%%&67 $)  !@ ##6 $6, : ' ' '(8 9"
 (X''(:; &*  #
  GK#/#4#4)-	 D	
 ! '& %% ! &0&='?< -E  ( )>" < ' ' '(: ;$
 &8%%&67 $(  !9 ## $&='?$ +A  & '> : ' ' '(8 9"r#   