
    Q
ieS                     (   d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZ d dlmZmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ 	 d dlmZ dZn# e$ r dZY nw xY w ej         e!          Z" G d de          Z# G d de          Z$dS )    N)Enum)Path)AnyCallableDictListOptionalUnion)	BaseModel
ConfigDictFieldfield_validator)	Annotated)EmbeddingsCachearray_to_buffer)VectorDataType)deprecated_argument)ImageTFc                   *    e Zd ZdZdZdZdZdZdZdZ	dS )	Vectorizersazure_openaiopenaicoheremistralvertexaihfvoyageaiN)
__name__
__module____qualname__r   r   r   r   r   r   r        C:\Users\Dell Inspiron 16\Desktop\tws\AgrotaPowerBi\back-agrota-powerbi\mcp-client-agrota\venv\Lib\site-packages\redisvl/utils/vectorize/base.pyr   r      s1        !LFFGH	BHHHr#   r   c                      e Zd ZU dZeed<   dZeed<   dZee	e
          edd          f         ed	<    ed
          Ze	e         ed<    ed          Zedefd            Z ed          ed                         Z edd          	 	 	 	 	 d/dedede	e         dededeee         ef         fd            Z edd          	 	 	 	 	 	 d0de	ee                  de	ee                  de	e         de
dededeeee                  ee         f         fd            Z edd          	 	 	 	 	 d/dedede	e         dededeee         ef         fd            Z edd          	 	 	 	 	 	 d0de	ee                  de	ee                  de	e         de
dededeeee                  ee         f         fd            Z  edd          d1dededee         fd            Z! edd          	 	 	 d2de	ee                  de	ee                  de
deee                  fd             Z" edd          d1dededee         fd!            Z# edd          	 	 	 d2de	ee                  de	ee                  de
deee                  fd"            Z$dee         dede%ee	ee                           ee         ee
         f         fd#Z&dee         dede%ee	ee                           ee         ee
         f         fd$Z'dee         d%eee                  d&e(deddf
d'Z)dee         d%eee                  d&e(deddf
d(Z*d3d)e+d*e
de	e         fd+Z,d,e	ee                  dedefd-Z-dedeeef         fd.Z.dS )4BaseVectorizera  Base RedisVL vectorizer interface.

    This class defines the interface for vectorization with an optional
    caching layer to improve performance by avoiding redundant API calls.

    Attributes:
        model: The name of the embedding model.
        dtype: The data type of the embeddings, defaults to "float32".
        dims: The dimensionality of the vectors.
        cache: Optional embedding cache to store and retrieve embeddings.
    modelfloat32dtypeNTr   )strictgtdims)defaultcache)arbitrary_types_allowedreturnc                     dS )zReturn the type of vectorizer.baser"   )selfs    r$   typezBaseVectorizer.type6   s	     vr#   c                     	 t          |                                           n2# t          $ r% t          d| dd t           D                        w xY w|S )z$Validate the data type is supported.zInvalid data type: z. Supported types are: c                 6    g | ]}|                                 S r"   )lower).0ts     r$   
<listcomp>z.BaseVectorizer.check_dtype.<locals>.<listcomp>C   s!    DgDgDgSTQWWYYDgDgDgr#   )r   upper
ValueError)clsr)   s     r$   check_dtypezBaseVectorizer.check_dtype;   sw    	5;;==)))) 	 	 	ieiiDgDgXfDgDgDgii  	 s	   !$ /AtextcontentF
preprocess	as_buffer
skip_cachec                 V   |p|}|st          d          | ||          }| j        |s	 | j                            |                     |          | j                  }|rDt
                              d| j                    |                     |d         || j                  S nA# t          $ r4}t
          
                    dt          |                      Y d}~nd}~ww xY w|                    di           }	 | j        |fi |}
| j        {|sy	 | j                            |                     |          | j        |
|	           nA# t          $ r4}t
          
                    d	t          |                      Y d}~nd}~ww xY w|                     |
|| j                  S )
a  Generate a vector embedding for content.

        Args:
            content: The content to convert to a vector embedding
            text: The text to convert to a vector embedding (deprecated - use `content` instead)
            preprocess: Function to apply to the content before embedding
            as_buffer: Return the embedding as a binary buffer instead of a list
            skip_cache: Bypass the cache for this request
            **kwargs: Additional model-specific parameters

        Returns:
            The vector embedding as either a list of floats or binary buffer

        Examples:
            >>> embedding = text_vectorizer.embed("Hello world")
            >>> embedding = image_vectorizer.embed(Image.open("test.png"))
        No content provided to embed.Nr@   
model_namez!Cache hit for content with model 	embeddingz!Error accessing embedding cache: metadatar@   rG   rH   rI   z"Error storing in embedding cache: )r<   r.   get_serialize_for_cacher'   loggerdebug_process_embeddingr)   	Exceptionwarningstrpop_embedsetr3   r@   r?   rA   rB   rC   kwargscache_resultecache_metadatarH   s              r$   embedzBaseVectorizer.embedG   s   6 /T 	><=== ! j))G :!*!
M#z~~ 55g>>4:  .       LL!QTZ!Q!QRRR22$[19dj  
  M M MK3q66KKLLLLLLLLM  J33DK22622	 :!*!N
 55g>>#z'+	       N N NLCFFLLMMMMMMMMN &&y)TZHHHs0   A9B( (
C&2*C!!C&6E 
F*FFtextscontents
   
batch_sizec                 F    |p|}|sg S fd|D             }n|}                      ||          \  }	}
}|
rY|                    di           }  j        d|
|d|}                     |
|||           t	          ||          D ]
\  }}||	|<    fd|	D             S )a  Generate vector embeddings for multiple items efficiently.

        Args:
            contents: List of content to convert to vector embeddings
            texts: List of texts to convert to vector embeddings (deprecated - use `contents` instead)
            preprocess: Function to apply to each item before embedding
            batch_size: Number of items to process in each API call
            as_buffer: Return embeddings as binary buffers instead of lists
            skip_cache: Bypass the cache for this request
            **kwargs: Additional model-specific parameters

        Returns:
            List of vector embeddings in the same order as the inputs

        Examples:
            >>> embeddings = vectorizer.embed_many(["Hello", "World"], batch_size=2)
        Nc                 &    g | ]} |          S r"   r"   r8   itemrA   s     r$   r:   z-BaseVectorizer.embed_many.<locals>.<listcomp>   #    !H!H!Ht**T"2"2!H!H!Hr#   rI   r]   r_   c                 H    g | ]}                     |j                  S r"   rO   r)   r8   embrB   r3   s     r$   r:   z-BaseVectorizer.embed_many.<locals>.<listcomp>   -    WWW''Y
CCWWWr#   r"   )_get_from_cache_batchrS   _embed_many_store_in_cache_batchzipr3   r]   r\   rA   r_   rB   rC   rW   processed_contentsresultscache_missescache_miss_indicesrZ   new_embeddingsidxrH   s   `  ` `          r$   
embed_manyzBaseVectorizer.embed_many   s#   8 $u 	I !!H!H!H!Hx!H!H!H!) 594N4N
5
 5
11
  	)#ZZ
B77N-T- %* @F N
 &&nnj  
 #&&8."I"I ) )Y( XWWWWwWWWWr#   c                 ~  K   |p|}|st          d          | ||          }| j        |s	 | j                            |                     |          | j                   d{V }|rDt
                              d| j                    |                     |d         || j                  S nA# t          $ r4}t
          
                    dt          |                      Y d}~nd}~ww xY w|                    di           }	 | j        |fi | d{V }
| j        |s	 | j                            |                     |          | j        |
|	           d{V  nA# t          $ r4}t
          
                    d	t          |                      Y d}~nd}~ww xY w|                     |
|| j                  S )
a  Asynchronously generate a vector embedding for an item of content.

        Args:
            content: The content to convert to a vector embedding
            text: The text to convert to a vector embedding (deprecated - use `content` instead)
            preprocess: Function to apply to the content before embedding
            as_buffer: Return the embedding as a binary buffer instead of a list
            skip_cache: Bypass the cache for this request
            **kwargs: Additional model-specific parameters

        Returns:
            The vector embedding as either a list of floats or binary buffer

        Examples:
            >>> embedding = await vectorizer.aembed("Hello world")
        rE   NrF   z'Async cache hit for content with model rH   z0Error accessing embedding cache asynchronously: rI   rJ   z1Error storing in embedding cache asynchronously: )r<   r.   agetrL   r'   rM   rN   rO   r)   rP   rQ   rR   rS   _aembedasetrV   s              r$   aembedzBaseVectorizer.aembed   s     4 /T 	><=== ! j))G :!*!%)Z__ 55g>>4: &5 & &               LL!W4:!W!WXXX22$[19dj  
    Os1vvOO         J33&$,w99&99999999	 :!*!
joo 55g>>#z'+	 &              PAPP        &&y)TZHHHs0   A?B0 0
C.:*C))C.%<E" "
F ,*FF c                 n   K   |p|}|sg S fd|D             }n|}                      ||           d{V \  }	}
}|
re|                    di           }  j        d|
|d| d{V }                     |
|||           d{V  t	          ||          D ]
\  }}||	|<    fd|	D             S )a"  Asynchronously generate vector embeddings for multiple items efficiently.

        Args:
            contents: List of content to convert to vector embeddings
            texts: List of texts to convert to vector embeddings (deprecated - use `contents` instead)
            preprocess: Function to apply to each item before embedding
            batch_size: Number of texts to process in each API call
            as_buffer: Return embeddings as binary buffers instead of lists
            skip_cache: Bypass the cache for this request
            **kwargs: Additional model-specific parameters

        Returns:
            List of vector embeddings in the same order as the inputs

        Examples:
            >>> embeddings = await vectorizer.aembed_many(["Hello", "World"], batch_size=2)
        Nc                 &    g | ]} |          S r"   r"   rb   s     r$   r:   z.BaseVectorizer.aembed_many.<locals>.<listcomp>2  rd   r#   rI   re   c                 H    g | ]}                     |j                  S r"   rg   rh   s     r$   r:   z.BaseVectorizer.aembed_many.<locals>.<listcomp>L  rj   r#   r"   )_aget_from_cache_batchrS   _aembed_many_astore_in_cache_batchrn   ro   s   `  ` `          r$   aembed_manyzBaseVectorizer.aembed_many  s     8 $u 	I !!H!H!H!Hx!H!H!H!) ;?:U:U
;
 ;
 5
 5
 5
 5
 5
 5
11
  	)#ZZ
B77N#44#4 $%*$ $@F$ $      N
 --nnj        
 #&&8."I"I ) )Y( XWWWWwWWWWr#    c                     t           )z.Generate a vector embedding for a single item.NotImplementedError)r3   r?   r@   rW   s       r$   rT   zBaseVectorizer._embedN  s
     "!r#   c                     t           )z0Generate vector embeddings for a batch of items.r   r3   r]   r\   r_   rW   s        r$   rl   zBaseVectorizer._embed_manyS  s
     "!r#   c                 \   K   t                               d            | j        dd|p|i|S )z=Asynchronously generate a vector embedding for a single item.z@This vectorizer has no async embed method. Falling back to sync.r@   r"   )rM   rQ   rT   )r3   r@   r?   rW   s       r$   ry   zBaseVectorizer._aembed^  sD       	N	
 	
 	
 t{==7?d=f===r#   c                 ^   K   t                               d            | j        d|p||d|S )z?Asynchronously generate vector embeddings for a batch of items.zEThis vectorizer has no async embed_many method. Falling back to sync.re   r"   )rM   rQ   rl   r   s        r$   r   zBaseVectorizer._aembed_manyf  sV       	S	
 	
 	
  t 
&:
 
AG
 
 	
r#   c           	      ,    dgt          |          z  }g }g }|s j        ,||t          t          t          |                              fS 	  j                             fd|D              j                  }t          t          ||                    D ]@\  }\  }}	|	r|	d         ||<   |                    |           |                    |           At          
                    dt          |          t          |          z
   dt          |                      nl# t          $ r_}
t                              dt          |
                      |}t          t          t          |                              }Y d}
~
nd}
~
ww xY w|||fS )a  Get vector embeddings from cache and track cache misses.

        Args:
            contents: List of content to get from cache
            skip_cache: Whether to skip cache lookup

        Returns:
            Tuple of (results, cache_misses, cache_miss_indices)
        Nc              3   B   K   | ]}                     |          V  d S NrL   r8   cr3   s     r$   	<genexpr>z7BaseVectorizer._get_from_cache_batch.<locals>.<genexpr>  1      II1$33A66IIIIIIr#   r]   rG   rH   zCache hits: 
, misses: z*Error accessing embedding cache in batch: )lenr.   listrangemgetr'   	enumeratern   appendrM   rN   rP   rQ   rR   r3   r]   rC   rq   rr   rs   cache_resultsir@   rX   rY   s   `          r$   rk   z$BaseVectorizer._get_from_cache_batchv  s    &3x==(  	A+Hd5X+?+?&@&@@@	< JOOIIIIIII: ,  M /8Hm8T8T.U.U 1 1**G\ 1!-k!:GAJJ ''000&--a0000LL_s8}}s</@/@@__CP\L]L]__     	< 	< 	<NNPAPPQQQ#L!%eCMM&:&:!;!;		< &888s   CD% %
F/AF		Fc           	      <   K   dgt          |          z  }g }g }|s j        ,||t          t          t          |                              fS 	  j                             fd|D              j                   d{V }t          t          ||                    D ]@\  }\  }}	|	r|	d         ||<   |                    |           |                    |           At          
                    dt          |          t          |          z
   dt          |                      nl# t          $ r_}
t                              dt          |
                      |}t          t          t          |                              }Y d}
~
nd}
~
ww xY w|||fS )a  Asynchronously get vector embeddings from cache and track cache misses.

        Args:
            contents: List of content to get from cache
            skip_cache: Whether to skip cache lookup

        Returns:
            Tuple of (results, cache_misses, cache_miss_indices)
        Nc              3   B   K   | ]}                     |          V  d S r   r   r   s     r$   r   z8BaseVectorizer._aget_from_cache_batch.<locals>.<genexpr>  r   r#   r   rH   zAsync cache hits: r   z9Error accessing embedding cache in batch asynchronously: )r   r.   r   r   amgetr'   r   rn   r   rM   rN   rP   rQ   rR   r   s   `          r$   r   z%BaseVectorizer._aget_from_cache_batch  s      &3x==(  	A+Hd5X+?+?&@&@@@	<"&*"2"2IIIIIII: #3 # #      M /8Hm8T8T.U.U 1 1**G\ 1!-k!:GAJJ ''000&--a0000LLeS]]S5F5F%FeeRUVbRcRcee     	< 	< 	<NNTCPQFFTT   $L!%eCMM&:&:!;!;	< &888s   CD- -
F7AFF
embeddingsrI   c                     |s j         dS 	  fdt          ||          D             } j                             |           dS # t          $ r5}t                              dt          |                      Y d}~dS d}~ww xY w)a$  Store a batch of vector embeddings in the cache.

        Args:
            contents: List of content that was embedded
            embeddings: List of vector embeddings
            metadata: Metadata to store with the embeddings
            skip_cache: Whether to skip cache storage
        Nc                 T    g | ]$\  }}                     |          j        |d %S rJ   rL   r'   r8   r@   ri   rI   r3   s      r$   r:   z8BaseVectorizer._store_in_cache_batch.<locals>.<listcomp>  R        !GS  $88AA"&*!$ (	   r#   itemsz(Error storing batch in embedding cache: )r.   rn   msetrP   rM   rQ   rR   r3   r]   r   rI   rC   cache_itemsrY   s   `  `   r$   rm   z$BaseVectorizer._store_in_cache_batch  s      	+F	P     %(*$=$=  K JOO+O..... 	P 	P 	PNNNc!ffNNOOOOOOOOO	Ps   8A	 	
B*BBc                 &   K   |s j         dS 	  fdt          ||          D             } j                             |           d{V  dS # t          $ r5}t                              dt          |                      Y d}~dS d}~ww xY w)a3  Asynchronously store a batch of vector embeddings in the cache.

        Args:
            contents: List of content that was embedded
            embeddings: List of vector embeddings
            metadata: Metadata to store with the embeddings
            skip_cache: Whether to skip cache storage
        Nc                 T    g | ]$\  }}                     |          j        |d %S r   r   r   s      r$   r:   z9BaseVectorizer._astore_in_cache_batch.<locals>.<listcomp>	  r   r#   r   z7Error storing batch in embedding cache asynchronously: )r.   rn   amsetrP   rM   rQ   rR   r   s   `  `   r$   r   z%BaseVectorizer._astore_in_cache_batch  s        	+F	     %(*$=$=  K *"""55555555555 	 	 	NNR#a&&RR        	s   >A 
B*BBseqsizec              #      K   t          dt          |          |          D ]/}fd||||z            D             V   ||||z            V  0dS )a  Split a sequence into batches of specified size.

        Args:
            seq: Sequence to split into batches
            size: Batch size
            preprocess: Optional function to preprocess each item

        Yields:
            Batches of the sequence
        r   Nc                 &    g | ]} |          S r"   r"   )r8   chunkrA   s     r$   r:   z+BaseVectorizer.batchify.<locals>.<listcomp>%  s#    LLLUzz%((LLLr#   )r   r   )r3   r   r   rA   poss      ` r$   batchifyzBaseVectorizer.batchify  s       CHHd++ 	, 	,C%LLLLc#d
:J6KLLLLLLL#d
*+++++		, 	,r#   rH   c                 .    ||rt          ||          S |S )z@Process the vector embedding format based on the as_buffer flag.r   )r3   rH   rB   r)   s       r$   rO   z!BaseVectorizer._process_embedding)  s(       9&y%888r#   c                    t          |t                    r|S t          |t                    r|S t          |t                    r|                                S t
          rSt          |t                    r>t          j                    }|	                    |d           |
                                S t          dt          |           d          )z&Convert content to a cacheable format.PNG)formatzContent type z is not supported for caching.)
isinstancerR   bytesr   
read_bytes_PILLOW_INSTALLEDr   ioBytesIOsavegetvaluer   r4   )r3   r@   buffers      r$   rL   z#BaseVectorizer._serialize_for_cache2  s    gs## 		%N'' 	%N&& 	%%%''' 	%:gu#=#= 	%Z\\FLLL...??$$$!IDMMIII
 
 	
r#   )NNNFF)NNNr^   FF)r   r   )NNr^   r   )/r   r    r!   __doc__rR   __annotations__r)   r,   r   r	   intr   r.   r   r   model_configpropertyr4   r   classmethodr>   r   r   r   boolr
   r   floatr   r[   rv   r{   r   rT   rl   ry   r   tuplerk   r   r   rm   r   r   r   rO   rL   r"   r#   r$   r&   r&   #   sJ        
 
 JJJE3?CD)HSM55#;#;#;;
<CCC',uT':':':E8O$::::d;;;Lc    X _W  [  ++ )- AI AIAI AI X&	AI
 AI AI 
tE{E!	"AI AI AI ,+AIF *-- )-%))- ;X ;X49%;X S	";X X&	;X
 ;X ;X ;X 
tDK $u+-	.;X ;X ;X .-;Xz ++ )- DI DIDI DI X&	DI
 DI DI 
tE{E!	"DI DI DI ,+DIL *-- )-%))- ;X ;X49%;X S	";X X&	;X
 ;X ;X ;X 
tDK $u+-	.;X ;X ;X .-;Xz ++" "3 "c "T%[ " " " ,+" *-- )-%)	" "49%" S	"" 	" 
d5k	" " " .-" ++> >S >S >DQVK > > > ,+> *-- )-%)	
 
49%
 S	"
 	
 
d5k	
 
 
 .-
,9S	,9/3,9	tHT%[)*DItCy@	A,9 ,9 ,9 ,9\.9S	.9/3.9	tHT%[)*DItCy@	A.9 .9 .9 .9`Ps)P e%P 	P
 P 
P P P PB!s)! e%! 	!
 ! 
! ! ! !F, ,D , ,(9K , , , ,"!$u+.;?HK   
C 
E%*4E 
 
 
 
 
 
r#   r&   )%r   loggingenumr   pathlibr   typingr   r   r   r   r	   r
   pydanticr   r   r   r   typing_extensionsr   #redisvl.extensions.cache.embeddingsr   redisvl.redis.utilsr   redisvl.schema.fieldsr   redisvl.utils.utilsr   	PIL.Imager   r   ImportError	getLoggerr   rM   r   r&   r"   r#   r$   <module>r      s   				              = = = = = = = = = = = = = = = = B B B B B B B B B B B B ' ' ' ' ' ' ? ? ? ? ? ? / / / / / / 0 0 0 0 0 0 3 3 3 3 3 3     
 
	8	$	$    $   ^
 ^
 ^
 ^
 ^
Y ^
 ^
 ^
 ^
 ^
s   A A#"A#