
    Q
iD                         d dl Z d dlmZmZmZmZmZmZ d dlm	Z	 d dl
mZmZmZ d dlmZ erd dlmZ d dlmZ  G d d	e          ZdS )
    N)TYPE_CHECKINGAnyDictListOptionalUnion)
ConfigDict)retrystop_after_attemptwait_random_exponential)retry_if_not_exception_type)EmbeddingsCache)BaseVectorizerc            
       .    e Zd ZdZ ed          Z	 	 	 	 d(dedee         d	ed
ed         f fdZ	e
defd            Zdedeee         ef         fdZdedeee         ef         fdZdee         fdZdee         fdZdefdZdefdZdee         dee         dee         fdZdedee         fdZ e edd           ed            ee          !          	 d)dee         d"ee         deee                  fd#            Z dedee         fd$Z! e edd           ed            ee          !          	 d)dee         d"ee         deee                  fd%            Z"dedeeef         f fd&Z#e
defd'            Z$ xZ%S )*VoyageAIVectorizera
  The VoyageAIVectorizer class utilizes VoyageAI's API to generate
    embeddings for text and multimodal (text / image / video) data.

    This vectorizer is designed to interact with VoyageAI's /embed and /multimodal_embed APIs,
    requiring an API key for authentication. The key can be provided
    directly in the `api_config` dictionary or through the `VOYAGE_API_KEY`
    environment variable. User must obtain an API key from VoyageAI's website
    (https://dash.voyageai.com/). Additionally, the `voyageai` python
    client must be installed with `pip install voyageai`. For image embeddings, the Pillow
    library must also be installed with `pip install pillow`.

    The vectorizer supports both synchronous and asynchronous operations, allows for batch
    processing of content and flexibility in handling preprocessing tasks.

    You can optionally enable caching to improve performance when generating
    embeddings for repeated text inputs.

    .. code-block:: python

        from redisvl.utils.vectorize import VoyageAIVectorizer

        # Basic usage
        vectorizer = VoyageAIVectorizer(
            model="voyage-3-large",
            api_config={"api_key": "your-voyageai-api-key"} # OR set VOYAGE_API_KEY in your env
        )
        query_embedding = vectorizer.embed(
            content="your input query text here",
            input_type="query"
        )
        doc_embeddings = vectorizer.embed_many(
            contents=["your document text", "more document text"],
            input_type="document"
        )

        # Multimodal usage - requires Pillow and voyageai>=0.3.6

        vectorizer = VoyageAIVectorizer(
            model="voyage-multimodal-3.5",
            api_config={"api_key": "your-voyageai-api-key"} # OR set VOYAGE_API_KEY in your env
        )
        image_embedding = vectorizer.embed_image(
            "path/to/your/image.jpg",
            input_type="query"
        )
        video_embedding = vectorizer.embed_video(
            "path/to/your/video.mp4",
            input_type="document"
        )

        # With caching enabled
        from redisvl.extensions.cache.embeddings import EmbeddingsCache
        cache = EmbeddingsCache(name="voyageai_embeddings_cache")

        vectorizer = VoyageAIVectorizer(
            model="voyage-3-large",
            api_config={"api_key": "your-voyageai-api-key"},
            cache=cache
        )

        # First call will compute and cache the embedding
        embedding1 = vectorizer.embed(
            content="your input query text here",
            input_type="query"
        )

        # Second call will retrieve from cache
        embedding2 = vectorizer.embed(
            content="your input query text here",
            input_type="query"
        )

    T)arbitrary_types_allowedvoyage-3-largeNfloat32model
api_configdtypecacher   c                 l    t                                          |||            | j        |fi | dS )ao  Initialize the VoyageAI vectorizer.

        Visit https://docs.voyageai.com/docs/embeddings to learn about embeddings and check the available models.

        Args:
            model (str): Model to use for embedding. Defaults to "voyage-3-large".
            api_config (Optional[Dict], optional): Dictionary containing the API key.
                Defaults to None.
            dtype (str): the default datatype to use when embedding content as byte arrays.
                Used when setting `as_buffer=True` in calls to embed() and embed_many().
                Defaults to 'float32'.
            cache (Optional[EmbeddingsCache]): Optional EmbeddingsCache instance to cache embeddings for
                better performance with repeated items. Defaults to None.

        Raises:
            ImportError: If the voyageai library is not installed.
            ValueError: If the API key is not provided.

        Notes:
            - Multimodal models require voyageai>=0.3.6 to be installed for video embeddings, as well as
                ffmpeg installed on the system. Image embeddings require pillow to be installed.

        )r   r   r   N)super__init___setup)selfr   r   r   r   kwargs	__class__s         C:\Users\Dell Inspiron 16\Desktop\tws\AgrotaPowerBi\back-agrota-powerbi\mcp-client-agrota\venv\Lib\site-packages\redisvl/utils/vectorize/voyageai.pyr   zVoyageAIVectorizer.__init__^   sD    > 	uE???J))&)))))    returnc                     d| j         v S )z/Whether a multimodal model has been configured.
multimodalr   r   s    r    is_multimodalz VoyageAIVectorizer.is_multimodal   s     tz))r!   
image_pathc                     | j         st          d          	 ddlm} n# t          $ r t	          d          w xY w | j        |                    |          fi |S )zhEmbed an image (from its path on disk) using VoyageAI's multimodal API. Requires pillow to be installed.z/Cannot embed image with a non-multimodal model.r   )ImagezXPillow library is required for image embedding. Please install with `pip install pillow`)r'   
ValueErrorPILr*   ImportErrorembedopen)r   r(   r   r*   s       r    embed_imagezVoyageAIVectorizer.embed_image   s    ! 	PNOOO	!!!!!!! 	 	 	;  	
 tz%**Z00;;F;;;    9
video_pathc                     | j         st          d          	 ddlm} n# t          $ r t	          d          w xY w|                    || j                  } | j        |fi |S )zEmbed a video (from its path on disk) using VoyageAI's multimodal API.

        Requires voyageai>=0.3.6 to be installed, as well as ffmpeg to be installed on the system.
        z/Cannot embed video with a non-multimodal model.r   Videobvoyageai>=0.3.6 is required for video embedding. Please install with `pip install voyageai>=0.3.6`r%   )r'   r+   voyageai.video_utilsr5   ModuleNotFoundError	from_pathr   r.   )r   r2   r   r5   videos        r    embed_videozVoyageAIVectorizer.embed_video   s    
 ! 	PNOOO	2222222" 	 	 	%D  	 *   
 
 tz%**6***r1   c                      | j         |fi | | j        r#| j        j        | _        | j        j        | _        n"| j        j        | _        | j        j        | _        |                                 | _	        dS )zBSet up the VoyageAI client and determine the embedding dimensions.N)
_initialize_clientr'   _clientmultimodal_embed	_embed_fn_aclient
_aembed_fnr.   _set_model_dimsdims)r   r   r   s      r    r   zVoyageAIVectorizer._setup   su     	 
55f555 	2!\:DN"m<DOO!\/DN"m1DO ((**			r!   c                    |i }	 ddl m}m} n# t          $ r t          d          w xY w|r|                    d          nt          j        d          }|st          d           |dd|i|| _         |dd|i|| _	        dS )	a  
        Setup the VoyageAI clients using the provided API key or an
        environment variable.

        Args:
            api_config: Dictionary with API configuration options
            **kwargs: Additional arguments to pass to VoyageAI clients

        Raises:
            ImportError: If the voyageai library is not installed
            ValueError: If no API key is provided
        Nr   )AsyncClientClientz]VoyageAI vectorizer requires the voyageai library. Please install with `pip install voyageai`api_keyVOYAGE_API_KEYzfVoyageAI API key is required. Provide it in api_config or set the VOYAGE_API_KEY environment variable. )
voyageairF   rG   r-   getosgetenvr+   r>   rA   )r   r   r   rF   rG   rH   s         r    r=   z%VoyageAIVectorizer._initialize_client   s     J	444444444 	 	 	=  	 *4TJNN9%%%CS9T9T 	  	[  
 v88g888#>>G>v>>s    )c                    	 |                      dd          }t          |          S # t          t          f$ r$}t	          dt          |                     d}~wt          $ r$}t	          dt          |                     d}~ww xY w)z
        Determine the dimensionality of the embedding model by making a test call.

        Returns:
            int: Dimensionality of the embedding model

        Raises:
            ValueError: If embedding dimensions cannot be determined
        zdimension checkdocument)
input_typez+Unexpected response from the VoyageAI API: Nz*Error setting embedding model dimensions: )_embedlenKeyError
IndexErrorr+   str	Exception)r   	embeddingkees       r    rC   z"VoyageAIVectorizer._set_model_dims   s    	T$5*MMIy>>!*% 	V 	V 	VT3r77TTUUU 	T 	T 	TR#a&&RRSSS	Ts    %( B	AB	%BB	c                 H    | j         dv rdS | j         dv rdS | j         dv rdS dS )z
        Determine the appropriate batch size based on the model being used.

        Returns:
            int: Recommended batch size for the current model
        )zvoyage-2z	voyage-02H   )zvoyage-3-litezvoyage-3.5-lite   )zvoyage-3z
voyage-3.5
      r%   r&   s    r    _get_batch_sizez"VoyageAIVectorizer._get_batch_size   sB     :2222Z???2Z55521r!   contentsrQ   
truncationc                 .   t          |t                    st          d          | j        s,|r*t          |d         t                    st          d          ||dvrt          d          |$t          |t
                    st          d          dS dS )a+  
        Validate the inputs to the embedding methods.

        Args:
            contents: List of items to embed
            input_type: Type of input (document or query)
            truncation: Whether to truncate long inputs

        Raises:
            TypeError: If inputs are invalid
        z[Must pass in a list of str, PIL.Image.Image, or voyageai.video_utils.Video values to embed.r   z+Must pass in a list of str values to embed.N)rP   queryznMust pass in a allowed value for voyageai embedding input_type. See https://docs.voyageai.com/docs/embeddings.z*Truncation (optional) parameter is a bool.)
isinstancelist	TypeErrorr'   rV   bool)r   ra   rQ   rb   s       r    _validate_inputz"VoyageAIVectorizer._validate_input  s     (D)) 	m   ! 	Kh 	Kz(1+s7S7S 	KIJJJ!j8M&M&MA   !*Z*F*F!HIII "!!!r!   contentc                 0     | j         |gfi |}|d         S )a"  
        Generate a vector embedding for a single item using the VoyageAI API.

        Args:
            content: Item to embed - must be one of str, PIL.Image.Image, or voyageai.video_utils.Video. Images and
                video require a multimodal model to be configured.
            **kwargs: Additional parameters to pass to the VoyageAI API

        Returns:
            List[float]: Vector embedding as a list of floats

        Raises:
            TypeError: If parameters are invalid
            ValueError: If embedding fails
        r   )_embed_manyr   rj   r   results       r    rR   zVoyageAIVectorizer._embed  s*    " "!7)66v66ayr!      <   )minmax   )waitstopr
   
batch_sizec                 
   ddl m} |                    dd          }|                    dd          }|                     |||           ||                                 }	 g }|                     ||          D ]=} | j        | j        r|gn|f| j        ||d|}	|	                    |	j
                   >|S # |$ r%}
t          dt          |
                     |
d}
~
wt          $ r}
t          d	|
           d}
~
ww xY w)
a  
        Generate vector embeddings for a batch of items using the VoyageAI API.

        Args:
            contents: List of items to embed - each item must be one of str, PIL.Image.Image, or
                voyageai.video_utils.Video. Images and video require a multimodal model to be configured.
            batch_size: Number of items to process in each API call
            **kwargs: Additional parameters to pass to the VoyageAI API

        Returns:
            List[List[float]]: List of vector embeddings as lists of floats

        Raises:
            TypeError: If `contents` is not a list, or parameters are invalid
            ValueError: If embedding fails
        r   InvalidRequestErrorrQ   Nrb   Tr   rQ   rb   Invalid input for embedding: Embedding texts failed: )voyageai.errorry   popri   r`   batchifyr@   r'   r   extend
embeddingsrg   rV   rW   r+   r   ra   rv   r   ry   rQ   rb   r   batchresponserZ   s              r    rl   zVoyageAIVectorizer._embed_many3  sk   0 	766666ZZd33
ZZd33
 	Xz:>>> --//J	=!Jx<< 
7 
7)4>#'#5@5 *))    !!("56666" 	M 	M 	MDCFFDDEE1L 	= 	= 	=;;;<<<	=s%   !AB9 9D> CD+C==Dc                 @   K    | j         |gfi | d{V }|d         S )a1  
        Asynchronously generate a vector embedding for a single item using the VoyageAI API.

        Args:
            content: Item to embed - must be one of str, PIL.Image.Image, or voyageai.video_utils.Video. Images and
                video require a multimodal model to be configured.
            **kwargs: Additional parameters to pass to the VoyageAI API

        Returns:
            List[float]: Vector embedding as a list of floats

        Raises:
            TypeError: If parameters are invalid
            ValueError: If embedding fails
        Nr   )_aembed_manyrm   s       r    _aembedzVoyageAIVectorizer._aembedj  s@      " )t('==f========ayr!   c                   K   ddl m} |                    dd          }|                    dd          }|                     |||           ||                                 }	 g }|                     ||          D ]C} | j        | j        r|gn|f| j        ||d| d{V }	|	                    |	j
                   D|S # |$ r%}
t          dt          |
                     |
d}
~
wt          $ r}
t          d	|
           d}
~
ww xY w)
a  
        Asynchronously generate vector embeddings for a batch of items using the VoyageAI API.

        Args:
            contents: List of items to embed - each item must be one of str, PIL.Image.Image, or
                voyageai.video_utils.Video. Images and video require a multimodal model to be configured.
            batch_size: Number of texts to process in each API call
            **kwargs: Additional parameters to pass to the VoyageAI API

        Returns:
            List[List[float]]: List of vector embeddings as lists of floats

        Raises:
            TypeError: If `contents` is not a list, or parameters are invalid
            ValueError: If embedding fails
        r   rx   rQ   Nrb   Trz   r{   r|   )r}   ry   r~   ri   r`   r   rB   r'   r   r   r   rg   rV   rW   r+   r   s              r    r   zVoyageAIVectorizer._aembed_many~  s     0 	766666ZZd33
ZZd33
 	Xz:>>> --//J	=!Jx<< 
7 
7!0#'#5@5" *))" " " "       !!("56666" 	M 	M 	MDCFFDDEE1L 	= 	= 	=;;;<<<	=s%   #AC D
 C&&D
3DD
c                     	 ddl m} n# t          $ r t          d          w xY wt          ||          r|                                S t                                          |          S )z&Convert content to a cacheable format.r   r4   r6   )r7   r5   r8   re   to_bytesr   _serialize_for_cache)r   rj   r5   r   s      r    r   z'VoyageAIVectorizer._serialize_for_cache  s    	2222222" 	 	 	%D  	 gu%% 	&##%%%ww++G444s   
 $c                     dS )NrK   rJ   r&   s    r    typezVoyageAIVectorizer.type  s    zr!   )r   Nr   N)N)&__name__
__module____qualname____doc__r	   model_configrV   r   r   r   propertyrh   r'   r   r   floatbytesr0   r;   r   r=   intrC   r`   r   ri   rR   r
   r   r   r   rg   rl   r   r   r   r   __classcell__)r   s   @r    r   r      s       H HT :d;;;L &%)-1!* !*!* TN!* 	!*
 )*!* !* !* !* !* !*F *t * * * X*<c <d5k5>P8Q < < < <+c +d5k5>P8Q + + + +,+$ + + + +$?Xd^ $? $? $? $?LT T T T T(     JS	J/7}JJRSW.J J J J8c U    ( U$$333"")))44   @D0= 0=S	0=/7}0=	d5k	0= 0= 0= 
0=dS tE{    ( U$$333"")))44   @D0= 0=S	0=/7}0=	d5k	0= 0= 0= 
0=d5C 5E%*4E 5 5 5 5 5 5 c    X    r!   r   )rM   typingr   r   r   r   r   r   pydanticr	   tenacityr
   r   r   tenacity.retryr   .redisvl.extensions.cache.embeddings.embeddingsr   redisvl.utils.vectorize.baser   r   rJ   r!   r    <module>r      s    				 B B B B B B B B B B B B B B B B       G G G G G G G G G G 6 6 6 6 6 6 ONNNNNN 7 7 7 7 7 7t t t t t t t t t tr!   