
    Q
i3                         d dl Z d dlmZ d dlmZmZmZmZmZm	Z	 d dl
mZ d dlmZmZmZ d dlmZ erd dlmZ d dlmZ  G d	 d
e          ZdS )    N)cached_property)TYPE_CHECKINGAnyDictListOptionalUnion)
ConfigDict)retrystop_after_attemptwait_random_exponential)retry_if_not_exception_type)EmbeddingsCache)BaseVectorizerc            
           e Zd ZdZ ed          Z	 	 	 	 d$dedee         d	ed
ed         f fdZ	e
defd            Zed             Zdedeee         ef         fdZdedeee         ef         fdZdee         fdZdee         fdZdefdZ e edd           ed           eeef                    dedee         fd            Z  e edd           ed           eeef                    	 d%dee         d edeee                  fd!            Z!dedeeef         f fd"Z"e
defd#            Z# xZ$S )&VertexAIVectorizera	  The VertexAIVectorizer uses Google's VertexAI embedding model
    API to create embeddings.

    This vectorizer is tailored for use in
    environments where integration with Google Cloud Platform (GCP) services is
    a key requirement.

    Utilizing this vectorizer requires an active GCP project and location
    (region), along with appropriate application credentials. These can be
    provided through the `api_config` dictionary or set the GOOGLE_APPLICATION_CREDENTIALS
    env var. Additionally, the vertexai python client must be
    installed with `pip install google-cloud-aiplatform>=1.26`.

    You can optionally enable caching to improve performance when generating
    embeddings for repeated inputs.

    .. code-block:: python

        # Basic usage
        vectorizer = VertexAIVectorizer(
            model="textembedding-gecko",
            api_config={
                "project_id": "your_gcp_project_id", # OR set GCP_PROJECT_ID
                "location": "your_gcp_location",     # OR set GCP_LOCATION
            })
        embedding = vectorizer.embed("Hello, world!")

        # With caching enabled
        from redisvl.extensions.cache.embeddings import EmbeddingsCache
        cache = EmbeddingsCache(name="vertexai_embeddings_cache")

        vectorizer = VertexAIVectorizer(
            model="textembedding-gecko",
            api_config={
                "project_id": "your_gcp_project_id",
                "location": "your_gcp_location",
            },
            cache=cache
        )

        # First call will compute and cache the embedding
        embedding1 = vectorizer.embed("Hello, world!")

        # Second call will retrieve from cache
        embedding2 = vectorizer.embed("Hello, world!")

        # Batch embedding of multiple texts
        embeddings = vectorizer.embed_many(
            ["Hello, world!", "Goodbye, world!"],
            batch_size=2
        )

        # Multimodal usage
        from vertexai.vision_models import Image, Video

        vectorizer = VertexAIVectorizer(
            model="multimodalembedding@001",
            api_config={
                "project_id": "your_gcp_project_id", # OR set GCP_PROJECT_ID
                "location": "your_gcp_location",     # OR set GCP_LOCATION
            }
        )
        text_embedding = vectorizer.embed("Hello, world!")
        image_embedding = vectorizer.embed(Image.load_from_file("path/to/your/image.jpg"))
        video_embedding = vectorizer.embed(Video.load_from_file("path/to/your/video.mp4"))

    T)arbitrary_types_allowedtextembedding-geckoNfloat32model
api_configdtypecacher   c                 l    t                                          |||            | j        |fi | dS )a  Initialize the VertexAI vectorizer.

        Args:
            model (str): Model to use for embedding. Defaults to
                'textembedding-gecko'.
            api_config (Optional[Dict], optional): Dictionary containing the
                API config details. Defaults to None.
            dtype (str): the default datatype to use when embedding text as byte arrays.
                Used when setting `as_buffer=True` in calls to embed() and embed_many().
                Defaults to 'float32'.
            cache (Optional[EmbeddingsCache]): Optional EmbeddingsCache instance to cache embeddings for
                better performance with repeated texts. Defaults to None.

        Raises:
            ImportError: If the google-cloud-aiplatform library is not installed.
            ValueError: If the API key is not provided.
            ValueError: If an invalid dtype is provided.
        )r   r   r   N)super__init___setup)selfr   r   r   r   kwargs	__class__s         C:\Users\Dell Inspiron 16\Desktop\tws\AgrotaPowerBi\back-agrota-powerbi\mcp-client-agrota\venv\Lib\site-packages\redisvl/utils/vectorize/vertexai.pyr   zVertexAIVectorizer.__init__V   sD    4 	uE???J))&)))))    returnc                     d| j         v S )z/Whether a multimodal model has been configured.
multimodal)r   r   s    r!   is_multimodalz VertexAIVectorizer.is_multimodalt   s     tz))r"   c                     | j         r ddlm} |                    | j                  S ddlm} |                    | j                  S )z3Get the appropriate client based on the model type.r   )MultiModalEmbeddingModel)TextEmbeddingModel)r'   vertexai.vision_modelsr)   from_pretrainedr   vertexai.language_modelsr*   )r   r)   r*   s      r!   _clientzVertexAIVectorizer._clienty   s`      	HGGGGGG+;;DJGGG??????!11$*===r"   
image_pathc                 |    | j         st          d          ddlm}  | j        |                    |          fi |S )zIEmbed an image (from its path on disk) using a VertexAI multimodal model.z/Cannot embed image with a non-multimodal model.r   )Image)r'   
ValueErrorr+   r1   embedload_from_file)r   r/   r   r1   s       r!   embed_imagezVertexAIVectorizer.embed_image   W    ! 	PNOOO000000tz%..z::EEfEEEr"   
video_pathc                 |    | j         st          d          ddlm}  | j        |                    |          fi |S )zHEmbed a video (from its path on disk) using a VertexAI multimodal model.z/Cannot embed video with a non-multimodal model.r   )Video)r'   r2   r+   r9   r3   r4   )r   r7   r   r9   s       r!   embed_videozVertexAIVectorizer.embed_video   r6   r"   c                 T     | j         |fi | |                                 | _        dS )zBSet up the VertexAI client and determine the embedding dimensions.N)_initialize_client_set_model_dimsdims)r   r   r   s      r!   r   zVertexAIVectorizer._setup   s6     	 
55f555((**			r"   c                    |r|                     d          nt          j        d          }|r|                     d          nt          j        d          }|st          d          |st          d          |r|                     d          nd}	 d	dl}|                    |||
           dS # t          $ r t          d          w xY w)a  
        Setup the VertexAI client using the provided config options or
        environment variables.

        Args:
            api_config: Dictionary with GCP configuration options
            **kwargs: Additional arguments for initialization

        Raises:
            ImportError: If the google-cloud-aiplatform library is not installed
            ValueError: If required parameters are not provided
        
project_idGCP_PROJECT_IDlocationGCP_LOCATIONzzMissing project_id. Provide the id in the api_config with key 'project_id' or set the GCP_PROJECT_ID environment variable.zMissing location. Provide the location (region) in the api_config with key 'location' or set the GCP_LOCATION environment variable.credentialsNr   )projectrB   rD   zVertexAI vectorizer requires the google-cloud-aiplatform library. Please install with `pip install google-cloud-aiplatform>=1.26`)getosgetenvr2   vertexaiinitImportError)r   r   r   r@   rB   rD   rI   s          r!   r<   z%VertexAIVectorizer._initialize_client   s(    -7WJNN<(((BIFV<W<W 	 +5SJNN:&&&")N:S:S 	  	B    	@   8BKjnn]333t	OOOMM"X;        	 	 	R  	s   B1 1Cc                    	 |                      d          }t          |          S # t          t          f$ r$}t	          dt          |                     d}~wt          $ r$}t	          dt          |                     d}~ww xY w)z
        Determine the dimensionality of the embedding model by making a test call.

        Returns:
            int: Dimensionality of the embedding model

        Raises:
            ValueError: If embedding dimensions cannot be determined
        zdimension checkz+Unexpected response from the VertexAI API: Nz*Error setting embedding model dimensions: )_embedlenKeyError
IndexErrorr2   str	Exception)r   	embeddingkees       r!   r=   z"VertexAIVectorizer._set_model_dims   s    	T$566Iy>>!*% 	V 	V 	VT3r77TTUUU 	T 	T 	TR#a&&RRSSS	Ts    #& BAB#BB   <   )minmax   )waitstopr   contentc                 l   	 | j         rddlm}m} t	          |t
                    r1 | j        j        dd|i|}|j        t          d          |j        S t	          ||          r1 | j        j        dd|i|}|j
        t          d          |j
        S t	          ||          r< | j        j        dd|i|}|j        t          d	          |j        d         j        S t          d
           | j        j        |gfi |d         j        S # t          $ r}t          d|           d}~ww xY w)a_  
        Generate a vector embedding for a single input using the VertexAI API.

        Args:
            content: Input to embed
            **kwargs: Additional parameters to pass to the VertexAI API

        Returns:
            List[float]: Vector embedding as a list of floats

        Raises:
            ValueError: If embedding fails
        r   r1   r9   contextual_textNz)No text embedding returned from VertexAI.imagez*No image embedding returned from VertexAI.videoz*No video embedding returned from VertexAI.zJInvalid input type for multimodal embedding. Must be str, Image, or Video.zEmbedding input failed:  )r'   r+   r1   r9   
isinstancerQ   r.   get_embeddingstext_embeddingr2   image_embeddingvideo_embeddingsrS   	TypeErrorvaluesrR   )r   r]   r   r1   r9   resultrU   s          r!   rM   zVertexAIVectorizer._embed   s   &&	=! "R????????gs++ 8T\8  (/  F ,4()TUUU!00// 8T\8  %  F -5()UVVV!11// 8T\8  %  F .6()UVVV!215??#8   3t|2G9GGGGJQQ 	= 	= 	=;;;<<<	=s,   AD A D AD $-D 
D3D..D3
   contents
batch_sizec                    | j         rt          d          t          |t                    st	          d          |r*t          |d         t
                    st	          d          	 g }|                     ||          D ]4} | j        j        |fi |}|	                    d |D                        5|S # t          $ r}t          d|           d}~ww xY w)a  
        Generate vector embeddings for a batch of texts using the VertexAI API.

        Args:
            contents: List of texts to embed
            batch_size: Number of texts to process in each API call
            **kwargs: Additional parameters to pass to the VertexAI API

        Returns:
            List[List[float]]: List of vector embeddings as lists of floats

        Raises:
            ValueError: If embedding fails
        zEBatch embedding is not supported for multimodal models with VertexAI.z+Must pass in a list of str values to embed.r   c                     g | ]	}|j         
S rc   )rj   ).0rs     r!   
<listcomp>z2VertexAIVectorizer._embed_many.<locals>.<listcomp>C  s    ">">">18">">">r"   zEmbedding texts failed: N)r'   NotImplementedErrorrd   listri   rQ   batchifyr.   re   extendrR   r2   )r   rm   rn   r   
embeddingsbatchresponserU   s           r!   _embed_manyzVertexAIVectorizer._embed_many   s   ,  	%W   (D)) 	KIJJJ 	KJx{C88 	KIJJJ	=!Jx<< @ @64<6uGGGG!!">">X">">">???? 	= 	= 	=;;;<<<	=s   (AB7 7
CCCc                     ddl m}m} t          ||          r|j        S t          ||          r|j        S t                                          |          S )z&Convert content to a cacheable format.r   r_   )r+   r1   r9   rd   _image_bytes_video_bytesr   _serialize_for_cache)r   r]   r1   r9   r    s       r!   r   z'VertexAIVectorizer._serialize_for_cacheH  si    77777777gu%% 	('''' 	(''ww++G444r"   c                     dS )NrI   rc   r&   s    r!   typezVertexAIVectorizer.typeR  s    zr"   )r   Nr   N)rl   )%__name__
__module____qualname____doc__r
   model_configrQ   r   r   r   propertyboolr'   r   r.   r	   r   floatbytesr5   r:   r   r<   intr=   r   r   r   r   ri   r2   r   rM   r{   r   r   __classcell__)r    s   @r!   r   r      s       B BH :d;;;L +%)-1* ** TN* 	*
 )** * * * * *< *t * * * X* 	> 	> _	>Fc Fd5k5>P8Q F F F FFc Fd5k5>P8Q F F F F+$ + + + +1Xd^ 1 1 1 1fT T T T T( U$$333""))9j*ABB  
4=c 4=U 4= 4= 4= 
4=l U$$333""))9j*ABB   68!= !=S	!=/2!=	d5k	!= != != 
!=F5C 5E%*4E 5 5 5 5 5 5 c    X    r"   r   )rG   	functoolsr   typingr   r   r   r   r   r	   pydanticr
   tenacityr   r   r   tenacity.retryr   .redisvl.extensions.cache.embeddings.embeddingsr   redisvl.utils.vectorize.baser   r   rc   r"   r!   <module>r      s    				 % % % % % % B B B B B B B B B B B B B B B B       G G G G G G G G G G 6 6 6 6 6 6 ONNNNNN 7 7 7 7 7 7E E E E E E E E E Er"   