
    Q
i|0                         d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZmZmZ d dlmZ d dlmZmZmZ d dlmZ erd dlmZ d dlmZ 	 d d	lmZ d
Zn# e$ r dZY nw xY w G d de          ZdS )    N)Path)TYPE_CHECKINGAnyDictListLiteralOptionalUnion)
ConfigDict)retrystop_after_attemptwait_random_exponential)retry_if_not_exception_type)EmbeddingsCache)BaseVectorizer)ImageTFc                   n    e Zd ZdZ ed          Z	 	 	 	 d#dedeeeef                  d	ed
ed         ddf
 fdZ	dede
ee         ef         fdZdee         fdZdee         fdZdefdZ e edd           ed           ee                    dedee         fd            Z e edd           ed           ee                    	 d$dee         dedeee                  fd            Zdedeed         ef         fdZed edefd!            Ze defd"            Z! xZ"S )%BedrockVectorizera 	  The BedrockVectorizer class utilizes Amazon Bedrock's API to generate
    embeddings for text or image data.

    This vectorizer is designed to interact with Amazon Bedrock API,
    requiring AWS credentials for authentication. The credentials can be provided
    directly in the `api_config` dictionary or through environment variables:
    - AWS_ACCESS_KEY_ID
    - AWS_SECRET_ACCESS_KEY
    - AWS_REGION (defaults to us-east-1)

    The vectorizer supports synchronous operations with batch processing and
    preprocessing capabilities.

    You can optionally enable caching to improve performance when generating
    embeddings for repeated inputs.

    .. code-block:: python

        # Basic usage with explicit credentials
        vectorizer = BedrockVectorizer(
            model="amazon.titan-embed-text-v2:0",
            api_config={
                "aws_access_key_id": "your_access_key",
                "aws_secret_access_key": "your_secret_key",
                "aws_region": "us-east-1"
            }
        )

        # With environment variables and caching
        from redisvl.extensions.cache.embeddings import EmbeddingsCache
        cache = EmbeddingsCache(name="bedrock_embeddings_cache")

        vectorizer = BedrockVectorizer(
            model="amazon.titan-embed-text-v2:0",
            cache=cache
        )

        # First call will compute and cache the embedding
        embedding1 = vectorizer.embed("Hello, world!")

        # Second call will retrieve from cache
        embedding2 = vectorizer.embed("Hello, world!")

        # Generate batch embeddings
        embeddings = vectorizer.embed_many(["Hello", "World"], batch_size=2)

        # Multimodal usage
        from pathlib import Path
        vectorizer = BedrockVectorizer(
            model="amazon.titan-embed-image-v1:0",
            api_config={
                "aws_access_key_id": "your_access_key",
                "aws_secret_access_key": "your_secret_key",
                "aws_region": "us-east-1"
            }
        )
        image_embedding = vectorizer.embed(Path("path/to/your/image.jpg"))

        # Embedding a list of mixed modalities
        embeddings = vectorizer.embed_many(
            ["Hello", "world!", Path("path/to/your/image.jpg")],
            batch_size=2
        )

    T)arbitrary_types_allowedamazon.titan-embed-text-v2:0Nfloat32model
api_configdtypecacher   returnc                 l    t                                          |||            | j        |fi | dS )a  Initialize the AWS Bedrock Vectorizer.

        Args:
            model (str): The Bedrock model ID to use. Defaults to amazon.titan-embed-text-v2:0
            api_config (Optional[Dict[str, str]]): AWS credentials and config.
                Can include: aws_access_key_id, aws_secret_access_key, aws_region
                If not provided, will use environment variables.
            dtype (str): the default datatype to use when embedding text as byte arrays.
                Used when setting `as_buffer=True` in calls to embed() and embed_many().
                Defaults to 'float32'.
            cache (Optional[EmbeddingsCache]): Optional EmbeddingsCache instance to cache embeddings for
                better performance with repeated texts. Defaults to None.

        Raises:
            ValueError: If credentials are not provided in config or environment.
            ImportError: If boto3 is not installed.
            ValueError: If an invalid dtype is provided.
        )r   r   r   N)super__init___setup)selfr   r   r   r   kwargs	__class__s         C:\Users\Dell Inspiron 16\Desktop\tws\AgrotaPowerBi\back-agrota-powerbi\mcp-client-agrota\venv\Lib\site-packages\redisvl/utils/vectorize/bedrock.pyr   zBedrockVectorizer.__init__^   sD    4 	uE???J))&)))))    
image_pathc                 8     | j         t          |          fi |S )zHEmbed an image (from its path on disk) using a Bedrock multimodal model.)embedr   )r!   r&   r"   s      r$   embed_imagezBedrockVectorizer.embed_image|   s$    tz$z**55f555r%   c                 T     | j         |fi | |                                 | _        dS )zASet up the Bedrock client and determine the embedding dimensions.N)_initialize_client_set_model_dimsdims)r!   r   r"   s      r$   r    zBedrockVectorizer._setup   s6     	 
55f555((**			r%   c                    	 ddl }n# t          $ r t          d          w xY w|i }|                    dt          j        d                    }|                    dt          j        d                    }|                    dt          j        d	d
                    }|r|st          d           |j        	 d|||d|| _        dS )a  
        Setup the Bedrock client using the provided API keys or
        environment variables.

        Args:
            api_config: Dictionary with AWS credentials and configuration
            **kwargs: Additional arguments to pass to boto3 client

        Raises:
            ImportError: If boto3 is not installed
            ValueError: If AWS credentials are not provided
        r   NzQAmazon Bedrock vectorizer requires boto3. Please install with `pip install boto3`aws_access_key_idAWS_ACCESS_KEY_IDaws_secret_access_keyAWS_SECRET_ACCESS_KEY
aws_region
AWS_REGIONz	us-east-1zuAWS credentials required. Provide via api_config or environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEYbedrock-runtime)r/   r1   region_name)r5   )boto3ImportErrorgetosgetenv
ValueErrorclient_client)r!   r   r"   r7   r/   r1   r3   s          r$   r+   z$BedrockVectorizer._initialize_client   s   	LLLL 	 	 	:  	 J&NN+>!?!?
 
 !+#RY/F%G%G!
 !
  ^^L")L+2V2VWW
  	(= 	>   $u|
/"7"	
 

 
 
s    !c                    	 |                      d          }t          |          S # t          t          f$ r$}t	          dt          |                     d}~wt          $ r$}t	          dt          |                     d}~ww xY w)z
        Determine the dimensionality of the embedding model by making a test call.

        Returns:
            int: Dimensionality of the embedding model

        Raises:
            ValueError: If embedding dimensions cannot be determined
        zdimension checkz*Unexpected response from the Bedrock API: Nz*Error setting embedding model dimensions: )_embedlenKeyError
IndexErrorr<   str	Exception)r!   	embeddingkees       r$   r,   z!BedrockVectorizer._set_model_dims   s    	T$566Iy>>!*% 	U 	U 	US#b''SSTTT 	T 	T 	TR#a&&RRSSS	Ts    #& BAB#BB   <   )minmax   )waitstopr   contentc                    ddl m} |                     |          }	  | j        j        d| j        t          j        |          d|}t          j        |d         	                                          }|d         S # |$ r<}dt          |          v rd|v rt          d          |t          d	|           d
}~wt          $ r}t          d	|           d
}~ww xY w)a#  
        Generate a vector embedding for a single input using the AWS Bedrock API.

        Args:
            content: Text or PIL.Image.Image or Path to image-file to embed
            **kwargs: Additional parameters to pass to the AWS Bedrock API

        Returns:
            List[float]: Vector embedding as a list of floats

        Raises:
            TypeError: If content is not a string, Path, or PIL.Image.Image
            ValueError: If attempting to embed an image with a text model
            ValueError: If embedding fails
        r   ValidationErrormodelIdbodyrV   rF   Malformed input request
inputImage)Attempted to embed image with text model.zEmbedding text failed: N )botocore.exceptionsrS   _serialize_request_bodyr>   invoke_modelr   jsondumpsloadsreadrD   r<   rE   )r!   rP   r"   rS   rV   responseresponse_bodyrH   s           r$   r@   zBedrockVectorizer._embed   s$   * 	877777++G44	<0t|0 
D)9)9 =C H !Jx'7'<'<'>'>??M -- 	< 	< 	<(CFF22|t7K7K !LMMSTT:q::;;; 	< 	< 	<:q::;;;	<s$   AA= =C7B99CCC
   contents
batch_sizec           	         ddl m} t          |t                    st	          d          	 g }|                     ||          D ]}g }|D ]}|                     |          }		  | j        j        d| j	        t          j        |	          d|}
n4# |$ r,}dt          |          v rd|	v rt          d          ||d}~ww xY wt          j        |
d	                                                   }|                    |d
                    |                    |           |S # t$          $ r}t          d|           d}~ww xY w)a  
        Generate vector embeddings for a batch of inputs using the AWS Bedrock API.

        Args:
            contents: List of text/images to embed. Images must be Paths to image-file or PIL.Image.Image
            batch_size: Number of inputs to process in each API call
            **kwargs: Additional parameters to pass to the AWS Bedrock API

        Returns:
            List[List[float]]: List of vector embeddings as lists of floats

        Raises:
            TypeError: If `contents` is not a list
            TypeError: If each item in `contents` is not a string, Path, or PIL.Image.Image
            ValueError: If attempting to embed an image with a text model
            ValueError: If embedding fails
        r   rR   z`contents` must be a listrT   rW   rX   rY   NrV   rF   zEmbedding texts failed: rZ   )r[   rS   
isinstancelist	TypeErrorbatchifyr\   r>   r]   r   r^   r_   rD   r<   r`   ra   appendextendrE   )r!   re   rf   r"   rS   
embeddingsbatchbatch_embeddingsrP   rV   rb   rH   rc   s                r$   _embed_manyzBedrockVectorizer._embed_many   s   2 	877777(D)) 	97888	=,.Jx<< 4 4 $& $ H HG77@@D #<4<#< $$(J!%D!1!1$ $ %$ $
 +      4A>><SWCWCW", K# ##$%    %)Jx/?/D/D/F/F$G$GM$++M+,FGGGG!!"23333 	= 	= 	=;;;<<<	=s<   5D# ",BD# C 'B;;C  A"D# #
E-D??E)	inputTextrX   c                    t          |t                    rd|iS t          |t                    r)d|                     |                                          iS t
          rmt          |t          j                  rSt          j                    }|	                    |d           d|                     |
                                          iS t          d          )z/Serialize the request body for the Bedrock API.rr   rX   PNG)formatz@Content must be a string, Path to image-file, or PIL.Image.Image)rh   rD   r   _b64encode_image
read_bytes_PILLOW_INSTALLEDr   ioBytesIOsavegetvaluerj   )r!   rP   
bytes_datas      r$   r\   z)BedrockVectorizer._serialize_request_body.  s     gs## 	P))&& 	P $"7"78J8J8L8L"M"MNN 	P:gu{#C#C 	PJLLEL222 $"7"7
8K8K8M8M"N"NOON
 
 	
r%   r}   c                 P    t          j        |                               d          S )z#Encode an image as a base64 string.zutf-8)base64	b64encodedecode)r}   s    r$   rv   z"BedrockVectorizer._b64encode_image>  s#     
++227;;;r%   c                     dS )NbedrockrZ   )r!   s    r$   typezBedrockVectorizer.typeC  s    yr%   )r   Nr   N)rd   )#__name__
__module____qualname____doc__r   model_configrD   r	   r   r   r
   r   floatbytesr)   r    r+   intr,   r   r   r   r   rj   r   r@   rq   dictr   r\   staticmethodrv   propertyr   __classcell__)r#   s   @r$   r   r      s       @ @D :d;;;L 4/3-1* ** T#s(^,* 	*
 )** 
* * * * * *<6c 6d5k5>P8Q 6 6 6 6+$ + + + +-
Xd^ -
 -
 -
 -
^T T T T T( U$$333"")))44  
<c <U < < < 
<B U$$333"")))44   687= 7=S	7=/27=	d5k	7= 7= 7= 
7=r

	g/0#5	6
 
 
 
  <U <s < < < \< c    X    r%   r   )r   ry   r^   r:   pathlibr   typingr   r   r   r   r   r	   r
   pydanticr   tenacityr   r   r   tenacity.retryr   .redisvl.extensions.cache.embeddings.embeddingsr   redisvl.utils.vectorize.baser   PILr   rx   r8   r   rZ   r%   r$   <module>r      si    				  				       K K K K K K K K K K K K K K K K K K       G G G G G G G G G G 6 6 6 6 6 6 ONNNNNN 7 7 7 7 7 7     l l l l l l l l l ls   A A! A!