
    k
iA                        d Z ddlZddlZddlZddlmZmZmZmZm	Z	 ddl
mZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ  ej        e          Z e            ZdedefdZdedefdZdedefdZ G d de          Z dS )zSemantic cache middleware for LLM responses.

This module provides a middleware that caches LLM responses based on
semantic similarity using Redis and vector embeddings. Compatible with
LangChain's AgentMiddleware protocol for use with create_agent.
    N)Any	AwaitableCallableListUnion)ModelCallResultModelRequestModelResponse)	AIMessage)SemanticCache)JsonPlusRedisSerializer   )AsyncRedisMiddleware)SemanticCacheConfigcontentreturnc                    t          | t                    s| S g }| D ]b}t          |t                    r6d|v r2|                    d |                                D                        M|                    |           c|S )zStrip provider-specific IDs from content blocks.

    When using the OpenAI Responses API, content is a list of blocks with
    embedded item IDs (rs_, msg_ prefixes). These must be removed from cached
    messages to prevent duplicate ID errors.
    idc                 &    i | ]\  }}|d k    ||S )r    ).0kvs      C:\Users\Dell Inspiron 16\Desktop\tws\AgrotaPowerBi\back-agrota-powerbi\mcp-client-agrota\venv\Lib\site-packages\langgraph/middleware/redis/semantic_cache.py
<dictcomp>z&_strip_content_ids.<locals>.<dictcomp>,   s#    IIIdaqDyyQyyy    )
isinstancelistdictappenditems)r   strippedblocks      r   _strip_content_idsr$       s     gt$$ H # #eT"" 	#tu}}OOIIekkmmIIIJJJJOOE""""Or   responsec           	         t          | d          rt          | j        t                    rt	          | j                  D ]q}t          |d          r(t          j        |                                          c S t          |d          r't          j        dt          |dd          i          c S rt          j        ddi          S t          | d          r&t          j        |                                           S t          | t                    rt          j        |           S t          | dt          |                     }t          j        d|i          S )zSerialize a model response for cache storage.

    Uses LangChain's to_json() for proper serialization of LangChain objects.

    Args:
        response: The response to serialize.

    Returns:
        JSON string representation of the response.
    resultto_jsonr    )hasattrr   r'   r   reversedjsondumpsr(   getattrr   str)r%   msgr   s      r   _serialize_responser1   2   sM    x"" 	+z(/4'H'H 	+HO,, 	L 	LCsI&& Lz#++--00000i(( Lz9gc9b.I.I"JKKKKKL z9b/*** x## .z(**,,--- (D!! $z(### h	3x==99G:y'*+++r   
cached_strc           	         t          t          j                              }	 t          j        |           }t          |t                    r|                    d          dv r|                    d          dk    rt          	                    |          }t          |t                    rA|                    |t          |j                  ddii d          }t          |gd	
          S t          |dt          |                    }t          t          ||ddi          gd	
          S |                    dd          }t          t          ||ddi          gd	
          S t          t          t          |          |ddi          gd	
          S # t          j        $ r' t          t          | |ddi          gd	
          cY S w xY w)a  Deserialize a cached response into a ModelResponse.

    Uses the project's JsonPlusRedisSerializer for proper LangChain object revival.
    Always returns a ModelResponse to maintain compatibility with agent routing.

    IMPORTANT: Each cache hit generates a NEW message ID (UUID). This is critical
    for frontend streaming compatibility - without unique IDs, the frontend
    deduplicates messages and cached responses don't appear.

    The cached response is also marked with additional_kwargs={"cached": True}
    to allow consumers to identify cached responses.

    Args:
        cached_str: The cached JSON string.

    Returns:
        A ModelResponse containing the cached message with a unique ID.
    lc)r      typeconstructorcachedT)r   r   additional_kwargsresponse_metadata)updateN)r'   structured_responser   )r   r   r9   r)   )r/   uuiduuid4r,   loadsr   r   get_serializer_revive_if_neededr   
model_copyr$   r   r
   r.   JSONDecodeError)r2   new_message_iddatarevivedcached_messager   s         r   _deserialize_responserI   V   sT   * &&NC
z*%%dD!! *	xx~~''DHHV,<,<,M,M%77==gy11  &-%7%7"0'9'/'J'J2:D1A13	    &8 & &N ) ./T    "'9c'llCC$!$+-/7.>   )-	 	 	 	 hhy"--G  ')+3T*:   %)	 	 	 	 II%'/&6   !%	
 	
 	
 		
  
 
 
&%'/&6   !%	
 	
 	
 		
 		
 		

s%   C	F ,AF .9F (0F 3GGc                   L    e Zd ZU dZeed<   eed<   deddf fdZddZd	e	e
eeef         ef                  defd
ZdedefdZd	e	e
eeef         ef                  de	e         fdZd	e	e
eeef         ef                  defdZdedeegee         f         defdZ xZS )SemanticCacheMiddlewarea,  Middleware that caches LLM responses based on semantic similarity.

    Uses redisvl.extensions.llmcache.SemanticCache to store and retrieve
    cached responses. When a request is semantically similar to a previous
    request (within the distance threshold), the cached response is returned
    without calling the LLM.

    By default, only "final" responses (those without tool_calls) are cached.
    This prevents caching intermediate responses that require tool execution.

    Example:
        ```python
        from langgraph.middleware.redis import (
            SemanticCacheMiddleware,
            SemanticCacheConfig,
        )

        config = SemanticCacheConfig(
            redis_url="redis://localhost:6379",
            distance_threshold=0.1,
            ttl_seconds=3600,
        )

        middleware = SemanticCacheMiddleware(config)

        async def call_llm(request):
            # Your LLM call here
            return response

        # Use middleware
        result = await middleware.awrap_model_call(request, call_llm)
        ```
    _cache_configconfigr   Nc                 X    t                                          |           || _        dS )z{Initialize the semantic cache middleware.

        Args:
            config: Configuration for the semantic cache.
        N)super__init__rM   )selfrN   	__class__s     r   rQ   z SemanticCacheMiddleware.__init__   s(     	   r   c                 8  K   | j         j        | j         j        d}| j         j        r| j         j        |d<   n| j         j        r| j         j        |d<   | j         j        | j         j        |d<   | j         j        | j         j        |d<   t          di || _        dS )a   Set up the SemanticCache instance.

        Note: SemanticCache from redisvl uses synchronous Redis operations
        internally, so we must provide redis_url and let it manage its own
        sync connection rather than passing our async client.
        )namedistance_threshold	redis_urlconnection_kwargsN
vectorizerttlr   )	rM   rU   rV   rW   connection_argsrY   ttl_secondsr   rL   )rR   cache_kwargss     r   _setup_asyncz$SemanticCacheMiddleware._setup_async   s       L%"&,"A(
 (
 <! 	M(,(>L%%\) 	M040LL,-<".)-)@L&<#/"&,":L#33l33r   messagesc                 8   |sdS t          |          D ]}t          |t                    r5|                    dd          }|dk    r|                    dd          c S Lt	          |dd          pt	          |dd          }|dv rt	          |dd          c S dS )aG  Extract the prompt to use for cache lookup.

        Extracts the last user message content from the messages list.
        Handles both dict-style messages and LangChain message objects.

        Args:
            messages: List of messages from the request.

        Returns:
            The extracted prompt string.
        r)   roleuserr   r6   N)rb   human)r+   r   r   r@   r.   )rR   r_   messagera   msg_types        r   _extract_promptz'SemanticCacheMiddleware._extract_prompt   s      	2  )) 	; 	;G'4(( 
;{{62..6>>";;y"55555 " #7FD99 WVT> > 000"7Ir::::: 1 rr   r%   c                 
   t          |t                    r|                    d          }nVt          |dd          }|Ct	          |d          r3|j        }|r*t          |          dk    rt          |d         dd          }| S )zCheck if the response is a final response (no tool calls).

        Args:
            response: The model response to check (dict or LangChain type).

        Returns:
            True if the response is final (should be cached), False otherwise.
        
tool_callsNr'   r   )r   r   r@   r.   r*   r'   len)rR   r%   rh   r'   s       r   _is_final_responsez*SemanticCacheMiddleware._is_final_response  s     h%% 		H!l33JJ !<>>J!gh&A&A!! Hc&kkAoo!(L$!G!GJ~r   c                    g }|D ]}t          |t                    r\|                    dd          p|                    dd          }|dk    r)|                    |                    dd                     st	          |dd          pt	          |dd          }|dk    s|j        j        dk    r$|                    t	          |dd                     |S )zExtract tool names from tool result messages.

        Args:
            messages: List of messages from the request.

        Returns:
            List of tool names that have results in the messages.
        ra   r)   r6   toolrU   NToolMessage)r   r   r@   r    r.   rS   __name__)rR   r_   
tool_namesrd   ra   re   s         r   _get_tool_names_from_resultsz4SemanticCacheMiddleware._get_tool_names_from_results4  s     
 
	D 
	DG'4(( 	D{{62..I'++fb2I2I6>>%%gkk&"&=&=>>>"7FD99 WVT> > v%%):)C})T)T%%ggvr&B&BCCCr   c                     |                      |          }|sdS | j        j        dS |D ]}|r|| j        j        vr dS dS )a  Check if cache should be skipped due to tool results.

        When tool results are present, we check if ALL tools are in the
        deterministic_tools list. If so, caching is safe. Otherwise,
        we skip the cache to avoid returning stale responses.

        Args:
            messages: List of messages from the request.

        Returns:
            True if cache should be skipped, False if caching is OK.
        FNT)rp   rM   deterministic_tools)rR   r_   ro   	tool_names       r   #_should_skip_cache_for_tool_resultsz;SemanticCacheMiddleware._should_skip_cache_for_tool_resultsM  sm     66x@@
 	5 <+34 $ 	 	I Ydl.NNNtt ur   requesthandlerc                   K   |                                   d{V  t          |t                    r|                    dg           }nt	          |dg           }|                     |          }|s ||           d{V S |                     |          rt                              d            ||           d{V }| j	        j
        r|                     |          ro	 t          |          }| j                            ||           d{V  n<# t          $ r/}| j        s t                              d|            Y d}~nd}~ww xY w|S 	 | j                            |           d{V }|rR|d                             d          }	|	r5t                              d	|dd
          d           t'          |	          S n<# t          $ r/}| j        s t                              d|            Y d}~nd}~ww xY w ||           d{V }| j	        j
         p|                     |          }
|
r	 t          |          }| j                            ||           d{V  t                              d|dd
          d           n<# t          $ r/}| j        s t                              d|            Y d}~nd}~ww xY w|S )a  Wrap a model call with semantic caching.

        Checks the cache for a semantically similar request. If found,
        returns the cached response. Otherwise, calls the handler and
        optionally caches the result.

        Args:
            request: The model request containing messages.
            handler: The async function to call the model.

        Returns:
            The model response (from cache or handler).

        Raises:
            Exception: If graceful_degradation is False and cache operations fail.
        Nr_   z@Skipping cache - request contains non-deterministic tool results)promptr%   zCache store failed: )rx   r   r%   zCache hit for prompt: 2   z...z%Cache check failed, calling handler: zCached response for prompt: )_ensure_initialized_asyncr   r   r@   r.   rf   rt   loggerdebugrM   cache_final_onlyrj   r1   rL   astore	Exception_graceful_degradationwarningacheckrI   )rR   ru   rv   r_   rx   r%   response_strer8   cached_responseshould_caches              r   awrap_model_callz(SemanticCacheMiddleware.awrap_model_callp  s     * ,,......... gt$$ 	8{{:r22HHw
B77H%%h// 	* ))))))))) 33H== 	LLR   %WW--------H<0 ?D4K4KH4U4U ??#6x#@#@L+,,F\,RRRRRRRRRR  ? ? ?5 NN#=!#=#=>>>>>>>>? O
	H;--V-<<<<<<<<F B"()--
";";" BLL!J&"+!J!J!JKKK0AAA 	H 	H 	H- NNF1FFGGGGGGGG	H !))))))))  <88 
D<S<S=
 =
  		;;28<<k(((NNNNNNNNNLF3B3KLLLMMMM ; ; ;1 9a99::::::::;
 sJ   $1D 
E %E

EA4G 
H%G??H=AJ 
K%K		K)r   N)rn   
__module____qualname____doc__r   __annotations__r   rQ   r^   r   r   r   r/   r   rf   boolrj   rp   rt   r	   r   r   r
   r   r   __classcell__)rS   s   @r   rK   rK      s           D     2 t      4 4 4 46U4S>33F-G(H S    @3 4    .U4S>3#678	c   2!U4S>3#678!	! ! ! !FTT <.)M*BBCT 
	T T T T T T T Tr   rK   )!r   r,   loggingr=   typingr   r   r   r   r   !langchain.agents.middleware.typesr   r	   r
   langchain_core.messagesr   redisvl.extensions.cache.llmr   )langgraph.checkpoint.redis.jsonplus_redisr   aior   typesr   	getLoggerrn   r{   rA   r$   r/   r1   rI   rK   r   r   r   <module>r      s       8 8 8 8 8 8 8 8 8 8 8 8 8 8         
 . - - - - - 6 6 6 6 6 6 M M M M M M % % % % % % & & & & & &		8	$	$ &%''     $!,# !,# !, !, !, !,HZ
c Z
m Z
 Z
 Z
 Z
zQ Q Q Q Q2 Q Q Q Q Qr   