
    a
i.                         d Z ddlmZmZmZmZmZ ddlmZ ddl	m
Z
 ddlmZmZmZ ddlmZmZ  G d de          Z G d	 d
e          Zdeeef         defdZ G d de          ZdS )zIContains the `LLMEvaluator` class for building LLM-as-a-judge evaluators.    )AnyCallableOptionalUnioncast)	BaseModel)	warn_beta)EvaluationResultEvaluationResultsRunEvaluator)ExampleRunc                   f    e Zd ZU dZeed<   ee         ed<   eed<   dZeed<   dZ	e
e         ed<   dS )	CategoricalScoreConfigz&Configuration for a categorical score.keychoicesdescriptionFinclude_explanationNexplanation_description)__name__
__module____qualname____doc__str__annotations__listr   boolr   r        C:\Users\Dell Inspiron 16\Desktop\tws\AgrotaPowerBi\back-agrota-powerbi\mcp-client-agrota\venv\Lib\site-packages\langsmith/evaluation/llm_evaluator.pyr   r      sc         00	HHH#Y %%%%-1Xc]11111r   r   c                   l    e Zd ZU dZeed<   dZeed<   dZeed<   eed<   dZ	e
ed	<   d
Zee         ed<   d
S )ContinuousScoreConfigz%Configuration for a continuous score.r   r   min   maxr   Fr   Nr   )r   r   r   r   r   r   r#   floatr%   r   r   r   r   r   r   r    r"   r"      sm         //	HHHCNNNCNNN %%%%-1Xc]11111r   r"   score_configreturnc                    i }t          | t                    r+d| j        dd                    | j                   dd|d<   nJt          | t                    r&d| j        | j        d| j         d	| j         d
d|d<   nt          d          | j        rd| j	        dn| j	        d|d<   | j
        | j        d|| j        rddgndgdS )Nstringz%The score for the evaluation, one of z, .)typeenumr   scorenumberz&The score for the evaluation, between z and z, inclusive.)r,   minimummaximumr   z9Invalid score type. Must be 'categorical' or 'continuous'zThe explanation for the score.)r,   r   explanationobject)titler   r,   
propertiesrequired)
isinstancer   r   joinr"   r#   r%   
ValueErrorr   r   r   r   )r'   r5   s     r    _create_score_json_schemar:   !   sL    "$J, 677 V (2yy-..2 2 2
 

7 
L"7	8	8 	V#'#'EE E&2&6E E E	
 

7 TUUU' 
  7? 10!9%
 %

=! !#/ (4(HWWm$$wi  r   c                   ~   e Zd ZdZdddddeeeeeef                  f         deee	f         de
eee
e         gef                  d	ed
ef
dZedddedeeeeeef                  f         deee	f         de
eee
e         gef                  fd            Zdeeeeeef                  f         deee	f         de
eee
e         gef                  defdZe	 ddede
e         deeef         fd            Ze	 ddede
e         deeef         fd            Zdede
e         defdZdedeeef         fdZdS )LLMEvaluatorzA class for building LLM-as-a-judge evaluators.

    .. deprecated:: 0.5.0

       LLMEvaluator is deprecated. Use openevals instead: https://github.com/langchain-ai/openevals
    Nzgpt-4oopenai)map_variables
model_namemodel_providerprompt_templater'   r>   r?   r@   c                    	 ddl m} n"# t          $ r}t          d          |d}~ww xY w |d||d|}	|                     ||||	           dS )a  Initialize the `LLMEvaluator`.

        Args:
            prompt_template (Union[str, List[Tuple[str, str]]): The prompt
                template to use for the evaluation. If a string is provided, it is
                assumed to be a human / user message.
            score_config (Union[CategoricalScoreConfig, ContinuousScoreConfig]):
                The configuration for the score, either categorical or continuous.
            map_variables (Optional[Callable[[Run, Example], dict]], optional):
                A function that maps the run and example to the variables in the
                prompt.

                If `None`, it is assumed that the prompt only requires 'input',
                'output', and 'expected'.
            model_name (Optional[str], optional): The model to use for the evaluation.
            model_provider (Optional[str], optional): The model provider to use
                for the evaluation.
        r   )init_chat_modelzmLLMEvaluator requires langchain to be installed. Please install langchain by running `pip install langchain`.N)modelr@   r   )langchain.chat_modelsrC   ImportError_initialize)
selfrA   r'   r>   r?   r@   kwargsrC   e
chat_models
             r    __init__zLLMEvaluator.__init__T   s    8	        	 	 	O  	 %_ 
^
 
?E
 

 	,zRRRRRs   	 
(#()r>   rD   c                `    |                      |           }|                    ||||           |S )a  Create an `LLMEvaluator` instance from a `BaseChatModel` instance.

        Args:
            model (BaseChatModel): The chat model instance to use for the evaluation.
            prompt_template (Union[str, List[Tuple[str, str]]): The prompt
                template to use for the evaluation. If a string is provided, it is
                assumed to be a system message.
            score_config (Union[CategoricalScoreConfig, ContinuousScoreConfig]):
                The configuration for the score, either categorical or continuous.
            map_variables (Optional[Callable[[Run, Example]], dict]], optional):
                A function that maps the run and example to the variables in the
                prompt.

                If `None`, it is assumed that the prompt only requires 'input',
                'output', and 'expected'.

        Returns:
            LLMEvaluator: An instance of `LLMEvaluator`.
        )__new__rG   )clsrD   rA   r'   r>   instances         r    
from_modelzLLMEvaluator.from_model   s4    8 ;;s##_lM5QQQr   rK   c                 \   	 ddl m} ddlm} n"# t          $ r}t	          d          |d}~ww xY wt          ||          rt          |d          st          d          t          |t                    r|	                    d|fg          | _
        n|	                    |          | _
        t          | j
        j                  h d	z
  r|st          d
          || _        || _        t          | j                  | _        |                    | j                  }| j
        |z  | _        dS )a  Shared initialization code for `__init__` and `from_model`.

        Args:
            prompt_template (Union[str, List[Tuple[str, str]]): The prompt template.
            score_config (Union[CategoricalScoreConfig, ContinuousScoreConfig]):
                The score configuration.
            map_variables (Optional[Callable[[Run, Example]], dict]]):
                Function to map variables.
            chat_model (BaseChatModel): The chat model instance.
        r   )BaseChatModel)ChatPromptTemplatez|LLMEvaluator requires langchain-core to be installed. Please install langchain-core by running `pip install langchain-core`.Nwith_structured_outputzRchat_model must be an instance of BaseLanguageModel and support structured output.human>   inputoutputexpectedzrmap_inputs must be provided if the prompt template contains variables other than 'input', 'output', and 'expected')*langchain_core.language_models.chat_modelsrS   langchain_core.promptsrT   rF   r7   hasattrr9   r   from_messagespromptsetinput_variablesr>   r'   r:   score_schemarU   runnable)rH   rA   r'   r>   rK   rS   rT   rJ   s           r    rG   zLLMEvaluator._initialize   s   "	PPPPPPAAAAAAA 	 	 	Y  	 z=11	
$<==	 C  
 os++ 	L,::Wo<V;WXXDKK,::?KKDKt{*++.M.M.MM 	   M   +(5d6GHH66t7HII
j0s    
.).runexampler(   c                     |                      ||          }t          t          | j                            |                    }|                     |          S )zEvaluate a run.)_prepare_variablesr   dictrb   invoke_parse_outputrH   rc   rd   	variablesrX   s        r    evaluate_runzLLMEvaluator.evaluate_run   sL    
 ++C99	D$-"6"6y"A"ABB!!&)))r   c                    K   |                      ||          }t          t          | j                            |           d{V           }|                     |          S )zAsynchronously evaluate a run.N)rf   r   rg   rb   ainvokeri   rj   s        r    aevaluate_runzLLMEvaluator.aevaluate_run   sb      
 ++C99	D(=(=i(H(H"H"H"H"H"H"HII!!&)))r   c                    | j         r|                      ||          S i }d| j        j        v r}t          |j                  dk    rt          d          t          |j                  dk    rt          d          t          |j                                                  d         |d<   d| j        j        v r|j        st          d          t          |j                  dk    rt          d          t          |j                  dk    rt          d          t          |j                                                  d         |d<   d	| j        j        v r|r|j        st          d
          t          |j                  dk    rt          d          t          |j                  dk    rt          d          t          |j                                                  d         |d	<   |S )z'Prepare variables for model invocation.rW   r   zHNo input keys are present in run.inputs but the prompt requires 'input'.r$   zWMultiple input keys are present in run.inputs. Please provide a map_variables function.rX   zKNo output keys are present in run.outputs but the prompt requires 'output'.zYMultiple output keys are present in run.outputs. Please provide a map_variables function.rY   zMNo example or example outputs is provided but the prompt requires 'expected'.zQNo output keys are present in example.outputs but the prompt requires 'expected'.z]Multiple output keys are present in example.outputs. Please provide a map_variables function.)	r>   r^   r`   leninputsr9   r   valuesoutputs)rH   rc   rd   rk   s       r    rf   zLLMEvaluator._prepare_variables   s*    	4%%c7333	dk1113:!## (   3:!## 0   "&cj&7&7&9&9!:!:1!=Igt{222;  )   3;1$$ )   3;1$$ 8   #'s{'9'9';';"<"<Q"?Ih444 '/  +   7?##q(( +   7?##q(( 8   %))?)?)A)A$B$B1$EIj!r   rX   c                 V   t          | j        t                    r:|d         }|                    dd          }t	          | j        j        ||          S t          | j        t                    r:|d         }|                    dd          }t	          | j        j        ||          S dS )z1Parse the model output into an evaluation result.r.   r2   N)r   valuecomment)r   r.   rw   )r7   r'   r   getr
   r   r"   )rH   rX   rv   r2   r.   s        r    ri   zLLMEvaluator._parse_output!  s    d')?@@ 	7OE **]D99K#%)    )+@AA 	7OE **]D99K#%)   	 	r   )N)r   r   r   r   r   r   r   tupler   r"   r   r   r   r   rg   rL   classmethodr   rQ   rG   r	   r
   r   rl   ro   rf   ri   r   r   r    r<   r<   L   s         MQ"&*S *S *S sDsCx$99:*S 24IIJ	*S
  #x/@)A4)G HI*S *S *S *S *S *SX  MQ   sDsCx$99:	
 24IIJ  #x/@)A4)G HI   [>41sDsCx$99:41 24IIJ41  #x/@)A4)G HI	41
 41 41 41 41l 59* **!)'!2*	!22	3* * * Y* 59* **!)'!2*	!22	3* * * Y*7c 7HW4E 7$ 7 7 7 7rD U3CEV3V-W      r   r<   N)r   typingr   r   r   r   r   pydanticr   #langsmith._internal._beta_decoratorr	   langsmith.evaluationr
   r   r   langsmith.schemasr   r   r   r"   rg   r:   r<   r   r   r    <module>r      sR   O O 7 7 7 7 7 7 7 7 7 7 7 7 7 7       9 9 9 9 9 9 R R R R R R R R R R * * * * * * * *2 2 2 2 2Y 2 2 22 2 2 2 2I 2 2 2(.0EEF(	( ( ( (Vb b b b b< b b b b br   