
    m
iQE                    \   d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ  ej        e          Z G d dee
          Z G d de          Z G d d          Z G d dee          Z G d dee          Z G d dee          Z dS )z3Interfaces to be implemented by general evaluators.    )annotationsN)ABCabstractmethod)Sequence)Enum)Any)warn)AgentAction)BaseLanguageModel)run_in_executor)Chainc                      e Zd ZdZdZ	 dZ	 dZ	 dZ	 dZ	 dZ		 dZ
	 d	Z	 d
Z	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZdS )EvaluatorTypezThe types of the evaluators.qacot_qa
context_qapairwise_stringscore_stringlabeled_pairwise_stringlabeled_score_string
trajectorycriterialabeled_criteriastring_distanceexact_matchregex_matchpairwise_string_distanceembedding_distancepairwise_embedding_distancejson_validityjson_equalityjson_edit_distancejson_schema_validationN)__name__
__module____qualname____doc__QACOT_QA
CONTEXT_QAPAIRWISE_STRINGSCORE_STRINGLABELED_PAIRWISE_STRINGLABELED_SCORE_STRINGAGENT_TRAJECTORYCRITERIALABELED_CRITERIASTRING_DISTANCEEXACT_MATCHREGEX_MATCHPAIRWISE_STRING_DISTANCEEMBEDDING_DISTANCEPAIRWISE_EMBEDDING_DISTANCEJSON_VALIDITYJSON_EQUALITYJSON_EDIT_DISTANCEJSON_SCHEMA_VALIDATION     C:\Users\Dell Inspiron 16\Desktop\tws\AgrotaPowerBi\back-agrota-powerbi\mcp-client-agrota\venv\Lib\site-packages\langchain_classic/evaluation/schema.pyr   r      s        &&	BF% JS'O!L7H1@#VH<)7'OPKIKN9=-M"?;#M.#M=-T5IIr=   r   c                  :    e Zd ZdZeed	d                        ZdS )
LLMEvalChainz,A base class for evaluators that use an LLM.llmr   kwargsr   returnc                    dS )z#Create a new evaluator from an LLM.Nr<   )clsrA   rB   s      r>   from_llmzLLMEvalChain.from_llmN         r=   N)rA   r   rB   r   rC   r@   )r$   r%   r&   r'   classmethodr   rF   r<   r=   r>   r@   r@   K   sB        662 2 2 ^ [2 2 2r=   r@   c                      e Zd ZdZedd            Zedd            Zedd            Zedd            Z	 	 dddZ	d	S )_EvalArgsMixinz(Mixin for checking evaluation arguments.rC   boolc                    dS z2Whether this evaluator requires a reference label.Fr<   selfs    r>   requires_referencez!_EvalArgsMixin.requires_referenceW   	     ur=   c                    dS )0Whether this evaluator requires an input string.Fr<   rN   s    r>   requires_inputz_EvalArgsMixin.requires_input\   rQ   r=   strc                "    d| j         j         dS )z&Warning to show when input is ignored.zIgnoring input in , as it is not expected.	__class__r$   rN   s    r>   _skip_input_warningz"_EvalArgsMixin._skip_input_warninga   s     VDN$;UUUUr=   c                "    d| j         j         dS )z*Warning to show when reference is ignored.zIgnoring reference in rW   rX   rN   s    r>   _skip_reference_warningz&_EvalArgsMixin._skip_reference_warningf   s     WT^%<VVV	
r=   N	reference
str | Noneinput_Nonec                &   | j         r || j        j         d}t          |          || j         st	          | j        d           | j        r || j        j         d}t          |          || j        st	          | j        d           dS dS dS )aT  Check if the evaluation arguments are valid.

        Args:
            reference: The reference label.
            input_: The input string.

        Raises:
            ValueError: If the evaluator requires an input string but none is provided,
                or if the evaluator requires a reference label but none is provided.
        Nz requires an input string.   )
stacklevelz requires a reference string.)rT   rY   r$   
ValueErrorr	   rZ   rP   r\   )rO   r]   r_   msgs       r>   _check_evaluation_argsz%_EvalArgsMixin._check_evaluation_argsm   s      	"6>^,HHHCS//!d&9)a8888" 	"y'8^,KKKCS//! )@ -!<<<<<< !   r=   rC   rK   rC   rU   )NN)r]   r^   r_   r^   rC   r`   )
r$   r%   r&   r'   propertyrP   rT   rZ   r\   rf   r<   r=   r>   rJ   rJ   T   s        22   X    X V V V XV 
 
 
 X
 !%!= = = = = = =r=   rJ   c                      e Zd ZdZedd            Zedd            Zeddddd            ZdddddZ	dddddZ
dddddZdS )StringEvaluatorzString evaluator interface.

    Grade, tag, or otherwise evaluate predictions relative to their inputs
    and/or reference labels.
    rC   rU   c                    | j         j        S )zThe name of the evaluation.rX   rN   s    r>   evaluation_namezStringEvaluator.evaluation_name   s     ~&&r=   rK   c                    dS rM   r<   rN   s    r>   rP   z"StringEvaluator.requires_reference   rQ   r=   Nr]   input
prediction	str | Anyr]   str | Any | Nonerp   rB   r   dictc                   dS )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction: The LLM or chain prediction to evaluate.
            reference: The reference label to evaluate against.
            input: The input to consider during evaluation.
            **kwargs: Additional keyword arguments, including callbacks, tags, etc.

        Returns:
            The evaluation results containing the score or value.
            It is recommended that the dictionary contain the following keys:
                 - score: the score of the evaluation, if applicable.
                 - value: the string value of the evaluation, if applicable.
                 - reasoning: the reasoning for the evaluation, if applicable.
        Nr<   rO   rq   r]   rp   rB   s        r>   _evaluate_stringsz!StringEvaluator._evaluate_strings   rG   r=   c               @   K   t          d| j        f|||d| d{V S )a  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction: The LLM or chain prediction to evaluate.
            reference: The reference label to evaluate against.
            input: The input to consider during evaluation.
            **kwargs: Additional keyword arguments, including callbacks, tags, etc.

        Returns:
            The evaluation results containing the score or value.
            It is recommended that the dictionary contain the following keys:
                 - score: the score of the evaluation, if applicable.
                 - value: the string value of the evaluation, if applicable.
                 - reasoning: the reasoning for the evaluation, if applicable.
        Nrq   r]   rp   )r   rw   rv   s        r>   _aevaluate_stringsz"StringEvaluator._aevaluate_strings   s`      . %"
 "
 
 
 
 
 
 
 
 
 
 	
r=   r^   c               R    |                      ||            | j        d|||d|S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction: The LLM or chain prediction to evaluate.
            reference: The reference label to evaluate against.
            input: The input to consider during evaluation.
            **kwargs: Additional keyword arguments, including callbacks, tags, etc.

        Returns:
            The evaluation results containing the score or value.
        r]   r_   ry   r<   )rf   rw   rv   s        r>   evaluate_stringsz StringEvaluator.evaluate_strings   sQ    & 	##i#FFF%t% 
!
 
 	
 
 	
r=   c               b   K   |                      ||            | j        d|||d| d{V S )a  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction: The LLM or chain prediction to evaluate.
            reference: The reference label to evaluate against.
            input: The input to consider during evaluation.
            **kwargs: Additional keyword arguments, including callbacks, tags, etc.

        Returns:
            The evaluation results containing the score or value.
        r|   ry   Nr<   )rf   rz   rv   s        r>   aevaluate_stringsz!StringEvaluator.aevaluate_strings   ss      & 	##i#FFF,T, 
!
 
 	
 
 
 
 
 
 
 
 	
r=   rh   rg   )
rq   rr   r]   rs   rp   rs   rB   r   rC   rt   )
rq   rU   r]   r^   rp   r^   rB   r   rC   rt   )r$   r%   r&   r'   ri   rm   rP   r   rw   rz   r}   r   r<   r=   r>   rk   rk      s         ' ' ' X'    X 
 '+"&     ^8 '+"&
 
 
 
 
 
H !% 
 
 
 
 
 
> !% 
 
 
 
 
 
 
 
r=   rk   c                  b    e Zd ZdZeddddd            ZdddddZdddddZdddddZdS )PairwiseStringEvaluatorzDCompare the output of two models (or two outputs of the same model).Nro   rq   rU   prediction_br]   r^   rp   rB   r   rC   rt   c                   dS )  Evaluate the output string pairs.

        Args:
            prediction: The output string from the first model.
            prediction_b: The output string from the second model.
            reference: The expected output / reference string.
            input: The input string.
            **kwargs: Additional keyword arguments, such as callbacks and optional reference strings.

        Returns:
            `dict` containing the preference, scores, and/or other information.
        Nr<   rO   rq   r   r]   rp   rB   s         r>   _evaluate_string_pairsz.PairwiseStringEvaluator._evaluate_string_pairs  rG   r=   c               B   K   t          d| j        f||||d| d{V S )  Asynchronously evaluate the output string pairs.

        Args:
            prediction: The output string from the first model.
            prediction_b: The output string from the second model.
            reference: The expected output / reference string.
            input: The input string.
            **kwargs: Additional keyword arguments, such as callbacks and optional reference strings.

        Returns:
            `dict` containing the preference, scores, and/or other information.
        Nrq   r   r]   rp   )r   r   r   s         r>   _aevaluate_string_pairsz/PairwiseStringEvaluator._aevaluate_string_pairs#  sc      * %'
 "%
 
 
 
 
 
 
 
 
 
 	
r=   c               T    |                      ||            | j        d||||d|S )r   r|   r   r<   )rf   r   r   s         r>   evaluate_string_pairsz-PairwiseStringEvaluator.evaluate_string_pairsB  sT    * 	##i#FFF*t* 
!%	
 

 
 
 	
r=   c               d   K   |                      ||            | j        d||||d| d{V S )r   r|   r   Nr<   )rf   r   r   s         r>   aevaluate_string_pairsz.PairwiseStringEvaluator.aevaluate_string_pairs`  sv      * 	##i#FFF1T1 
!%	
 

 
 
 
 
 
 
 
 
 	
r=   )rq   rU   r   rU   r]   r^   rp   r^   rB   r   rC   rt   )	r$   r%   r&   r'   r   r   r   r   r   r<   r=   r>   r   r   	  s        NN !%      ^6 !% 
 
 
 
 
 
H !% 
 
 
 
 
 
F !% 
 
 
 
 
 
 
 
r=   r   c                  r    e Zd ZdZedd            Zedddd            ZddddZddddZ	ddddZ
dS )AgentTrajectoryEvaluatorz,Interface for evaluating agent trajectories.rC   rK   c                    dS )rS   Tr<   rN   s    r>   rT   z'AgentTrajectoryEvaluator.requires_input  s	     tr=   N)r]   rq   rU   agent_trajectory!Sequence[tuple[AgentAction, str]]rp   r]   r^   rB   r   rt   c                   dS )  Evaluate a trajectory.

        Args:
            prediction: The final predicted response.
            agent_trajectory:
                The intermediate steps forming the agent trajectory.
            input: The input to the agent.
            reference: The reference answer.
            **kwargs: Additional keyword arguments.

        Returns:
            The evaluation result.
        Nr<   rO   rq   r   rp   r]   rB   s         r>   _evaluate_agent_trajectoryz3AgentTrajectoryEvaluator._evaluate_agent_trajectory  rG   r=   c               B   K   t          d| j        f||||d| d{V S )  Asynchronously evaluate a trajectory.

        Args:
            prediction: The final predicted response.
            agent_trajectory:
                The intermediate steps forming the agent trajectory.
            input: The input to the agent.
            reference: The reference answer.
            **kwargs: Additional keyword arguments.

        Returns:
            The evaluation result.
        N)rq   r   r]   rp   )r   r   r   s         r>   _aevaluate_agent_trajectoryz4AgentTrajectoryEvaluator._aevaluate_agent_trajectory  sc      , %+
 "-
 
 
 
 
 
 
 
 
 
 	
r=   c               T    |                      ||            | j        d||||d|S )r   r|   rq   rp   r   r]   r<   )rf   r   r   s         r>   evaluate_agent_trajectoryz2AgentTrajectoryEvaluator.evaluate_agent_trajectory  sT    , 	##i#FFF.t. 
!-	
 

 
 
 	
r=   c               d   K   |                      ||            | j        d||||d| d{V S )r   r|   r   Nr<   )rf   r   r   s         r>   aevaluate_agent_trajectoryz3AgentTrajectoryEvaluator.aevaluate_agent_trajectory  sv      , 	##i#FFF5T5 
!-	
 

 
 
 
 
 
 
 
 
 	
r=   rg   )rq   rU   r   r   rp   rU   r]   r^   rB   r   rC   rt   )r$   r%   r&   r'   ri   rT   r   r   r   r   r   r<   r=   r>   r   r     s        66   X  !%     ^: !%
 
 
 
 
 
L !%
 
 
 
 
 
J !%
 
 
 
 
 
 
 
r=   r   )!r'   
__future__r   loggingabcr   r   collections.abcr   enumr   typingr   warningsr	   langchain_core.agentsr
   langchain_core.language_modelsr   langchain_core.runnables.configr   langchain_classic.chains.baser   	getLoggerr$   loggerrU   r   r@   rJ   rk   r   r   r<   r=   r>   <module>r      s   9 9 " " " " " "  # # # # # # # # $ $ $ $ $ $                   - - - - - - < < < < < < ; ; ; ; ; ; / / / / / /		8	$	$3J 3J 3J 3J 3JC 3J 3J 3Jl2 2 2 2 25 2 2 21= 1= 1= 1= 1= 1= 1= 1=h~
 ~
 ~
 ~
 ~
nc ~
 ~
 ~
Bs
 s
 s
 s
 s
nc s
 s
 s
l|
 |
 |
 |
 |
~s |
 |
 |
 |
 |
r=   