
    a
i                       d Z ddlmZ ddlZddlZddlZddlZddlmZ ddl	m
Z
mZ ddlmZ ddlmZmZmZmZmZmZ ddlmZmZmZmZmZ dd	lmZ dd
lmZ ddlm Z  ddl!m"Z"m#Z#m$Z$m%Z%  ej&        e'          Z( G d de          Z) G d ded          Z* G d de          Z+ G d ded          Z, G d d          Z-ee+e,e.f         Z/ G d de          Z0ee0e.f         Z1 G d de-          Z2d4dZ3d Z4d5d#Z5 G d$ d%          Z6d6d(Z7d7d+Z8d8d-Z9d9d1Z:eeee j%                 ee j$                 gee+e,f         f         ee;e j%                 e;e j$                 gee+e,f         f         f         Z<d:d3Z=dS );z?This module contains the evaluator classes for evaluating runs.    )annotationsN)abstractmethod)	AwaitableSequence)wraps)AnyCallableLiteralOptionalUnioncast)	BaseModel
ConfigDictFieldValidationErrormodel_validator)	TypedDictrun_helpers)schemas)
SCORE_TYPE
VALUE_TYPEExampleRunc                  *    e Zd ZU dZded<   	 ded<   dS )Categoryz$A category for categorical feedback.Optional[Union[float, int]]valuestrlabelN__name__
__module____qualname____doc____annotations__     C:\Users\Dell Inspiron 16\Desktop\tws\AgrotaPowerBi\back-agrota-powerbi\mcp-client-agrota\venv\Lib\site-packages\langsmith/evaluation/evaluator.pyr   r      s0         ..&&&&CJJJ&&r(   r   c                  B    e Zd ZU dZded<   	 ded<   	 ded<   	 ded<   d	S )
FeedbackConfigzkConfiguration to define a type of feedback.

    Applied on on the first creation of a `feedback_key`.
    z0Literal['continuous', 'categorical', 'freeform']typer   minmaxz%Optional[list[Union[Category, dict]]]
categoriesNr!   r'   r(   r)   r+   r+   (   sW          
 ;:::$$$$;$$$$A555555r(   r+   F)totalc                     e Zd ZU dZded<   	 dZded<   	 dZded<   	 dZd	ed
<   	 dZded<   	 dZ	d	ed<   	  e
e          Zded<   	 dZded<   	 dZded<   	 dZded<   	 dZd	ed<   	  ed          Z ed          dd            ZdS )EvaluationResultzEvaluation result.r   keyNr   scorer   r   zOptional[dict]metadatazOptional[str]comment
correction)default_factorydictevaluator_infoz%Optional[Union[FeedbackConfig, dict]]feedback_configOptional[Union[uuid.UUID, str]]source_run_idtarget_run_idextraforbid)r?   after)modereturnc                    | j         Ct          | j        t          t          f          r"t
                              d| j                    | S )z:Warn when numeric values are passed via the `value` field.NzJNumeric values should be provided in the 'score' field, not 'value'. Got: )r4   
isinstancer   intfloatloggerwarningselfs    r)   check_value_non_numericz(EvaluationResult.check_value_non_numericV   sR     :*TZ#u"F"FNN&& &   r(   )rC   r2   )r"   r#   r$   r%   r&   r4   r   r5   r6   r7   r   r9   r:   r;   r=   r>   r?   r   model_configr   rL   r'   r(   r)   r2   r2   7   s?        HHH@E0E8#H####8!G!!!!2!%J%%%%: 5666N66665=AOAAAA;59M9999659M9999 !E    ):H---L_'"""   #"  r(   r2   c                      e Zd ZU dZded<   dS )EvaluationResultszqBatch evaluation results.

    This makes it easy for your evaluator to return multiple
    metrics at once.
    zlist[EvaluationResult]resultsNr!   r'   r(   r)   rO   rO   a   s*           $###!!r(   rO   c                  >    e Zd ZdZe	 	 ddd            Z	 	 dddZdS )RunEvaluatorzEvaluator interface class.Nrunr   exampleOptional[Example]evaluator_run_idOptional[uuid.UUID]rC   *Union[EvaluationResult, EvaluationResults]c                    dS )zEvaluate an example.Nr'   )rK   rS   rT   rV   s       r)   evaluate_runzRunEvaluator.evaluate_runo   s      r(   c                    K   t          j                     fd}t          j                                        d|           d{V S )z#Evaluate an example asynchronously.c                     t          j        di  5                                cd d d            S # 1 swxY w Y   d S )Nr'   )rhtracing_contextrZ   )current_contextrV   rT   rS   rK   s   r)   _run_with_contextz5RunEvaluator.aevaluate_run.<locals>._run_with_context   s    #66o66 I I((g7GHHI I I I I I I I I I I I I I I I I Is   7;;N)r]   get_tracing_contextasyncioget_running_looprun_in_executor)rK   rS   rT   rV   r`   r_   s   ```` @r)   aevaluate_runzRunEvaluator.aevaluate_runx   s       022	I 	I 	I 	I 	I 	I 	I 	I 	I -//??FWXXXXXXXXXr(   NNrS   r   rT   rU   rV   rW   rC   rX   )r"   r#   r$   r%   r   rZ   re   r'   r(   r)   rR   rR   l   sm        $$ &*04	# # # # ^# &*04	Y Y Y Y Y Y Yr(   rR   c                  J    e Zd ZU dZded<   	 ded<   	 dZded<   	 dZd	ed
<   dS )ComparisonEvaluationResultzFeedback scores for the results of comparative evaluations.

    These are generated by functions that compare two or more runs,
    returning a ranking or other feedback.
    r   r3   z'dict[Union[uuid.UUID, str], SCORE_TYPE]scoresNr<   r=   z6Optional[Union[str, dict[Union[uuid.UUID, str], str]]]r6   )r"   r#   r$   r%   r&   r=   r6   r'   r(   r)   ri   ri      sb           HHH@3333459M99996FJGJJJJ r(   ri   c                       e Zd ZdZ	 d$d%dZ	 d&d'dZd(dZd)dZed*d            Z		 	 d+d,dZ
	 	 d+d- fd Z	 d$d.d!Zd/d#Z xZS )0DynamicRunEvaluatora  A dynamic evaluator that wraps a function and transforms it into a `RunEvaluator`.

    This class is designed to be used with the `@run_evaluator` decorator, allowing
    functions that take a `Run` and an optional `Example` as arguments, and return
    an `EvaluationResult` or `EvaluationResults`, to be used as instances of `RunEvaluator`.

    Attributes:
        func (Callable): The function that is wrapped by this evaluator.
    NfuncXCallable[[Run, Optional[Example]], Union[_RUNNABLE_OUTPUT, Awaitable[_RUNNABLE_OUTPUT]]]afuncIOptional[Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]]c                   t          |          \  }|rt          |          \  }dfd} t          |          |            ddlm} |2|                    ||          | _        t          |d	d
          | _        t          j	        |          rE|t          d          |                    ||          | _        t          |d	d
          | _        dS |                    t          t          t          t          t                   gt           f         |          |          | _        t          |d	d
          | _        dS )zInitialize the `DynamicRunEvaluator` with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns a dict or `ComparisonEvaluationResult`.
        inputsr9   rC   c                |    | S  |                      d          |                      d                    \  }}}|S )NrS   rT   getrr   _traced_inputsprepare_inputss      r)   process_inputsz4DynamicRunEvaluator.__init__.<locals>.process_inputs   sK    %$2N

5!!6::i#8#8% %!Q= ! r(   r   r   Nrz   r"   rl   Func was provided as a coroutine function, but afunc was also provided. If providing both, func should be a regular function to avoid ambiguity.rr   r9   rC   r9   )_normalize_evaluator_funcr   	langsmithr   ensure_traceablero   getattr_nameinspectiscoroutinefunction	TypeErrorr   r	   r   r   r   _RUNNABLE_OUTPUTrm   rK   rm   ro   rz   r   ry   s        @r)   __init__zDynamicRunEvaluator.__init__   s   ( ";4!@!@~ 	G&?&F&F#UN	! 	! 	! 	! 	! 	! 	dD))))))$55n 6  DJ !
4IJJDJ&t,, 	J 3  
 %55^ 6  DJ !z3HIIDJJJ#44XsHW$568HHI4PP- 5  DI !z3HIIDJJJr(   FresultUnion[EvaluationResult, dict]r=   	uuid.UUIDallow_no_keyboolrC   r2   c                \   t          t                    rj        s|_        S 	 st          d           dvr|r
| j        d<   t          fddD                       rt          d           t          di d|iS # t          $ r}t          d           |d }~ww xY w)	NziExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got empty result: r3   c              3      K   | ]}|vV  	d S Nr'   ).0kr   s     r)   	<genexpr>z@DynamicRunEvaluator._coerce_evaluation_result.<locals>.<genexpr>   s'      JJq1F?JJJJJJr(   )r4   r   r6   zrExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score' or categorical 'value'; got r=   z[Expected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got r'   )rE   r2   r=   
ValueErrorr   allr   )rK   r   r=   r   es    `   r)   _coerce_evaluation_resultz-DynamicRunEvaluator._coerce_evaluation_result   s6    f.// 	' 5'4$M	  OFLO O   F""|" $
uJJJJ,IJJJJJ  XOUX X   $QQ&P&PQQQ 	 	 	=4:= =  	s   A B	 	
B+B&&B+rP   Union[dict, EvaluationResults]rX   c                     d|v r8|                                 } fd|d         D             |d<   t          di |S                      t          t          |          d          S )NrP   c                >    g | ]}                     |           S ))r=   )r   )r   rrK   r=   s     r)   
<listcomp>zBDynamicRunEvaluator._coerce_evaluation_results.<locals>.<listcomp>  s<        ..q.NN  r(   T)r=   r   r'   )copyrO   r   r   r9   )rK   rP   r=   cps   ` ` r)   _coerce_evaluation_resultsz.DynamicRunEvaluator._coerce_evaluation_results  s    
 B     +  ByM %**r***--w}4 . 
 
 	
r(   MUnion[EvaluationResult, EvaluationResults, dict, str, int, bool, float, list]c                    t          |t                    r|j        s||_        |S t          |          }|                     ||          S r   )rE   r2   r=   _format_evaluator_resultr   )rK   r   r=   s      r)   _format_resultz"DynamicRunEvaluator._format_result  sR     f.// 	' 5'4$M)&11..v}EEEr(   c                "    t          | d          S zCheck if the evaluator function is asynchronous.

        Returns:
            bool: `True` if the evaluator function is asynchronous, `False` otherwise.
        ro   hasattrrJ   s    r)   is_asynczDynamicRunEvaluator.is_async$       tW%%%r(   rS   r   rT   rU   rV   rW   c                   t          | d          s_t          j                    }|                                rt	          d          |                    |                     ||                    S |t          j                    }d|j	        i}t          |dd          rt          |j                  |d<   |                     ||||d          }|                     ||          S )	a  Evaluate a run using the wrapped function.

        This method directly invokes the wrapped function with the provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        rm   tCannot call `evaluate_run` on an async run evaluator from within an running event loop. Use `aevaluate_run` instead.Nr>   
session_id
experimentrun_idr5   langsmith_extra)r   rb   get_event_loop
is_runningRuntimeErrorrun_until_completere   uuiduuid4idr   r   r   rm   r   )rK   rS   rT   rV   running_loopr5   r   s          r)   rZ   z DynamicRunEvaluator.evaluate_run-  s    " tV$$ 	Y"133L&&(( Y"R  
 $66t7I7I#w7W7WXXX##z||$3SV#<3d++ 	9%(%8%8H\"'7XNN  
 

 ""6+;<<<r(   c                r  K   t          | d          s(t                                          ||           d{V S |t          j                    }d|j        i}t          |dd          rt          |j                  |d<   | 	                    ||||d           d{V }| 
                    ||          S )a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        ro   Nr>   r   r   r   r   )r   superre   r   r   r   r   r   r   ro   r   )rK   rS   rT   rV   r5   r   	__class__s         r)   re   z!DynamicRunEvaluator.aevaluate_runS  s      & tW%% 	=..sG<<<<<<<<<##z||$3SV#<3d++ 	9%(%8%8H\"zz'7XNN " 
 
 
 
 
 
 
 

 ""6+;<<<r(   c                .    |                      ||          S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        )rZ   )rK   rS   rT   s      r)   __call__zDynamicRunEvaluator.__call__t  s       g...r(   r   c                    d| j          dS ))Represent the DynamicRunEvaluator object.z<DynamicRunEvaluator >r   rJ   s    r)   __repr__zDynamicRunEvaluator.__repr__  s    4tz4444r(   r   )rm   rn   ro   rp   )F)r   r   r=   r   r   r   rC   r2   )rP   r   r=   r   rC   rX   )r   r   r=   r   rC   rX   rC   r   rf   rg   )rS   r   rT   rU   rV   rW   )rS   r   rT   rU   rC   rX   rC   r   )r"   r#   r$   r%   r   r   r   r   propertyr   rZ   re   r   r   __classcell__)r   s   @r)   rl   rl      s7        , 8J 8J 8J 8J 8J| #	    <
 
 
 
"F F F F & & & X& &*04	$= $= $= $= $=R &*04	= = = = = = =D 6:/ / / / /"5 5 5 5 5 5 5 5r(   rl   rm   rn   c                     t          |           S )zmCreate a run evaluator from a function.

    Decorator that transforms a function into a `RunEvaluator`.
    )rl   rm   s    r)   run_evaluatorr     s     t$$$r(   i'  objr   c                ~    t          |           }t          |          t          k    r|d t          dz
           dz   }|S )N   z...))reprlen_MAXSIZE)r   ss     r)   _maxsize_reprr     s:    S		A
1vvn1n&Hr(   c                      e Zd ZdZ	 dddZedd
            Z	 dddZ	 dddZ	 dddZ	d dZ
ed!d            Zd"dZdS )#DynamicComparisonRunEvaluatorz4Compare predictions (as traces) from 2 or more runs.Nrm   fCallable[[Sequence[Run], Optional[Example]], Union[_COMPARISON_OUTPUT, Awaitable[_COMPARISON_OUTPUT]]]ro   UOptional[Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]]c                   t          |          \  }|rt          |          \  }dfd} t          |          |            ddlm} |2|                    ||          | _        t          |d	d
          | _        t          j	        |          rE|t          d          |                    ||          | _        t          |d	d
          | _        dS |                    t          t          t          t                   t          t                    gt"          f         |          |          | _        t          |d	d
          | _        dS )zInitialize the `DynamicRunEvaluator` with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns an `EvaluationResult` or `EvaluationResults`.
        rr   r9   rC   c                |    | S  |                      d          |                      d                    \  }}}|S )NrunsrT   rt   rv   s      r)   rz   z>DynamicComparisonRunEvaluator.__init__.<locals>.process_inputs  sK    %$2N

6""FJJy$9$9% %!Q= ! r(   r   r   Nr{   r"   rl   r|   r}   )$_normalize_comparison_evaluator_funcr   r   r   r   ro   r   r   r   r   r   r   r	   r   r   r   r   _COMPARISON_OUTPUTrm   r   s        @r)   r   z&DynamicComparisonRunEvaluator.__init__  s   ( "Fd!K!K~ 	R&J5&Q&Q#UN	! 	! 	! 	! 	! 	! 	dD))))))$55n 6  DJ !
4IJJDJ&t,, 	J 3  
 %55^ 6  DJ !z3HIIDJJJ#44!#(9:*,    . 5 	 	DI !z3HIIDJJJr(   rC   r   c                "    t          | d          S r   r   rJ   s    r)   r   z&DynamicComparisonRunEvaluator.is_async  r   r(   r   Sequence[Run]rT   rU   ri   c                   t          | d          s_t          j                    }|                                rt	          d          |                    |                     ||                    S t          j                    }| 	                    |          }| 
                    ||||d          }|                     |||          S )zCompare runs to score preferences.

        Args:
            runs: A list of runs to compare.
            example: An optional example to be used in the evaluation.

        rm   r   r   tagsr   )r   rb   r   r   r   r   acompare_runsr   r   	_get_tagsrm   _format_results)rK   r   rT   r   r=   r   r   s          r)   compare_runsz*DynamicComparisonRunEvaluator.compare_runs  s     tV$$ 
	"133L&&(( "R  
 $66&&tW55   
~~d##'4dCC  
 

 ##FM4@@@r(   c                  K   t          | d          s|                     ||          S t          j                    }|                     |          }|                     ||||d           d{V }|                     |||          S )a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            runs (Run): The runs to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        ro   r   r   N)r   r   r   r   r   ro   r   )rK   r   rT   r=   r   r   s         r)   r   z+DynamicComparisonRunEvaluator.acompare_runs  s        tW%% 	4$$T7333
~~d##zz'4dCC " 
 
 
 
 
 
 
 

 ##FM4@@@r(   c                .    |                      ||          S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        )r   )rK   r   rT   s      r)   r   z&DynamicComparisonRunEvaluator.__call__'  s       w///r(   r   c                    d| j          dS )r   z<DynamicComparisonRunEvaluator r   r   rJ   s    r)   r   z&DynamicComparisonRunEvaluator.__repr__8  s    >>>>>r(   	list[str]c                    g }| D ]g}|                     dt          |j                  z              t          |dd          r*|                     dt          |j                  z              h|S )zExtract tags from runs.zrun:r   Nzexperiment:)appendr   r   r   r   )r   r   rS   s      r)   r   z'DynamicComparisonRunEvaluator._get_tags<  su      	A 	ACKKSV,---sL$// AMC,?,??@@@r(   r   -Union[dict, list, ComparisonEvaluationResult]r=   r   c                   t          |t                    r|j        s||_        |S t          |t                    r$d t	          ||          D             | j        |d}n8t          |t                    rd|vr
| j        |d<   nd|}t          |          	 t          di d|i|S # t          $ r}t          d|           |d }~ww xY w)Nc                $    i | ]\  }}|j         |S r'   )r   )r   rS   r4   s      r)   
<dictcomp>zADynamicComparisonRunEvaluator._format_results.<locals>.<dictcomp>S  s     MMMZS%365MMMr(   )rj   r3   r=   r3   zXExpected 'dict', 'list' or 'ComparisonEvaluationResult' result object. Received: result=r=   zExpected a dictionary with a 'key' and dictionary of scores mappingrun IDs to numeric scores, or ComparisonEvaluationResult object, got r'   )	rE   ri   r=   listzipr   r9   r   r   )rK   r   r=   r   msgr   s         r)   r   z-DynamicComparisonRunEvaluator._format_resultsG  s:    f899 	"' 5'4$M%% 	"MM3tV;L;LMMMz!. FF
 %% 	"F"" $
u/%+/ /  S//!		-  "M<V<    	 	 	!! !  		s   B( (
C
2CC
r   )rm   r   ro   r   r   )r   r   rT   rU   rC   ri   r   )r   r   rC   r   )r   r   r=   r   r   r   rC   ri   )r"   r#   r$   r%   r   r   r   r   r   r   r   staticmethodr   r   r'   r(   r)   r   r     s       >> >J >J >J >J >J@ & & & X& AEA A A A A@ AEA A A A A: AE0 0 0 0 0"? ? ? ?    \" " " " " "r(   r   r   rC   c                     t          |           S )z.Create a comaprison evaluator from a function.)r   r   s    r)   comparison_evaluatorr   l  s     )...r(   r	   tuple[Union[Callable[[Run, Optional[Example]], _RUNNABLE_OUTPUT], Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]], Optional[Callable[..., dict]]]c                    dt          j                   d j                                        D             }d j                                        D             |r;t	          fd|D                       s4t          fd|D                       dk    rd d}t          |          t	          fd	|D                       r|d
dgk    r d fS t          j                   r=dfdd fd}t           d          rt           d          n|j
        |_
        |fS dfdd fd}t           d          rt           d          n|j
        |_
        |fS )N)rS   rT   rr   outputsreference_outputsattachmentsc                8    g | ]\  }}|j         |j        k    |S r'   kindVAR_KEYWORDr   pnameps      r)   r   z-_normalize_evaluator_func.<locals>.<listcomp>  *    XXX(%!-@W@W@W@W@Wr(   c                H    g | ]\  }}|j         t          j        j        u| S r'   defaultr   	Parameteremptyr   s      r)   r   z-_normalize_evaluator_func.<locals>.<listcomp>  9       E19G-333 	333r(   c              3  (   K   | ]}|v p|v V  d S r   r'   r   r   args_with_defaultssupported_argss     r)   r   z,_normalize_evaluator_func.<locals>.<genexpr>  D       
 
GLE^#Bu0B'B
 
 
 
 
 
r(   c                    g | ]}|v|	S r'   r'   r   ar
  s     r)   r   z-_normalize_evaluator_func.<locals>.<listcomp>  $    DDDq1C(C(C(C(C(Cr(      UInvalid evaluator function. Must have at least one argument. Supported arguments are . Please see https://docs.smith.langchain.com/evaluation/how_to_guides/evaluation/evaluate_llm_application#use-custom-evaluatorsc              3  (   K   | ]}|v p|v V  d S r   r'   r	  s     r)   r   z,_normalize_evaluator_func.<locals>.<genexpr>  D        CH>5,>#>     r(   rS   rT   r   rU   rC   tuple[list, dict, dict]c                ~   | ||r|j         ni | j        pi |r	|j        pi ni |r	|j        pi ni d}i }g }i }j                                        D ]i\  }}||v r`|j        |j        |j        fv r|                    ||                    n||         ||<   |dv rt          ||                   n||         ||<   j|||fS N)rS   rT   rr   r   r   r   )rS   rT   
rr   r   r   
parametersitemsr   POSITIONAL_OR_KEYWORDPOSITIONAL_ONLYr   r   	rS   rT   arg_mapkwargsargsrx   
param_nameparamsigs	           r)   _prepare_inputsz2_normalize_evaluator_func.<locals>._prepare_inputs  "    &07?gnnR"{0b@G#O7#6#<"RBI)Q)>Br   "),)=)=)?)?  %J!W,, :!7!1*   !KK
(;<<<<181DF:.  *-??? *'**=>>>!(!4 &j1
 V]22r(   r   c                D   K    | |          \  }}} |i | d {V S r   r'   rS   rT   r!  r   rw   r%  rm   s        r)   awrapperz+_normalize_evaluator_func.<locals>.awrapper  sH       %4OC$A$A!vq!T426222222222r(   r"   c                ~   | ||r|j         ni | j        pi |r	|j        pi ni |r	|j        pi ni d}i }g }i }j                                        D ]i\  }}||v r`|j        |j        |j        fv r|                    ||                    n||         ||<   |dv rt          ||                   n||         ||<   j|||fS r  r  r  s	           r)   r%  z2_normalize_evaluator_func.<locals>._prepare_inputs  r&  r(   c                4     | |          \  }}} |i |S r   r'   r(  s        r)   wrapperz*_normalize_evaluator_func.<locals>.wrapper  s0    $3OC$A$A!vqtT,V,,,r(   )rS   r   rT   rU   rC   r  )rS   r   rT   rU   rC   r   r   	signaturer  r  r   r   r   r   r   r   r"   	rm   all_argsr   r)  r,  r%  r
  r$  r  s	   `    @@@@r)   r~   r~   v  s   N 
D
!
!CXXcn&:&:&<&<XXXH ,,..  
  o. 
 
 
 
 
PX
 
 
 
 
o. DDDDHDDDEEJJG1?G G G 	 oo      LT     `.	 
 
 Tz&t,, X	.3 3 3 3 3 3>3 3 3 3 3 3 3 4,,'j)))& 
 o..3 3 3 3 3 3>- - - - - - - 4,,&j)))% 
 _--r(   tuple[Union[Callable[[Sequence[Run], Optional[Example]], _COMPARISON_OUTPUT], Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]], Optional[Callable[..., dict]]]c                    dt          j                   d j                                        D             }d j                                        D             |r;t	          fd|D                       s4t          fd|D                       dk    rd d}t          |          t	          fd	|D                       r|d
dgk    r d fS t          j                   r=dfdd fd}t           d          rt           d          n|j
        |_
        |fS dfdd fd}t           d          rt           d          n|j
        |_
        |fS )Nr   rT   rr   r   r   c                8    g | ]\  }}|j         |j        k    |S r'   r   r   s      r)   r   z8_normalize_comparison_evaluator_func.<locals>.<listcomp>  r  r(   c                H    g | ]\  }}|j         t          j        j        u| S r'   r  r   s      r)   r   z8_normalize_comparison_evaluator_func.<locals>.<listcomp>  r  r(   c              3  (   K   | ]}|v p|v V  d S r   r'   r	  s     r)   r   z7_normalize_comparison_evaluator_func.<locals>.<genexpr>  r  r(   c                    g | ]}|v|	S r'   r'   r  s     r)   r   z8_normalize_comparison_evaluator_func.<locals>.<listcomp>  r  r(   r  r  r  c              3  (   K   | ]}|v p|v V  d S r   r'   r	  s     r)   r   z7_normalize_comparison_evaluator_func.<locals>.<genexpr>   r  r(   r   rT   r   rU   rC   r  c                l   | ||r|j         ni d | D             |r	|j        pi ni d}i }g }i }j                                        D ]i\  }}||v r`|j        |j        |j        fv r|                    ||                    n||         ||<   |dv rt          ||                   n||         ||<   j|||fS )Nc                     g | ]}|j         pi S r'   r   r   rS   s     r)   r   zQ_normalize_comparison_evaluator_func.<locals>._prepare_inputs.<locals>.<listcomp>1      BBBc 1rBBBr(   r3  r   rT   	rr   r   r  r  r   r  r  r   r   	r   rT   r  r   r!  rx   r"  r#  r$  s	           r)   r%  z=_normalize_comparison_evaluator_func.<locals>._prepare_inputs*      !&07?gnnRBBTBBBBI)Q)>Br   "),)=)=)?)?  %J!W,, :!7!1*   !KK
(;<<<<181DF:.  *-@@@ *'**=>>>!(!4 &j1
 V]22r(   r   c                D   K    | |          \  }}} |i | d {V S r   r'   r   rT   r!  r   rw   r%  rm   s        r)   r)  z6_normalize_comparison_evaluator_func.<locals>.awrapperH  sH       %4OD'$B$B!vq!T426222222222r(   r"   c                l   | ||r|j         ni d | D             |r	|j        pi ni d}i }g }i }j                                        D ]i\  }}||v r`|j        |j        |j        fv r|                    ||                    n||         ||<   |dv rt          ||                   n||         ||<   j|||fS )Nc                     g | ]}|j         pi S r'   r;  r<  s     r)   r   zQ_normalize_comparison_evaluator_func.<locals>._prepare_inputs.<locals>.<listcomp>^  r=  r(   r3  r>  r?  r@  s	           r)   r%  z=_normalize_comparison_evaluator_func.<locals>._prepare_inputsW  rA  r(   c                4     | |          \  }}} |i |S r   r'   rC  s        r)   r,  z5_normalize_comparison_evaluator_func.<locals>.wrapperu  s2     %4OD'$B$B!vqtT,V,,,r(   )r   r   rT   rU   rC   r  )r   r   rT   rU   rC   r   r-  r/  s	   `    @@@@r)   r   r      s    SN

D
!
!CXXcn&:&:&<&<XXXH ,,..  
  o, 
 
 
 
 
PX
 
 
 
 
o, DDDDHDDDEEJJG1?G G G 	 oo      LT     `,	 
 
 Tz&t,, X	,3 3 3 3 3 3<3 3 3 3 3 3 3 4,,'j)))& 
 _,,3 3 3 3 3 3<- - - - - - - 4,,&j)))% 
 O++r(   r   ;Union[EvaluationResults, dict, str, int, bool, float, list]Union[EvaluationResults, dict]c                   t          | t          t          t          f          rd| i} n| st	          d|            t          | t
                    r1t          d | D                       st	          d|  d          d| i} nBt          | t                    rd| i} n(t          | t                    rnt	          d|            | S )	Nr4   zdExpected a non-empty dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got c              3  @   K   | ]}t          |t                    V  d S r   )rE   r9   )r   xs     r)   r   z+_format_evaluator_result.<locals>.<genexpr>  s,      771:a&&777777r(   z8Expected a list of dicts or EvaluationResults. Received .rP   r   zZExpected a dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got )	rE   r   rG   rF   r   r   r   r   r9   )r   s    r)   r   r     s    &4,-- 
6" 
D;AD D
 
 	
 
FD	!	! 
7777777 	T6TTT   V$	FC	 	  
6"	FD	!	! 
/&,/ /
 
 	
 Mr(   SUMMARY_EVALUATOR_Tc                0    dt          j                   d j                                        D             }d j                                        D             |r;t	          fd|D                       s?t          fd|D                       dk    r d d}|r	|d	| dz  }t          |          t	          fd
|D                       r|ddgk    r S d fd}t           d          rt           d          n|j	        |_	        |S )Nr   examplesrr   r   r   c                    g | ]\  }}|S r'   r'   r   s      r)   r   z0_normalize_summary_evaluator.<locals>.<listcomp>  s    ===(%===r(   c                H    g | ]\  }}|j         t          j        j        u| S r'   r  r   s      r)   r   z0_normalize_summary_evaluator.<locals>.<listcomp>  r  r(   c              3  (   K   | ]}|v p|v V  d S r   r'   r	  s     r)   r   z/_normalize_summary_evaluator.<locals>.<genexpr>  r  r(   c                    g | ]}|v|	S r'   r'   r  s     r)   r   z0_normalize_summary_evaluator.<locals>.<listcomp>  r  r(   r  r  rL  z Received arguments c              3      K   | ]}|v V  	d S r   r'   )r   r   r  s     r)   r   z/_normalize_summary_evaluator.<locals>.<genexpr>  s(      ??U.(??????r(   r   rP  Sequence[schemas.Run]Sequence[schemas.Example]rC   rX   c                r   | |d |D             d | D             d |D             d}i }g }	j                                         D ]E\  }}||v r<|j        |j        |j        fv r|                    ||                    :||         ||<   F |i |}t          |t                    r|S t          |          S )Nc                    g | ]	}|j         
S r'   )rr   r   rT   s     r)   r   zA_normalize_summary_evaluator.<locals>.wrapper.<locals>.<listcomp>  s    BBBg7>BBBr(   c                     g | ]}|j         pi S r'   r;  r<  s     r)   r   zA_normalize_summary_evaluator.<locals>.wrapper.<locals>.<listcomp>  s    >>>#CK-2>>>r(   c                     g | ]}|j         pi S r'   r;  rZ  s     r)   r   zA_normalize_summary_evaluator.<locals>.wrapper.<locals>.<listcomp>  s    %T%T%Tgo&;%T%T%Tr(   rO  )	r  r  r   r  r  r   rE   r2   r   )
r   rP  r  r   r!  r"  r#  r   rm   r$  s
           r)   r,  z-_normalize_summary_evaluator.<locals>.wrapper  s    $BBBBB>>>>>%T%T8%T%T%T G FD%(^%9%9%;%; 	A 	A!
E((z3-&   GJ$78888-4Z-@z*T4*6**F&"233 +F333r(   r"   )r   rV  rP  rW  rC   rX   )
r   r.  r  r  r   r   r   r   r   r"   )rm   r0  r   r,  r
  r$  r  s   `   @@@r)   _normalize_summary_evaluatorr]    s   SN

D
!
!C==cn&:&:&<&<===H ,,..  
  5 
 
 
 
 
PX
 
 
 
 
5 DDDDHDDDEEJJC1?C C C 	  	65(5555Coo ????h????? &8P D D 	4 	4 	4 	4 	4 	4 	4: *1z)B)BXGD*%%%HX 	 r(   )rm   rn   )r   r   )rm   r   rC   r   )rm   r	   rC   r   )rm   r	   rC   r1  )r   rG  rC   rH  )rm   r	   rC   rM  )>r%   
__future__r   rb   r   loggingr   abcr   collections.abcr   r   	functoolsr   typingr   r	   r
   r   r   r   pydanticr   r   r   r   r   typing_extensionsr   r   r   r]   r   langsmith.schemasr   r   r   r   	getLoggerr"   rH   r   r+   r2   rO   rR   r9   r   ri   r   rl   r   r   r   r   r   r~   r   r   r   rM  r]  r'   r(   r)   <module>rh     s#   E E " " " " " "           / / / / / / / /                      T S S S S S S S S S S S S S ' ' ' ' ' ' ' ' ' ' ' '       B B B B B B B B B B B B		8	$	$' ' ' ' 'y ' ' '6 6 6 6 6Ye 6 6 6 6' ' ' ' 'y ' ' 'T" " " " "	 " " " "Y Y Y Y Y Y Y Y8 )+<dBC        ( 5t;< e5 e5 e5 e5 e5, e5 e5 e5P	% 	% 	% 	%    I I I I I I I IX/ / / /G. G. G. G.T@, @, @, @,F   8 	'+	 9: 112	4 	gk	D12 112	4		 > > > > > >r(   