
    
iv6                       U d Z ddlmZ ddlZddlZddlZddlZddlmZm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ ed	         Z	  G d
 de          Z G d de          Zeegee         f         Z	 d3dZd3dZd3dZd3dZd3dZeeeeedZded<   	 dZ dZ!d4dZ"d5d Z#d!Z$d!Z%d5d"Z&d5d#Z'd6d&Z(d7d+Z) ed,-           G d. d/                      Z* ed,-           G d0 d1                      Z+g d2Z,dS )8z5Shared redaction utilities for middleware components.    )annotationsN)CallableSequence)	dataclass)Literal)urlparse)	TypedDict)blockredactmaskhashc                  <    e Zd ZU dZded<   ded<   ded<   ded<   dS )	PIIMatchz1Represents an individual match of sensitive data.strtypevalueintstartendN)__name__
__module____qualname____doc____annotations__     C:\Users\Dell Inspiron 16\Desktop\tws\AgrotaPowerBi\back-agrota-powerbi\mcp-client-agrota\venv\Lib\site-packages\langchain/agents/middleware/_redaction.pyr   r      s:         ;;IIIJJJJJJHHHHHr   r   c                  $     e Zd ZdZd	 fdZ xZS )
PIIDetectionErrorz=Raised when configured to block on detected sensitive values.pii_typer   matchesSequence[PIIMatch]returnNonec                    || _         t          |          | _        t          |          }d| d| d}t	                                          |           dS )zInitialize the exception with match context.

        Args:
            pii_type: Name of the detected sensitive type.
            matches: All matches that were detected for that type.
        z	Detected z instance(s) of z in text contentN)r    listr!   lensuper__init__)selfr    r!   countmsg	__class__s        r   r)   zPIIDetectionError.__init__    sZ     !G}}GK%KKKKKr   )r    r   r!   r"   r#   r$   )r   r   r   r   r)   __classcell__)r-   s   @r   r   r      sC        GG         r   r   contentr   r#   list[PIIMatch]c                D    d}d t          j        ||           D             S )zDetect email addresses in content.

    Args:
        content: The text content to scan for email addresses.

    Returns:
        A list of detected email matches.
    z3\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\bc           	         g | ]K}t          d |                                |                                |                                          LS )emailr   r   r   r   r   groupr   r   .0matchs     r   
<listcomp>z detect_email.<locals>.<listcomp><   s]         	++--++--				
 	
 	
  r   refinditerr/   patterns     r   detect_emailr@   2   s9     EG  ['22   r   c           
        d}g }t          j        ||           D ]o}|                                }t          |          rJ|                    t          d||                                |                                                     p|S )zDetect credit card numbers in content using Luhn validation.

    Args:
        content: The text content to scan for credit card numbers.

    Returns:
        A list of detected credit card matches.
    z*\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\bcredit_cardr4   )r<   r=   r6   _passes_luhnappendr   r   r   )r/   r?   r!   r9   card_numbers        r   detect_credit_cardrF   G   s     <GGWg.. 
 
kkmm$$ 	NN&%++--			     Nr   c           
     F   g }d}t          j        ||           D ]}|                                }	 t          j        |           n# t
          $ r Y 8w xY w|                    t          d||                                |	                                                     |S )zDetect IPv4 or IPv6 addresses in content.

    Args:
        content: The text content to scan for IP addresses.

    Returns:
        A list of detected IP address matches.
    z!\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\bipr4   )
r<   r=   r6   	ipaddress
ip_address
ValueErrorrD   r   r   r   )r/   r!   ipv4_patternr9   ip_candidates        r   	detect_iprN   b   s     !G7L\733 
 
{{}}	 .... 	 	 	H	"kkmmIIKK	  	
 	
 	
 	
 Ns   A
AAc                D    d}d t          j        ||           D             S )zDetect MAC addresses in content.

    Args:
        content: The text content to scan for MAC addresses.

    Returns:
        A list of detected MAC address matches.
    z)\b([0-9A-Fa-f]{2}[:-]){5}[0-9A-Fa-f]{2}\bc           	         g | ]K}t          d |                                |                                |                                          LS )mac_addressr4   r5   r7   s     r   r:   z&detect_mac_address.<locals>.<listcomp>   s]         	++--++--				
 	
 	
  r   r;   r>   s     r   detect_mac_addressrR      s8     ;G  ['22   r   c           
       	 g }d}t          j        ||           D ]}|                                }t          |          }|j        dv rQ|j        rJ|                    t          d||                                |	                                                     d}t          j        ||           D ]}|                                |	                                c	t          	fd|D                       rH|                                }d|v s|                    d          rJd	| }t          |          }|j        r/d
|j        v r&|                    t          d|	                     |S )zDetect URLs in content using regex and stdlib validation.

    Args:
        content: The text content to scan for URLs.

    Returns:
        A list of detected URL matches.
    zhttps?://[^\s<>\"{}|\\^`\[\]]+>   httphttpsurlr4   zy\b(?:www\.)?[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?:/[^\s]*)?c              3     K   | ]<}|d          cxk    o|d         k     nc p|d          cxk     o|d         k    nc V  =dS )r   r   Nr   )r8   mr   r   s     r   	<genexpr>zdetect_url.<locals>.<genexpr>   sq      __RSqzU----QuX----M7c1M1M1M1MQuX1M1M1M1M______r   /zwww.zhttp://.)r<   r=   r6   r   schemenetlocrD   r   r   r   any
startswith)
r/   r!   scheme_patternr9   rV   resultbare_patterntest_urlr   r   s
           @@r   
detect_urlrd      s    !G 7N^W55  kkmm#=---&--NN++--			    	L 
 \733  [[]]EIIKK
s_____W^_____ 	kkmm #:://:&Hh''F} !5!5"!#	     Nr   )r3   rB   rH   rQ   rV   zdict[str, Detector]BUILTIN_DETECTORS      rE   boolc                   d | D             }t           t          |          cxk    rt          k    sn dS d}t          t	          |                    D ]%\  }}|}|dz  dk    r|dz  }|dk    r|dz  }||z  }&|dz  dk    S )z4Validate credit card number using the Luhn checksum.c                T    g | ]%}|                                 t          |          &S r   )isdigitr   )r8   ds     r   r:   z _passes_luhn.<locals>.<listcomp>   s+    999QYY[[9c!ff999r   Fr         	   
   )_CARD_NUMBER_MIN_DIGITSr'   _CARD_NUMBER_MAX_DIGITS	enumeratereversed)rE   digitschecksumindexdigitr   s         r   rC   rC      s    99k999F"c&kkLLLL5LLLLLuH!(6"2"233  u19>>QJEqyy
Eb=Ar   r!   c                    | }t          |t          j        d          d          D ]D}d|d                                          d}|d |d                  |z   ||d         d          z   }E|S )Nr   Tkeyreversez
[REDACTED_r   ]r   )sortedoperator
itemgetterupper)r/   r!   ra   r9   replacements        r   _apply_redact_strategyr      s    FX%8%A%A4PPP Q Q;5=#6#6#8#8;;;(%.()K7&u:PPMr      c                   | }t          |t          j        d          d          D ]}|d         }|d         }|dk    rz|                    d          }t	          |          dk    rN|d	                             d
          }t	          |          d	k    r|d          d|d          n
|d          d}n
d}n|dk    r_d                    d |D                       }	d|v rdnd|v rdnd}
|
rd|
 d|
 d|
 |	t           d           }nd|	t           d           }n|dk    r;|                    d
          }t	          |          t          k    rd|d          nd}n`|dk    r$d|v rdnd}
d|
 d|
 d|
 d|
 d|
 |dd           }n6|dk    rd}n-t	          |          t          k    rd|t           d           nd}|d |d                  |z   ||d         d          z   }|S )Nr   Trz   r   r   r3   @rm   rn   r[   r   z@****.z@****z****rB    c              3  B   K   | ]}|                                 |V  d S )N)rk   )r8   cs     r   rY   z'_apply_mask_strategy.<locals>.<genexpr>  s/      !B!Baiikk!B!!B!B!B!B!B!Br   - z************rH   z*.*.*.rQ   :z**rV   z[MASKED_URL]r   )r~   r   r   splitr'   join_UNMASKED_CHAR_NUMBER_IPV4_PARTS_NUMBER)r/   r!   ra   r9   r   r    partsdomain_partsmaskeddigits_only	separatoroctetss               r   _apply_mask_strategyr      s   FX%8%A%A4PPP (L (Lg=wKK$$E5zzQ$Qx~~c22 <((1,, Qx99|B'7999!!H+++   &&''!B!BU!B!B!BBBK"e||u"I O>9 > >) > > >"$9#9#:#:;> > 
 O5J4J4K4K(LNN[[%%F.1&kk=O.O.O*fRj***U[FF&&"e||I`Y``)``y``I```TYZ\Z]Z]T^`` F #FF u:: 555 8u33445777 
 (%.()F2VE%LNN5KKMr   c                J   | }t          |t          j        d          d          D ]{}t          j        |d                                                                                   d d         }d|d          d| d	}|d |d                  |z   ||d
         d          z   }||S )Nr   Trz   r      <r   z_hash:>r   )r~   r   r   hashlibsha256encode	hexdigest)r/   r!   ra   r9   digestr   s         r   _apply_hash_strategyr   )  s    FX%8%A%A4PPP Q Qg 5 5 7 788BBDDRaRH8%-88v888(%.()K7&u:PPMr   strategyRedactionStrategyc                    |s| S |dk    rt          | |          S |dk    rt          | |          S |dk    rt          | |          S |dk    rt          |d         d         |          d| }t	          |          )a  Apply the configured strategy to matches within content.

    Args:
        content: The content to apply strategy to.
        matches: List of detected PII matches.
        strategy: The redaction strategy to apply.

    Returns:
        The content with the strategy applied.

    Raises:
        PIIDetectionError: If the strategy is `'block'` and matches are found.
        ValueError: If the strategy is unknown.
    r   r   r   r
   r   r   zUnknown redaction strategy: )r   r   r   r   rK   )r/   r!   r   r,   s       r   apply_strategyr   2  s    &  8%gw7776#GW5556#GW5557
6 2G<<<
3
3
3C
S//r   r    detectorDetector | str | NoneDetectorc                *    |R t           vr<d  dt          t                                                      d}t          |          t                     S t	          |t
                    rt          j        |          d fd	}|S |d fd
}|S )a  Return a callable detector for the given configuration.

    Args:
        pii_type: The PII type name.
        detector: Optional custom detector or regex pattern. If `None`, a built-in detector
            for the given PII type will be used.

    Returns:
        The resolved detector.

    Raises:
        ValueError: If an unknown PII type is specified without a custom detector or regex.
    NzUnknown PII type: z. Must be one of z or provide a custom detector.r/   r   r#   r0   c                F    fd                     |           D             S )Nc           	         g | ]K}t          |                                |                                |                                           LS )r4   r5   )r8   r9   r    s     r   r:   z<resolve_detector.<locals>.regex_detector.<locals>.<listcomp>m  s^         !++--++--			    r   )r=   )r/   r?   r    s    r   regex_detectorz(resolve_detector.<locals>.regex_detectorl  s>        %--g66   r   c                2    fd |           D             S )Nc                    g | ]]}t          |                    d           |                    d|                    dd                    |d         |d                   ^S )r   r   textr   r   r   r4   )r   get)r8   rX   r    s     r   r:   zCresolve_detector.<locals>._normalizing_detector.<locals>.<listcomp>  sx     
 
 
  UU68,,eeGQUU62%6%677jeH	  
 
 
r   r   )r/   r    raw_detectors    r   _normalizing_detectorz/resolve_detector.<locals>._normalizing_detector  s:    
 
 
 
 "\'**
 
 
 	
r   r/   r   r#   r0   )re   r&   keysrK   
isinstancer   r<   compile)r    r   r,   r   r   r?   r   s   `    @@r   resolve_detectorr   S  s     ,,,aX a a"&'8'='='?'?"@"@a a a  S//! **(C   *X&&		 		 		 		 		 		 		  L	
 	
 	
 	
 	
 	
 	
 ! r   T)frozenc                  B    e Zd ZU dZded<   dZded<   dZded	<   ddZdS )RedactionRulez-Configuration for handling a single PII type.r   r    r   r   r   Nr   r   r#   ResolvedRedactionRulec                n    t          | j        | j                  }t          | j        | j        |          S )zzResolve runtime detector and return an immutable rule.

        Returns:
            The resolved redaction rule.
        )r    r   r   )r   r    r   r   r   )r*   resolved_detectors     r   resolvezRedactionRule.resolve  s<     -T]DMJJ$]]&
 
 
 	
r   )r#   r   )r   r   r   r   r   r   r   r   r   r   r   r   r     s[         77MMM"*H****&*H****
 
 
 
 
 
r   r   c                  :    e Zd ZU dZded<   ded<   ded<   ddZdS )r   z,Resolved redaction rule ready for execution.r   r    r   r   r   r   r/   r#   tuple[str, list[PIIMatch]]c                l    |                      |          }|s|g fS t          ||| j                  }||fS )zApply this rule to content, returning new content and matches.

        Args:
            content: The text content to scan and redact.

        Returns:
            A tuple of (updated content, list of detected matches).
        )r   r   r   )r*   r/   r!   updateds       r   applyzResolvedRedactionRule.apply  sE     --(( 	B; '4=AAr   N)r/   r   r#   r   )r   r   r   r   r   r   r   r   r   r   r     sQ         66MMM           r   r   )
r   r   r   r   r   rF   r@   rN   rR   rd   r   )rE   r   r#   rh   )r/   r   r!   r0   r#   r   )r/   r   r!   r0   r   r   r#   r   )r    r   r   r   r#   r   )-r   
__future__r   r   rI   r   r<   collections.abcr   r   dataclassesr   typingr   urllib.parser   typing_extensionsr	   r   r   	Exceptionr   r   r&   r   r@   rF   rN   rR   rd   re   r   rq   rr   rC   r   r   r   r   r   r   r   r   r   __all__r   r   r   <module>r      s   ; ; ; " " " " " "       				 . . . . . . . . ! ! ! ! ! !       ! ! ! ! ! ! ' ' ' ' ' '=>  B    y       	   " SE4>)* D   *   6   <   *9 9 9 9z %
%* *      9     "      + + + +\      B7! 7! 7! 7!t $
 
 
 
 
 
 
 
* $               .  r   