
    >
i                       U d dl mZ d dlZd dlmZ d dlmZ ddlmZm	Z	m
Z
mZ ddlmZmZmZmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZmZmZ  ej        d          Z ej                     Z!e!"                     ej#        d                     g Z$de%d<   g Z&de%d<   eD ]YZ'	  ee'          re$(                    e'           ne&(                    e'           :# e)$ r e&(                    e'           Y Vw xY we$e&z   Z*de%d<   	 	 	 	 	 	 	 	 	 d2d3d(Z+	 	 	 	 	 	 	 	 	 d2d4d+Z,	 	 	 	 	 	 	 	 	 d2d5d.Z-	 	 	 	 	 	 	 	 	 d6d7d1Z.dS )8    )annotationsN)PathLike)BinaryIO   )coherence_ratioencoding_languagesmb_encoding_languagesmerge_coherence_ratios)IANA_SUPPORTEDIANA_SUPPORTED_SIMILARTOO_BIG_SEQUENCETOO_SMALL_SEQUENCETRACE)
mess_ratio)CharsetMatchCharsetMatches)any_specified_encodingcut_sequence_chunks	iana_nameidentify_sig_or_bomis_multi_byte_encodingshould_strip_sig_or_bomcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)sz	list[str]_mb_supported_sb_supportedIANA_SUPPORTED_MB_FIRST      皙?TF皙?	sequencesbytes | bytearraystepsint
chunk_size	thresholdfloatcp_isolationlist[str] | Nonecp_exclusionpreemptive_behaviourboolexplainlanguage_thresholdenable_fallbackreturnr   c
                #   t          | t          t          f          s/t          d                    t          |                               |rJt          j        }
t                              t                     t          
                    t                     t          |           }|dk    rwt                              d           |r9t                              t                     t          
                    |
           t          t!          | dddg d          g          S |At                              t          d	d
                    |                     d |D             }ng }|At                              t          dd
                    |                     d |D             }ng }|||z  k    r't                              t          d|||           d}|}|dk    r||z  |k     rt'          ||z            }t          |           t(          k     }t          |           t*          k    }|r4t                              t          d                    |                     n5|r3t                              t          d                    |                     g }|rt-          |           nd}|6|                    |           t                              t          d|           t1                      }g }g }t1                      }t1                      }i }d}t1                      }d}d}d}d}d}d}t                      }t                      }t3          |           \  } }!| D|                    |            t                              t          dt          |!          |            |                    d           d|vr|                    d           |t4          z   D ]}"|r|"|vr
|r|"|v r|"|v r|                    |"           d}#| |"k    }$|$ot9          |"          }%|"dv r$|$s"t                              t          d|"           l|"dv r$|$s"t                              t          d|"           |"|v r"t                              t          d|"           |"|v r"t                              t          d|"           	 t;          |"          }&n9# t<          t>          f$ r% t                              t          d|"           Y &w xY w|ru|&st1          tA          |"                    }'nt1          tC          |"                    }'|'"                    |          s%t                              t          d|"|'|           |r-|&s+||k    r%t                              t          d|"||           |r%|&s#t                              t          d|"           	 |rS|&du rOtG          |%du r| dt'          d                    n#| t          |!          t'          d                    |"!           n,tG          |%du r| n| t          |!          d         |"!          }#nx# tH          tJ          f$ rd}(t          |(tJ                    s/t                              t          d"|"tG          |(                     |                    |"           Y d}(~(d}(~(ww xY wtM          |$sdnt          |!          |t'          ||z                      })|&o|#duot          |#          |k     }*|*r!t                              t          d#|"           |#V|&sStO          |#          }+|(                    |+          },|,,|,\  }-}.}/|/rt!          | |"|-|$|.|du s|"|ddfv r|#nd|$          }0|                    |0           |                    |"           t                              t          d%|"tS          |-d&z  d'(                     |"|ddfv r|-d)k     r|-dk    rmt                              d*|0j*                   |r9t                              t                     t          
                    |
           t          |0g          c S |                    |0           t          |          r|||v rd|v rd|v r|+                                }1t                              d*|1j*                   |r9t                              t                     t          
                    |
           t          |1g          c S L|                    |"           t                              t          d+|"           |	r3|"dd|d,d-fv r*t!          | |"||$g |#|$          }2|"|k    r|2}n|"dk    r|2}n|2}t'          t          |)          d.z            }3tY          |3d/          }3d}4d}5g }6g }7	 t[          | |"|)||$|%|!|&|#	  	        D ]y}8|6                    |8           |7                    t]          |8||d0u odt          |          cxk    od/k    nc                      |7d1         |k    r|4dz  }4|4|3k    s|$r|%du r nznJ# tH          $ r=}(t                              t          d2|"tG          |(                     |3}4d0}5Y d}(~(nd}(~(ww xY w|5s|r|&s	 | t'          d3          d         /                    |"d45           n\# tH          $ rO}(t                              t          d6|"tG          |(                     |                    |"           Y d}(~(Ud}(~(ww xY w|7rta          |7          t          |7          z  nd}9|9|k    s|4|3k    r|                    |"           |"tb          v r |2                    tb          |"                    |#(|&s&|3                    tO          |#          |9g df           t                              t          d7|"|4tS          |9d&z  d'(                     |	r5|"dd|d,d-fv r,|5s*t!          | |"||$g |#|$          }2|"|k    r|2}n|"dk    r|2}n|2}bt                              t          d8|"tS          |9d&z  d'(                     |&stA          |"          }:ntC          |"          }:|:rAt                              t          d9                    |"tG          |:                               g };|"dk    rR|6D ]?}8ti          |8||:rd:                    |:          nd          }<|;                    |<           @tk          |;          }=ntk          |;          }=|=r4t                              t          d;                    |=|"                     t!          | |"|9|$|=|du s|"|ddfv r|#nd|$          }>|                    |>           |#(|&s&|3                    tO          |#          |9|=d0f           |r|&s|9d<k     r|dz  }|"|ddfv r|9d)k     r|9dk    rmt                              d*|>j*                   |r9t                              t                     t          
                    |
           t          |>g          c S |                    |>           t          |          r|||v rd|v rd|v r|+                                }1t                              d*|1j*                   |r9t                              t                     t          
                    |
           t          |1g          c S |si|&sg|=rtY          d= |=D             d>          nd}?|?d?k    rBd|v r>d|v r:d0}|2                    |:           t                              t          d@|"|9|?           |so|&rm|*rk|#it          |#          |dAz  k     rS|"dBvrOd|v rKd|v rGd0}t                              t          dC|"|9t          |#          |t          |#          |z  d&z             |"| k    rnt                              dD|"           |r9t                              t                     t          
                    |
           t          ||"         g          c S t          |          dk    r|s|s|r t                              t          dE           |r6t                              dF|j*                   |                    |           n{|r||r|r|j6        |j6        k    s|0t                              dG           |                    |           n1|r/t                              dH           |                    |           |rDt                              dI|+                                j*        t          |          dz
             nt                              dJ           |r9t                              t                     t          
                    |
           |S )Kaf  
    Given a raw bytes sequence, return the best possibles charset usable to render str objects.
    If there is no results, it is a strong indicator that the source is binary/not text.
    By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
    And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.

    The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
    but never take it for granted. Can improve the performance.

    You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
    purpose.

    This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
    By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
    toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
    Custom logging format and handler can be set manually.
    z3Expected object of type bytes or bytearray, got: {}r   z<Encoding detection on empty bytes, assuming utf_8 intention.utf_8g        F Nz`cp_isolation is set. use this flag for debugging purpose. limited list of encoding allowed : %s.z, c                .    g | ]}t          |d           S Fr   .0cps     C:\Users\Dell Inspiron 16\Desktop\tws\AgrotaPowerBi\back-agrota-powerbi\mcp-client-agrota\venv\Lib\site-packages\charset_normalizer/api.py
<listcomp>zfrom_bytes.<locals>.<listcomp>s   "    DDD	"e,,DDD    zacp_exclusion is set. use this flag for debugging purpose. limited list of encoding excluded : %s.c                .    g | ]}t          |d           S r5   r6   r7   s     r:   r;   zfrom_bytes.<locals>.<listcomp>~   r<   r=   z^override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.r   z>Trying to detect encoding from a tiny portion of ({}) byte(s).zIUsing lazy str decoding because the payload is quite large, ({}) byte(s).z@Detected declarative mark in sequence. Priority +1 given for %s.   zIDetected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.ascii>   utf_16utf_32z\Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.>   utf_7zREncoding %s won't be tested as-is because detection is unreliable without BOM/SIG.zY%s is deemed too similar to a code page that was already considered unsuited. Continuing!zESkipping %s: already fast-tracked from a similar successful encoding.z2Encoding %s does not provide an IncrementalDecoderzbSkipping %s: definitive match already found, this encoding targets different languages (%s vs %s).zXSkipping %s: already accumulated %d same-family results after definitive match (cap=%d).zCSkipping single-byte %s: multi-byte definitive match already found.g    A)encodingz9Code page %s does not fit given bytes sequence at ALL. %szpCode page %s is a multi byte encoding table and it appear that at least one character was encoded using n-bytes.)preemptive_declarationzM%s fast-tracked (identical decoded payload to a prior encoding, chaos=%f %%).d      )ndigitsr    z.Encoding detection: %s is most likely the one.zZ%s fast-skipped (identical decoded payload to a prior encoding that failed chaos probing).rA   rB         TzaLazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %sg     j@strict)errorsz^LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %szc%s was excluded because of initial chaos probing. Gave up %i time(s). Computed mean chaos is %f %%.z=%s passed initial chaos probing. Mean measured chaos is %f %%z&{} should target any language(s) of {},z We detected language {} using {}g{Gz?c              3      K   | ]	\  }}|V  
d S )N )r8   _vs      r:   	<genexpr>zfrom_bytes.<locals>.<genexpr>  s&      4441aQ444444r=   )defaultg      ?zyDefinitive match found: %s (chaos=%.3f, coherence=%.2f). Encodings targeting different language families will be skipped.g\(\?>	   rC   r2   rA   rB   	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sigzjMulti-byte definitive match: %s (chaos=%.3f, decoded=%d/%d=%.1f%%). Single-byte encodings will be skipped.zoEncoding detection: %s is most likely the one as we detected a BOM or SIG within the beginning of the sequence.zONothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.z7Encoding detection: %s will be used as a fallback matchz:Encoding detection: utf_8 will be used as a fallback matchz:Encoding detection: ascii will be used as a fallback matchz]Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.z=Encoding detection: Unable to determine any suitable charset.)7
isinstance	bytearraybytes	TypeErrorformattypeloggerlevel
addHandlerexplain_handlersetLevelr   lendebugremoveHandlerr   r   logjoinr$   r   r   r   appendsetr   r   addr   r   ModuleNotFoundErrorImportErrorr   r	   intersectionstrUnicodeDecodeErrorLookupErrorrangehashgetroundrD   bestmaxr   r   decodesumr   update
setdefaultr   r
   fingerprint)@r!   r#   r%   r&   r(   r*   r+   r-   r.   r/   previous_logger_levellengthis_too_small_sequenceis_too_large_sequenceprioritized_encodingsspecified_encodingtestedtested_but_hard_failuretested_but_soft_failuresoft_failure_skipsuccess_fast_trackedpayload_result_cachedefinitive_match_founddefinitive_target_languages post_definitive_sb_success_countPOST_DEFINITIVE_SB_CAPmb_definitive_match_foundfallback_asciifallback_u8fallback_specifiedresultsearly_stop_resultssig_encodingsig_payloadencoding_ianadecoded_payloadbom_or_sig_availablestrip_sig_or_bomis_multi_byte_decoderenc_languageser_multi_byte_bonuspayload_hashcachedcached_mess	cached_cdcached_passed
fast_matchprobable_resultfallback_entrymax_chunk_gave_upearly_stop_countlazy_str_hard_failure	md_chunks	md_ratioschunkmean_mess_ratiotarget_languages	cd_ratioschunk_languagescd_ratios_mergedcurrent_matchbest_coherences@                                                                   r:   
from_bytesr   9   sj   < i)U!344 
AHHY 
 
 	
  %+\/***i..F{{STTT 	3  111OO1222|IwUBPRSSTUUU

5IIl##		
 	
 	
 ED|DDD

6IIl##		
 	
 	
 ED|DDD*u$%%

l	
 	
 	
 
qyyVe^j00%((
"%i..3E"E"%i..4D"D 


LSS 	
 	
 	
 	
 
 


W^^ 	
 	
 	
 (* .BKy)))t  %$$%7888

N	
 	
 	
 uuF)+)+"%%%%(UU
 TV $),/EE -.$"# ',*.N'+K.2,..G)7)9)9 3I > >L+$$\222

W		
 	
 	
   )))+++$$W---.1HH v< v< 	M== 	M\99F""

=!!!&*%1]%B!5 "
:Q;
 ;
 0009M0JJn  
 I%%.B%JJd  
  ---JJk  
  000JJW  
 	*@*O*O!!#[1 	 	 	JJD  
 H	 " 	( J #$6}$E$E F F #$9-$H$H I I --.IJJ 

x!!/    #	)	 14JJJJJj0&   
 % 	-B 	JJU  
 	$ )>%)G)G ,u44 "+CII+..&s;'7'7#d))'CD*     #& ,u44 "	&s;'7'7'9'9:*# # # #K0 		 		 		a-- 

O!FF	   $**=999HHHH		 )?AAs;/?/?
 
 " .t+.O$$v- 	  	JJ-	   &/D& $_ 5 5L)--l;;F!8>5Y  [!-!%#,! !6 > >#0$6#I$J $J ,O "&/A" " "J" NN:...(,,];;;JJg%kC/;;;	   &*<gw)OOO'#--&#--"LL P * 3    ' G & 4 4_ E E E &0E F F F#1:,#?#????*11*=== .//A/7;MQW;W;W#v--#v--8J8O8O8Q8QL+4   # C"00AAA"OO,ABBB-.?@@@@@ ,22=AAAJJt%   ' 9=*  = , , *6%)%0+3E* * * ),>>>1?..*g55-;NN*8K!$SWWq[!1!1 1155 ! %!		'	),$ %
 
     '''  !4GA\1B1B,G,G,G,Ga,G,G,G,G    R=I--$)$$(999( :-=-F-FE
	) 
	) 
	) JJsA	    1$(!!!!!!
	) &	%	 *	

#d))++&--mH-MMMM%   

t!FF	   (..}=== EN!VY#i..!@!@SVi''+;?P+P+P#**=999 666!(()?)NOOO *3H*$//))OR+G   JJ0 o+Q777    1!W&8(HMN N-N ".!(#+=" " " !$666)7&&"g--%3NN"0K

K/C'333		
 	
 	
 % 	D*<]*K*K4]CC 	JJ8??!3'7#8#8    	 G## # 2 2"1&2BLCHH-...# #   11115i@@5i@@ 	JJ299$m    %  *U22$);Wg(NNN  
 #5
 
 
" 	}%%% &/D& ++_%% "2D9   #	2)	2  $&&,1, 0'7CCC#%% #%%D!*    ;((999OO$9:::%}o66666%%m444 "##	5#+/AV/K/K6!!6!!05577OLL@(    7$$_555 5666!?"344444 & 	.C 	 $44#3444cBBBB 
 $$F):):w&?P?P)-&+223CDDD

 P!#"   *	%	 !	  +O$$v}44
  6!!6!!(,%JJ|O$$O$$v-3   L((LL1  
  7$$_555 5666!7=#9":;;;;; ) 7||q 	. 	,> 	JJa  
  	+LLI"+   NN-....	++3 4 # 4  +~/III'LLUVVVNN;'''' 	+LLUVVVNN>*** VkLLNN#LL1	
 	
 	
 	
 	TUUU /_----...Ns^    S2TTBY[*A[		[Bi""
j),3j$$j)3,k  
l9*Al44l9fpr   c
                V    t          |                                 |||||||||	
  
        S )z
    Same thing than the function from_bytes but using a file pointer that is already ready.
    Will not close the file pointer.
    )r   read)
r   r#   r%   r&   r(   r*   r+   r-   r.   r/   s
             r:   from_fpr   R  s<      
		  r=   pathstr | bytes | PathLikec
                    t          | d          5 }
t          |
|||||||||	
  
        cddd           S # 1 swxY w Y   dS )z
    Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
    Can raise IOError.
    rbN)openr   )r   r#   r%   r&   r(   r*   r+   r-   r.   r/   r   s              r:   	from_pathr   p  s      
dD		 
R 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   6::fp_or_path_or_payload!PathLike | str | BinaryIO | bytesc
                   t          | t          t          f          rt          | |||||||||	
  
        }
nOt          | t          t
          f          rt          | |||||||||	
  
        }
nt          | |||||||||	
  
        }
|
 S )a)  
    Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
    Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
    are disabled to be stricter around ASCII-compatible but unlikely to be a string.
    )	r#   r%   r&   r(   r*   r+   r-   r.   r/   )rZ   rp   r   r   r\   r[   r   r   )r   r#   r%   r&   r(   r*   r+   r-   r.   r/   guessess              r:   	is_binaryr     s    " '#x99 ,
!!%%!51+
 
 
 
	

 
 
 !!%%!51+
 
 
 !!%%!51+
 
 
 ;r=   )	r   r   r   NNTFr    T)r!   r"   r#   r$   r%   r$   r&   r'   r(   r)   r*   r)   r+   r,   r-   r,   r.   r'   r/   r,   r0   r   )r   r   r#   r$   r%   r$   r&   r'   r(   r)   r*   r)   r+   r,   r-   r,   r.   r'   r/   r,   r0   r   )r   r   r#   r$   r%   r$   r&   r'   r(   r)   r*   r)   r+   r,   r-   r,   r.   r'   r/   r,   r0   r   )	r   r   r   NNTFr    F)r   r   r#   r$   r%   r$   r&   r'   r(   r)   r*   r)   r+   r,   r-   r,   r.   r'   r/   r,   r0   r,   )/
__future__r   loggingosr   typingr   cdr   r   r	   r
   constantr   r   r   r   r   mdr   modelsr   r   utilsr   r   r   r   r   r   	getLoggerr`   StreamHandlerrc   setFormatter	Formatterr   __annotations__r   _supported_encrj   rn   r   r   r   r   r   rP   r=   r:   <module>r      sB   " " " " " " "                                            0 0 0 0 0 0 0 0                
	/	0	0'''))   GABB           $ - -N-!!.11 	1  0000  000 - - -^,,,,,- &3]%B  B B B B
 %)%)!% # V V V V Vv %)%)!% #     @ %)%)!% # 
 
 
 
 
B %)%)!% #!? ? ? ? ? ? ?s   %6CC98C9