
    Rǻi                         d dl mZmZmZmZmZmZmZmZm	Z	 ddl
mZmZ ddlmZ ddlmZ e	eeef   ee   eeeedf   Z G d d      Zy)	    )	AnyClassVarDictListLiteralOptionalTextIOTupleUnion   )ContextValuesJsonContext)ObjectComparer)StringFileWrapperNc                   |   e Zd ZU g dZeee      ed<   	 	 ddeee	f   de
e   de
e   deded	d
fdZd	eeeeeeeef      f   f   fdZd	efdZd	eeef   fdZd	ee   fdZd	eeed
f   fdZd	eeeeef   fdZd	eeed
f   fdZd	efdZdded	eeed   f   fdZdded	efdZddeded	efdZded	d
fdZ y
)
JSONParser)"'   “   ”STRING_DELIMITERSFjson_strjson_fdloggingjson_fd_chunk_lengthstream_stablereturnNc                     || _         |rt        ||      | _         d| _        t               | _        || _        |r g | _        | j                  | _        || _	        y d | _        || _	        y )Nr   c                       y N )argskwargss     U/opt/lhia/marcimex/agent/venv/lib/python3.12/site-packages/json_repair/json_parser.py<lambda>z%JSONParser.__init__.<locals>.<lambda>+   s        )
r   r   indexr   contextr   logger_loglogr   )selfr   r   r   r   r   s         r$   __init__zJSONParser.__init__   si     8@-g7KLDM
"} 02DKyyDH + 4DH +r&   c                    | j                         }| j                  t        | j                        k  r| j	                  d       |g}| j                  }| j                  t        | j                        k  r| j                         }|dk7  r:t        j                  |d   |      r|j                          |j                  |       | j                  |k(  r| xj                  dz  c_        | j                  }| j                  t        | j                        k  rt        |      dk(  r| j	                  d       |d   }| j                  r|| j                  fS |S )NzAThe parser returned early, checking if there's more json elements r   zDThere were no more elements, returning the element without the arrayr   )
parse_jsonr'   lenr   r+   r   is_same_objectpopappendr   r)   )r,   json
last_indexjs       r$   parsezJSONParser.parse5   s     ::DMM**HHS 6DJ**s4==11OO%7%44T"XqA
KKN::+JJ!OJ!ZZ
 **s4==11 4yA~Z Aw<<$$Kr&   c                    	 | j                         }|du ry|dk(  r%| xj                  dz  c_        | j                         S |dk(  r%| xj                  dz  c_        | j                         S | j                  j
                  t        j                  k(  r|dk(  r| j                  d       y| j                  j                  s.|| j                  v s|j                         r| j                         S | j                  j                  s*|j                         s
|dk(  s|d	k(  r| j                         S |d
v r| j                         S | xj                  dz  c_        V)NFr/   {r   [}zCAt the end of an object we found a key with missing value, skipping-.#/)get_char_atr'   parse_objectparse_arrayr(   currentr   OBJECT_VALUEr+   emptyr   isalphaparse_stringisdigitparse_numberparse_comment)r,   chars     r$   r1   zJSONParser.parse_jsonT   s)    ##%Du}

a
((**

a
'')) %%)C)CCPSY \\''...$,,.((**\\''$#+((**#))++ 

a
E r&   c                 *   i }| j                         xs ddk7  r| j                          | j                         xs ddk(  r&| j                  d       | xj                  dz  c_        | j                  j                  t        j                         | j                  }d}| j                         r| j                  }| j                         dk(  r|dk(  r|rt        |j                               d   nd }|rt        ||   t              r| xj                  dz  c_        | j                         }t        |t              r||   }t        |t              r7|j                  t        |      dk(  rt        |d   t              r|d   n|       | j                          | j                         d	k(  r| xj                  dz  c_        | j                          7|| _        t        | j                               }|dk(  r| j                          |dk7  s|dk(  r| j                         d
v rn| j                         rt        j                   | j                  j                  v rb||v r^| j                  d       |dz
  | _        | j"                  d | j                  dz    dz   | j"                  | j                  dz   d  z   | _        n<| j                          | j                         xs ddk(  r| j                          | j                         xs ddk7  r| j                  d       | xj                  dz  c_        | j                  j%                          | j                  j                  t        j&                         | j)                         }| j                  j%                          |||<   | j                         xs ddv r| xj                  dz  c_        | j                          | j                         xs ddk7  r| xj                  dz  c_        |S )Nr=   r/   :z;While parsing an object we found a : before a key, ignoringr   r<   r0   r   ,rP   r=   zdWhile parsing an object we found a duplicate key, closing the object here and rolling back the indexr;   z1While parsing an object we missed a : after a key)rQ   r   r   )rC   skip_whitespaces_atr+   r'   r(   setr   
OBJECT_KEYlistkeys
isinstancerE   extendr2   strrJ   ARRAYr   resetrG   r1   )r,   objrollback_indexkeyprev_key	new_array
prev_valuevalues           r$   rD   zJSONParser.parse_object{   s   )+!(SS0
 $$&   "(bS0Q 

a
 LL]556 "ZZN C""$!%##%, 8;tCHHJ/3HJs8}d$C

a
$($4$4$6	%i6),XJ)*d; * 1 1'*9~':(29Q<(F %.aL *3	!" !446#//1S8 $

a
 446$)7DJ$++-."9,,."9t/?/?/AZ/OC ""$D ""dll&:&::sczz ,a/
 MM"2DJJN3mmDJJN$456 
  $$&   ")cc1$$&   "(bS0G JJ!OJLL LL]778OO%E LL CH  "(b_<

a
 $$&G !(SS0J 	

a

r&   c                    g }| j                   j                  t        j                         | j	                         }|r|dvr| j                          | j                         }|dk(  r| xj                  dz  c_        n<|dk(  r&| j	                  d      dk(  r| j                  d       n|j                  |       | j	                         }|r]|dk7  rX|j                         s|d	k(  rC| xj                  dz  c_        | j	                         }|r|dk7  r|j                         r=|d	k(  rC|r|dvr|r|dk7  r| j                  d
       | xj                  dz  c_        | j                   j                          |S )N)]r=   r/   r   z...r0   r?   z8While parsing an array, found a stray '...'; ignoring itre   rQ   z;While parsing an array we missed the closing ], ignoring it)r(   rT   r   r[   rC   rS   r1   r'   r+   r5   isspacer\   )r,   arrrN   rc   s       r$   rE   zJSONParser.parse_array   s@   ,,-!t:-$$&OO%E {

a
%D$4$4R$8C$?N 

5! ##%D43;DLLNdck

a
'') 43;DLLNdck! t:-* DCKHHM 	

a

r&   c                     d}d}dx}} j                         }|dv r j                         S |rd| j                  vrV|j                         sF xj                  dz  c_         j                         }|r| j                  vr|j                         sF|sy|dk(  rdx}}n}|dk(  rd}d}ns|j                         rc|j                         d	v r> j                  j                  t        j                  k7  r j                         }|dk7  r|S  j                  d
       d}|s xj                  dz  c_         j                          j                  v r j                         |k(  rk j                  j                  t        j                  k(  r* j                  d      dk(  r xj                  dz  c_        y j                  d      |k(  r j                  d       y j                  |d      } j                  |      }|rD j                  |dz         xs d|k(  r) j                  d       d} xj                  dz  c_        n j                  dd      } j                  |      }|g  j                  ddv r' j                  d        xj                  dz  c_        y|dvr_ j                  d        xj                  dz  c_        n8 j                  |d      } j                  |      }|s j                  d       yd}	 j                         }d}
|	r ||k7  r|rO j                  j                  t        j                  k(  r(|dk(  s|j                         r j                  d       n|s j                  s j                  j                  t        j                   k(  rm|dv rhd} j                  |d      } j                  |      }|r|dz  } j                  |d      } j                  |      }|r|dv rd}n j                  ||      }|r j                  ||      } j                  |      }|sd}n j                  |dz   d      } j                  |      }|r|dk7  rd}n j                  dd      } j                  |      }|rn| j                  dd      } j                  d|      }||z
  dkD  rd}n1 j                  |      r t#        |	      D ]  }|dk(  rd} n	|dk(  s n |r j                  d       n|s j                  sO|dk(  rJt        j$                   j                  j                  v r$ j                  |      } j                  |      sn|	|z  }	 xj                  dz  c_         j                         } j                  r|s|	d   dk(  r|	d d }	|rm|	d   dk(  re j                  d       ||dd d!d"dfv rJ|	d d }	d#d$d%d&d'}|	|j'                  ||      xs |z  }	 xj                  dz  c_         j                         }|dk(  r|s͉ j                  j                  t        j                  k(  r j                  |d      } j                  |      }|rm|dz  } j                  ||      } j                  |      }|rU|dz  } j                  |d      } j                  |      }|r*|dv r& j                  d(       n3 j                  d(       n ||k(  r|r< j                  d      |k(  r( j                  d)        xj                  dz  c_        n|rΉ j                  j                  t        j                   k(  rd} j                  |      }|r%|||fvr|dz  } j                  |      }|r|||fvr|rm|dz  } j                  |d      } j                  |      }|rA|dk(  r; xj                  dz  c_         j                         } j                  d*       n|
r7d}
|	t)        |      z  }	 xj                  dz  c_         j                         }nd} j                  |      }d}|r|||fvr|r|j+                         rd}t        j                   j                  j                  v r|d+v st        j                    j                  j                  v r|dk(  sXt        j$                   j                  j                  v r|d,v s.|r- j                  j                  t        j                   k(  r|d-k(  rn|dz  } j                  |      }|r|||fvr|d-k(  rȉ j                  j                  t        j                   k(  r|dz  } j                  ||      } j                  |      }|dz  } j                  |d      } j                  |      }|dk(  rH j                  d.       |
 }
|	t)        |      z  }	 xj                  dz  c_         j                         }n||k(  r j                  |dz
        dk7  rt-         fd/t/        d|      D              rnʉ j                  j                  t        j                   k(  r j                  ||dz         }|dz  } j                  |      }|rD|dk7  r?|dv s||k(  r j                  |dz
        dk7  rn|dz  } j                  |      }|r|dk7  r?|dk7  r" j                  d0       |
 }
|	t)        |      z  }	 xj                  dz  c_         j                         }nډ j                  j                  t        j$                  k(  rH j                  d1       |
 }
|	t)        |      z  }	 xj                  dz  c_         j                         }nk j                  j                  t        j                  k(  rD j                  d2       |	t)        |      z  }	 xj                  dz  c_         j                         }|r||k7  r|rm|rk j                  j                  t        j                  k(  rD|j                         r4 j                  d3        j                           j                         d4vry||k7  r. j                  s7 j                  d5       |	j1                         }	n xj                  dz  c_         j                  s|s
|	r|	d   d$k(  r|	j1                         }	|	S )6NFr   r@   r   r/   r   r   r   )tfnz=While parsing a string, we found a literal instead of a quoteTrP   zTWhile parsing a string, we found a doubled quote and then a quote again, ignoring it	characteridxz?While parsing a string, we found a valid starting doubled quote)rn   move_main_indexr;   r<   z_While parsing a string, we found a doubled quote but also another quote afterwards, ignoring it)rQ   re   r=   zYWhile parsing a string, we found a doubled quote but it was a mistake, removing one quotezJWhile parsing a string, we found a quote but it was a mistake, ignoring itzdWhile parsing a string missing the left delimiter in object key context, we found a :, stopping here)rQ   r=   r=   zWhile parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping herere   r0   \z-Found a stray escape sequence, normalizing itri   rk   rb	
)ri   rk   rq   rr   zeWhile parsing a string missing the right delimiter in object key context, we found a :, stopping herez=While parsing a string, we found a doubled quote, ignoring itzIn a string with missing quotes and object value context, I found a delimeter but it turns out it was the beginning on the next key. Stopping here.rR   )re   rQ   rQ   zWhile parsing a string, we misplaced a quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring itc              3      K   | ]=  }j                  |      r*t        j                  |            j                          ? y wr    )rC   rZ   rf   ).0r8   r,   s     r$   	<genexpr>z*JSONParser.parse_string.<locals>.<genexpr>r  s@       !#//2   0 0 34<<>s   AAz|While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring itzWhile parsing a string in Array context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring itzWhile parsing a string in Object Key context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring itzWhile parsing a string, handling an extreme corner case in which the LLM added a comment instead of valid string, invalidate the string and return an empty value)rP   rQ   z=While parsing a string, we missed the closing quote, ignoring)rC   rM   r   isalnumr'   lowerr(   rF   r   rU   parse_boolean_or_nullr+   skip_to_characterrS   rf   r   rG   reversedr[   getrZ   rI   allrangerstrip)r,   missing_quotesdoubled_quoteslstring_delimiterrstring_delimiterrN   rc   inext_c
string_accunmatched_delimiterrstring_delimiter_missingr8   cescape_seqscheck_comma_in_object_values   `               r$   rJ   zJSONParser.parse_string  sB    033-!:%%''t4#9#99$,,.JJ!OJ##%D t4#9#99$,,.  3;477 1U] % %\\^ 

/LL((M,D,DD224B; LHHO "NJJ!OJ !7!77!%66 LL((M,D,DD((+s2JJ!OJ##A&*;;HHn **5FA*N))!, t//A6<"ARRHHY &*NJJ!OJ 00Q0NA!--a0F!D4#9#9!D3!D!DD} 

a
!6w 

a
 **5FA*N))!,HHd  
 !#t00LL((M,D,DDS[DLLNz t'9'9LL((M,F,FF -1)**5FA*N))!,FA 00Q0NA!--a0F!Vz%9491
 !22=NTU2V) $ 6 6*; !7 !A "&!1!1!!4%8=5 !% 8 8$%E5 !9 !A &*%5%5a%8F%&C-<A 9
 ..!.DA!--a0F !44E4R !22Sa2Hq519 9>5!--a0%-j%9 *#$8@E$=$)%&#X$)* -HH C t'9'9CK!''4<<+?+?? **+<=''*$JJJ!OJ##%D!!$:b>T3I'_

2$.HI-sCc4HH!+CRJ(,4d"NK+//$"="EEJJJ!OJ++-D &LL((M,D,DD **5FA*N))!,FA..9JPQ.RA!--a0FQ 44E4R!%!1!1!!4!f
&: HH !H " HH ((!d&6&6q&9=N&NHHW JJ!OJ",,0J0JJ A!--a0F V))4 & Q!%!1!1!!4 !V))4 & Q 44E4R!%!1!1!!4!fm JJ!OJ#'#3#3#5D HH !v "(*/'#d)+JJJ!OJ++-D A!--a0F26/ V))4 & 76>>;K:?7 !. 8 8DLL<P<P P$*j$8 !. : :dll>R>R R$*cM !. 3 3t||7K7K K$*j$8 !<$(LL$8$8M<V<V$V$*cM "Q!%!1!1!!4= !V))4 &B # LL00M4N4NNQ 22=NTU2V!%!1!1!!4Q 44E4R!%!1!1!!4!S= HH !L 7J2I/&#d)3J JJ!OJ#'#3#3#5D"338H8HQ8OSW8W  %*1a[ 
 "<<//=3M3MM !% 6 6*;Q !7 !A FA%)%5%5a%8F"(Vs]#)_#<$*.?$?(,(8(8Q(?4(G$) !Q)-)9)9!)< #)Vs]  &} $ %c!" ;N6M 3 *c$i 7
 $

a
'+'7'7'9!\\11]5H5HH !HH !x 7J2I/&#d)3J JJ!OJ#'#3#3#5D!\\11]5M5MM HH !} '#d)3J JJ!OJ#'#3#3#5DQ	 t00T	 $$(@(@@HH t $$&!3 $$%%S (..0
JJ!OJ!!zjn.D $**,Jr&   c                    d}| j                         }| j                  j                  t        j                  k(  }t        d      }|rD||v r@|r|dk7  r9||z  }| xj                  dz  c_        | j                         }|r||v r	|s3|dk7  r9|r"|d   dv r|d d }| xj                  dz  c_        nP| j                         xs dj                         r.| xj                  t        |      z  c_        | j                         S 	 d|v rt        |      S d|v sd|v sd	|v rt        |      S |d
k(  r| j                         S t        |      S # t        $ r |cY S w xY w)Nr/   z0123456789-.eE/,rQ   r   r0   z-eE/,r?   eEr>   )rC   r(   rF   r   r[   rT   r'   rI   r2   rJ   rZ   floatr1   int
ValueError)r,   
number_strrN   is_arrayNUMBER_CHARSs        r$   rL   zJSONParser.parse_number  sT   
!<<''=+>+>>-.t|+X$JJJ!OJ##%D t|+X *R.G3#CRJJJ!OJ &B//1JJ#j/)J$$&&	j :&j C:$5
9JZ((s"((:& 		s$    E E &E ;
E EEc                    | j                   }| j                         xs dj                         }|dk(  rd}n|dk(  rd}n|dk(  rd}rd}|rw|t        |d         k  rf||d   |   k(  r[|d	z  }| xj                   d	z  c_         | j                         xs dj                         }|r|t        |d         k  r||d   |   k(  r[|t        |d         k(  r|d	   S || _         y)
Nr/   ri   )trueTrj   )falseFrk   )nullNr   r   )r'   rC   r{   r2   )r,   starting_indexrN   rc   r   s        r$   r|   z JSONParser.parse_boolean_or_null  s      "(b//13;"ES[$ES["EA1s58},q!1DQ

a
((*0b779 1s58},q!1D CaM!Qx $
r&   c                    | j                         }ddg}t        j                  | j                  j                  v r|j	                  d       t        j
                  | j                  j                  v r|j	                  d       t        j                  | j                  j                  v r|j	                  d       |dk(  rNd}|r5||vr1||z  }| xj                  dz  c_        | j                         }|r||vr1| j                  d	|        y|d
k(  r"| j                  d      }|d
k(  rsd}| xj                  dz  c_        | j                         }|r5||vr1||z  }| xj                  dz  c_        | j                         }|r||vr1| j                  d	|        y|dk(  r~d}| xj                  dz  c_        	 | j                         }|s| j                  d       n-||z  }| xj                  dz  c_        |j                  d      rnQ| j                  d|        y| xj                  dz  c_        y| xj                  dz  c_        y)a  
        Parse code-like comments:

        - "# comment": A line comment that continues until a newline.
        - "// comment": A line comment that continues until a newline.
        - "/* comment */": A block comment that continues until the closing delimiter "*/".

        The comment is skipped over and an empty string is returned so that comments do not interfere
        with the actual JSON elements.
        rt   ru   re   r=   rP   rA   r/   r   zFound line comment: rB   z//   *z/*zJReached end-of-string while parsing block comment; unclosed block comment.z*/zFound block comment: )
rC   r   r[   r(   r5   rG   rU   r'   r+   endswith)r,   rN   termination_characterscomment	next_chars        r$   rM   zJSONParser.parse_comment  s0    !"&$,,"6"66"))#.%%)=)=="))#.##t||';';;"))#.3;G4'==4

a
'') 4'== HH+G956 S[((+IC

a
'')t+AAtOGJJ!OJ++-D t+AA /y9:c!

a
++-Dh tOGJJ!OJ''-  0	:; 

a
 JJ!OJr&   countc                 Z    	 | j                   | j                  |z      S # t        $ r Y yw xY w)NF)r   r'   
IndexError)r,   r   s     r$   rC   zJSONParser.get_char_atD  s1    	==e!344 		s    	**rn   c                 :   	 | j                   | j                  |z      }|j                         rK|r| xj                  dz  c_        n|dz  }	 | j                   | j                  |z      }|j                         rK|S # t        $ r |cY S w xY w# t        $ r |cY S w xY w)zn
        This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
        r   )r   r'   r   rf   )r,   rn   ro   rN   s       r$   rS   zJSONParser.skip_whitespaces_atK  s    	==c!12D lln

a
q}}TZZ#%56 lln 
  	J	  
s#   A; B ;B	B	BBrm   c                 r   	 | j                   | j                  |z      }||k7  r(|dz  }	 | j                   | j                  |z      }||k7  r(| j                  |z   dkD  r8| j                   | j                  |z   dz
     dk(  r| j                  ||dz         S |S # t        $ r |cY S w xY w# t        $ r |cY S w xY w)zs
        This function quickly iterates to find a character, syntactic sugar to make the code more concise
        r   r   rp   rl   )r   r'   r   r}   )r,   rm   rn   rN   s       r$   r}   zJSONParser.skip_to_character^  s    	==c!12D i1HC}}TZZ#%56 i ::aDMM$**s2BQ2F$G4$O))I37)KK
  	J	  
s"   B B( B%$B%(B65B6textc                     d}t        | j                  |z
  d      }t        | j                  |z   t        | j                              }| j                  || }| j
                  j                  ||d       y )N
   r   )r   r(   )maxr'   minr2   r   r)   r5   )r,   r   windowstartendr(   s         r$   r*   zJSONParser._logq  sg    f,a0tzzF*C,>?}}U3/"	
r&   )r   F)r   )r   T)!__name__
__module____qualname__r   r   rV   rZ   __annotations__r   r   r   r	   boolr   r-   JSONReturnTyper
   r   r   r9   r1   rD   rE   rJ   r   rL   r|   rM   r   rC   rS   r}   r*   r!   r&   r$   r   r   
   s   -ExS	*E %&#%+../%+ &!%+ $	%+
 "%+ %+ 
%+N	~u^T$sCx.5I%IJJ	K>% 	% Njd3#67 jX#T.1 #JxeCtO4 xteE3^$CD >uT3_'= 2Ds DL U33F-G s  &3 S  &

 

 

r&   r   )typingr   r   r   r   r   r   r	   r
   r   json_contextr   r   object_comparerr   string_file_wrapperr   rZ   r   r   r   r   r   r!   r&   r$   <module>r      sH    U U U 4 + 2tCH~tCy#uc4MNq
 q
r&   