
    Zǻi$                       d dl mZ d dlZd dlZd dlmZmZmZmZ d dl	Z	d dl
mZmZmZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZm Z m!Z!m"Z"m#Z#m$Z$  ejJ                  e&      Z'ddZ( G d de      Z)y)    )annotationsN)AnyCallableDictOptional)CypherSyntaxErrorDriverError
Neo4jError)ValidationError)RetrieverInitializationErrorSchemaFetchErrorSearchValidationErrorText2CypherRetrievalError)Text2CypherTemplate)LLMInterface)	Retriever)
get_schema)LLMModelNeo4jDriverModelNeo4jSchemaModelRawSearchResultRetrieverResultItemText2CypherRetrieverModelText2CypherSearchModelc                    d}t        j                  || t         j                        }|r|d   n| }t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }|S )a  Extract and format Cypher query from text, handling code blocks and special characters.

    This function performs two main operations:
    1. Extracts Cypher code from within triple backticks (```), if present
    2. Automatically adds backtick quotes around multi-word identifiers:
       - Node labels (e.g., ":Data Science" becomes ":`Data Science`")
       - Property keys (e.g., "first name:" becomes "`first name`:")
       - Relationship types (e.g., "[:WORKS WITH]" becomes "[:`WORKS WITH`]")

    Args:
        text (str): Raw text that may contain Cypher code, either within triple
                   backticks or as plain text.

    Returns:
        str: Properly formatted Cypher query with correct backtick quoting.
    z```(.*?)```r   zB:\s*(?!`\s*)(\s*)([a-zA-Z0-9_]+(?:\s+[a-zA-Z0-9_]+)+)(?!\s*`)(\s*)z:`\2`z=([,{]\s*)(?!`)([a-zA-Z0-9_]+(?:\s+[a-zA-Z0-9_]+)+)(?!`)(\s*:)z\1`\2`\3zV(\[\s*[a-zA-Z0-9_]*\s*:\s*)(?!`)([a-zA-Z0-9_]+(?:\s+[a-zA-Z0-9_]+)+)(?!`)(\s*(?:\]|-)))refindallDOTALLsub)textpatternmatchescypher_querys       c/opt/lhia/marcimex/agent/venv/lib/python3.12/site-packages/neo4j_graphrag/retrievers/text2cypher.pyextract_cypherr%   0   s}    $ Gjj$		2G!(71:dL66ML 66HL 66aL
     c                  d     e Zd ZdZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 	 	 ddZ xZS )Text2CypherRetrievera  
    Allows for the retrieval of records from a Neo4j database using natural language.
    Converts a user's natural language query to a Cypher query using an LLM,
    then retrieves records from a Neo4j database using the generated Cypher query.

    Args:
        driver (neo4j.Driver): The Neo4j Python driver.
        llm (neo4j_graphrag.generation.llm.LLMInterface): LLM object to generate the Cypher query.
        neo4j_schema (Optional[str]): Neo4j schema used to generate the Cypher query.
        examples (Optional[list[str], optional): Optional user input/query pairs for the LLM to use as examples.
        custom_prompt (Optional[str]): Optional custom prompt to use instead of auto generated prompt. Will include the neo4j_schema for schema and examples for examples prompt parameters, if they are provided.

    Raises:
        RetrieverInitializationError: If validation of the input arguments fail.
    c           	        	 t        |      }t        |      }	|rt        |      nd }
t        ||	|
||||      }t        | !  |j                  j                  |j                         |j                  j                  | _        |j                  | _        |j                  | _        |j                   | _        |j                   rJ|j"                  r4|j"                  j$                  r|j"                  j$                  }|| _        y d}|| _        y |j"                  r4|j"                  j$                  r|j"                  j$                  }|| _        y 	 t'        |j                  j                        }|| _        y # t        $ r}t        |j                               |d }~ww xY w# t(        t*        f$ r*}t-        |dt/        |            }t1        d|       |d }~ww xY w)N)driver)llm)neo4j_schema)driver_model	llm_modelneo4j_schema_modelexamplesresult_formattercustom_promptneo4j_database messagez1Failed to fetch schema for Text2CypherRetriever: )r   r   r   r   r   r   errorssuper__init__r-   r*   r3   r.   r+   r0   r1   r2   r/   r,   r   r
   r	   getattrstrr   )selfr*   r+   r,   r0   r1   r2   r3   r-   r.   r/   validated_dataeerror_message	__class__s                 r$   r8   zText2CypherRetriever.__init__k   s   	B+6:L S)I?K l;QU  7)##5!!1+-N 	''..0M0M	
 "++//&// . ? ?+99''11"55BB-@@MM" )  " ) 11"55BB-@@MM )#-n.I.I.P.P#QL )A  	B.qxxz:A	B6 #K0 $+Ay#a&$AM*KM?[s/   :E7 F" 7	F FF"G1%GGc                   	 t        |      }t	        | j
                        }|^|j                  dd      xs) | j                  rdj                  | j                        nd}|j                  dd      xs | j                  }n?| j                  rdj                  | j                        nd}| j                  }t               } |j                  d|||j                  d|}t        j                  d	|       	 | j                  j!                  |      }	t#        |	j$                        }
t        j                  d
|
       | j&                  j)                  |
| j*                  t,        j.                  j0                        \  }}}t9        |d|
i      S # t        $ r}t        |j                               |d}~ww xY w# t2        $ r}t5        d|j6                         |d}~ww xY w)aZ  Converts query_text to a Cypher query using an LLM.
           Retrieve records from a Neo4j database using the generated Cypher query.

        Args:
            query_text (str): The natural language query used to search the Neo4j database.
            prompt_params (Dict[str, Any]): additional values to inject into the custom prompt, if it is provided. If the schema or examples parameter is specified, it will overwrite the corresponding value passed during initialization. Example: {'schema': 'this is the graph schema'}

        Raises:
            SearchValidationError: If validation of the input arguments fail.
            Text2CypherRetrievalError: If the LLM fails to generate a correct Cypher query.

        Returns:
            RawSearchResult: The results of the search query as a list of neo4j.Record and an optional metadata dict
        )
query_textN)templater0   
r4   schema)rD   r0   rA   zText2CypherRetriever prompt: %sz%Text2CypherRetriever Cypher query: %s)query_	database_routing_zFailed to get search result: cypher)recordsmetadata )r   r   r   r6   r   r2   popr0   joinr,   dictformatrA   loggerdebugr+   invoker%   contentr*   execute_queryr3   neo4jRoutingControlREADr   r   r5   r   )r;   rA   prompt_paramsr<   r=   prompt_templateexamples_to_useschema_to_useprompt
llm_result	t2c_queryrI   _s                r$   get_search_resultsz'Text2CypherRetriever.get_search_results   s   "	;3zJN .t7I7IJ$+//
DA ,0MM		$--(r  *--h=RARARM:>--dii6RO --M FM''' 
 $%00
 	
 	6?	0J&z'9'9:ILL@)L KK55 ----22 6 MGQ )
 	
O  	;'
3:	;D ! 	+/		{;	s0   F :B
F> 	F;F66F;>	G%G  G%)NNNNN)r*   zneo4j.Driverr+   r   r,   Optional[str]r0   zOptional[list[str]]r1   z7Optional[Callable[[neo4j.Record], RetrieverResultItem]]r2   ra   r3   ra   returnNone)N)rA   r:   rX   zOptional[Dict[str, Any]]rb   r   )__name__
__module____qualname____doc__r8   r`   __classcell__)r?   s   @r$   r(   r(   Z   s    ( '+(, '+(,;);) ;) $	;)
 &;)
;) %;) &;) 
;)| JN?
?
.F?
	?
r&   r(   )r    r:   rb   r:   )*
__future__r   loggingr   typingr   r   r   r   rU   neo4j.exceptionsr   r	   r
   pydanticr   neo4j_graphrag.exceptionsr   r   r   r   !neo4j_graphrag.generation.promptsr   neo4j_graphrag.llmr   neo4j_graphrag.retrievers.baser   neo4j_graphrag.schemar   neo4j_graphrag.typesr   r   r   r   r   r   r   	getLoggerrd   rP   r%   r(   rK   r&   r$   <module>ru      sr    #  	 0 0  G G $  B + 4 ,   
		8	$'TM
9 M
r&   