
    ZǻiF                       d dl mZ d dlZd dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZmZmZ d dlmZ d dlmZ d d	lmZmZmZmZmZmZmZmZmZm Z  d d
l!m"Z"  ejF                  e$      Z% G d de      Z& G d de      Z'y)    )annotationsN)AnyCallableOptionalUnion)ValidationError)Embedder)EmbeddingRequiredErrorRetrieverInitializationErrorSearchValidationErrorSearchQueryParseError)get_search_query)	Retriever)
EmbedderModelHybridCypherRetrieverModelHybridCypherSearchModelHybridRetrieverModelHybridSearchModelNeo4jDriverModelRawSearchResultRetrieverResultItem
SearchTypeHybridSearchRanker)prettifyc                       e Zd ZdZ	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	 fdZd
dZdddej                  df	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ xZ	S )HybridRetrievera  
    Provides retrieval method using combination of vector search over embeddings and
    fulltext search.
    If an embedder is provided, it needs to have the required Embedder type.

    Example:

    .. code-block:: python

      import neo4j
      from neo4j_graphrag.retrievers import HybridRetriever

      driver = neo4j.GraphDatabase.driver(URI, auth=AUTH)

      retriever = HybridRetriever(
          driver, "vector-index-name", "fulltext-index-name", custom_embedder
      )
      retriever.search(query_text="Find me a book about Fremen", top_k=5)

    Args:
        driver (neo4j.Driver): The Neo4j Python driver.
        vector_index_name (str): Vector index name.
        fulltext_index_name (str): Fulltext index name.
        embedder (Optional[Embedder]): Embedder object to embed query text.
        return_properties (Optional[list[str]]): List of node properties to return.
        result_formatter (Optional[Callable[[neo4j.Record], RetrieverResultItem]]): Provided custom function to transform a neo4j.Record to a RetrieverResultItem.
        neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to the server's default database ("neo4j" by default) (`see reference to documentation <https://neo4j.com/docs/operations-manual/current/database-administration/#manage-databases-default>`_).

            Two variables are provided in the neo4j.Record:

            -   node: Represents the node retrieved from the vector index search.
            -   score: Denotes the similarity score.
    Nc           	     B   	 t        |      }|rt        |      nd }	t        ||||	|||      }
t        |   |
j                  j                  |
j                         |
j                  | _        |
j                  | _        |
j                  | _        |
j                  r|
j                  j                  nd | _        |
j                   | _        d | _        d | _        | j'                  | j                         y # t        $ r}t	        |j                               |d }~ww xY w)Ndriverembedder)driver_modelvector_index_namefulltext_index_nameembedder_modelreturn_propertiesresult_formatterneo4j_database)r   r   r   r   r   errorssuper__init__r"   r   r(   r#   r$   r&   r%   r!   r'   _embedding_node_property_embedding_dimension_fetch_index_infos)selfr   r#   r$   r!   r&   r'   r(   r"   r%   validated_datae	__class__s               ^/opt/lhia/marcimex/agent/venv/lib/python3.12/site-packages/neo4j_graphrag/retrievers/hybrid.pyr+   zHybridRetriever.__init__T   s   	B+6:LAI]H=tN1)"3$7-"3!1-N 	''..0M0M	
 "0!A!A#1#E#E !/!A!A ,, ))22 	
 !/ ? ?(,%$(! 6 67#  	B.qxxz:A	Bs   .C6 6	D?DDc                v    d|j                  d      i}|j                  d      }t        t        |      |      S )z
        Best effort to guess the node-to-text method. Inherited classes
        can override this method to implement custom text formatting.
        scorenode)contentmetadata)getr   str)r/   recordr8   r6   s       r3   default_record_formatterz(HybridRetriever.default_record_formatter   s@     VZZ(
 zz&!"I
 	
          c                   	 t        ||||||      }|j	                  d      }	| j
                  |	d<   | j                  |	d<   |r9|s7| j                  st        d      | j                  j                  |      }||	d<   t        t        j                  | j                  | j                  | j                  |j                   |j"                  	      \  }
}d
|	v r|	d
= t$        j'                  dt)        |	             t$        j'                  d|
       	 | j*                  j-                  |
|	| j.                  t0        j2                  j4                        \  }}}t?        |d|i      S # t        $ r}t        |j                               |d}~ww xY w# t0        j6                  j8                  $ r"}dt;        |      v rt=        d|       | d}~ww xY w)a  Get the top_k nearest neighbor embeddings for either provided query_vector or query_text.
        Both query_vector and query_text can be provided.
        If query_vector is provided, then it will be preferred over the embedded query_text
        for the vector search.

        See the following documentation for more details:

        - `Query a vector index <https://neo4j.com/docs/cypher-manual/current/indexes-for-vector-search/#indexes-vector-query>`_
        - `db.index.vector.queryNodes() <https://neo4j.com/docs/operations-manual/5/reference/procedures/#procedure_db_index_vector_queryNodes>`_
        - `db.index.fulltext.queryNodes() <https://neo4j.com/docs/operations-manual/5/reference/procedures/#procedure_db_index_fulltext_querynodes>`_

        To query by text, an embedder must be provided when the class is instantiated.

        Args:
            query_text (str): The text to get the closest neighbors of.
            query_vector (Optional[list[float]], optional): The vector embeddings to get the closest neighbors of. Defaults to None.
            top_k (int, optional): The number of neighbors to return. Defaults to 5.
            effective_search_ratio (int): Controls the candidate pool size for the vector index by multiplying top_k to balance query
                accuracy and performance. Defaults to 1.
            ranker (str, HybridSearchRanker): Type of ranker to order the results from retrieval.
            alpha (Optional[float]): Weight for the vector score when using the linear ranker.
                The fulltext index score is multiplied by (1 - alpha).
                **Required** when using the linear ranker; must be between 0 and 1.

        Raises:
            SearchValidationError: If validation of the input arguments fail.
            EmbeddingRequiredError: If no embedder is provided.

        Returns:
            RawSearchResult: The results of the search query as a list of neo4j.Record and an optional metadata dict
        )query_vector
query_texttop_keffective_search_ratiorankeralphaNTexclude_noner#   r$   )Embedding method required for text query.rA   )search_typer&   embedding_node_propertyneo4j_version_is_5_23_or_aboverE   rF   rE   %HybridRetriever Cypher parameters: %s HybridRetriever Cypher query: %s	database_routing_4org.apache.lucene.queryparser.classic.ParseException0Invalid Lucene query generated from query_text: recordsr8   ) r   r   r   r)   
model_dumpr#   r$   r!   r
   embed_queryr   r   HYBRIDr&   r,   rL   rE   rF   loggerdebugr   r   execute_queryr(   neo4jRoutingControlREAD
exceptionsClientErrorr:   r   r   )r/   rB   rA   rC   rD   rE   rF   r0   r1   
parameterssearch_query_rU   s                r3   get_search_resultsz"HybridRetriever.get_search_results   s   P
	;.)%'=N $..D.A
*.*@*@
&',0,D,D
()l==,?   ==44Z@L)5J~&*"))"44$($A$A+/+N+N!(( &&
a z!8$<hz>RS7F	 KK55----22	 6 MGQ $l3
 	
U  	;'
3:	;H ++ 	EQO+FzlS 	s0   E# AF #	F,FFG+GG)NNNN)r   neo4j.Driverr#   r:   r$   r:   r!   Optional[Embedder]r&   zOptional[list[str]]r'   7Optional[Callable[[neo4j.Record], RetrieverResultItem]]r(   Optional[str]returnNone)r;   zneo4j.Recordri   r   )rB   r:   rA   Optional[list[float]]rC   intrD   rl   rE   Union[str, HybridSearchRanker]rF   Optional[float]ri   r   )
__name__
__module____qualname____doc__r+   r<   r   NAIVErd   __classcell__r2   s   @r3   r   r   1   s     N (,15 (,)8)8 )8 !	)8
 %)8 /)8
)8 &)8 
)8V
" /3&'1C1I1I!%^
^
 ,^
 	^

 !$^
 /^
 ^
 
^
r=   r   c                       e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddddej                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	dZ xZS )
HybridCypherRetrievera(  
    Provides retrieval method using combination of vector search over embeddings and
    fulltext search, augmented by a Cypher query.
    This retriever builds on HybridRetriever.
    If an embedder is provided, it needs to have the required Embedder type.

    Note: `node` is a variable from the base query that can be used in `retrieval_query` as seen in the example below.

    Example:

    .. code-block:: python

      import neo4j
      from neo4j_graphrag.retrievers import HybridCypherRetriever

      driver = neo4j.GraphDatabase.driver(URI, auth=AUTH)

      retrieval_query = "MATCH (node)-[:AUTHORED_BY]->(author:Author)" "RETURN author.name"
      retriever = HybridCypherRetriever(
          driver, "vector-index-name", "fulltext-index-name", retrieval_query, custom_embedder
      )
      retriever.search(query_text="Find me a book about Fremen", top_k=5)

    To query by text, an embedder must be provided when the class is instantiated.

    Args:
        driver (neo4j.Driver): The Neo4j Python driver.
        vector_index_name (str): Vector index name.
        fulltext_index_name (str): Fulltext index name.
        retrieval_query (str): Cypher query that gets appended.
        embedder (Optional[Embedder]): Embedder object to embed query text.
        result_formatter (Optional[Callable[[neo4j.Record], RetrieverResultItem]]): Provided custom function to transform a neo4j.Record to a RetrieverResultItem.
        neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to the server's default database ("neo4j" by default) (`see reference to documentation <https://neo4j.com/docs/operations-manual/current/database-administration/#manage-databases-default>`_).

    Raises:
        RetrieverInitializationError: If validation of the input arguments fail.
    Nc           	        	 t        |      }|rt        |      nd }	t        |||||	||      }
t        |   |
j                  j                  |
j                         |
j                  | _        |
j                  | _        |
j                  | _        |
j                  r|
j                  j                  nd | _        |
j                   | _        y # t        $ r}t	        |j                               |d }~ww xY w)Nr   r    )r"   r#   r$   retrieval_queryr%   r'   r(   )r   r   r   r   r   r)   r*   r+   r"   r   r(   r#   r$   ry   r%   r!   r'   )r/   r   r#   r$   ry   r!   r'   r(   r"   r%   r0   r1   r2   s               r3   r+   zHybridCypherRetriever.__init__  s    	B+6:LAI]H=tN7)"3$7 /-!1-N 	''..0M0M	
 "0!A!A#1#E#E -== ,, ))22 	
 !/ ? ?  	B.qxxz:A	Bs   .C 	C5C00C5r>   r?   c           	        	 t        |||||||      }|j	                  d      }
| j
                  |
d<   | j                  |
d<   |r9|s7| j                  st        d      | j                  j                  |      }||
d<   |r%|j                         D ]  \  }}||
vs||
|<    |
d	= t        t        j                  | j                  | j                  |j                   |j"                  
      \  }}d|
v r|
d= t$        j'                  dt)        |
             t$        j'                  d|       	 | j*                  j-                  ||
| j.                  t0        j2                  j4                        \  }}}t?        |d|i      S # t        $ r}	t        |	j                               |	d}	~	ww xY w# t0        j6                  j8                  $ r"}	dt;        |	      v rt=        d|       |	 d}	~	ww xY w)a  Get the top_k nearest neighbor embeddings for either provided query_vector or query_text.
        Both query_vector and query_text can be provided.
        If query_vector is provided, then it will be preferred over the embedded query_text
        for the vector search.

        See the following documentation for more details:

        - `Query a vector index <https://neo4j.com/docs/cypher-manual/current/indexes-for-vector-search/#indexes-vector-query>`_
        - `db.index.vector.queryNodes() <https://neo4j.com/docs/operations-manual/5/reference/procedures/#procedure_db_index_vector_queryNodes>`_
        - `db.index.fulltext.queryNodes() <https://neo4j.com/docs/operations-manual/5/reference/procedures/#procedure_db_index_fulltext_querynodes>`_

        Args:
            query_text (str): The text to get the closest neighbors of.
            query_vector (Optional[list[float]]): The vector embeddings to get the closest neighbors of. Defaults to None.
            top_k (int): The number of neighbors to return. Defaults to 5.
            effective_search_ratio (int): Controls the candidate pool size for the vector index by multiplying top_k to balance query
                accuracy and performance. Defaults to 1.
            query_params (Optional[dict[str, Any]]): Parameters for the Cypher query. Defaults to None.
            ranker (str, HybridSearchRanker): Type of ranker to order the results from retrieval.
            alpha (Optional[float]): Weight for the vector score when using the linear ranker.
                The fulltext index score is multiplied by (1 - alpha).
                **Required** when using the linear ranker; must be between 0 and 1.
        Raises:
            SearchValidationError: If validation of the input arguments fail.
            EmbeddingRequiredError: If no embedder is provided.

        Returns:
            RawSearchResult: The results of the search query as a list of neo4j.Record and an optional metadata dict
        )rA   rB   rC   rD   rE   rF   query_paramsNTrG   r#   r$   rI   rA   r{   )rJ   ry   rL   rE   rF   rE   rM   rN   rO   rR   rS   rT   ) r   r   r   r)   rV   r#   r$   r!   r
   rW   itemsr   r   rX   ry   rL   rE   rF   rY   rZ   r   r   r[   r(   r\   r]   r^   r_   r`   r:   r   r   )r/   rB   rA   rC   rD   r{   rE   rF   r0   r1   ra   keyvaluerb   rc   rU   s                   r3   rd   z(HybridCypherRetriever.get_search_results=  s   N	;4)%'=)N $..D.A
*.*@*@
&',0,D,D
()l==,?   ==44Z@L)5J~&*002 ,
Uj(&+JsO, >**")) 00+/+N+N!(( &&
a z!8$<hz>RS7F	 KK55----22	 6 MGQ $l3
 	
a  	;'
3:	;T ++ 	EQO+FzlS 	s0   F  ,AF+  	F(	F##F(+G*G%%G*)NNN)r   re   r#   r:   r$   r:   ry   r:   r!   rf   r'   rg   r(   rh   ri   rj   )rB   r:   rA   rk   rC   rl   rD   rl   r{   zOptional[dict[str, Any]]rE   rm   rF   rn   ri   r   )	ro   rp   rq   rr   r+   r   rs   rd   rt   ru   s   @r3   rw   rw      s    $X (, (,&@&@ &@ !	&@
 &@ %&@
&@ &&@ 
&@V /3&'151C1I1I!%d
d
 ,d
 	d

 !$d
 /d
 /d
 d
 
d
r=   rw   )(
__future__r   loggingtypingr   r   r   r   r\   pydanticr   neo4j_graphrag.embeddings.baser	   neo4j_graphrag.exceptionsr
   r   r   r   neo4j_graphrag.neo4j_queriesr   neo4j_graphrag.retrievers.baser   neo4j_graphrag.typesr   r   r   r   r   r   r   r   r   r   neo4j_graphrag.utils.loggingr   	getLoggerro   rY   r   rw    r=   r3   <module>r      sq    #  1 1  $ 3  : 4   2			8	$z
i z
zs
I s
r=   