
    ZǻiE                       d dl mZ d dlZd dlmZmZmZ d dlmZ d dl	m
Z
 d dlmZmZmZ dZdZd	Zd
ZdZ	 	 	 	 	 	 ddZ	 d	 	 	 	 	 ddZddZddZdZdZdZdZddZd dZ	 	 	 	 	 	 	 	 	 	 	 	 d!dZej>                  ddddddddej@                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d"dZ!dddej>                  f	 	 	 	 	 	 	 	 	 d#dZ"y)$    )annotationsN)AnyOptionalUnion)InvalidHybridSearchRankerError)get_metadata_filter)
EntityType
SearchTypeHybridSearchRankerzCALL db.index.vector.queryNodes($vector_index_name, $top_k * $effective_search_ratio, $query_vector) YIELD node, score WITH node, score LIMIT $top_kzCALL db.index.vector.queryRelationships($vector_index_name, $top_k * $effective_search_ratio, $query_vector) YIELD relationship, score WITH relationship, score LIMIT $top_kz~WITH node, vector.similarity.cosine(node.`{embedding_node_property}`, $query_vector) AS score ORDER BY score DESC LIMIT $top_kzMATCH (node:`{node_label}`) WHERE node.`{embedding_node_property}` IS NOT NULL AND size(node.`{embedding_node_property}`) = toInteger($embedding_dimension)zgCALL db.index.fulltext.queryNodes($fulltext_index_name, $query_text, {limit: $top_k}) YIELD node, scorec                F    dj                  |      }| rd| dS |rd| dS y)z{A helper function to return the CALL subquery syntax:
    - Either CALL { WITH <variables>
    - or CALL (variables) {
    ,zCALL (z) { zCALL { WITH  CALL { )join)support_variable_scope_clausevariable_list	variabless      Z/opt/lhia/marcimex/agent/venv/lib/python3.12/site-packages/neo4j_graphrag/neo4j_queries.py_call_subquery_syntaxr   8   s9     'I$	{%((yk++    Fc                @    t        | ddg      }|rd}nd}d| d| dS )	zBuild the Cypher query to upsert a batch of nodes:
    - Create the new node
    - Set its label(s) and properties
    - Set its embedding properties if any
    - Return the node elementId
    nrowr   zSET n:$(row.labels) zUWITH n, row CALL apoc.create.addLabels(n, row.labels) YIELD node WITH node as n, row zaUNWIND $rows AS row CREATE (n:__KGBuilder__ {__tmp_internal_id: row.id}) SET n += row.properties zWITH n, row z WITH n, row WHERE row.embedding_properties IS NOT NULL UNWIND keys(row.embedding_properties) as emb CALL db.create.setNodeVectorProperty(n, emb, row.embedding_properties[emb]) RETURN count(*) as nbEmb } RETURN elementId(n) as element_idr   )r   support_dynamic_labelscall_prefix
set_labelss       r   upsert_node_queryr   G   sI     (%c5\K +
l
	# ,- ,	,r   c                ,    t        | ddg      }d| dS )a  Build the Cypher query to upsert a batch of relationships:
    - Create the new relationship:
        only one relationship of a specific type is allowed between the same two nodes
    - Set its properties
    - Set its embedding properties if any
    - Return the node elementId
    relr   r   a  UNWIND $rows as row MATCH (start:__KGBuilder__ {__tmp_internal_id: row.start_node_id}),       (end:__KGBuilder__ {__tmp_internal_id: row.end_node_id}) WITH start, end, row CALL apoc.merge.relationship(start, row.type, {}, row.properties, end, row.properties) YIELD rel  WITH rel, row z WITH rel, row WHERE row.embedding_properties IS NOT NULL UNWIND keys(row.embedding_properties) as emb CALL db.create.setRelationshipVectorProperty(rel, emb, row.embedding_properties[emb]) } RETURN elementId(rel)r   )r   r   s     r   upsert_relationship_queryr"   g   s2     (%eU^K	 -  	 r   c                0    t        | dg      }d| d| dS )z@Removes the temporary __tmp_internal_id property from all nodes.r   r   z>MATCH (n:__KGBuilder__) WHERE n.__tmp_internal_id IS NOT NULL z9     SET n.__tmp_internal_id = NULL } IN TRANSACTIONS OF z ROWS ON ERROR CONTINUEr   )r   
batch_sizer   s      r   db_cleaning_queryr%      s8    '%cUK	1-  )\ *	r   zMATCH (n) WHERE elementId(n) = $node_element_id WITH n CALL db.create.setNodeVectorProperty(n, $embedding_property, $vector) RETURN nzUNWIND $rows AS row MATCH (n) WHERE elementId(n) = row.id WITH n, row CALL db.create.setNodeVectorProperty(n, $embedding_property, row.embedding) RETURN nzMATCH ()-[r]->() WHERE elementId(r) = $rel_element_id WITH r CALL db.create.setRelationshipVectorProperty(r, $embedding_property, $vector) RETURN rzUNWIND $rows AS row MATCH ()-[r]->() WHERE elementId(r) = row.id WITH r, row CALL db.create.setRelationshipVectorProperty(r, $embedding_property, row.embedding) RETURN rc                F    t        | g       }t         dt         d}||z   S )a  
    Construct a cypher query for hybrid search.

    Args:
        neo4j_version_is_5_23_or_above (bool): Whether the Neo4j version is 5.23 or above;
            determines which call syntax is used.

    Returns:
        str: The constructed Cypher query string.
    r   z WITH collect({node:node, score:score}) AS nodes, max(score) AS vector_index_max_score UNWIND nodes AS n RETURN n.node AS node, (n.score / vector_index_max_score) AS score UNION z WITH collect({node:node, score:score}) AS nodes, max(score) AS ft_index_max_score UNWIND nodes AS n RETURN n.node AS node, (n.score / ft_index_max_score) AS score } WITH node, max(score) AS score ORDER BY score DESC LIMIT $top_k)r   NODE_VECTOR_INDEX_QUERYFULL_TEXT_SEARCH_QUERY)neo4j_version_is_5_23_or_abover   
query_bodys      r   _get_hybrid_queryr+      sF     (&bK #
# $
 "
" #J		J  ##r   c                8    | rdnd}t          dt         d}||z   S )a  
    Construct a Cypher query for hybrid search using a linear combination approach with an alpha parameter.

    This query retrieves normalized scores from both the vector index and full-text index. It then
    computes the final score as a weighted sum:

    ```
    final_score = alpha * (vector normalized score) + (1 - alpha) * (fulltext normalized score)
    ```

    If a node appears in only one index, the missing score is treated as 0.

    Args:
        neo4j_version_is_5_23_or_above (bool): Whether the Neo4j version is 5.23 or above; determines the call syntax.
        alpha (float): Weight for the vector index normalized score. The full-text score is weighted as (1 - alpha).

    Returns:
        str: The constructed Cypher query string.
    z
CALL () { r   z WITH collect({node: node, score: score}) AS nodes, max(score) AS vector_index_max_score UNWIND nodes AS n WITH n.node AS node, (n.score / vector_index_max_score) AS rawScore RETURN node, rawScore * $alpha AS score UNION a   WITH collect({node: node, score: score}) AS nodes, max(score) AS ft_index_max_score UNWIND nodes AS n WITH n.node AS node, (n.score / ft_index_max_score) AS rawScore RETURN node, rawScore * (1 - $alpha) AS score } WITH node, sum(score) AS score ORDER BY score DESC LIMIT $top_k)r'   r(   )r)   alphar   r*   s       r   _get_hybrid_query_linearr.      sA    ( #A,iK #
# $ "
" #J	J  ##r   c                    |rdnd}t        | d      \  }}t        j                  ||      }t        j                  |      }	||d<   || d| d	|	 z   |fS )
aK  Build Cypher query for vector search with filters
    Uses exact KNN.

    Args:
        filters (dict[str, Any]): filters used to pre-filter the nodes before vector search
        node_label (str): node label we want to search for
        embedding_node_property (str): the name of the property holding the embeddings
        embedding_dimension (int): the dimension of the embeddings
        use_parallel_runtime (bool): Whether or not use the parallel runtime to run the query.
            Defaults to False.

    Returns:
        tuple[str, dict[str, Any]]: query and parameters
    z5CYPHER runtime = parallel parallelRuntimeSupport=all  node)
node_alias)
node_labelembedding_node_property)r4   embedding_dimensionz AND (z) )r   BASE_VECTOR_EXACT_QUERYformatVECTOR_EXACT_QUERY)
filtersr3   r4   r5   use_parallel_runtimeparallel_querywhere_filtersquery_params
base_queryvector_querys
             r   _get_filtered_vector_queryr@      s    .   	@ 
 #6g&"QM<(// 7 0 J &,, 7 - L +>L&'J<vm_B|nMM r   c                   t        j                  dt        d       |t        j                  k(  r| t
        j                  k(  r\|rt        d      |
t        j                  k(  rt        |      }n-|
t        j                  k(  r|rt        ||      }n
t               i }nP| t
        j                  k(  r/|r$|||t        |||||	      \  }}n"t        d      t         i }}nt#        d|        d| d	}n||t        j$                  k(  r[|rt        d
      | t
        j                  k(  rt        d      | t
        j                  k(  rt&        i }}d| d}nt#        d|        t#        d|       t)        ||||      }| d| |fS )a  
    Constructs a search query for vector or hybrid search, including optional pre-filtering
    and return clause.

    Args:
        search_type (SearchType): Specifies whether to perform a vector or hybrid search.
        entity_type (Optional[EntityType]): Specifies whether to search over node or
            relationship indexes. Defaults to 'node'.
        return_properties (Optional[list[str]]): List of property names to return.
            Cannot be provided alongside `retrieval_query`.
        retrieval_query (Optional[str]): Query used to retrieve search results.
            Cannot be provided alongside `return_properties`.
        node_label (Optional[str]): Label of the nodes to search.
        embedding_node_property (Optional[str])): Name of the property containing the embeddings.
        embedding_dimension (Optional[int]): Dimension of the embeddings.
        filters (Optional[dict[str, Any]]): Filters to pre-filter nodes before vector search.
        neo4j_version_is_5_23_or_above (Optional[bool]): Whether the Neo4j version is 5.23 or above.
        use_parallel_runtime (bool): Whether or not use the parallel runtime to run the query.
            Defaults to False.
        ranker (HybridSearchRanker): Type of ranker to order the results from retrieval.
        alpha (Optional[float]): Weight for the vector score when using the linear ranker. Only used when ranker is 'linear'. Defaults to 0.5 if not provided.

    Returns:
        tuple[str, dict[str, Any]]: A tuple containing the constructed query string and
        a dictionary of query parameters.

     Raises:
        Exception: If filters are used with Hybrid Search.
        Exception: If Vector Search with filters is missing required parameters.
        ValueError: If an unsupported search type is provided.
    zrThe default returned 'id' field in the search results will be removed. Please switch to using 'elementId' instead.   )
stacklevelz,Filters are not supported with hybrid search)r-   z]Vector Search with filters requires: node_label, embedding_node_property, embedding_dimensionzSearch type is not supported: zRETURN node { .*, `zi`: null } AS node, labels(node) AS nodeLabels, elementId(node) AS elementId, elementId(node) AS id, scorez2Filters are not supported for relationship indexesz7Hybrid search is not supported for relationship indexeszRETURN relationship { .*, `z`: null } AS relationship, type(relationship) as relationshipType, elementId(relationship) AS elementId, elementId(relationship) AS id, scoreEntity type is not supported: )fallback_returnentity_typer   )warningswarnDeprecationWarningr	   NODEr
   HYBRID	Exceptionr   NAIVEr+   LINEARr.   r   VECTORr@   r'   
ValueErrorRELATIONSHIPREL_VECTOR_INDEX_QUERYget_query_tail)search_typerF   return_propertiesretrieval_queryr3   r4   r5   r9   r)   r:   rankerr-   queryparamsrE   
query_tails                   r   get_search_queryr[   $  s   Z MM|
 joo%*+++ NOO+111)*HI-44402% 566%'FJ---*/;+7$>"/+,%ME6 $w  !8v=k]KLL"#:"; <  	 

//	/PQQ*+++UVVJ---2B6E./F.G H   =k]KLL9+GHH'	J WAj\"F**r   c                    | r| S |rbdj                  |D cg c]  }d| 	 c}      }|t        j                  k(  rd| dS |t        j                  k(  rd| dS t	        d|       |r|S dS c c}w )	a  Build the RETURN statement after the search is performed

    Args
        return_properties (list[str]): list of property names to return.
            It can't be provided together with retrieval_query.
        retrieval_query (str): the query to use to retrieve the search results
            It can't be provided together with return_properties.
        fallback_return (str): the fallback return statement to use to retrieve the search results

    Returns:
       str: the RETURN statement
    z, .zRETURN node {za} AS node, labels(node) AS nodeLabels, elementId(node) AS elementId, elementId(node) AS id, scorezRETURN relationship {z} AS relationship, type(relationship) as relationshipType, elementId(relationship) AS elementId, elementId(relationship) AS id, scorerD   r0   )r   r	   rJ   rQ   rP   )rV   rU   rE   rF   propreturn_properties_cyphers         r   rS   rS     s    $ #'99EV-WT$j-W#X *//) !9 : ;  J333()A(B C  =k]KLL-?525' .Xs   A/)r   boolr   z	list[str]returnstr)F)r   r`   r   r`   ra   rb   )r   r`   ra   rb   )r   r`   r$   intra   rb   )r)   r`   ra   rb   )r)   r`   r-   floatra   rb   )r9   zdict[str, Any]r3   rb   r4   rb   r5   rc   r:   r`   ra   tuple[str, dict[str, Any]])rT   r
   rF   r	   rU   Optional[list[str]]rV   Optional[str]r3   rg   r4   rg   r5   zOptional[int]r9   zOptional[dict[str, Any]]r)   r`   r:   r`   rW   zUnion[str, HybridSearchRanker]r-   zOptional[float]ra   re   )
rV   rg   rU   rf   rE   rg   rF   r	   ra   rb   )#
__future__r   rG   typingr   r   r   neo4j_graphrag.exceptionsr   neo4j_graphrag.filtersr   neo4j_graphrag.typesr	   r
   r   r'   rR   r8   r6   r(   r   r   r"   r%   UPSERT_VECTOR_ON_NODE_QUERYUPSERT_VECTORS_ON_NODE_QUERY#UPSERT_VECTOR_ON_RELATIONSHIP_QUERY$UPSERT_VECTORS_ON_RELATIONSHIP_QUERYr+   r.   r@   rJ   rM   r[   rS    r   r   <module>rr      s   #  ' ' D 6 K K$ , ' S  #'8A  IN#'AE@6$   $ %$:$$N&&& !& 	&
 &  &V )oo-1%) $-1)-(,+0!&-?-E-E!t+t+t+ +t+ #	t+
 t+ +t+ 't+ &t+ %)t+ t+ +t+ t+  t+p &*-1%)(oo	(6"(6*(6 #(6 	(6
 	(6r   