
    ZǻiN                    ,   d dl mZ d dlmZmZmZmZmZ d dlZd dlm	Z	 d dl
mZmZ dZdZdd	gZd
gZdZdZdZdZdZdZdZdZd#dZd$dZi i dddf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d%dZ	 	 	 	 	 d&	 	 	 	 	 	 	 	 	 	 	 	 	 d'dZ	 	 	 	 	 d&	 	 	 	 	 	 	 	 	 	 	 	 	 d(dZd)dZd*dZd+dZ d,dZ!	 	 	 	 d-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d.dZ"d/dZ#	 d0	 	 	 	 	 	 	 d1dZ$	 	 	 	 	 d2	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d3d Z%	 	 	 d4	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d5d!Z&	 	 	 d4	 	 	 	 	 	 	 	 	 	 	 d6d"Z'y)7    )annotations)AnyDictListOptionalTupleN)Query)ClientErrorCypherTypeError__KGBuilder__
__Entity___Bloom_Perspective__Bloom_Scene__Bloom_HAS_SCENE_i'     
   a<  CALL apoc.meta.data({sample: $SAMPLE}) YIELD label, other, elementType, type, property WHERE NOT type = 'RELATIONSHIP' AND elementType = 'node' AND NOT label IN $EXCLUDED_LABELS WITH label AS nodeLabel, collect({property:property, type:type}) AS properties RETURN {label: nodeLabel, properties: properties} AS outputa?  CALL apoc.meta.data({sample: $SAMPLE}) YIELD label, other, elementType, type, property WHERE NOT type = 'RELATIONSHIP' AND elementType = 'relationship' AND NOT label in $EXCLUDED_LABELS WITH label AS relType, collect({property:property, type:type}) AS properties RETURN {type: relType, properties: properties} AS outputaC  CALL apoc.meta.data({sample: $SAMPLE}) YIELD label, other, elementType, type, property WHERE type = 'RELATIONSHIP' AND elementType = 'node' UNWIND other AS other_node WITH * WHERE NOT label IN $EXCLUDED_LABELS AND NOT other_node IN $EXCLUDED_LABELS RETURN {start: label, type: property, end: toString(other_node)} AS outputzCALL apoc.schema.nodes() YIELD label, properties, type, size, valuesSelectivity WHERE type = 'RANGE' RETURN *, size * valuesSelectivity as distinctValueszCALL apoc.meta.graph({sample: 1000, maxRels: 100}) YIELD nodes, relationships RETURN nodes, [rel in relationships | {name:apoc.any.property(rel, 'type'), count: apoc.any.property(rel, 'count')}] AS relationshipsc                F    | j                  dd      j                  dd      S )zClean string values for schema.

    Cleans the input text by replacing newline and carriage return characters.

    Args:
        text (str): The input text to clean.

    Returns:
        str: The cleaned text.
    
 )replace)texts    S/opt/lhia/marcimex/agent/venv/lib/python3.12/site-packages/neo4j_graphrag/schema.py_clean_string_valuesr   J   s"     <<c"**455    c                   t        | t              r|i }| j                         D ]e  \  }}t        |t              rt        |      }|$|||<   *t        |t              r't        |      t        k  sMt        |      }|[|||<   a|||<   g |S t        | t              r8t        |       t        k  r%| D cg c]  }t        |      t        |       c}S y| S c c}w )a  Sanitize the input dictionary or list.

    Sanitizes the input by removing embedding-like values,
    lists with more than 128 elements, that are mostly irrelevant for
    generating answers in a LLM context. These properties, if left in
    results, can occupy significant context space and detract from
    the LLM's performance by introducing unnecessary noise and cost.

    Args:
        d (Any): The input dictionary or list to sanitize.

    Returns:
        Any: The sanitized dictionary or list.
    N)
isinstancedictitems_value_sanitizelistlen
LIST_LIMIT)dnew_dictkeyvaluesanitized_valueitems         r   r    r    X   s     !T'') 	&JC%&"1%"8#/$3HSME4(u:
*&5e&<O'3(7 !&!	&" 	At	q6J23*.t7L7X%  s   3CCFc                   |sb| j                  t        ||      ||      }|j                  D cg c]  }|j                          }	}|r|	D 
cg c]  }
t	        |
       }	}
|	S |j                  d|        | j                  di |5 }|j                  t        ||      |      }|D cg c]  }|j                          }	}|r|	D 
cg c]  }
t	        |
       }	}
|	cddd       S c c}w c c}
w c c}w c c}
w # 1 sw Y   yxY w)a  
    Queries the database.

    Args:
        driver (neo4j.Driver):  Neo4j Python driver instance.
        query (str): The cypher query.
        params (Optional[dict[str, Any]]): The query parameters. Defaults to None.
        session_params (Optional[dict[str, Any]]): Parameters to pass to the
            session used for executing the query. Defaults to None.
        database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
        timeout (Optional[float]): The timeout for transactions in seconds.
                Useful for terminating long-running queries.
                By default, there is no timeout set.
        sanitize (bool): A flag to indicate whether to remove lists with
                more than 128 elements from results. Useful for removing
                embedding-like properties from database responses. Default is False.

    Returns:
        list[dict[str, Any]]: the result of the query in json format.
    )r   timeout)	database_parameters_databaseN )execute_queryr	   recordsdatar    
setdefaultsessionrun)driverqueryparamssession_paramsr.   r+   sanitizer2   r	json_dataelr4   results                r   query_databaser?      s   : ##ug. $ 

 (,||4!QVVX4	47@A,AIAj(3		).	) WUw?H'-.!QVVX.	.7@A,AIA  5A /A	 s5   C)C.	"C=+C3C=
C8C=3
C==Dc                <    t        | |||||      }t        ||      S )a  
    Returns the schema of the graph as a string with following format:

    .. code-block:: text

        Node properties:
        Person {id: INTEGER, name: STRING}
        Relationship properties:
        KNOWS {fromDate: DATE}
        The relationships:
        (:Person)-[:KNOWS]->(:Person)

    Args:
        driver (neo4j.Driver): Neo4j Python driver instance.
        is_enhanced (bool): Flag indicating whether to format the schema with
            detailed statistics (True) or in a simpler overview format (False).
        database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
        timeout (Optional[float]): The timeout for transactions in seconds.
                Useful for terminating long-running queries.
                By default, there is no timeout set.
        sanitize (bool): A flag to indicate whether to remove lists with
                more than 128 elements from results. Useful for removing
                embedding-like properties from database responses. Default is False.
        sample (int): Number of nodes to sample for the apoc.meta.data procedure. Setting sample to -1 will remove sampling.
                Defaults to 1000.


    Returns:
        str: the graph schema information in a serialized format.
    )r6   is_enhancedr.   r+   r:   sample)get_structured_schemaformat_schema)r6   rA   r.   r+   r:   rB   structured_schemas          r   
get_schemarF      s2    L . *K88r   c                p   t        | t        t        t        t        gz   |d|||      D cg c]  }|d   	 }}t        | t
        t        |d|||      D cg c]  }|d   	 }}t        | t        t        t        t        gz   |d|||      D cg c]  }|d   	 }	}	 t        | d|||      }
t        | t        |||      }|D ci c]  }|d   |d    c}|D ci c]  }|d   |d    c}|	|
|d	d
}|rt        | ||||       |S c c}w c c}w c c}w # t        $ r g }
g }Y fw xY wc c}w c c}w )a(	  
    Returns the structured schema of the graph.

    Returns a dict with following format:

    .. code:: python

        {
            'node_props': {
                'Person': [{'property': 'id', 'type': 'INTEGER'}, {'property': 'name', 'type': 'STRING'}]
            },
            'rel_props': {
                'KNOWS': [{'property': 'fromDate', 'type': 'DATE'}]
            },
            'relationships': [
                {'start': 'Person', 'type': 'KNOWS', 'end': 'Person'}
            ],
            'metadata': {
                'constraint': [
                    {'id': 7, 'name': 'person_id', 'type': 'UNIQUENESS', 'entityType': 'NODE', 'labelsOrTypes': ['Person'], 'properties': ['id'], 'ownedIndex': 'person_id', 'propertyType': None},
                ],
                'index': [
                    {'label': 'Person', 'properties': ['name'], 'size': 2, 'type': 'RANGE', 'valuesSelectivity': 1.0, 'distinctValues': 2.0},
                ]
            }
        }

    Note:
        The internal structure of the returned dict depends on the apoc.meta.data
        and apoc.schema.nodes procedures.

    Warning:
        Some labels are excluded from the output schema:

        - The `__Entity__` and `__KGBuilder__` node labels which are created by the KG Builder pipeline within this package
        - Some labels related to Bloom internals.

    Args:
        driver (neo4j.Driver): Neo4j Python driver instance.
        is_enhanced (bool): Flag indicating whether to format the schema with
            detailed statistics (True) or in a simpler overview format (False).
        database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
        timeout (Optional[float]): The timeout for transactions in seconds.
            Useful for terminating long-running queries.
            By default, there is no timeout set.
        sanitize (bool): A flag to indicate whether to remove lists with
            more than 128 elements from results. Useful for removing
            embedding-like properties from database responses. Default is False.
        sample (int): Number of nodes to sample for the apoc.meta.data procedure. Setting sample to -1 will remove sampling.
            Defaults to 1000.

    Returns:
        dict[str, Any]: the graph schema information in a structured format.
    )EXCLUDED_LABELSSAMPLE)r6   r7   r8   r.   r+   r:   outputzSHOW CONSTRAINTSr6   r7   r.   r+   r:   label
propertiestype)
constraintindex)
node_props	rel_propsrelationshipsmetadata)r6   rE   r.   r+   r:   )r?   NODE_PROPERTIES_QUERYrH   BASE_ENTITY_LABELBASE_KG_BUILDER_LABELREL_PROPERTIES_QUERYEXCLUDED_RELS	REL_QUERYINDEX_QUERYr
   enhance_schema)r6   rA   r.   r+   r:   rB   r2   node_propertiesrel_propertiesrS   rO   rP   r=   rE   s                 r   rC   rC      s   @ #'#2$&;<$= 
 
 	XO $ #&'4G

 	X
N 
 ##2$&;<$= 
 
 	XM "#$

 
 @OOr'{B|$44O=KLrbj"\"22L&#->	 /	
 ] 
@  

 PLs/   DDD $D D.D3D+*D+c                   | d   dk(  rf| j                  d      rU| j                  dt        dz         t        kD  rdt        | d   d          dS d	| d   D cg c]  }t        |       c} z   S | d   d
v rO| j                  d      r| j                  d      rd| d    d| d    S | j                  d      rd| d   d    dS dS | d   dk(  r,| j                  d      r| d   t        kD  ryd| d    d| d    S yc c}w )a  
    Format a single property based on its type and available metadata.

    Depending on the property type, this function provides either an example value,
    a range (for numerical and date types), or a list of available options (for strings).
    If the property is a list that exceeds a defined size limit, it is omitted.

    Args:
        prop (Dict[str, Any]): A dictionary containing details of the property,
            including type, values, min/max, and other metadata.

    Returns:
        Optional[str]: A formatted string representing the property details,
        or None if the property should be skipped (e.g., large lists).
    rN   STRINGvaluesdistinct_count   z
Example: "r   "zAvailable options: INTEGERFLOATDATE	DATE_TIMELOCAL_DATE_TIMEminmaxzMin: z, Max:  LISTmin_sizeNz
Min Size: z, Max Size: max_size)getDISTINCT_VALUE_LIMITr   r#   )propr=   s     r   _format_propertyrt   w  s>     F|xDHHX$688$&:Q&>?BVV 4T(^A5F GHJJ &9=hH2*2.HIK 
f  
 88E?txx4;-wtE{m<<8<8JZXq 12!4RPRR	f	xx
#tJ'7*'DZ 01d:>N=OPP% Is   C:c           
        g }|rd| j                         D ]O  \  }}|j                  d| d       |D ]0  }t        |      }||j                  d|d    d|d    d|        2 Q |S | j                         D ]F  \  }}dj                  |D cg c]  }|d    d	|d     c}      }|j                  | d
| d       H |S c c}w )aM  
    Format a collection of properties for nodes or relationships.

    If `is_enhanced` is True, properties are formatted with additional metadata,
    such as example values or min/max statistics. Otherwise, they are presented in
    a more compact form.

    Args:
        property_dict (Dict[str, Any]): A dictionary mapping labels (for nodes or relationships)
            to lists of property definitions.
        is_enhanced (bool): Flag indicating whether to format properties with additional details.

    Returns:
        List[str]: A list of formatted property descriptions.
    z- **z**z  - `property`: rN   r   , z: z {})r   appendrt   join)property_dictrA   formatted_propsrL   propsrs   example	props_strs           r   _format_propertiesr     s     O)//1 	LE5""T%#34 *40&#**Z 01T&\N!G9M	  *//1 	?LE5		CHI4D$%RV~6II ""eWC	{"#=>		?
  Js   C
c           
     R    | D cg c]  }d|d    d|d    d|d    d c}S c c}w )a8  
    Format relationships into a structured string representation.

    Args:
        rels (List[dict]): A list of dictionaries, each containing `start`, `type`, and `end`
            to describe a relationship between two entities.

    Returns:
        List[str]: A list of formatted relationship strings.
    z(:startz)-[:rN   z]->(:end)r/   )relsr=   s     r   _format_relationshipsr     s;     KOOBbGT"V*U2e9+Q?OOOs   $c           
         t        | d   |      }t        | d   |      }t        | d         }dj                  ddj                  |      ddj                  |      ddj                  |      g      S )a  
    Format the structured schema into a human-readable string.

    Depending on the `is_enhanced` flag, this function either creates a concise
    listing of node labels and relationship types alongside their properties or
    generates an enhanced, more verbose representation with additional details like
    example or available values and min/max statistics. It also includes a formatted
    list of existing relationships.

    Args:
        schema (Dict[str, Any]): The structured schema dictionary, containing
            properties for nodes and relationships as well as relationship definitions.
        is_enhanced (bool): Flag indicating whether to format the schema with
            detailed statistics (True) or in a simpler overview format (False).

    Returns:
        str: A formatted string representation of the graph schema, including
        node properties, relationship properties, and relationship patterns.
    rQ   rR   rS   r   zNode properties:zRelationship properties:zThe relationships:)r   r   r{   )schemarA   formatted_node_propsformatted_rel_propsformatted_relss        r   rD   rD     s|    ( .f\.BKP,VK-@+N*6/+BCN99II*+&II)* IIn%	
	 	r   c           	        g }g }	|su|rs|d   j                  d      dkD  r\|d   j                  d      t        k  rAt        |d| d|  d|||      d   d   }
|	j                  d	|
 d
t	        |
              ||	fS |j                  d|  d|  d       |s|	j                  d|  d       ||	fS |	j                  d|  dt         d|  d       ||	fS )a  
    Build Cypher clauses for string property statistics.

    Constructs and returns the parts of a Cypher query (`WITH` and `RETURN` clauses)
    required to gather statistical information about a string property. Depending on
    property index metadata and whether the query is exhaustive, this function may
    retrieve a distinct set of values directly from an index or a truncated list of
    distinct values from the actual nodes or relationships.

    Args:
        prop_name (str): The name of the string property.
        driver (neo4j.Driver): Neo4j Python driver instance.
        label_or_type (str): The node label or relationship type to query.
        exhaustive (bool): Whether to perform an exhaustive search or a
            sampled query approach.
        prop_index (Optional[List[Any]]): Optional metadata about the property's
            index. If provided, certain optimizations are applied based on
            distinct value limits and index availability.
        database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
        timeout (Optional[float]): The timeout for transactions in seconds.
            Useful for terminating long-running queries.
            By default, there is no timeout set.
        sanitize (bool): A flag to indicate whether to remove lists with
            more than 128 elements from results. Useful for removing
            embedding-like properties from database responses. Default is False.

    Returns:
        Tuple[List[str], List[str]]:
            A tuple of two lists. The first list contains the `WITH` clauses, and
            the second list contains the corresponding `RETURN` clauses for the
            string property.
    r   sizedistinctValuesz&CALL apoc.schema.properties.distinct('z', 'z') YIELD valuerK   r'   zvalues: z, distinct_count: z'collect(distinct substring(toString(n.`z`), 0, 50)) AS `_values`	values: `z_values`[..z], distinct_count: size(`z	_values`))rq   rr   r?   rz   r"   )	prop_namer6   label_or_type
exhaustive
prop_indexr.   r+   r:   with_clausesreturn_clausesdistinct_valuess              r   _build_str_clausesr     sO   T LNqMf%)qM./3GG(!?$ykA 	
 	 	 	((:3;O:PQ	
& '' 	9) E!!*85	
 !!Ii["AB '' !!	{+6J5K L..7[	C ''r   c           	     :    d|  d|  d|  d|  d	}d|  d|  d}||fS )a]  
    Build Cypher clauses for list property size statistics.

    Constructs and returns the parts of a Cypher query (`WITH` and `RETURN` clauses)
    that gather minimum and maximum size information for properties that are lists.
    These clauses compute the smallest and largest list lengths across the matched
    entities.

    Args:
        prop_name (str): The name of the list property.

    Returns:
        Tuple[str, str]:
            A tuple consisting of a single `WITH` clause (calculating min and max
            sizes) and a corresponding `RETURN` clause that references these values.
    zmin(size(n.``)) AS `z_size_min`, max(size(n.`z
_size_max`zmin_size: `z_size_min`, max_size: `r/   )r   with_clausereturn_clauses      r   _build_list_clausesr   H  sU    $ yk) 5 k)J	@  i[ 7	{*M  %%r   c           	     B   g }g }|s3|s1|j                  d|  d|  d       |j                  d|  d       ||fS |j                  d|  d|  d       |j                  d|  d|  d	       |j                  d
|  d|  d       |j                  d|  d|  d|  d       ||fS )aa  
    Build Cypher clauses for numeric and date/datetime property statistics.

    Constructs and returns the parts of a Cypher query (`WITH` and `RETURN` clauses)
    needed to gather statistical information about numeric or date/datetime
    properties. Depending on whether there is an available index or an exhaustive
    approach is required, this may collect a distinct set of values or compute
    minimum, maximum, and distinct counts.

    Args:
        prop_name (str): The name of the numeric or date/datetime property.
        exhaustive (bool): Whether to perform an exhaustive search or a
            sampled query approach.
        prop_index (Optional[List[Any]]): Optional metadata about the property's
            index. If provided and the search is not exhaustive, it can be used
            to optimize the retrieval of distinct values.

    Returns:
        Tuple[List[str], List[str]]:
            A tuple of two lists. The first list contains the `WITH` clauses, and
            the second list contains the corresponding `RETURN` clauses for the
            numeric or date/datetime property.
    zcollect(distinct toString(n.`r   r   r   zmin(n.`z`) AS `z_min`zmax(n.`z_max`zcount(distinct n.`z
_distinct`zmin: toString(`z_min`), max: toString(`z_max`), distinct_count: `)rz   )r   r   r   r   r   s        r   _build_num_date_clausesr   d  s    4 LNj+I;hykR	
 		)H=> '' 	gi[	{%HIgi[	{%HI 79+ZH	
 	!) -""+ -$$-;j:	
 ''r   c
                   |rd| d}
nd| d}
g }g }i }|s|
d| z  }
|D ]  }|d   }|d   }|s/|d   d	   D cg c]  }|d
   |k(  r|d   |gk(  r
|d   dk(  r| c}nd}|dk(  r!t        || ||||||	      \  }}||z  }||z  }n\|dv rt        |||      \  }}||z  }||z  }n<|dk(  r2t        |      \  }}|j                  |       |j                  |       n|dv rd|j	                         z   dz   ||<    |s|
 dS |rddj                  |      z   nd}ddj                  d |j                         D              z   dz   }dj                  |
||g      }|S c c}w ) a+  
    Build a Cypher query for enhanced schema information.

    Constructs and returns a Cypher query string to gather detailed property
    statistics for either nodes or relationships. Depending on whether the target
    entities are below a certain threshold, it may collect exhaustive information
    or simply sample a few records. This query retrieves data such as minimum and
    maximum values, distinct value counts, and sample values.

    Args:
        driver (neo4j.Driver): Neo4j Python driver instance.
        structured_schema (Dict[str, Any]): The current schema information
            including metadata, indexes, and constraints.
        label_or_type (str): The node label or relationship type to query.
        properties (List[Dict[str, Any]]): A list of property definitions for
            the node label or relationship type.
        exhaustive (bool): Whether to perform an exhaustive search or a
            sampled query approach.
        sample_size (int): The number of nodes or relationships to sample when
            exhaustive is False. Defaults to 5.
        is_relationship (bool, optional): Indicates if the query is for
            a relationship type (True) or a node label (False). Defaults to False.
        database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
        timeout (Optional[float]): The timeout for transactions in seconds.
            Useful for terminating long-running queries.
            By default, there is no timeout set.
        sanitize (bool): A flag to indicate whether to remove lists with
            more than 128 elements from results. Useful for removing
            embedding-like properties from database responses. Default is False.

    Returns:
        str: A Cypher query string that gathers enhanced property metadata.
    zMATCH ()-[n:`z`]->()z
MATCH (n:`z`)z WITH n LIMIT rv   rN   rT   rP   rL   rM   RANGENr`   )r   r6   r   r   r   r.   r+   r:   re   )r   r   r   rn   )r   )BOOLEANPOINTDURATION{ry   z
RETURN {} AS outputzWITH z,
     rm   zRETURN {rx   c              3  2   K   | ]  \  }}d | d|   yw)`rw   Nr/   ).0kvs      r   	<genexpr>z-get_enhanced_schema_cypher.<locals>.<genexpr>  s      Bdaas#aSMBs   z} AS outputr   )r   r   r   rz   popr{   r   )r6   rE   r   rM   r   sample_sizeis_relationshipr.   r+   r:   match_clauser   r   output_dictrs   r   	prop_typer=   r   str_w_clausesstr_r_clausesnum_date_w_clausesnum_date_r_clauseslist_w_clauselist_r_clauser   r   cypher_querys                               r   get_enhanced_schema_cypherr     s&   Z &}oV<#M?"5LNK.66 .B$	L	  ,J7@g;-/|$3vJ')	   	  +=#+%%!!	,(M= M)Lm+N 
 
 6M#
z62 2 ..L00N& +>+S(M=.!!-0::!$~'9'9';!;c!AI].B^ 677=I'JOOL99rK
))Bk.?.?.AB
B	C
	  99lKGHLks   "E c                V   |d   }|d   }|rt         nt        }	||	v ry||rdnd   j                  |      }
|
syt        | |||
|t        k  ||||	      }	 |sddgini }t        | |||||	      d
   d   }|
D ]!  }|d   |v s|j                  ||d             # y# t        $ r Y yw xY w)a   
    Enhance the structured schema with detailed statistics for a single node label or relationship type.

    For the specified node label or relationship type, this function queries the database to gather
    property statistics such as minimum and maximum values, distinct value counts, and sample values.
    These statistics are then integrated into the provided structured schema, enriching the schema with
    more in-depth information about each property.

    Args:
        driver (neo4j.Driver): A Neo4j Python driver instance used to run queries against the database.
        structured_schema (Dict[str, Any]): A dictionary representing the current structured schema,
            which will be updated with enhanced property statistics.
        prop_dict (Dict[str, Any]): A dictionary containing the name and count of the node label or
            relationship type to be enhanced.
        is_relationship (bool): Indicates whether the properties to be enhanced belong to a relationship
            (True) or a node (False).
        database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
        timeout (Optional[float]): The timeout for transactions in seconds.
            Useful for terminating long-running queries.
            By default, there is no timeout set.
        sanitize (bool): A flag to indicate whether to remove lists with
            more than 128 elements from results. Useful for removing
            embedding-like properties from database responses. Default is False.

    Returns:
        None
    namecountNrR   rQ   )	r6   rE   r   rM   r   r   r.   r+   r:   !notifications_disabled_categoriesUNRECOGNIZED)r6   r7   r9   r.   r+   r:   r   rJ   rv   )rY   rH   rq   r   EXHAUSTIVE_SEARCH_LIMITr?   updater   )r6   rE   	prop_dictr   r.   r+   r:   r   r   excludedr~   enhanced_cypherr9   enhanced_infors   s                  r   enhance_propertiesr     s   H VDgE /}_Hx_k,OSSE 0+22'
O # 1>2BC 	
 '!)
    	=DJ=0M$z*:;<	=  s   ,B B 	B('B(c           
         t        | t        |||      }|d   d   D ]  }t        | ||d|||        |d   d   D ]  }t        | ||d|||        y)	a  
    Enhance the structured schema with detailed property statistics.

    For each node label and relationship type in the structured schema, this
    function queries the database to gather additional property statistics such
    as minimum and maximum values, distinct value counts, and sample values.
    These statistics are then merged into the provided structured schema
    dictionary.

    Args:
        driver (neo4j.Driver): Neo4j Python driver instance.
        structured_schema (Dict[str, Any]): The initial structured schema
            containing node and relationship properties, which will be updated
            with enhanced statistics.
        database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
        timeout (Optional[float]): The timeout for transactions in seconds.
            Useful for terminating long-running queries.
            By default, there is no timeout set.
        sanitize (bool): A flag to indicate whether to remove lists with
            more than 128 elements from results. Useful for removing
            embedding-like properties from database responses. Default is False.

    Returns:
        None
    rK   r   nodesF)r6   rE   r   r   r.   r+   r:   rS   TN)r?   SCHEMA_COUNTS_QUERYr   )r6   rE   r.   r+   r:   schema_countsnoderels           r   r\   r\   ^  s    @ #!M a ) 	
/!	
	
 Q0 	
/ 	
	
r   )r   strreturnr   )r$   r   r   r   )r6   neo4j.Driverr7   r   r8   Dict[str, Any]r9   r   r.   Optional[str]r+   Optional[float]r:   boolr   List[Dict[str, Any]])FNNFi  )r6   r   rA   r   r.   r   r+   r   r:   r   rB   intr   r   )r6   r   rA   r   r.   r   r+   r   r:   r   rB   r   r   zdict[str, Any])rs   r   r   r   )r|   r   rA   r   r   	List[str])r   r   r   r   )r   r   rA   r   r   r   )NNNF)r   r   r6   r   r   r   r   r   r   Optional[List[Any]]r.   r   r+   r   r:   r   r   Tuple[List[str], List[str]])r   r   r   zTuple[str, str])N)r   r   r   r   r   r   r   r   )   FNNF)r6   r   rE   r   r   r   rM   r   r   r   r   r   r   r   r.   r   r+   r   r:   r   r   r   )NNF)r6   r   rE   r   r   r   r   r   r.   r   r+   r   r:   r   r   None)r6   r   rE   r   r.   r   r+   r   r:   r   r   r   )(
__future__r   typingr   r   r   r   r   neo4jr	   neo4j.exceptionsr
   r   rW   rV   rH   rY   r   r#   rr   rU   rX   rZ   r[   r   r   r    r?   rF   rC   rt   r   r   rD   r   r   r   r   r   r\   r/   r   r   <module>r      s   # 3 3   9'   (/:$% 
 B ? Q 
1  6+b  %'"#... . #	.
 . . . .f "#.9.9.9 .9 	.9
 .9 .9 	.9f "#LLL L 	L
 L L L^(V FP P '+"#O(O(O( O( 	O(
 $O( O( O( O( !O(d&: IM.(.( $.(2E.( .(n !"#ss%s s %	s
 s s s s s s 	sv ##PP%P P 	P
 P P P 
Pl ##<
<
%<
 <
 	<

 <
 
<
r   