
    j
i>	                        d Z ddlmZ ddlmZ ddlmZ ddlmZ 	 ddl	Z	dZ
n# e$ r dZ
Y nw xY w G d	 d
e          ZdS )zNLTK text splitter.    )annotations)Any)override)TextSplitterNTFc                  H     e Zd ZdZ	 	 dddd fdZedd            Z xZS )NLTKTextSplitterz"Splitting text using NLTK package.

englishF)use_span_tokenize	separatorstrlanguager   boolkwargsr   returnNonec               b    t                      j        di | || _        || _        || _        | j        r| j        rd}t          |          t          sd}t          |          | j        r+t          j	        
                    | j                  | _        dS t          j	        j        | _        dS )a  Initialize the NLTK splitter.

        Args:
            separator: The separator to use when combining splits.
            language: The language to use.
            use_span_tokenize: Whether to use `span_tokenize` instead of
                `sent_tokenize`.

        Raises:
            ImportError: If NLTK is not installed.
            ValueError: If `use_span_tokenize` is `True` and separator is not `''`.
        z6When use_span_tokenize is True, separator should be ''zANLTK is not installed, please install it with `pip install nltk`.N )super__init__
_separator	_language_use_span_tokenize
ValueError	_HAS_NLTKImportErrornltktokenize_get_punkt_tokenizer
_tokenizersent_tokenize)selfr   r   r   r   msg	__class__s         C:\Users\Dell Inspiron 16\Desktop\tws\AgrotaPowerBi\back-agrota-powerbi\mcp-client-agrota\venv\Lib\site-packages\langchain_text_splitters/nltk.pyr   zNLTKTextSplitter.__init__   s    ( 	""6"""#!"3" 	"t 	"JCS//! 	#UCc"""" 	:"m@@PPDOOO"m9DOOO    text	list[str]c                   | j         rt          | j                            |                    }g }t	          |          D ]T\  }\  }}|dk    r'||dz
           d         }|||         |||         z   }n
|||         }|                    |           Un|                     || j                  }|                     || j                  S )Nr      )r   )	r   listr    span_tokenize	enumerateappendr   _merge_splitsr   )	r"   r'   spanssplitsistartendprev_endsentences	            r%   
split_textzNLTKTextSplitter.split_text9   s     " 	D66t<<==EF#,U#3#3 ( (<E3q55$QU|AH#HUN3d59oEHH#E#IHh''''( __TDN_CCF!!&$/:::r&   )r	   r
   )
r   r   r   r   r   r   r   r   r   r   )r'   r   r   r(   )__name__
__module____qualname____doc__r   r   r7   __classcell__)r$   s   @r%   r   r      s        ,,  !!:
 #(!: !: !: !: !: !: !: !:F ; ; ; X; ; ; ; ;r&   r   )r;   
__future__r   typingr   typing_extensionsr   langchain_text_splitters.baser   r   r   r   r   r   r&   r%   <module>rA      s      " " " " " "       & & & & & & 6 6 6 6 6 6KKKII   III5; 5; 5; 5; 5;| 5; 5; 5; 5; 5;s   # --