From c5d8b2f6f8d6fc0a8f3da4a06bb7b8cb030bbcba Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Fri, 10 Jan 2025 17:02:03 +0100 Subject: [PATCH] cleaning --- haystack/components/preprocessors/recursive_splitter.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/haystack/components/preprocessors/recursive_splitter.py b/haystack/components/preprocessors/recursive_splitter.py index 6c7b2d86d5..3286a80d72 100644 --- a/haystack/components/preprocessors/recursive_splitter.py +++ b/haystack/components/preprocessors/recursive_splitter.py @@ -89,13 +89,13 @@ def __init__( {"keep_white_spaces": True} if sentence_splitter_params is None else sentence_splitter_params ) - def warm_up(self): + def warm_up(self) -> None: """ Warm up the sentence tokenizer. """ self.nltk_tokenizer = self._get_custom_sentence_tokenizer(self.sentence_splitter_params) - def _check_params(self): + def _check_params(self) -> None: if self.split_length < 1: raise ValueError("Split length must be at least 1 character.") if self.split_overlap < 0: @@ -314,6 +314,9 @@ def _fall_back_to_fixed_chunking(self, text: str, split_units: Literal["word", " """ Fall back to a fixed chunking approach if no separator works for the text. + Splits the text into smaller chunks based on the split_length and split_units attributes, either by words or + characters. It splits into words using whitespace as a separator. + :param text: The text to be split into chunks. :param split_units: The unit of the split_length parameter. It can be either "word" or "char". :returns: