diff --git a/khiops/utils/helpers.py b/khiops/utils/helpers.py
index 39fb6a28..3551bc99 100644
--- a/khiops/utils/helpers.py
+++ b/khiops/utils/helpers.py
@@ -2,6 +2,8 @@
 
 import os
 
+from sklearn.model_selection import train_test_split
+
 from khiops import core as kh
 from khiops.core.internals.common import is_dict_like, type_error_message
 from khiops.utils.dataset import Dataset, FileTable, PandasTable
@@ -62,7 +64,6 @@ def _sort_df_table(table):
 
 def _sort_file_table(table, sep, header, output_dir):
     assert isinstance(table, FileTable), type_error_message("table", table, FileTable)
-
     domain = kh.DictionaryDomain()
     dictionary = table.create_khiops_dictionary()
     domain.add_dictionary(dictionary)
@@ -79,3 +80,189 @@ def _sort_file_table(table, sep, header, output_dir):
     )
 
     return out_data_source
+
+
+def train_test_split_dataset(
+    ds_spec, target_column=None, test_size=0.25, output_dir=None, **kwargs
+):
+    # Check the types
+    if not is_dict_like(ds_spec):
+        raise TypeError(type_error_message("ds_spec", ds_spec, "dict-like"))
+
+    # Build the dataset for the feature table
+    ds = Dataset(ds_spec)
+
+    # Check the parameter coherence
+    if not ds.is_in_memory():
+        if target_column is not None:
+            raise ValueError("'target_column' cannot be used with file path datasets")
+        if output_dir is None:
+            raise ValueError("'output_dir' must be specified for file path datasets")
+        if not isinstance(output_dir, str):
+            raise TypeError(type_error_message("output_dir", output_dir, str))
+
+    # Perform the split for each type of dataset
+    if ds.is_in_memory():
+        # Obtain the keys for the other test_train_split function
+        sklearn_split_params = {}
+        for param in ("train_size", "random_state", "shuffle", "stratify"):
+            if param in kwargs:
+                sklearn_split_params[param] = kwargs[param]
+
+        if target_column is None:
+            train_ds, test_ds = _train_test_split_in_memory_dataset(
+                ds,
+                target_column,
+                test_size=test_size,
+                split_params=sklearn_split_params,
+            )
+        else:
+            train_ds, test_ds, train_target_column, test_target_column = (
+                _train_test_split_in_memory_dataset(
+                    ds,
+                    target_column,
+                    test_size=test_size,
+                    split_params=sklearn_split_params,
+                )
+            )
+    else:
+        train_ds, test_ds = _train_test_split_file_dataset(ds, test_size, output_dir)
+
+    # Create the return tuple
+    # Note: We use `tuple` to avoid pylint warning about unbalanced-tuple-unpacking
+    if target_column is None:
+        split = tuple([train_ds.to_spec(), test_ds.to_spec()])
+    else:
+        split = tuple(
+            [
+                train_ds.to_spec(),
+                test_ds.to_spec(),
+                train_target_column,
+                test_target_column,
+            ]
+        )
+
+    return split
+
+
+def _train_test_split_in_memory_dataset(
+    ds, target_column, test_size, split_params=None
+):
+    # Create shallow copies of the feature dataset
+    train_ds = ds.copy()
+    test_ds = ds.copy()
+
+    # Split the main table and the target (if any)
+    if target_column is None:
+        train_ds.main_table.data_source, test_ds.main_table.data_source = (
+            train_test_split(
+                ds.main_table.data_source, test_size=test_size, **split_params
+            )
+        )
+    else:
+        (
+            train_ds.main_table.data_source,
+            test_ds.main_table.data_source,
+            train_target_column,
+            test_target_column,
+        ) = train_test_split(
+            ds.main_table.data_source,
+            target_column,
+            test_size=test_size,
+            **split_params,
+        )
+
+    # Split the secondary tables tables
+    # Note: The tables are traversed in BFS
+    todo_relations = [
+        relation for relation in ds.relations if relation[0] == ds.main_table.name
+    ]
+    while todo_relations:
+        current_parent_table_name, current_child_table_name, _ = todo_relations.pop(0)
+        for relation in ds.relations:
+            parent_table_name, _, _ = relation
+            if parent_table_name == current_child_table_name:
+                todo_relations.append(relation)
+
+        for new_ds in (train_ds, test_ds):
+            origin_child_table = ds.get_table(current_child_table_name)
+            new_child_table = new_ds.get_table(current_child_table_name)
+            new_parent_table = new_ds.get_table(current_parent_table_name)
+            new_parent_key_cols_df = new_parent_table.data_source[new_parent_table.key]
+            new_child_table.data_source = new_parent_key_cols_df.merge(
+                origin_child_table.data_source, on=new_parent_table.key
+            )
+
+    # Build the return value
+    # Note: We use `tuple` to avoid pylint warning about unbalanced-tuple-unpacking
+    if target_column is None:
+        return_tuple = tuple([train_ds, test_ds])
+    else:
+        return_tuple = tuple(
+            [train_ds, test_ds, train_target_column, test_target_column]
+        )
+
+    return return_tuple
+
+
+def _train_test_split_file_dataset(ds, test_size, output_dir):
+    domain = ds.create_khiops_dictionary_domain()
+    secondary_data_paths = domain.extract_data_paths(ds.main_table.name)
+    additional_data_tables = {}
+    output_additional_data_tables = {
+        "train": {},
+        "test": {},
+    }
+    # Initialize the split datasets as copies of the original one
+    split_dss = {
+        "train": ds.copy(),
+        "test": ds.copy(),
+    }
+    for split, split_ds in split_dss.items():
+        split_ds.main_table.data_source = os.path.join(
+            output_dir, split, f"{split_ds.main_table.name}.txt"
+        )
+
+    for data_path in secondary_data_paths:
+        dictionary = domain.get_dictionary_at_data_path(data_path)
+        table = ds.get_table(dictionary.name)
+        additional_data_tables[data_path] = table.data_source
+        for (
+            split,
+            split_output_additional_data_tables,
+        ) in output_additional_data_tables.items():
+            data_table_path = os.path.join(output_dir, split, f"{table.name}.txt")
+            split_output_additional_data_tables[data_path] = data_table_path
+            split_dss[split].get_table(table.name).data_source = data_table_path
+
+    kh.deploy_model(
+        domain,
+        ds.main_table.name,
+        ds.main_table.data_source,
+        split_dss["train"].main_table.data_source,
+        additional_data_tables=additional_data_tables,
+        output_additional_data_tables=output_additional_data_tables["train"],
+        header_line=ds.header,
+        field_separator=ds.sep,
+        output_header_line=ds.header,
+        output_field_separator=ds.sep,
+        sample_percentage=100.0 * (1 - test_size),
+        sampling_mode="Include sample",
+    )
+    kh.deploy_model(
+        domain,
+        ds.main_table.name,
+        ds.main_table.data_source,
+        split_dss["test"].main_table.data_source,
+        additional_data_tables=additional_data_tables,
+        output_additional_data_tables=output_additional_data_tables["test"],
+        header_line=ds.header,
+        field_separator=ds.sep,
+        output_header_line=ds.header,
+        output_field_separator=ds.sep,
+        sample_percentage=100.0 * (1 - test_size),
+        sampling_mode="Exclude sample",
+    )
+
+    # Note: We use `tuple` to avoid pylint warning about unbalanced-tuple-unpacking
+    return tuple([split_dss["train"], split_dss["test"]])
diff --git a/tests/test_helper_functions.py b/tests/test_helper_functions.py
index b69d8331..233d720f 100644
--- a/tests/test_helper_functions.py
+++ b/tests/test_helper_functions.py
@@ -14,7 +14,7 @@
 
 from khiops.core.dictionary import DictionaryDomain
 from khiops.core.helpers import build_multi_table_dictionary_domain
-from khiops.utils.helpers import sort_dataset
+from khiops.utils.helpers import sort_dataset, train_test_split_dataset
 
 
 class KhiopsHelperFunctions(unittest.TestCase):
@@ -101,26 +101,29 @@ def test_build_multi_table_dictionary_domain(self):
     def test_sort_dataset_dataframe(self):
         """Tests that the sort_dataset function works for dataframe datasets"""
         # Create the fixture dataset
-        clients_df = pd.read_csv(io.StringIO(UNSORTED_TEST_CLIENTS_CSV))
-        calls_df = pd.read_csv(io.StringIO(UNSORTED_TEST_CALLS_CSV))
+        clients_df = pd.read_csv(io.StringIO(UNSORTED_CLIENTS_CSV))
+        calls_df = pd.read_csv(io.StringIO(UNSORTED_CALLS_CSV))
+        connections_df = pd.read_csv(io.StringIO(UNSORTED_CONNECTIONS_CSV))
         ds_spec = {
             "main_table": "clients",
             "tables": {
                 "clients": (clients_df, ["id"]),
                 "calls": (calls_df, ["id", "call_id"]),
+                "connections": (connections_df, ["id", "call_id"]),
             },
-            "relations": [("clients", "calls", False)],
+            "relations": [("clients", "calls", False), ("calls", "connections", False)],
         }
 
         # Call the sort_dataset function
         sorted_ds_spec = sort_dataset(ds_spec)
         ref_sorted_table_dfs = {
-            "clients": pd.read_csv(io.StringIO(TEST_CLIENTS_CSV)),
-            "calls": pd.read_csv(io.StringIO(TEST_CALLS_CSV)),
+            "clients": pd.read_csv(io.StringIO(CLIENTS_CSV)),
+            "calls": pd.read_csv(io.StringIO(CALLS_CSV)),
+            "connections": pd.read_csv(io.StringIO(CONNECTIONS_CSV)),
         }
 
         # Check that the structure of the sorted dataset
-        self._assert_sorted_dataset_keeps_structure(ds_spec, sorted_ds_spec)
+        self._assert_dataset_keeps_structure(ds_spec, sorted_ds_spec)
 
         # Check that the table specs are the equivalent and the tables are sorted
         for table_name in ds_spec["tables"]:
@@ -132,25 +135,36 @@ def test_sort_dataset_dataframe(self):
 
     def test_sort_dataset_file(self):
         """Tests that the sort_dataset function works for file datasets"""
-        # Create a execution context with temporary files and directories
+        # Create a execution context for temporary files and directories
         with contextlib.ExitStack() as exit_stack:
             # Create temporary files and a temporary directory
             clients_csv_file = exit_stack.enter_context(tempfile.NamedTemporaryFile())
             calls_csv_file = exit_stack.enter_context(tempfile.NamedTemporaryFile())
+            connections_csv_file = exit_stack.enter_context(
+                tempfile.NamedTemporaryFile()
+            )
             tmp_dir = exit_stack.enter_context(tempfile.TemporaryDirectory())
 
             # Create the fixture dataset
-            clients_csv_file.write(bytes(UNSORTED_TEST_CLIENTS_CSV, encoding="utf8"))
-            calls_csv_file.write(bytes(UNSORTED_TEST_CALLS_CSV, encoding="utf8"))
+            clients_csv_file.write(bytes(UNSORTED_CLIENTS_CSV, encoding="ascii"))
+            calls_csv_file.write(bytes(UNSORTED_CALLS_CSV, encoding="ascii"))
+            connections_csv_file.write(
+                bytes(UNSORTED_CONNECTIONS_CSV, encoding="ascii")
+            )
             clients_csv_file.flush()
             calls_csv_file.flush()
+            connections_csv_file.flush()
             ds_spec = {
                 "main_table": "clients",
                 "tables": {
                     "clients": (clients_csv_file.name, ["id"]),
                     "calls": (calls_csv_file.name, ["id", "call_id"]),
+                    "connections": (connections_csv_file.name, ["id", "call_id"]),
                 },
-                "relations": [("clients", "calls", False)],
+                "relations": [
+                    ("clients", "calls", False),
+                    ("calls", "connections", False),
+                ],
                 "format": (",", True),
             }
 
@@ -158,10 +172,14 @@ def test_sort_dataset_file(self):
             sorted_ds_spec = sort_dataset(ds_spec, output_dir=tmp_dir)
 
             # Check that the structure of the sorted dataset
-            self._assert_sorted_dataset_keeps_structure(ds_spec, sorted_ds_spec)
+            self._assert_dataset_keeps_structure(ds_spec, sorted_ds_spec)
 
             # Check that the table specs are the equivalent and the tables are sorted
-            ref_sorted_tables = {"clients": TEST_CLIENTS_CSV, "calls": TEST_CALLS_CSV}
+            ref_sorted_tables = {
+                "clients": CLIENTS_CSV,
+                "calls": CALLS_CSV,
+                "connections": CONNECTIONS_CSV,
+            }
             for table_name, _ in ds_spec["tables"].items():
                 # Read the contents of the sorted table to a list of strings
                 sorted_table_spec = sorted_ds_spec["tables"][table_name]
@@ -178,22 +196,157 @@ def test_sort_dataset_file(self):
                 # Check that the sorted table is equal to the reference
                 self.assertEqual(ref_sorted_table, sorted_table)
 
-    def _assert_sorted_dataset_keeps_structure(self, ds_spec, sorted_ds_spec):
-        """Asserts that the sorted dataset keeps the structure of the input dataset
+    def test_traint_test_split_dataset_dataframe(self):
+        """Tests that the train_test_split_dataset function works for df datasets"""
+        # Create the fixture dataset
+        clients_df = pd.read_csv(io.StringIO(CLIENTS_CSV))
+        calls_df = pd.read_csv(io.StringIO(CALLS_CSV))
+        connections_df = pd.read_csv(io.StringIO(CONNECTIONS_CSV))
+        ds_spec = {
+            "main_table": "clients",
+            "tables": {
+                "clients": (clients_df.drop("class", axis=1), ["id"]),
+                "calls": (calls_df, ["id", "call_id"]),
+                "connections": (connections_df, ["id", "call_id"]),
+            },
+            "relations": [("clients", "calls", False), ("calls", "connections", False)],
+        }
+        y = clients_df["class"]
+
+        # Execute the train/test split function
+        ds_spec_train, ds_spec_test, y_train, y_test = train_test_split_dataset(
+            ds_spec, y, test_size=0.5, random_state=31614
+        )
+
+        # Check that the target are the same as the reference
+        ref_y_train = pd.read_csv(io.StringIO(TRAIN_DF_TARGET_CSV))["class"]
+        ref_y_test = pd.read_csv(io.StringIO(TEST_DF_TARGET_CSV))["class"]
+        self._assert_series_equal(ref_y_train, y_train.reset_index()["class"])
+        self._assert_series_equal(ref_y_test, y_test.reset_index()["class"])
+
+        # Check that the dataset spec structure is the same
+        self._assert_dataset_keeps_structure(ds_spec_train, ds_spec)
+        self._assert_dataset_keeps_structure(ds_spec_test, ds_spec)
+
+        # Check that the table contents match those of the references
+        split_ds_specs = {
+            "train": ds_spec_train,
+            "test": ds_spec_test,
+        }
+        ref_table_dfs = {
+            "train": {
+                "clients": pd.read_csv(io.StringIO(TRAIN_DF_CLIENTS_CSV)),
+                "calls": pd.read_csv(io.StringIO(TRAIN_DF_CALLS_CSV)),
+                "connections": pd.read_csv(io.StringIO(TRAIN_DF_CONNECTIONS_CSV)),
+            },
+            "test": {
+                "clients": pd.read_csv(io.StringIO(TEST_DF_CLIENTS_CSV)),
+                "calls": pd.read_csv(io.StringIO(TEST_DF_CALLS_CSV)),
+                "connections": pd.read_csv(io.StringIO(TEST_DF_CONNECTIONS_CSV)),
+            },
+        }
+        for split, ref_tables in ref_table_dfs.items():
+            for table_name in ds_spec["tables"]:
+                with self.subTest(split=split, table_name=table_name):
+                    self._assert_frame_equal(
+                        split_ds_specs[split]["tables"][table_name][0].reset_index(
+                            drop=True
+                        ),
+                        ref_tables[table_name].reset_index(drop=True),
+                    )
+
+    def test_train_test_split_dataset_file(self):
+        """Tests that the train_test_split_dataset function works for file datasets"""
+        # Create a execution context for temporary files and directories
+        with contextlib.ExitStack() as exit_stack:
+            # Create temporary files and a temporary directory
+            clients_csv_file = exit_stack.enter_context(tempfile.NamedTemporaryFile())
+            calls_csv_file = exit_stack.enter_context(tempfile.NamedTemporaryFile())
+            connections_csv_file = exit_stack.enter_context(
+                tempfile.NamedTemporaryFile()
+            )
+            tmp_dir = exit_stack.enter_context(tempfile.TemporaryDirectory())
+
+            # Create the fixture dataset
+            clients_csv_file.write(bytes(CLIENTS_CSV, encoding="ascii"))
+            calls_csv_file.write(bytes(CALLS_CSV, encoding="ascii"))
+            connections_csv_file.write(bytes(CONNECTIONS_CSV, encoding="ascii"))
+            clients_csv_file.flush()
+            calls_csv_file.flush()
+            connections_csv_file.flush()
+            ds_spec = {
+                "main_table": "clients",
+                "tables": {
+                    "clients": (clients_csv_file.name, ["id"]),
+                    "calls": (calls_csv_file.name, ["id", "call_id"]),
+                    "connections": (connections_csv_file.name, ["id", "call_id"]),
+                },
+                "relations": [
+                    ("clients", "calls", False),
+                    ("calls", "connections", False),
+                ],
+                "format": (",", True),
+            }
+
+            # Call the train_test_split_dataset function
+            train_ds_spec, test_ds_spec = train_test_split_dataset(
+                ds_spec, test_size=0.5, output_dir=tmp_dir
+            )
+            split_ds_specs = {"train": train_ds_spec, "test": test_ds_spec}
+
+            # Check that the structure of the splitted datasets
+            self._assert_dataset_keeps_structure(ds_spec, train_ds_spec)
+            self._assert_dataset_keeps_structure(ds_spec, test_ds_spec)
+
+            # Check that the table specs are the equivalent and the tables are sorted
+            ref_split_tables = {
+                "train": {
+                    "clients": TRAIN_FILE_CLIENTS_CSV,
+                    "calls": TRAIN_FILE_CALLS_CSV,
+                    "connections": TRAIN_FILE_CONNECTIONS_CSV,
+                },
+                "test": {
+                    "clients": TEST_FILE_CLIENTS_CSV,
+                    "calls": TEST_FILE_CALLS_CSV,
+                    "connections": TEST_FILE_CONNECTIONS_CSV,
+                },
+            }
+            for split, split_ds_spec in split_ds_specs.items():
+                for table_name, _ in ds_spec["tables"].items():
+                    # Read the contents of the splitted table to a list of strings
+                    split_table_spec = split_ds_spec["tables"][table_name]
+                    split_table_file = exit_stack.enter_context(
+                        open(split_table_spec[0], encoding="ascii")
+                    )
+                    split_table = split_table_file.readlines()
+
+                    # Transform the reference table string to a list of strings
+                    ref_split_table = ref_split_tables[split][table_name].splitlines(
+                        keepends=True
+                    )
+
+                    # Check that the sorted table is equal to the reference
+                    self.assertEqual(split_table, ref_split_table)
+
+    def _assert_dataset_keeps_structure(self, ds_spec, ref_ds_spec):
+        """Asserts that the input dataset has the same structure as the reference
 
         It does not check the contents of the tables.
         """
         # Check that the spec dictionary is the same excluding the tables
-        self.assertIn("main_table", sorted_ds_spec)
-        self.assertIn("tables", sorted_ds_spec)
-        self.assertIn("relations", sorted_ds_spec)
-        self.assertEqual(ds_spec["main_table"], sorted_ds_spec["main_table"])
-        self.assertEqual(ds_spec["relations"], sorted_ds_spec["relations"])
-        self.assertEqual(ds_spec["tables"].keys(), sorted_ds_spec["tables"].keys())
+        self.assertIn("main_table", ref_ds_spec)
+        self.assertIn("tables", ref_ds_spec)
+        self.assertIn("relations", ref_ds_spec)
+        self.assertEqual(ds_spec["main_table"], ref_ds_spec["main_table"])
+        self.assertEqual(ds_spec["relations"], ref_ds_spec["relations"])
+        self.assertEqual(ds_spec["tables"].keys(), ref_ds_spec["tables"].keys())
+        if "format" in ref_ds_spec:
+            self.assertIn("format", ds_spec)
+            self.assertEqual(ds_spec["format"], ref_ds_spec["format"])
 
         # Check that the table keys are equal
         for table_name, table_spec in ds_spec["tables"].items():
-            self.assertEqual(table_spec[1], sorted_ds_spec["tables"][table_name][1])
+            self.assertEqual(table_spec[1], ref_ds_spec["tables"][table_name][1])
 
     def _assert_frame_equal(self, ref_df, out_df):
         """Wrapper for the assert_frame_equal pandas function
@@ -210,19 +363,37 @@ def _assert_frame_equal(self, ref_df, out_df):
         if failure_error is not None:
             self.fail(failure_error)
 
+    def _assert_series_equal(self, ref_series, out_series):
+        """Wrapper for the assert_frame_equal pandas function
+
+        In case of failure of assert_frame_equal we capture the AssertionError thrown by
+        it and make a unittest call to fail. This reports the error found by
+        assert_frame_equal while avoiding a double thrown exception.
+        """
+        failure_error = None
+        try:
+            pd.testing.assert_series_equal(ref_series, out_series)
+        except AssertionError as error:
+            failure_error = error
+        if failure_error is not None:
+            self.fail(failure_error)
+
 
 # pylint: disable=line-too-long
 # fmt: off
-TEST_CLIENTS_CSV = """
-id,name,phone,email,address,numberrange,time,date
-1,Hakeem Wilkinson,1-352-535-7028,at.pede@outlook.org,247-2921 Elit. Rd.,2,3:02 PM,"May 1, 2024"
-10,Axel Holman,1-340-743-8860,est@google.com,Ap #737-7185 Donec St.,9,1:17 PM,"Jan 8, 2025"
-13,Armando Cleveland,(520) 285-3188,amet.consectetuer@icloud.edu,Ap #167-1519 Tempus Avenue,8,1:50 PM,"Jul 24, 2024"
-4,Edward Miles,(959) 886-5744,in.nec@outlook.edu,2184 Gravida Road,6,10:02 PM,"Mar 30, 2025"
-7,Aurora Valentine,1-838-806-6257,etiam.gravida.molestie@yahoo.com,Ap #923-3118 Ante Ave,8,4:02 AM,"Dec 12, 2023"
+
+# Test data
+
+CLIENTS_CSV = """
+id,name,phone,email,address,numberrange,time,date,class
+1,Hakeem Wilkinson,1-352-535-7028,at.pete@outlook.org,247-2921 Elit. Rd.,2,3:02 PM,"May 1, 2024",1
+10,Axel Holman,1-340-743-8860,est@google.com,Ap #737-7185 Donec St.,9,1:17 PM,"Jan 8, 2025",0
+13,Armando Cleveland,(520) 285-3188,amet.consectetuer@icloud.edu,Ap #167-1519 Tempus Avenue,8,1:50 PM,"Jul 24, 2024",0
+4,Edward Miles,(959) 886-5744,in.nec@outlook.edu,2184 Gravida Road,6,10:02 PM,"Mar 30, 2025",1
+7,Aurora Valentine,1-838-806-6257,etiam.gravida.molestie@yahoo.com,Ap #923-3118 Ante Ave,8,4:02 AM,"Dec 12, 2023",1
 """.lstrip()
 
-TEST_CALLS_CSV = """
+CALLS_CSV = """
 id,call_id,duration
 1,1,38
 1,20,29
@@ -235,24 +406,204 @@ def _assert_frame_equal(self, ref_df, out_df):
 7,4,339
 """.lstrip()
 
-UNSORTED_TEST_CLIENTS_CSV = """
+CONNECTIONS_CSV = """
+id,call_id,connection_ip
+1,1,277.1.56.30
+1,1,147.43.67.35
+1,1,164.27.26.50
+1,20,199.44.70.12
+1,20,169.51.97.96
+10,2,170.05.79.41
+10,2,118.45.57.51
+13,25,193.23.02.67
+13,25,146.74.18.88
+13,25,118.41.87.47
+13,25,161.51.79.60
+13,3,115.45.02.58
+13,30,12.115.90.93
+4,14,16.56.66.16
+4,14,19.30.36.57
+4,14,15.16.40.67
+4,2,10.189.71.73
+4,2,10.6.76.93
+7,4,16.66.64.13
+7,4,15.13.69.18
+""".lstrip()
+
+UNSORTED_CLIENTS_CSV = """
+id,name,phone,email,address,numberrange,time,date,class
+13,Armando Cleveland,(520) 285-3188,amet.consectetuer@icloud.edu,Ap #167-1519 Tempus Avenue,8,1:50 PM,"Jul 24, 2024",0
+10,Axel Holman,1-340-743-8860,est@google.com,Ap #737-7185 Donec St.,9,1:17 PM,"Jan 8, 2025",0
+1,Hakeem Wilkinson,1-352-535-7028,at.pete@outlook.org,247-2921 Elit. Rd.,2,3:02 PM,"May 1, 2024",1
+7,Aurora Valentine,1-838-806-6257,etiam.gravida.molestie@yahoo.com,Ap #923-3118 Ante Ave,8,4:02 AM,"Dec 12, 2023",1
+4,Edward Miles,(959) 886-5744,in.nec@outlook.edu,2184 Gravida Road,6,10:02 PM,"Mar 30, 2025",1
+""".lstrip()
+
+UNSORTED_CALLS_CSV = """
+id,call_id,duration
+1,1,38
+10,2,7
+13,25,329
+4,2,543
+13,30,8
+13,3,1
+4,14,48
+1,20,29
+7,4,339
+""".lstrip()
+
+UNSORTED_CONNECTIONS_CSV = """
+id,call_id,connection_ip
+13,25,193.23.02.67
+1,1,277.1.56.30
+4,14,16.56.66.16
+13,25,146.74.18.88
+13,25,118.41.87.47
+1,1,147.43.67.35
+4,14,19.30.36.57
+1,20,199.44.70.12
+10,2,170.05.79.41
+1,20,169.51.97.96
+10,2,118.45.57.51
+13,25,161.51.79.60
+13,3,115.45.02.58
+4,14,15.16.40.67
+1,1,164.27.26.50
+7,4,16.66.64.13
+13,30,12.115.90.93
+7,4,15.13.69.18
+4,2,10.189.71.73
+4,2,10.6.76.93
+""".lstrip()
+
+TRAIN_DF_CLIENTS_CSV = """
 id,name,phone,email,address,numberrange,time,date
-1,Hakeem Wilkinson,1-352-535-7028,at.pede@outlook.org,247-2921 Elit. Rd.,2,3:02 PM,"May 1, 2024"
-13,Armando Cleveland,(520) 285-3188,amet.consectetuer@icloud.edu,Ap #167-1519 Tempus Avenue,8,1:50 PM,"Jul 24, 2024"
 7,Aurora Valentine,1-838-806-6257,etiam.gravida.molestie@yahoo.com,Ap #923-3118 Ante Ave,8,4:02 AM,"Dec 12, 2023"
+13,Armando Cleveland,(520) 285-3188,amet.consectetuer@icloud.edu,Ap #167-1519 Tempus Avenue,8,1:50 PM,"Jul 24, 2024"
+""".lstrip()
+
+TRAIN_DF_CALLS_CSV = """
+id,call_id,duration
+7,4,339
+13,25,329
+13,3,1
+13,30,8
+""".lstrip()
+
+TRAIN_DF_TARGET_CSV = """
+class
+1
+0
+""".lstrip()
+
+TRAIN_DF_CONNECTIONS_CSV = """
+id,call_id,connection_ip
+7,4,16.66.64.13
+7,4,15.13.69.18
+13,25,193.23.02.67
+13,25,146.74.18.88
+13,25,118.41.87.47
+13,25,161.51.79.60
+13,3,115.45.02.58
+13,30,12.115.90.93
+""".lstrip()
+
+
+TEST_DF_CLIENTS_CSV = """
+id,name,phone,email,address,numberrange,time,date
 4,Edward Miles,(959) 886-5744,in.nec@outlook.edu,2184 Gravida Road,6,10:02 PM,"Mar 30, 2025"
 10,Axel Holman,1-340-743-8860,est@google.com,Ap #737-7185 Donec St.,9,1:17 PM,"Jan 8, 2025"
+1,Hakeem Wilkinson,1-352-535-7028,at.pete@outlook.org,247-2921 Elit. Rd.,2,3:02 PM,"May 1, 2024"
 """.lstrip()
 
-UNSORTED_TEST_CALLS_CSV = """
+TEST_DF_TARGET_CSV = """
+class
+1
+0
+1
+""".lstrip()
+
+
+TEST_DF_CALLS_CSV = """
 id,call_id,duration
+4,14,48
+4,2,543
+10,2,7
 1,1,38
+1,20,29
+""".lstrip()
+
+TEST_DF_CONNECTIONS_CSV = """
+id,call_id,connection_ip
+4,14,16.56.66.16
+4,14,19.30.36.57
+4,14,15.16.40.67
+4,2,10.189.71.73
+4,2,10.6.76.93
+10,2,170.05.79.41
+10,2,118.45.57.51
+1,1,277.1.56.30
+1,1,147.43.67.35
+1,1,164.27.26.50
+1,20,199.44.70.12
+1,20,169.51.97.96
+""".lstrip()
+
+TRAIN_FILE_CLIENTS_CSV = """
+id,name,phone,email,address,numberrange,time,date,class
+10,Axel Holman,1-340-743-8860,est@google.com,Ap #737-7185 Donec St.,9,1:17 PM,"Jan 8, 2025",0
+13,Armando Cleveland,(520) 285-3188,amet.consectetuer@icloud.edu,Ap #167-1519 Tempus Avenue,8,1:50 PM,"Jul 24, 2024",0
+4,Edward Miles,(959) 886-5744,in.nec@outlook.edu,2184 Gravida Road,6,10:02 PM,"Mar 30, 2025",1
+""".lstrip()
+
+TRAIN_FILE_CALLS_CSV = """
+id,call_id,duration
 10,2,7
 13,25,329
-4,2,543
-13,30,8
 13,3,1
+13,30,8
 4,14,48
+4,2,543
+""".lstrip()
+
+TRAIN_FILE_CONNECTIONS_CSV = """
+id,call_id,connection_ip
+10,2,170.05.79.41
+10,2,118.45.57.51
+13,25,193.23.02.67
+13,25,146.74.18.88
+13,25,118.41.87.47
+13,25,161.51.79.60
+13,3,115.45.02.58
+13,30,12.115.90.93
+4,14,16.56.66.16
+4,14,19.30.36.57
+4,14,15.16.40.67
+4,2,10.189.71.73
+4,2,10.6.76.93
+""".lstrip()
+
+
+TEST_FILE_CLIENTS_CSV = """
+id,name,phone,email,address,numberrange,time,date,class
+1,Hakeem Wilkinson,1-352-535-7028,at.pete@outlook.org,247-2921 Elit. Rd.,2,3:02 PM,"May 1, 2024",1
+7,Aurora Valentine,1-838-806-6257,etiam.gravida.molestie@yahoo.com,Ap #923-3118 Ante Ave,8,4:02 AM,"Dec 12, 2023",1
+""".lstrip()
+
+TEST_FILE_CALLS_CSV = """
+id,call_id,duration
+1,1,38
 1,20,29
 7,4,339
 """.lstrip()
+
+TEST_FILE_CONNECTIONS_CSV = """
+id,call_id,connection_ip
+1,1,277.1.56.30
+1,1,147.43.67.35
+1,1,164.27.26.50
+1,20,199.44.70.12
+1,20,169.51.97.96
+7,4,16.66.64.13
+7,4,15.13.69.18
+""".lstrip()