From b28ca78596f5e4b68dbd40c353e0fa97c541667e Mon Sep 17 00:00:00 2001
From: Thomas Moreau <thomas.moreau.2010@gmail.com>
Date: Mon, 2 Dec 2024 23:52:40 +0100
Subject: [PATCH] FIX compat with sklearn/keras/pandas/ipykernels/circleci
 (#338)

---
 .circleci/config.yml                          |  5 +--
 .github/workflows/main.yml                    |  5 ++-
 .gitignore                                    |  1 +
 ci_tools/circle/build_doc.sh                  |  2 +
 doc/conf.py                                   |  4 +-
 doc/sphinxext/github_link.py                  |  2 +-
 rampwf/hyperopt/hyperopt.py                   |  6 ++-
 rampwf/prediction_types/multiclass.py         |  2 +-
 rampwf/score_types/detection/ospa.py          |  7 +++-
 .../starting_kit/batch_classifier.py          | 38 ++-----------------
 .../parallel_load/image_classifier.py         |  2 +-
 .../starting_kit/image_classifier.py          |  2 +-
 12 files changed, 26 insertions(+), 50 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 5dc16866..4ba331b8 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -3,9 +3,6 @@ jobs:
   build:
     docker:
       - image: cimg/python:3.11
-        environment:
-            # miniconda path install
-            MINICONDA_PATH: ~/miniconda
       - image: cimg/postgres:9.6
         environment:
             POSTGRES_USER: mrrampmaster_doc = 'index'
@@ -54,6 +51,6 @@ jobs:
           fingerprints:
               - "36:72:da:c4:54:0d:ab:57:84:80:a7:65:59:1f:02:f9"
 
-      - deploy:
+      - run:
           name: Deploy documentation
           command: ./ci_tools/circle/push_doc.sh doc/_build/html/stable
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 8d081c26..e3e1aabd 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -10,7 +10,7 @@ jobs:
   test:
     strategy:
       matrix:
-        python-version: ['3.7', '3.11']
+        python-version: ['3.10', '3.12']
         os: [ubuntu-latest, windows-latest]
 
     runs-on: ${{ matrix.os }}
@@ -29,7 +29,8 @@ jobs:
           python --version
           which python
           # Otherwise we get errors on Python 3.6 due to https://stackoverflow.com/questions/38257138/jupyter-no-such-kernel-named-python3
-          conda install -y nb_conda_kernels
+          conda install -y nb_conda_kernels jupyter_server ipykernel
+          ipython kernel install --name "python3" --user
 
       - name: Install dependencies
         shell: bash -l {0}
diff --git a/.gitignore b/.gitignore
index a3c81aed..e222c3fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ rampwf.egg-info
 coverage.xml
 ramp_workflow.egg-info
 .cache
+**/data/
 
 # included test starting kits
 training_output/
diff --git a/ci_tools/circle/build_doc.sh b/ci_tools/circle/build_doc.sh
index 94e4e73f..e7e12064 100755
--- a/ci_tools/circle/build_doc.sh
+++ b/ci_tools/circle/build_doc.sh
@@ -2,6 +2,8 @@
 set -x
 set -e
 
+MINICONDA_PATH=$HOME/miniconda
+
 # Install dependencies with miniconda
 wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
    -O miniconda.sh
diff --git a/doc/conf.py b/doc/conf.py
index 797712c8..70603cf8 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -309,8 +309,8 @@
 # In an ideal world, this would get fixed in this PR:
 # https://github.com/readthedocs/sphinx_rtd_theme/pull/747/files
 def setup(app):
-    app.add_javascript('js/copybutton.js')
-    app.add_stylesheet("basic.css")
+    app.add_js_file('js/copybutton.js')
+    app.add_css_file("basic.css")
     # app.connect('autodoc-process-docstring', generate_example_rst)
 
 
diff --git a/doc/sphinxext/github_link.py b/doc/sphinxext/github_link.py
index 38d04868..84bdee4c 100644
--- a/doc/sphinxext/github_link.py
+++ b/doc/sphinxext/github_link.py
@@ -40,7 +40,7 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision):
         return
 
     class_name = info['fullname'].split('.')[0]
-    if type(class_name) != str:
+    if not isinstance(class_name, str):
         # Python 2 only
         class_name = class_name.encode('utf-8')
     module = __import__(info['module'], fromlist=[class_name])
diff --git a/rampwf/hyperopt/hyperopt.py b/rampwf/hyperopt/hyperopt.py
index a6353844..923835db 100644
--- a/rampwf/hyperopt/hyperopt.py
+++ b/rampwf/hyperopt/hyperopt.py
@@ -241,7 +241,7 @@ def parse_hyperparameters(module_path, workflow_element_name):
     )
     for object_name in dir(workflow_element):
         o = getattr(workflow_element, object_name)
-        if type(o) == Hyperparameter:
+        if isinstance(o, Hyperparameter):
             o.set_names(object_name, workflow_element_name)
             hyperparameters.append(o)
     return hyperparameters
@@ -439,7 +439,9 @@ def _update_df_scores(self, df_scores, fold_i):
         row['valid_time'] = float(df_scores.loc['valid']['time'])
         row['n_train'] = len(self.cv[fold_i][0])
         row['n_valid'] = len(self.cv[fold_i][1])
-        self.df_scores_ = self.df_scores_.append(row, ignore_index=True)
+        self.df_scores_ = pd.concat(
+            [self.df_scores_, pd.DataFrame([row])], ignore_index=True
+        )
 
     def _make_and_save_summary(self, hyperopt_output_path):
         summary_groupby = self.df_scores_.groupby(
diff --git a/rampwf/prediction_types/multiclass.py b/rampwf/prediction_types/multiclass.py
index f9384e5a..46cd2c74 100644
--- a/rampwf/prediction_types/multiclass.py
+++ b/rampwf/prediction_types/multiclass.py
@@ -69,7 +69,7 @@ def _init_from_pred_labels(self, y_pred_labels):
         (len(y_pred_labels), len(self.label_names)), dtype=np.float64)
     for ps_i, label_list in zip(self.y_pred, y_pred_labels):
         # converting single labels to list of labels, assumed below
-        if type(label_list) != np.ndarray and type(label_list) != list:
+        if not isinstance(label_list, (np.ndarray, list)):
             label_list = [label_list]
         label_list = list(map(type_of_label, label_list))
         for label in label_list:
diff --git a/rampwf/score_types/detection/ospa.py b/rampwf/score_types/detection/ospa.py
index 0758f0bf..5dafc12d 100644
--- a/rampwf/score_types/detection/ospa.py
+++ b/rampwf/score_types/detection/ospa.py
@@ -1,5 +1,10 @@
 import numpy as np
-from sklearn.utils import indices_to_mask
+
+try:
+    from sklearn.utils._mask import indices_to_mask
+except ImportError:
+    # Legacy for scikit-learn < 1.4
+    from sklearn.utils import indices_to_mask
 
 from .base import DetectionBaseScoreType
 from .util import _select_minipatch_tuples, _match_tuples
diff --git a/rampwf/tests/kits/digits/submissions/starting_kit/batch_classifier.py b/rampwf/tests/kits/digits/submissions/starting_kit/batch_classifier.py
index 496e4fed..c0054f8a 100644
--- a/rampwf/tests/kits/digits/submissions/starting_kit/batch_classifier.py
+++ b/rampwf/tests/kits/digits/submissions/starting_kit/batch_classifier.py
@@ -23,39 +23,6 @@ def fit(self, gen_builder):
             # samples of your dataset divided by the batch size.
             steps_per_epoch=get_nb_minibatches(nb_train, batch_size),
             epochs=1,
-            # In parallel to training, a CPU process loads and preprocesses
-            # data from disk and put it into a queue in the form of
-            # mini-batches of size `batch_size`.`max_queue_size` controls the
-            # maximum size of that queue. The size of the queue should be big
-            # enough so that the training process (GPU) never
-            # waits for data (the queue should be never be empty).
-            # The CPU process loads chunks of 1024 images each time, and
-            # 1024/batch_size mini-batches from that chunk are put into the
-            # queue. Assuming training the model on those 1024/batch_size
-            # mini-batches is slower than loading a single chunk of 1024
-            # images, a good lower bound for `max_queue_size` would be
-            # (1024/batch_size). if `batch_size` is 16, you can put
-            # `max_queue_size` to 64.
-            max_queue_size=64,
-            # WARNING : It is obligatory to set `workers` to 1.
-            # This in principle controls the number of workers used
-            # by keras to load mini-batches from disk to memory in parallel
-            # to GPU training. But I don't like the way it works and their
-            # code is not very commented/used, so I dont trust it that much
-            # (we might have surprises).
-            # The way it works in keras is by launching in parallel `workers`
-            # threads or processes which will all use a copy of the generator
-            # passed to `fit_generator`. So if nothing is done and `workers`
-            # is set to some number > 1, the neural net will be trained with
-            # repetitions of the same data, because the workers are independent
-            # and they got through the same generator.
-            # Hence it is necessary to introduce a shared lock between the
-            # processes so that they load different data, this can become a bit
-            # complicated, so I choose to rather load exactly one chunk at a
-            # time using 1 worker (so `workers` have to be equal to 1), but
-            # do this single chunk loading in parallel with joblib.
-            workers=1,
-            use_multiprocessing=False,
             validation_data=gen_valid,
             validation_steps=get_nb_minibatches(nb_valid, batch_size),
             verbose=1)
@@ -71,6 +38,7 @@ def _build_model(self):
         model = Model(inp, out)
         model.compile(
             loss='categorical_crossentropy',
-            optimizer=SGD(lr=1e-4),
-            metrics=['accuracy'])
+            optimizer=SGD(learning_rate=1e-4),
+            metrics=['accuracy']
+        )
         return model
diff --git a/rampwf/tests/kits/digits_simplified/submissions/parallel_load/image_classifier.py b/rampwf/tests/kits/digits_simplified/submissions/parallel_load/image_classifier.py
index bb48a715..2520a84a 100644
--- a/rampwf/tests/kits/digits_simplified/submissions/parallel_load/image_classifier.py
+++ b/rampwf/tests/kits/digits_simplified/submissions/parallel_load/image_classifier.py
@@ -18,7 +18,7 @@ def __init__(self):
         self.model = Model(inp, out)
         self.model.compile(
             loss='categorical_crossentropy',
-            optimizer=SGD(lr=1e-4),
+            optimizer=SGD(learning_rate=1e-4),
             metrics=['accuracy'])
 
     def _transform(self, x):
diff --git a/rampwf/tests/kits/digits_simplified/submissions/starting_kit/image_classifier.py b/rampwf/tests/kits/digits_simplified/submissions/starting_kit/image_classifier.py
index 0fe3c239..62b57c9c 100644
--- a/rampwf/tests/kits/digits_simplified/submissions/starting_kit/image_classifier.py
+++ b/rampwf/tests/kits/digits_simplified/submissions/starting_kit/image_classifier.py
@@ -17,7 +17,7 @@ def __init__(self):
         self.model = Model(inp, out)
         self.model.compile(
             loss='categorical_crossentropy',
-            optimizer=SGD(lr=1e-4),
+            optimizer=SGD(learning_rate=1e-4),
             metrics=['accuracy'])
 
     def _transform(self, x):