From b28ca78596f5e4b68dbd40c353e0fa97c541667e Mon Sep 17 00:00:00 2001 From: Thomas Moreau Date: Mon, 2 Dec 2024 23:52:40 +0100 Subject: [PATCH] FIX compat with sklearn/keras/pandas/ipykernels/circleci (#338) --- .circleci/config.yml | 5 +-- .github/workflows/main.yml | 5 ++- .gitignore | 1 + ci_tools/circle/build_doc.sh | 2 + doc/conf.py | 4 +- doc/sphinxext/github_link.py | 2 +- rampwf/hyperopt/hyperopt.py | 6 ++- rampwf/prediction_types/multiclass.py | 2 +- rampwf/score_types/detection/ospa.py | 7 +++- .../starting_kit/batch_classifier.py | 38 ++----------------- .../parallel_load/image_classifier.py | 2 +- .../starting_kit/image_classifier.py | 2 +- 12 files changed, 26 insertions(+), 50 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5dc16866..4ba331b8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,9 +3,6 @@ jobs: build: docker: - image: cimg/python:3.11 - environment: - # miniconda path install - MINICONDA_PATH: ~/miniconda - image: cimg/postgres:9.6 environment: POSTGRES_USER: mrrampmaster_doc = 'index' @@ -54,6 +51,6 @@ jobs: fingerprints: - "36:72:da:c4:54:0d:ab:57:84:80:a7:65:59:1f:02:f9" - - deploy: + - run: name: Deploy documentation command: ./ci_tools/circle/push_doc.sh doc/_build/html/stable diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8d081c26..e3e1aabd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -10,7 +10,7 @@ jobs: test: strategy: matrix: - python-version: ['3.7', '3.11'] + python-version: ['3.10', '3.12'] os: [ubuntu-latest, windows-latest] runs-on: ${{ matrix.os }} @@ -29,7 +29,8 @@ jobs: python --version which python # Otherwise we get errors on Python 3.6 due to https://stackoverflow.com/questions/38257138/jupyter-no-such-kernel-named-python3 - conda install -y nb_conda_kernels + conda install -y nb_conda_kernels jupyter_server ipykernel + ipython kernel install --name "python3" --user - name: Install dependencies shell: bash -l {0} diff --git a/.gitignore b/.gitignore index a3c81aed..e222c3fd 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ rampwf.egg-info coverage.xml ramp_workflow.egg-info .cache +**/data/ # included test starting kits training_output/ diff --git a/ci_tools/circle/build_doc.sh b/ci_tools/circle/build_doc.sh index 94e4e73f..e7e12064 100755 --- a/ci_tools/circle/build_doc.sh +++ b/ci_tools/circle/build_doc.sh @@ -2,6 +2,8 @@ set -x set -e +MINICONDA_PATH=$HOME/miniconda + # Install dependencies with miniconda wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ -O miniconda.sh diff --git a/doc/conf.py b/doc/conf.py index 797712c8..70603cf8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -309,8 +309,8 @@ # In an ideal world, this would get fixed in this PR: # https://github.com/readthedocs/sphinx_rtd_theme/pull/747/files def setup(app): - app.add_javascript('js/copybutton.js') - app.add_stylesheet("basic.css") + app.add_js_file('js/copybutton.js') + app.add_css_file("basic.css") # app.connect('autodoc-process-docstring', generate_example_rst) diff --git a/doc/sphinxext/github_link.py b/doc/sphinxext/github_link.py index 38d04868..84bdee4c 100644 --- a/doc/sphinxext/github_link.py +++ b/doc/sphinxext/github_link.py @@ -40,7 +40,7 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): return class_name = info['fullname'].split('.')[0] - if type(class_name) != str: + if not isinstance(class_name, str): # Python 2 only class_name = class_name.encode('utf-8') module = __import__(info['module'], fromlist=[class_name]) diff --git a/rampwf/hyperopt/hyperopt.py b/rampwf/hyperopt/hyperopt.py index a6353844..923835db 100644 --- a/rampwf/hyperopt/hyperopt.py +++ b/rampwf/hyperopt/hyperopt.py @@ -241,7 +241,7 @@ def parse_hyperparameters(module_path, workflow_element_name): ) for object_name in dir(workflow_element): o = getattr(workflow_element, object_name) - if type(o) == Hyperparameter: + if isinstance(o, Hyperparameter): o.set_names(object_name, workflow_element_name) hyperparameters.append(o) return hyperparameters @@ -439,7 +439,9 @@ def _update_df_scores(self, df_scores, fold_i): row['valid_time'] = float(df_scores.loc['valid']['time']) row['n_train'] = len(self.cv[fold_i][0]) row['n_valid'] = len(self.cv[fold_i][1]) - self.df_scores_ = self.df_scores_.append(row, ignore_index=True) + self.df_scores_ = pd.concat( + [self.df_scores_, pd.DataFrame([row])], ignore_index=True + ) def _make_and_save_summary(self, hyperopt_output_path): summary_groupby = self.df_scores_.groupby( diff --git a/rampwf/prediction_types/multiclass.py b/rampwf/prediction_types/multiclass.py index f9384e5a..46cd2c74 100644 --- a/rampwf/prediction_types/multiclass.py +++ b/rampwf/prediction_types/multiclass.py @@ -69,7 +69,7 @@ def _init_from_pred_labels(self, y_pred_labels): (len(y_pred_labels), len(self.label_names)), dtype=np.float64) for ps_i, label_list in zip(self.y_pred, y_pred_labels): # converting single labels to list of labels, assumed below - if type(label_list) != np.ndarray and type(label_list) != list: + if not isinstance(label_list, (np.ndarray, list)): label_list = [label_list] label_list = list(map(type_of_label, label_list)) for label in label_list: diff --git a/rampwf/score_types/detection/ospa.py b/rampwf/score_types/detection/ospa.py index 0758f0bf..5dafc12d 100644 --- a/rampwf/score_types/detection/ospa.py +++ b/rampwf/score_types/detection/ospa.py @@ -1,5 +1,10 @@ import numpy as np -from sklearn.utils import indices_to_mask + +try: + from sklearn.utils._mask import indices_to_mask +except ImportError: + # Legacy for scikit-learn < 1.4 + from sklearn.utils import indices_to_mask from .base import DetectionBaseScoreType from .util import _select_minipatch_tuples, _match_tuples diff --git a/rampwf/tests/kits/digits/submissions/starting_kit/batch_classifier.py b/rampwf/tests/kits/digits/submissions/starting_kit/batch_classifier.py index 496e4fed..c0054f8a 100644 --- a/rampwf/tests/kits/digits/submissions/starting_kit/batch_classifier.py +++ b/rampwf/tests/kits/digits/submissions/starting_kit/batch_classifier.py @@ -23,39 +23,6 @@ def fit(self, gen_builder): # samples of your dataset divided by the batch size. steps_per_epoch=get_nb_minibatches(nb_train, batch_size), epochs=1, - # In parallel to training, a CPU process loads and preprocesses - # data from disk and put it into a queue in the form of - # mini-batches of size `batch_size`.`max_queue_size` controls the - # maximum size of that queue. The size of the queue should be big - # enough so that the training process (GPU) never - # waits for data (the queue should be never be empty). - # The CPU process loads chunks of 1024 images each time, and - # 1024/batch_size mini-batches from that chunk are put into the - # queue. Assuming training the model on those 1024/batch_size - # mini-batches is slower than loading a single chunk of 1024 - # images, a good lower bound for `max_queue_size` would be - # (1024/batch_size). if `batch_size` is 16, you can put - # `max_queue_size` to 64. - max_queue_size=64, - # WARNING : It is obligatory to set `workers` to 1. - # This in principle controls the number of workers used - # by keras to load mini-batches from disk to memory in parallel - # to GPU training. But I don't like the way it works and their - # code is not very commented/used, so I dont trust it that much - # (we might have surprises). - # The way it works in keras is by launching in parallel `workers` - # threads or processes which will all use a copy of the generator - # passed to `fit_generator`. So if nothing is done and `workers` - # is set to some number > 1, the neural net will be trained with - # repetitions of the same data, because the workers are independent - # and they got through the same generator. - # Hence it is necessary to introduce a shared lock between the - # processes so that they load different data, this can become a bit - # complicated, so I choose to rather load exactly one chunk at a - # time using 1 worker (so `workers` have to be equal to 1), but - # do this single chunk loading in parallel with joblib. - workers=1, - use_multiprocessing=False, validation_data=gen_valid, validation_steps=get_nb_minibatches(nb_valid, batch_size), verbose=1) @@ -71,6 +38,7 @@ def _build_model(self): model = Model(inp, out) model.compile( loss='categorical_crossentropy', - optimizer=SGD(lr=1e-4), - metrics=['accuracy']) + optimizer=SGD(learning_rate=1e-4), + metrics=['accuracy'] + ) return model diff --git a/rampwf/tests/kits/digits_simplified/submissions/parallel_load/image_classifier.py b/rampwf/tests/kits/digits_simplified/submissions/parallel_load/image_classifier.py index bb48a715..2520a84a 100644 --- a/rampwf/tests/kits/digits_simplified/submissions/parallel_load/image_classifier.py +++ b/rampwf/tests/kits/digits_simplified/submissions/parallel_load/image_classifier.py @@ -18,7 +18,7 @@ def __init__(self): self.model = Model(inp, out) self.model.compile( loss='categorical_crossentropy', - optimizer=SGD(lr=1e-4), + optimizer=SGD(learning_rate=1e-4), metrics=['accuracy']) def _transform(self, x): diff --git a/rampwf/tests/kits/digits_simplified/submissions/starting_kit/image_classifier.py b/rampwf/tests/kits/digits_simplified/submissions/starting_kit/image_classifier.py index 0fe3c239..62b57c9c 100644 --- a/rampwf/tests/kits/digits_simplified/submissions/starting_kit/image_classifier.py +++ b/rampwf/tests/kits/digits_simplified/submissions/starting_kit/image_classifier.py @@ -17,7 +17,7 @@ def __init__(self): self.model = Model(inp, out) self.model.compile( loss='categorical_crossentropy', - optimizer=SGD(lr=1e-4), + optimizer=SGD(learning_rate=1e-4), metrics=['accuracy']) def _transform(self, x):