Merge pull request #111 from mwalmsley/dependencies

Update to latest dependencies
mwalmsley · Nov 23, 2023 · e9c5604 · e9c5604
2 parents 1b169ab + 9d9c991
commit e9c5604
Show file tree

Hide file tree

Showing 9 changed files with 113 additions and 77 deletions.
diff --git a/README.md b/README.md
@@ -32,16 +32,16 @@ Download the code using git:
 
 And then pick one of the three commands below to install Zoobot and either PyTorch (recommended) or TensorFlow:
 
-    # Zoobot with PyTorch and a GPU. Requires CUDA 11.3.
-    pip install -e "zoobot[pytorch_cu113]" --extra-index-url https://download.pytorch.org/whl/cu113
+    # Zoobot with PyTorch and a GPU. Requires CUDA 12.1 (or CUDA 11.8, if you use `_cu118` instead)
+    pip install -e "zoobot[pytorch-cu121]" --extra-index-url https://download.pytorch.org/whl/cu121
 
     # OR Zoobot with PyTorch and no GPU
-    pip install -e "zoobot[pytorch_cpu]" --extra-index-url https://download.pytorch.org/whl/cpu
+    pip install -e "zoobot[pytorch-cpu]" --extra-index-url https://download.pytorch.org/whl/cpu
 
     # OR Zoobot with PyTorch on Mac with M1 chip
-    pip install -e "zoobot[pytorch_m1]"
+    pip install -e "zoobot[pytorch-m1]"
 
-    # OR Zoobot with TensorFlow. Works with and without a GPU, but if you have a GPU, you need CUDA 11.2. 
+    # OR Zoobot with TensorFlow. Works with and without a GPU, but if you have a GPU, you need CUDA 11.2.
     pip install -e "zoobot[tensorflow]
 
 This installs the downloaded Zoobot code using pip [editable mode](https://pip.pypa.io/en/stable/topics/local-project-installs/#editable-installs) so you can easily change the code locally. Zoobot is also available directly from pip (`pip install zoobot[option]`). Only use this if you are sure you won't be making changes to Zoobot itself. For Google Colab, use `pip install zoobot[pytorch_colab]`

diff --git a/benchmarks/pytorch/run_benchmarks.sh b/benchmarks/pytorch/run_benchmarks.sh
@@ -13,31 +13,34 @@ SEED=$RANDOM
 
 
 # GZ Evo i.e. all galaxies
-# effnet, greyscale and color
-# sbatch --job-name=evo_py_gr_eff_224_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
-# sbatch --job-name=evo_py_gr_eff_300_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=300,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
+# effnet, greyscale and color, 224 and 300px
+sbatch --job-name=evo_py_gr_eff_224_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
+sbatch --job-name=evo_py_gr_eff_300_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=300,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 sbatch --job-name=evo_py_co_eff_224_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,GPUS=2,SEED=$SEED $TRAIN_JOB
-# sbatch --job-name=evo_py_co_eff_300_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=128,RESIZE_AFTER_CROP=300,DATASET=gz_evo,COLOR_STRING=--color,GPUS=2,SEED=$SEED $TRAIN_JOB
+sbatch --job-name=evo_py_co_eff_300_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=128,RESIZE_AFTER_CROP=300,DATASET=gz_evo,COLOR_STRING=--color,GPUS=2,SEED=$SEED $TRAIN_JOB
 
 # and resnet18
 # sbatch --job-name=evo_py_gr_res18_224_$SEED --export=ARCHITECTURE=resnet18,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # sbatch --job-name=evo_py_gr_res18_300_$SEED --export=ARCHITECTURE=resnet18,BATCH_SIZE=256,RESIZE_AFTER_CROP=300,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # and resnet50
 # sbatch --job-name=evo_py_gr_res50_224_$SEED --export=ARCHITECTURE=resnet50,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # sbatch --job-name=evo_py_gr_res50_300_$SEED --export=ARCHITECTURE=resnet50,BATCH_SIZE=256,RESIZE_AFTER_CROP=300,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
-# and with max-vit tiny because hey transformers are cool
+# color 224 version
+# sbatch --job-name=evo_py_co_res50_224_$SEED --export=ARCHITECTURE=resnet50,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 
+# and with max-vit tiny because hey transformers are cool
 # smaller batch size due to memory
-sbatch --job-name=evo_py_gr_vittiny_224_$SEED --export=ARCHITECTURE=maxvit_tiny_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
-sbatch --job-name=evo_py_co_vittiny_224_$SEED --export=ARCHITECTURE=maxvit_tiny_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
+# sbatch --job-name=evo_py_gr_vittiny_224_$SEED --export=ARCHITECTURE=maxvit_tiny_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
+# sbatch --job-name=evo_py_co_vittiny_224_$SEED --export=ARCHITECTURE=maxvit_tiny_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 
 # and max-vit small (works badly)
 # sbatch --job-name=evo_py_gr_vitsmall_224_$SEED --export=ARCHITECTURE=maxvit_small_224,BATCH_SIZE=64,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
-# and convnext (works badly)
+# and convnext (works badly, would really like to try again but bigger)
 # sbatch --job-name=evo_py_gr_$SEED --export=ARCHITECTURE=convnext_nano,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # and vit
 # sbatch --job-name=evo_py_gr_vittinyp16_224_$SEED --export=ARCHITECTURE=vit_tiny_patch16_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
-
+# and swinv2
+# TODO
 
 # and in color with no mixed precision, for specific project
 # sbatch --job-name=evo_py_co_res50_224_fullprec_$SEED --export=ARCHITECTURE=resnet50,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,GPUS=2,SEED=$SEED $TRAIN_JOB

diff --git a/docs/data_notes.rst b/docs/data_notes.rst
@@ -24,11 +24,6 @@ Zoobot includes weights for the following pretrained models.
      - 1 
      - Yes
      - `Link <https://www.dropbox.com/s/izvqagd6rkhi4lq/effnetb0_greyscale_300px.ckpt?dl=0>`__
-   * - EfficientNetB0 
-     - 300px
-     - 3
-     - Yes
-     - WIP
    * - EfficientNetB0 
      - 224px
      - 3
@@ -57,12 +52,12 @@ Zoobot includes weights for the following pretrained models.
    * - Max-ViT Tiny
      - 224px
      - 1
-     - Not yet
+     - Yes
      - `Link <https://www.dropbox.com/s/pndcgi6wxh9wuqb/maxvittiny_greyscale_224px.ckpt?dl=0>`__
    * - Max-ViT Tiny
      - 224px
      - 3
-     - Not yet
+     - Yes
      - `Link <https://www.dropbox.com/s/ibuo5n1tcaphvn3/maxvittiny_color_224px.ckpt?dl=0>`__
 
 
@@ -108,19 +103,19 @@ We also include a few additional ad-hoc models `on Dropbox <https://www.dropbox.
 Which model should I use?
 --------------------------
 
-We suggest the PyTorch EfficientNetB0 single-channel 300-pixel model for most users.
+We suggest the PyTorch EfficientNetB0 224-pixel model for most users.
 
 Zoobot will prioritise PyTorch going forward. For more, see here.
 The TensorFlow models currently perform just as well as the PyTorch equivalents but will not benefit from any future updates.
 
 EfficientNetB0 is a small yet capable modern architecture. 
 The ResNet50 models perform slightly worse than EfficientNet, but are a very common architecture and may be useful as benchmarks or as part of other frameworks (like detectron2, for segmentation).
 
-Color information does not improve overall performance at predicting GZ votes.
-This is a little surprising, but we're confident it's true for our datasets (see the benchmarks folder for our tests).
-However, it might be useful to include for other tasks where color is critical, such as hunting certain anomalous galaxies.
+It's unclear if color information improves overall performance at predicting GZ votes.
+For CNNs, the change in performance is not significant. For ViT, it is measureable but small.
+We suggesst including color if it is expected to be important to your specific task, such as hunting green peas.
 
-Larger input images (300px vs 224px) provide a small boost in performance at predicting GZ votes.
+Larger input images (300px vs 224px) may provide a small boost in performance at predicting GZ votes.
 However, the models require more memory and train/finetune slightly more slowly.
 You may want to start with a 224px model and experiment with "upgrading" once you're happy everything works.
 

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="zoobot",
-    version="1.0.4",
+    version="1.0.6",
     author="Mike Walmsley",
     author_email="[email protected]",
     description="Galaxy morphology classifiers",
@@ -22,51 +22,63 @@
     packages=setuptools.find_packages(),
     python_requires=">=3.8",  # recommend 3.9 for new users. TF needs >=3.7.2, torchvision>=3.8
     extras_require={
-        'pytorch_cpu': [
+        'pytorch-cpu': [
             # A100 GPU currently only seems to support cuda 11.3 on manchester cluster, let's stick with this version for now
             # very latest version wants cuda 11.6
-            'torch == 1.12.1+cpu',
-            'torchvision == 0.13.1+cpu',
-            'torchaudio == 0.12.1',
+            'torch == 2.1.0+cpu',
+            'torchvision == 0.16.0+cpu',
+            'torchaudio >= 2.1.0',
             'pytorch-lightning >= 2.0.0',
             # 'simplejpeg',
             'albumentations',
-            'pyro-ppl == 1.8.0',
+            'pyro-ppl >= 1.8.6',
             'torchmetrics == 0.11.0',
-            'timm == 0.6.12'
+            'timm == 0.9.10'
         ],
-        'pytorch_m1': [
+        'pytorch-m1': [
             # as above but without the +cpu (and the extra-index-url in readme has no effect)
             # all matching pytorch versions for an m1 system will be cpu
-            'torch == 1.12.1',
-            'torchvision == 0.13.1',
-            'torchaudio == 0.12.1',
+            'torch == 2.1.0',
+            'torchvision == 0.16.0',
+            'torchaudio >= 2.1.0',
             'pytorch-lightning >= 2.0.0',
             'albumentations',
-            'pyro-ppl == 1.8.0',
+            'pyro-ppl >= 1.8.6',
             'torchmetrics == 0.11.0',
-            'timm == 0.6.12'
+            'timm >= 0.9.10'
         ],
         # as above but without pytorch itself
         # for GPU, you will also need e.g. cudatoolkit=11.3, 11.6
         # https://pytorch.org/get-started/previous-versions/#v1121
-        'pytorch_cu113': [
-            'torch == 1.12.1+cu113',
-            'torchvision == 0.13.1+cu113',
-            'torchaudio == 0.12.1',
+        'pytorch-cu118': [
+            'torch == 2.1.0+cu118',
+            'torchvision == 0.16.0+cu118',
+            'torchaudio >= 2.1.0',
             'pytorch-lightning >= 2.0.0',
             'albumentations',
-            'pyro-ppl == 1.8.0',
+            'pyro-ppl >= 1.8.6',
             'torchmetrics == 0.11.0',
-            'timm == 0.6.12'
-        ],
-        'pytorch_colab': [
+            'timm >= 0.9.10'
+        ],  # exactly as above, but _cu121 for cuda 12.1 (the current default)
+        'pytorch-cu121': [
+            'torch == 2.1.0+cu121',
+            'torchvision == 0.16.0+cu121',
+            'torchaudio >= 2.1.0',
+            'pytorch-lightning >= 2.0.0',
+            'albumentations',
+            'pyro-ppl >= 1.8.6',
+            'torchmetrics == 0.11.0',
+            'timm >= 0.9.10'
+        ],        
+        'pytorch-colab': [
+            # colab includes pytorch already
             'pytorch-lightning >= 2.0.0',
             'albumentations',
             'pyro-ppl>=1.8.0',
             'torchmetrics==0.11.0',
-            'timm == 0.6.12'
+            'timm == 0.9.10'
         ],
+        # TODO may add narval/Digital Research Canada config
         'tensorflow': [
             'tensorflow == 2.10.0',  # 2.11.0 turns on XLA somewhere which then fails on multi-GPU...TODO
             'keras_applications',
@@ -95,13 +107,12 @@
         'pandas',
         'scipy',
         'astropy',  # for reading fits
-        # 'scikit-image >= 0.19.2',  # TODO remove
         'scikit-learn >= 1.0.2',
         'matplotlib',
         'pyarrow',  # to read parquet, which is very handy for big datasets
         # for saving metrics to weights&biases (cloud service, free within limits)
         'wandb',
-        'setuptools==59.5.0',  # wandb logger incompatibility
-        'galaxy-datasets==0.0.15'  # for dataset loading in both TF and Torch (renamed from pytorch-galaxy-datasets)
+        'setuptools',  # no longer pinned
+        'galaxy-datasets>=0.0.15'  # for dataset loading in both TF and Torch (see github/mwalmsley/galaxy-datasets)
     ]
 )
diff --git a/zoobot/pytorch/examples/finetuning/finetune_binary_classification.py b/zoobot/pytorch/examples/finetuning/finetune_binary_classification.py
@@ -27,9 +27,10 @@
     # To support more complicated labels, Zoobot expects a list of columns. A list with one element works fine.
 
     # load a pretrained checkpoint saved here
-    # checkpoint_loc = os.path.join(zoobot_dir, 'data/pretrained_models/temp/dr5_py_gr_2270/checkpoints/epoch=360-step=231762.ckpt')
-    checkpoint_loc = '/Users/user/repos/gz-decals-classifiers/results/benchmarks/pytorch/dr5/dr5_py_gr_15366/checkpoints/epoch=58-step=18939.ckpt'
-
+    # https://www.dropbox.com/s/7ixwo59imjfz4ay/effnetb0_greyscale_224px.ckpt?dl=0
+    # see https://zoobot.readthedocs.io/en/latest/data_notes.html for more options
+    checkpoint_loc = os.path.join(zoobot_dir, 'data/pretrained_models/pytorch/effnetb0_greyscale_224px.ckpt')
+
     # save the finetuning results here
     save_dir = os.path.join(zoobot_dir, 'results/pytorch/finetune/finetune_binary_classification')
 
@@ -70,8 +71,9 @@
       finetuned_model,
       n_samples=1,
       label_cols=label_cols,
-      save_loc=os.path.join(save_dir, 'finetuned_predictions.csv')
-      # trainer_kwargs={'accelerator': 'gpu'}
+      save_loc=os.path.join(save_dir, 'finetuned_predictions.csv'),
+      datamodule_kwargs={'batch_size': 32},  # we also need to set batch size here, or you may run out of memory
+      trainer_kwargs={'accelerator': 'gpu'}  
     )
     """
     Under the hood, this is essentially doing:

diff --git a/zoobot/pytorch/examples/finetuning/finetune_multiclass_classification.py b/zoobot/pytorch/examples/finetuning/finetune_multiclass_classification.py
@@ -17,7 +17,7 @@
     batch_size = 32
     num_workers= 8
     n_blocks = 1  # EffnetB0 is divided into 7 blocks. set 0 to only fit the head weights. Set 1, 2, etc to finetune deeper. 
-    max_epochs = 6  #  6 epochs should get you ~93% accuracy. Set much higher (e.g. 1000) for harder problems, to use Zoobot's default early stopping. 
+    max_epochs = 1  #  6 epochs should get you ~93% accuracy. Set much higher (e.g. 1000) for harder problems, to use Zoobot's default early stopping. 
     # the remaining key parameters for high accuracy are weight_decay, learning_rate, and lr_decay. You might like to tinker with these.
 
     # load in catalogs of images and labels to finetune on
@@ -83,7 +83,7 @@
       test_catalog,
       finetuned_model,
       n_samples=1,
-      label_cols=label_cols,
+      label_cols=['class_{}'.format(n) for n in range(num_classes)],  # TODO feel free to rename, it's just for the csv header
       save_loc=predictions_save_loc,
       trainer_kwargs={'accelerator': 'auto'},
       datamodule_kwargs={'batch_size': batch_size, 'num_workers': num_workers}

diff --git a/zoobot/pytorch/predictions/predict_on_catalog.py b/zoobot/pytorch/predictions/predict_on_catalog.py
@@ -55,12 +55,19 @@ def predict(catalog: pd.DataFrame, model: pl.LightningModule, n_samples: int, la
     start = datetime.datetime.fromtimestamp(time.time())
     logging.info('Starting at: {}'.format(start.strftime('%Y-%m-%d %H:%M:%S')))
 
-    logging.info(len(trainer.predict(model, predict_datamodule)))
+    # logging.info(len(trainer.predict(model, predict_datamodule)))
 
     # trainer.predict gives list of tensors, each tensor being predictions for a batch. Concat on axis 0.
     # range(n_samples) list comprehension repeats this, for dropout-permuted predictions. Stack to create new last axis.
     # final shape (n_galaxies, n_answers, n_samples)
-    predictions = torch.stack([torch.concat(trainer.predict(model, predict_datamodule), dim=0) for n in range(n_samples)], dim=-1).numpy()
+    predictions = torch.stack(
+        [   
+            # trainer.predict gives [(galaxy, answer), ...] list, batchwise
+            # concat batches
+            torch.concat(trainer.predict(model, predict_datamodule), dim=0)
+            for n in range(n_samples)
+        ], 
+        dim=-1).numpy()  # now stack on final dim for (galaxy, answer, dropout) shape
     logging.info('Predictions complete - {}'.format(predictions.shape))
 
     logging.info(f'Saving predictions to {save_loc}')