Skip to content

Commit

Permalink
1 gpu
Browse files Browse the repository at this point in the history
  • Loading branch information
mwalmsley committed Nov 7, 2023
1 parent a49904c commit 0f67ab6
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 4 deletions.
5 changes: 4 additions & 1 deletion only_for_me/narval/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from zoobot.pytorch.training import train_with_pytorch_lightning
from zoobot.shared import benchmark_datasets, schemas

import pytorch_lightning as pl


if __name__ == '__main__':

Expand Down Expand Up @@ -47,6 +49,7 @@
logging.basicConfig(level=logging.INFO)

random_state = args.random_state
pl.seed_everything(random_state)

# if args.nodes > 1:
# # at Manchester, our slurm cluster sets TASKS not NTASKS, which then confuses lightning
Expand Down Expand Up @@ -118,7 +121,7 @@
resize_after_crop=args.resize_after_crop,
# hardware parameters
# gpus=args.gpus,
gpus=2,
gpus=1,
nodes=args.nodes,
mixed_precision=args.mixed_precision,
wandb_logger=wandb_logger,
Expand Down
6 changes: 3 additions & 3 deletions only_for_me/narval/train.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash
#SBATCH --time=2:50:0
#SBATCH --nodes=1
#SBATCH --ntasks=2
#SBATCH --ntasks-per-node=2
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=10
#SBATCH --mem-per-cpu 4G
#SBATCH --gres=gpu:v100:2
#SBATCH --gres=gpu:v100:1

nvidia-smi

Expand Down
26 changes: 26 additions & 0 deletions zoobot/pytorch/training/debug_split.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,32 @@
"wds.utils.pytorch_worker_info()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['5', '2']"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from itertools import islice\n",
"\n",
"def get_per_worker(urls={str(x) for x in range(10)}, worker_n=1, num_workers=5):\n",
" for s in islice(urls, worker_n, None, num_workers):\n",
" yield s\n",
"\n",
"list(get_per_worker(worker_n=0))"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down

0 comments on commit 0f67ab6

Please sign in to comment.