diff --git a/.gitignore b/.gitignore index 7dc1495d..7ca6b17d 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ lib64/ parts/ sdist/ var/ +tmp/ wheels/ share/python-wheels/ *.egg-info/ diff --git a/configs/vision/dino_vit/offline/bach.yaml b/configs/vision/dino_vit/offline/bach.yaml index 7bcb7e3b..610338ed 100644 --- a/configs/vision/dino_vit/offline/bach.yaml +++ b/configs/vision/dino_vit/offline/bach.yaml @@ -2,7 +2,8 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &LIGHTNING_ROOT ${oc.env:LIGHTNING_ROOT, logs/dino_vits16/offline/bach} + n_runs: 5 + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:DINO_BACKBONE, dino_vits16}/offline/bach} max_steps: &MAX_STEPS 12500 callbacks: - class_path: pytorch_lightning.callbacks.LearningRateMonitor @@ -23,7 +24,7 @@ trainer: mode: *MONITOR_METRIC_MODE - class_path: eva.callbacks.EmbeddingsWriter init_args: - output_dir: &EMBEDDINGS_DIR ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/bach + output_dir: &EMBEDDINGS_DIR ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/${oc.env:DINO_BACKBONE, dino_vits16}/bach dataloader_idx_map: 0: train 1: val @@ -35,10 +36,11 @@ trainer: repo_or_dir: facebookresearch/dino:main model: ${oc.env:DINO_BACKBONE, dino_vits16} pretrained: ${oc.env:PRETRAINED, true} + checkpoint_path: ${oc.env:CHECKPOINT_PATH, null} logger: - class_path: pytorch_lightning.loggers.TensorBoardLogger init_args: - save_dir: *LIGHTNING_ROOT + save_dir: *OUTPUT_ROOT name: "" model: class_path: eva.HeadModule diff --git a/configs/vision/dino_vit/offline/crc.yaml b/configs/vision/dino_vit/offline/crc.yaml index 4d12c6e3..465f28ce 100644 --- a/configs/vision/dino_vit/offline/crc.yaml +++ b/configs/vision/dino_vit/offline/crc.yaml @@ -2,7 +2,8 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &LIGHTNING_ROOT ${oc.env:LIGHTNING_ROOT, logs/dino_vits16/offline/crc} + n_runs: 5 + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:DINO_BACKBONE, dino_vits16}/offline/crc} max_steps: &MAX_STEPS 12500 callbacks: - class_path: pytorch_lightning.callbacks.LearningRateMonitor @@ -23,7 +24,7 @@ trainer: mode: *MONITOR_METRIC_MODE - class_path: eva.callbacks.EmbeddingsWriter init_args: - output_dir: &EMBEDDINGS_DIR ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/crc + output_dir: &EMBEDDINGS_DIR ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/${oc.env:DINO_BACKBONE, dino_vits16}/crc dataloader_idx_map: 0: train 1: val @@ -35,10 +36,11 @@ trainer: repo_or_dir: facebookresearch/dino:main model: ${oc.env:DINO_BACKBONE, dino_vits16} pretrained: ${oc.env:PRETRAINED, true} + checkpoint_path: ${oc.env:CHECKPOINT_PATH, null} logger: - class_path: pytorch_lightning.loggers.TensorBoardLogger init_args: - save_dir: *LIGHTNING_ROOT + save_dir: *OUTPUT_ROOT name: "" model: class_path: eva.HeadModule diff --git a/configs/vision/dino_vit/offline/crc_nonorm.yaml b/configs/vision/dino_vit/offline/crc_nonorm.yaml index c04bd753..5d9a41eb 100644 --- a/configs/vision/dino_vit/offline/crc_nonorm.yaml +++ b/configs/vision/dino_vit/offline/crc_nonorm.yaml @@ -2,7 +2,8 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &LIGHTNING_ROOT ${oc.env:LIGHTNING_ROOT, logs/dino_vits16/offline/crc_nonorm} + n_runs: 5 + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:DINO_BACKBONE, dino_vits16}/offline/crc_nonorm} max_steps: &MAX_STEPS 12500 callbacks: - class_path: pytorch_lightning.callbacks.LearningRateMonitor @@ -23,7 +24,7 @@ trainer: mode: *MONITOR_METRIC_MODE - class_path: eva.callbacks.EmbeddingsWriter init_args: - output_dir: &EMBEDDINGS_DIR ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/crc_nonorm + output_dir: &EMBEDDINGS_DIR ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/${oc.env:DINO_BACKBONE, dino_vits16}/crc_nonorm dataloader_idx_map: 0: train 1: val @@ -35,10 +36,11 @@ trainer: repo_or_dir: facebookresearch/dino:main model: ${oc.env:DINO_BACKBONE, dino_vits16} pretrained: ${oc.env:PRETRAINED, true} + checkpoint_path: ${oc.env:CHECKPOINT_PATH, null} logger: - class_path: pytorch_lightning.loggers.TensorBoardLogger init_args: - save_dir: *LIGHTNING_ROOT + save_dir: *OUTPUT_ROOT name: "" model: class_path: eva.HeadModule diff --git a/configs/vision/dino_vit/offline/patch_camelyon.yaml b/configs/vision/dino_vit/offline/patch_camelyon.yaml index d4813dd3..b695a1b2 100644 --- a/configs/vision/dino_vit/offline/patch_camelyon.yaml +++ b/configs/vision/dino_vit/offline/patch_camelyon.yaml @@ -2,7 +2,8 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &LIGHTNING_ROOT ${oc.env:LIGHTNING_ROOT, logs/dino_vits16/offline/patch_camelyon} + n_runs: 5 + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:DINO_BACKBONE, dino_vits16}/offline/patch_camelyon} max_steps: &MAX_STEPS 12500 callbacks: - class_path: pytorch_lightning.callbacks.LearningRateMonitor @@ -23,7 +24,7 @@ trainer: mode: *MONITOR_METRIC_MODE - class_path: eva.callbacks.EmbeddingsWriter init_args: - output_dir: &EMBEDDINGS_DIR ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/patch_camelyon + output_dir: &EMBEDDINGS_DIR ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/${oc.env:DINO_BACKBONE, dino_vits16}/patch_camelyon dataloader_idx_map: 0: train 1: val @@ -36,10 +37,11 @@ trainer: repo_or_dir: facebookresearch/dino:main model: ${oc.env:DINO_BACKBONE, dino_vits16} pretrained: ${oc.env:PRETRAINED, true} + checkpoint_path: ${oc.env:CHECKPOINT_PATH, null} logger: - class_path: pytorch_lightning.loggers.TensorBoardLogger init_args: - save_dir: *LIGHTNING_ROOT + save_dir: *OUTPUT_ROOT name: "" model: class_path: eva.HeadModule diff --git a/configs/vision/dino_vit/online/bach.yaml b/configs/vision/dino_vit/online/bach.yaml index 3a1fb3d8..32648c55 100644 --- a/configs/vision/dino_vit/online/bach.yaml +++ b/configs/vision/dino_vit/online/bach.yaml @@ -2,7 +2,7 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &LIGHTNING_ROOT ${oc.env:LIGHTNING_ROOT, logs/dino_vits16/online/bach} + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:DINO_BACKBONE, dino_vits16}/online/bach} max_steps: &MAX_STEPS 12500 callbacks: - class_path: pytorch_lightning.callbacks.LearningRateMonitor @@ -24,7 +24,7 @@ trainer: logger: - class_path: pytorch_lightning.loggers.TensorBoardLogger init_args: - save_dir: *LIGHTNING_ROOT + save_dir: *OUTPUT_ROOT name: "" model: class_path: eva.HeadModule diff --git a/configs/vision/dino_vit/online/crc.yaml b/configs/vision/dino_vit/online/crc.yaml index 3f4dc333..b94a272c 100644 --- a/configs/vision/dino_vit/online/crc.yaml +++ b/configs/vision/dino_vit/online/crc.yaml @@ -2,7 +2,7 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &LIGHTNING_ROOT ${oc.env:LIGHTNING_ROOT, logs/dino_vits16/online/crc} + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:DINO_BACKBONE, dino_vits16}/online/crc} max_steps: &MAX_STEPS 12500 callbacks: - class_path: pytorch_lightning.callbacks.LearningRateMonitor @@ -24,7 +24,7 @@ trainer: logger: - class_path: pytorch_lightning.loggers.TensorBoardLogger init_args: - save_dir: *LIGHTNING_ROOT + save_dir: *OUTPUT_ROOT name: "" model: class_path: eva.HeadModule @@ -37,6 +37,7 @@ model: repo_or_dir: facebookresearch/dino:main model: ${oc.env:DINO_BACKBONE, dino_vits16} pretrained: ${oc.env:PRETRAINED, true} + checkpoint_path: &CHECKPOINT_PATH ${oc.env:CHECKPOINT_PATH, null} head: class_path: torch.nn.Linear init_args: diff --git a/configs/vision/dino_vit/online/crc_nonorm.yaml b/configs/vision/dino_vit/online/crc_nonorm.yaml index 21524ff5..ed79e45e 100644 --- a/configs/vision/dino_vit/online/crc_nonorm.yaml +++ b/configs/vision/dino_vit/online/crc_nonorm.yaml @@ -2,7 +2,7 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &LIGHTNING_ROOT ${oc.env:LIGHTNING_ROOT, logs/dino_vits16/online/crc_nonorm} + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:DINO_BACKBONE, dino_vits16}/online/crc_nonorm} max_steps: &MAX_STEPS 12500 callbacks: - class_path: pytorch_lightning.callbacks.LearningRateMonitor @@ -24,7 +24,7 @@ trainer: logger: - class_path: pytorch_lightning.loggers.TensorBoardLogger init_args: - save_dir: *LIGHTNING_ROOT + save_dir: *OUTPUT_ROOT name: "" model: class_path: eva.HeadModule @@ -37,6 +37,7 @@ model: repo_or_dir: facebookresearch/dino:main model: ${oc.env:DINO_BACKBONE, dino_vits16} pretrained: ${oc.env:PRETRAINED, true} + checkpoint_path: &CHECKPOINT_PATH ${oc.env:CHECKPOINT_PATH, null} head: class_path: torch.nn.Linear init_args: diff --git a/configs/vision/dino_vit/online/patch_camelyon.yaml b/configs/vision/dino_vit/online/patch_camelyon.yaml index 24900383..ed88b82f 100644 --- a/configs/vision/dino_vit/online/patch_camelyon.yaml +++ b/configs/vision/dino_vit/online/patch_camelyon.yaml @@ -2,7 +2,7 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &LIGHTNING_ROOT ${oc.env:LIGHTNING_ROOT, logs/dino_vits16/online/patch_camelyon} + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:DINO_BACKBONE, dino_vits16}/online/patch_camelyon} max_steps: &MAX_STEPS 12500 callbacks: - class_path: pytorch_lightning.callbacks.LearningRateMonitor @@ -24,7 +24,7 @@ trainer: logger: - class_path: pytorch_lightning.loggers.TensorBoardLogger init_args: - save_dir: *LIGHTNING_ROOT + save_dir: *OUTPUT_ROOT name: "" model: class_path: eva.HeadModule @@ -37,7 +37,7 @@ model: repo_or_dir: facebookresearch/dino:main model: ${oc.env:DINO_BACKBONE, dino_vits16} pretrained: ${oc.env:PRETRAINED, true} - checkpoint_path: &CHECKPOINT_PATH ${oc.env:CHECKPOINT_PATH, null} + checkpoint_path: ${oc.env:CHECKPOINT_PATH, null} head: class_path: torch.nn.Linear init_args: diff --git a/configs/vision/owkin/phikon/offline/bach.yaml b/configs/vision/owkin/phikon/offline/bach.yaml new file mode 100644 index 00000000..14badfc0 --- /dev/null +++ b/configs/vision/owkin/phikon/offline/bach.yaml @@ -0,0 +1,107 @@ +--- +trainer: + class_path: eva.Trainer + init_args: + n_runs: 5 + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:DINO_BACKBONE, owkin/phikon}/offline/bach} + max_steps: &MAX_STEPS 12500 + callbacks: + - class_path: pytorch_lightning.callbacks.LearningRateMonitor + init_args: + logging_interval: epoch + - class_path: pytorch_lightning.callbacks.ModelCheckpoint + init_args: + filename: best + save_last: true + save_top_k: 1 + monitor: &MONITOR_METRIC ${oc.env:MONITOR_METRIC, val/MulticlassAccuracy} + mode: &MONITOR_METRIC_MODE ${oc.env:MONITOR_METRIC_MODE, max} + - class_path: pytorch_lightning.callbacks.EarlyStopping + init_args: + min_delta: 0 + patience: 800 + monitor: *MONITOR_METRIC + mode: *MONITOR_METRIC_MODE + - class_path: eva.callbacks.EmbeddingsWriter + init_args: + output_dir: &EMBEDDINGS_DIR ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/${oc.env:DINO_BACKBONE, owkin/phikon}/bach + dataloader_idx_map: + 0: train + 1: val + backbone: + class_path: eva.models.wrappers.HuggingFaceModel + init_args: + model_name_or_path: owkin/phikon + tensor_transforms: + class_path: eva.vision.data.transforms.model_output.ExtractCLSFeatures + logger: + - class_path: pytorch_lightning.loggers.TensorBoardLogger + init_args: + save_dir: *OUTPUT_ROOT + name: "" +model: + class_path: eva.HeadModule + init_args: + head: + class_path: torch.nn.Linear + init_args: + in_features: ${oc.env:IN_FEATURES, 768} + out_features: &NUM_CLASSES 4 + criterion: torch.nn.CrossEntropyLoss + optimizer: + class_path: torch.optim.SGD + init_args: + lr: &LR_VALUE 0.00064 + momentum: 0.9 + weight_decay: 0.0 + lr_scheduler: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: *MAX_STEPS + eta_min: 0.0 + metrics: + common: + - class_path: eva.metrics.AverageLoss + - class_path: eva.metrics.MulticlassClassificationMetrics + init_args: + num_classes: *NUM_CLASSES +data: + class_path: eva.DataModule + init_args: + datasets: + train: + class_path: eva.vision.data.datasets.embeddings.PatchEmbeddingDataset + init_args: &DATASET_ARGS + root: *EMBEDDINGS_DIR + split: train + column_mapping: + path: embedding + val: + class_path: eva.vision.data.datasets.embeddings.PatchEmbeddingDataset + init_args: + <<: *DATASET_ARGS + split: val + predict: + - class_path: eva.vision.datasets.BACH + init_args: &PREDICT_DATASET_ARGS + root: ${oc.env:DATA_ROOT, ./data}/bach + split: train + download: ${oc.env:DOWNLOAD_DATA, true} + image_transforms: + class_path: eva.vision.data.transforms.common.ResizeAndCrop + init_args: + size: ${oc.env:RESIZE_DIM, 224} + mean: ${oc.env:NORMALIZE_MEAN, [0.485, 0.456, 0.406]} + std: ${oc.env:NORMALIZE_STD, [0.229, 0.224, 0.225]} + - class_path: eva.vision.datasets.BACH + init_args: + <<: *PREDICT_DATASET_ARGS + split: val + dataloaders: + train: + batch_size: &BATCH_SIZE 256 + shuffle: true + val: + batch_size: *BATCH_SIZE + predict: + batch_size: &PREDICT_BATCH_SIZE ${oc.env:PREDICT_BATCH_SIZE, 128} diff --git a/configs/vision/owkin/phikon/offline/crc.yaml b/configs/vision/owkin/phikon/offline/crc.yaml new file mode 100644 index 00000000..80b41400 --- /dev/null +++ b/configs/vision/owkin/phikon/offline/crc.yaml @@ -0,0 +1,108 @@ +--- +trainer: + class_path: eva.Trainer + init_args: + n_runs: 5 + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:DINO_BACKBONE, owkin/phikon}/offline/crc} + max_steps: &MAX_STEPS 12500 + callbacks: + - class_path: pytorch_lightning.callbacks.LearningRateMonitor + init_args: + logging_interval: epoch + - class_path: pytorch_lightning.callbacks.ModelCheckpoint + init_args: + filename: best + save_last: true + save_top_k: 1 + monitor: &MONITOR_METRIC ${oc.env:MONITOR_METRIC, val/MulticlassAccuracy} + mode: &MONITOR_METRIC_MODE ${oc.env:MONITOR_METRIC_MODE, max} + - class_path: pytorch_lightning.callbacks.EarlyStopping + init_args: + min_delta: 0 + patience: 48 + monitor: *MONITOR_METRIC + mode: *MONITOR_METRIC_MODE + - class_path: eva.callbacks.EmbeddingsWriter + init_args: + output_dir: &EMBEDDINGS_DIR ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/${oc.env:DINO_BACKBONE, owkin/phikon}/crc + dataloader_idx_map: + 0: train + 1: val + backbone: + class_path: eva.models.wrappers.HuggingFaceModel + init_args: + model_name_or_path: owkin/phikon + tensor_transforms: + class_path: eva.vision.data.transforms.model_output.ExtractCLSFeatures + logger: + - class_path: pytorch_lightning.loggers.TensorBoardLogger + init_args: + save_dir: *OUTPUT_ROOT + name: "" +model: + class_path: eva.HeadModule + init_args: + head: + class_path: torch.nn.Linear + init_args: + in_features: ${oc.env:IN_FEATURES, 768} + out_features: &NUM_CLASSES 9 + criterion: torch.nn.CrossEntropyLoss + optimizer: + class_path: torch.optim.SGD + init_args: + lr: &LR_VALUE 0.01 + momentum: 0.9 + weight_decay: 0.0 + lr_scheduler: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: *MAX_STEPS + eta_min: 0.0 + metrics: + common: + - class_path: eva.metrics.AverageLoss + - class_path: eva.metrics.MulticlassClassificationMetrics + init_args: + num_classes: *NUM_CLASSES +data: + class_path: eva.DataModule + init_args: + datasets: + train: + class_path: eva.vision.data.datasets.embeddings.PatchEmbeddingDataset + init_args: &DATASET_ARGS + root: *EMBEDDINGS_DIR + split: train + column_mapping: + path: embedding + val: + class_path: eva.vision.data.datasets.embeddings.PatchEmbeddingDataset + init_args: + <<: *DATASET_ARGS + split: val + predict: + - class_path: eva.vision.datasets.CRC + init_args: &PREDICT_DATASET_ARGS + root: ${oc.env:DATA_ROOT, ./data}/crc + split: train + download: ${oc.env:DOWNLOAD_DATA, true} + image_transforms: + class_path: eva.vision.data.transforms.common.ResizeAndCrop + init_args: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + - class_path: eva.vision.datasets.CRC + init_args: + <<: *PREDICT_DATASET_ARGS + split: val + dataloaders: + train: + batch_size: &BATCH_SIZE ${oc.env:BATCH_SIZE, 4096} + shuffle: true + val: + batch_size: *BATCH_SIZE + test: + batch_size: *BATCH_SIZE + predict: + batch_size: &PREDICT_BATCH_SIZE ${oc.env:PREDICT_BATCH_SIZE, 128} diff --git a/configs/vision/owkin/phikon/offline/patch_camelyon.yaml b/configs/vision/owkin/phikon/offline/patch_camelyon.yaml new file mode 100644 index 00000000..53dc5358 --- /dev/null +++ b/configs/vision/owkin/phikon/offline/patch_camelyon.yaml @@ -0,0 +1,119 @@ +--- +trainer: + class_path: eva.Trainer + init_args: + n_runs: 5 + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:DINO_BACKBONE, owkin/phikon}/offline/patch_camelyon} + max_steps: &MAX_STEPS 12500 + callbacks: + - class_path: pytorch_lightning.callbacks.LearningRateMonitor + init_args: + logging_interval: epoch + - class_path: pytorch_lightning.callbacks.ModelCheckpoint + init_args: + filename: best + save_last: true + save_top_k: 1 + monitor: &MONITOR_METRIC ${oc.env:MONITOR_METRIC, val/BinaryAccuracy} + mode: &MONITOR_METRIC_MODE ${oc.env:MONITOR_METRIC_MODE, max} + - class_path: pytorch_lightning.callbacks.EarlyStopping + init_args: + min_delta: 0 + patience: 17 + monitor: *MONITOR_METRIC + mode: *MONITOR_METRIC_MODE + - class_path: eva.callbacks.EmbeddingsWriter + init_args: + output_dir: &EMBEDDINGS_DIR ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/${oc.env:DINO_BACKBONE, owkin/phikon}/patch_camelyon + dataloader_idx_map: + 0: train + 1: val + 2: test + backbone: + class_path: eva.models.wrappers.HuggingFaceModel + init_args: + model_name_or_path: owkin/phikon + tensor_transforms: + class_path: eva.vision.data.transforms.model_output.ExtractCLSFeatures + logger: + - class_path: pytorch_lightning.loggers.TensorBoardLogger + init_args: + save_dir: *OUTPUT_ROOT + name: "" +model: + class_path: eva.HeadModule + init_args: + head: + class_path: torch.nn.Linear + init_args: + in_features: ${oc.env:IN_FEATURES, 768} + out_features: 1 + criterion: torch.nn.BCEWithLogitsLoss + optimizer: + class_path: torch.optim.SGD + init_args: + lr: &LR_VALUE 0.01 + momentum: 0.9 + weight_decay: 0.0 + lr_scheduler: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: *MAX_STEPS + eta_min: 0.0 + metrics: + common: + - class_path: eva.metrics.AverageLoss + - class_path: eva.metrics.BinaryClassificationMetrics +data: + class_path: eva.DataModule + init_args: + datasets: + train: + class_path: eva.vision.data.datasets.embeddings.PatchEmbeddingDataset + init_args: &DATASET_ARGS + root: *EMBEDDINGS_DIR + split: train + column_mapping: + path: embedding + target_transforms: + class_path: eva.vision.data.transforms.common.ArrayToFloatTensor + val: + class_path: eva.vision.data.datasets.embeddings.PatchEmbeddingDataset + init_args: + <<: *DATASET_ARGS + split: val + test: + class_path: eva.vision.data.datasets.embeddings.PatchEmbeddingDataset + init_args: + <<: *DATASET_ARGS + split: test + predict: + - class_path: eva.vision.datasets.PatchCamelyon + init_args: &PREDICT_DATASET_ARGS + root: ${oc.env:DATA_ROOT, ./data}/patch_camelyon + split: train + download: ${oc.env:DOWNLOAD_DATA, true} + image_transforms: + class_path: eva.vision.data.transforms.common.ResizeAndCrop + init_args: + size: ${oc.env:RESIZE_DIM, 224} + mean: ${oc.env:NORMALIZE_MEAN, [0.485, 0.456, 0.406]} + std: ${oc.env:NORMALIZE_STD, [0.229, 0.224, 0.225]} + - class_path: eva.vision.datasets.PatchCamelyon + init_args: + <<: *PREDICT_DATASET_ARGS + split: val + - class_path: eva.vision.datasets.PatchCamelyon + init_args: + <<: *PREDICT_DATASET_ARGS + split: test + dataloaders: + train: + batch_size: &BATCH_SIZE ${oc.env:BATCH_SIZE, 4096} + shuffle: true + val: + batch_size: *BATCH_SIZE + test: + batch_size: *BATCH_SIZE + predict: + batch_size: &PREDICT_BATCH_SIZE ${oc.env:PREDICT_BATCH_SIZE, 64} diff --git a/configs/vision/tests/offline/patch_camelyon.yaml b/configs/vision/tests/offline/patch_camelyon.yaml index 903f158c..013ebc79 100644 --- a/configs/vision/tests/offline/patch_camelyon.yaml +++ b/configs/vision/tests/offline/patch_camelyon.yaml @@ -2,8 +2,10 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &LIGHTNING_ROOT ${oc.env:LIGHTNING_ROOT, logs/tests/patch_camelyon} + default_root_dir: &LIGHTNING_ROOT ${oc.env:LIGHTNING_ROOT, logs/test/offline/patch_camelyon} max_epochs: &MAX_EPOCHS 20 + limit_train_batches: 2 + limit_val_batches: 2 callbacks: - class_path: eva.callbacks.EmbeddingsWriter init_args: diff --git a/configs/vision/tests/offline/patches.yaml b/configs/vision/tests/offline/patches.yaml index 332f8873..16d91bec 100644 --- a/configs/vision/tests/offline/patches.yaml +++ b/configs/vision/tests/offline/patches.yaml @@ -2,7 +2,7 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &OUTPUT_DIR ${oc.env:OUTPUT_DIR, logs/offline/test_patch} + default_root_dir: &OUTPUT_DIR ${oc.env:OUTPUT_DIR, logs/test/offline/patches} max_epochs: &MAX_EPOCHS 5 callbacks: - class_path: pytorch_lightning.callbacks.LearningRateMonitor diff --git a/configs/vision/tests/offline/slides.yaml b/configs/vision/tests/offline/slides.yaml index f633c82b..34bd9355 100644 --- a/configs/vision/tests/offline/slides.yaml +++ b/configs/vision/tests/offline/slides.yaml @@ -2,7 +2,7 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &OUTPUT_DIR ${oc.env:OUTPUT_DIR, logs/offline/test_patch} + default_root_dir: &OUTPUT_DIR ${oc.env:OUTPUT_DIR, logs/test/offline/slides} max_epochs: &MAX_EPOCHS 5 callbacks: - class_path: pytorch_lightning.callbacks.LearningRateMonitor diff --git a/configs/vision/tests/online/patch_camelyon.yaml b/configs/vision/tests/online/patch_camelyon.yaml index a5ec249c..dfc9c5e8 100644 --- a/configs/vision/tests/online/patch_camelyon.yaml +++ b/configs/vision/tests/online/patch_camelyon.yaml @@ -2,7 +2,7 @@ trainer: class_path: eva.Trainer init_args: - default_root_dir: &OUTPUT_DIR ${oc.env:OUTPUT_DIR, logs/dino_vits16/patch_camelyon} + default_root_dir: &LIGHTNING_ROOT ${oc.env:LIGHTNING_ROOT, logs/test/online/patch_camelyon} max_epochs: &MAX_EPOCHS 1 limit_train_batches: 2 limit_val_batches: 2 diff --git a/pdm.lock b/pdm.lock index 4159f9f6..6abb2916 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "all", "dev", "docs", "lint", "test", "vision"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:df00870347f9d9b0548e82038d07dab33e4d3c1e2fe86d0e6eb1cf83ba3fc8ac" +content_hash = "sha256:b16db3ac95e93ef0be3291f1f4984de5d8373c4e853247e4ea39cc90ca1f6aaa" [[package]] name = "absl-py" @@ -290,6 +290,20 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "coloredlogs" +version = "15.0.1" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +summary = "Colored terminal output for Python's logging module" +groups = ["all", "default", "dev"] +dependencies = [ + "humanfriendly>=9.1", +] +files = [ + {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, + {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, +] + [[package]] name = "colorlog" version = "6.8.2" @@ -424,6 +438,16 @@ files = [ {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"}, ] +[[package]] +name = "flatbuffers" +version = "23.5.26" +summary = "The FlatBuffers serialization format for Python" +groups = ["all", "default", "dev"] +files = [ + {file = "flatbuffers-23.5.26-py2.py3-none-any.whl", hash = "sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1"}, + {file = "flatbuffers-23.5.26.tar.gz", hash = "sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89"}, +] + [[package]] name = "frozenlist" version = "1.4.1" @@ -618,6 +642,20 @@ files = [ {file = "huggingface_hub-0.20.3.tar.gz", hash = "sha256:94e7f8e074475fbc67d6a71957b678e1b4a74ff1b64a644fd6cbb83da962d05d"}, ] +[[package]] +name = "humanfriendly" +version = "10.0" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +summary = "Human friendly output for text interfaces using Python" +groups = ["all", "default", "dev"] +dependencies = [ + "pyreadline3; sys_platform == \"win32\" and python_version >= \"3.8\"", +] +files = [ + {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, + {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, +] + [[package]] name = "idna" version = "3.6" @@ -1280,6 +1318,63 @@ files = [ {file = "omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7"}, ] +[[package]] +name = "onnx" +version = "1.15.0" +requires_python = ">=3.8" +summary = "Open Neural Network Exchange" +groups = ["default"] +dependencies = [ + "numpy", + "protobuf>=3.20.2", +] +files = [ + {file = "onnx-1.15.0-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:51cacb6aafba308aaf462252ced562111f6991cdc7bc57a6c554c3519453a8ff"}, + {file = "onnx-1.15.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:0aee26b6f7f7da7e840de75ad9195a77a147d0662c94eaa6483be13ba468ffc1"}, + {file = "onnx-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baf6ef6c93b3b843edb97a8d5b3d229a1301984f3f8dee859c29634d2083e6f9"}, + {file = "onnx-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ed899fe6000edc05bb2828863d3841cfddd5a7cf04c1a771f112e94de75d9f"}, + {file = "onnx-1.15.0-cp310-cp310-win32.whl", hash = "sha256:f1ad3d77fc2f4b4296f0ac2c8cadd8c1dcf765fc586b737462d3a0fe8f7c696a"}, + {file = "onnx-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:ca4ebc4f47109bfb12c8c9e83dd99ec5c9f07d2e5f05976356c6ccdce3552010"}, + {file = "onnx-1.15.0-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:233ffdb5ca8cc2d960b10965a763910c0830b64b450376da59207f454701f343"}, + {file = "onnx-1.15.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:51fa79c9ea9af033638ec51f9177b8e76c55fad65bb83ea96ee88fafade18ee7"}, + {file = "onnx-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f277d4861729f5253a51fa41ce91bfec1c4574ee41b5637056b43500917295ce"}, + {file = "onnx-1.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8a7c94d2ebead8f739fdb70d1ce5a71726f4e17b3e5b8ad64455ea1b2801a85"}, + {file = "onnx-1.15.0-cp311-cp311-win32.whl", hash = "sha256:17dcfb86a8c6bdc3971443c29b023dd9c90ff1d15d8baecee0747a6b7f74e650"}, + {file = "onnx-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:60a3e28747e305cd2e766e6a53a0a6d952cf9e72005ec6023ce5e07666676a4e"}, + {file = "onnx-1.15.0.tar.gz", hash = "sha256:b18461a7d38f286618ca2a6e78062a2a9c634ce498e631e708a8041b00094825"}, +] + +[[package]] +name = "onnxruntime" +version = "1.17.1" +summary = "ONNX Runtime is a runtime accelerator for Machine Learning models" +groups = ["all", "default", "dev"] +dependencies = [ + "coloredlogs", + "flatbuffers", + "numpy>=1.21.6", + "packaging", + "protobuf", + "sympy", +] +files = [ + {file = "onnxruntime-1.17.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:d43ac17ac4fa3c9096ad3c0e5255bb41fd134560212dc124e7f52c3159af5d21"}, + {file = "onnxruntime-1.17.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55b5e92a4c76a23981c998078b9bf6145e4fb0b016321a8274b1607bd3c6bd35"}, + {file = "onnxruntime-1.17.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ebbcd2bc3a066cf54e6f18c75708eb4d309ef42be54606d22e5bdd78afc5b0d7"}, + {file = "onnxruntime-1.17.1-cp310-cp310-win32.whl", hash = "sha256:5e3716b5eec9092e29a8d17aab55e737480487deabfca7eac3cd3ed952b6ada9"}, + {file = "onnxruntime-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:fbb98cced6782ae1bb799cc74ddcbbeeae8819f3ad1d942a74d88e72b6511337"}, + {file = "onnxruntime-1.17.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:36fd6f87a1ecad87e9c652e42407a50fb305374f9a31d71293eb231caae18784"}, + {file = "onnxruntime-1.17.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99a8bddeb538edabc524d468edb60ad4722cff8a49d66f4e280c39eace70500b"}, + {file = "onnxruntime-1.17.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd7fddb4311deb5a7d3390cd8e9b3912d4d963efbe4dfe075edbaf18d01c024e"}, + {file = "onnxruntime-1.17.1-cp311-cp311-win32.whl", hash = "sha256:606a7cbfb6680202b0e4f1890881041ffc3ac6e41760a25763bd9fe146f0b335"}, + {file = "onnxruntime-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:53e4e06c0a541696ebdf96085fd9390304b7b04b748a19e02cf3b35c869a1e76"}, + {file = "onnxruntime-1.17.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:40f08e378e0f85929712a2b2c9b9a9cc400a90c8a8ca741d1d92c00abec60843"}, + {file = "onnxruntime-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac79da6d3e1bb4590f1dad4bb3c2979d7228555f92bb39820889af8b8e6bd472"}, + {file = "onnxruntime-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ae9ba47dc099004e3781f2d0814ad710a13c868c739ab086fc697524061695ea"}, + {file = "onnxruntime-1.17.1-cp312-cp312-win32.whl", hash = "sha256:2dff1a24354220ac30e4a4ce2fb1df38cb1ea59f7dac2c116238d63fe7f4c5ff"}, + {file = "onnxruntime-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:6226a5201ab8cafb15e12e72ff2a4fc8f50654e8fa5737c6f0bd57c5ff66827e"}, +] + [[package]] name = "opencv-python-headless" version = "4.9.0.80" @@ -1541,6 +1636,17 @@ files = [ {file = "pymdown_extensions-10.7.tar.gz", hash = "sha256:c0d64d5cf62566f59e6b2b690a4095c931107c250a8c8e1351c1de5f6b036deb"}, ] +[[package]] +name = "pyreadline3" +version = "3.4.1" +summary = "A python implementation of GNU readline." +groups = ["all", "default", "dev"] +marker = "sys_platform == \"win32\" and python_version >= \"3.8\"" +files = [ + {file = "pyreadline3-3.4.1-py3-none-any.whl", hash = "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"}, + {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, +] + [[package]] name = "pyright" version = "1.1.351" @@ -1687,7 +1793,7 @@ name = "regex" version = "2023.12.25" requires_python = ">=3.7" summary = "Alternative regular expression module, to replace re." -groups = ["dev", "docs"] +groups = ["all", "default", "dev", "docs"] files = [ {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0694219a1d54336fd0445ea382d49d36882415c0134ee1e8332afd1529f0baa5"}, {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b014333bd0217ad3d54c143de9d4b9a3ca1c5a29a6d0d554952ea071cff0f1f8"}, @@ -1980,6 +2086,92 @@ files = [ {file = "timm-0.9.16.tar.gz", hash = "sha256:891e54f375d55adf31a71ab0c117761f0e472f9f3971858ecdd1e7376b7071e6"}, ] +[[package]] +name = "tokenizers" +version = "0.15.2" +requires_python = ">=3.7" +summary = "" +groups = ["all", "default", "dev"] +dependencies = [ + "huggingface-hub<1.0,>=0.16.4", +] +files = [ + {file = "tokenizers-0.15.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:52f6130c9cbf70544287575a985bf44ae1bda2da7e8c24e97716080593638012"}, + {file = "tokenizers-0.15.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:054c1cc9c6d68f7ffa4e810b3d5131e0ba511b6e4be34157aa08ee54c2f8d9ee"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a9b9b070fdad06e347563b88c278995735292ded1132f8657084989a4c84a6d5"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea621a7eef4b70e1f7a4e84dd989ae3f0eeb50fc8690254eacc08acb623e82f1"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cf7fd9a5141634fa3aa8d6b7be362e6ae1b4cda60da81388fa533e0b552c98fd"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44f2a832cd0825295f7179eaf173381dc45230f9227ec4b44378322d900447c9"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8b9ec69247a23747669ec4b0ca10f8e3dfb3545d550258129bd62291aabe8605"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40b6a4c78da863ff26dbd5ad9a8ecc33d8a8d97b535172601cf00aee9d7ce9ce"}, + {file = "tokenizers-0.15.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5ab2a4d21dcf76af60e05af8063138849eb1d6553a0d059f6534357bce8ba364"}, + {file = "tokenizers-0.15.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a47acfac7e511f6bbfcf2d3fb8c26979c780a91e06fb5b9a43831b2c0153d024"}, + {file = "tokenizers-0.15.2-cp310-none-win32.whl", hash = "sha256:064ff87bb6acdbd693666de9a4b692add41308a2c0ec0770d6385737117215f2"}, + {file = "tokenizers-0.15.2-cp310-none-win_amd64.whl", hash = "sha256:3b919afe4df7eb6ac7cafd2bd14fb507d3f408db7a68c43117f579c984a73843"}, + {file = "tokenizers-0.15.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:89cd1cb93e4b12ff39bb2d626ad77e35209de9309a71e4d3d4672667b4b256e7"}, + {file = "tokenizers-0.15.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cfed5c64e5be23d7ee0f0e98081a25c2a46b0b77ce99a4f0605b1ec43dd481fa"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a907d76dcfda37023ba203ab4ceeb21bc5683436ebefbd895a0841fd52f6f6f2"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20ea60479de6fc7b8ae756b4b097572372d7e4032e2521c1bbf3d90c90a99ff0"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:48e2b9335be2bc0171df9281385c2ed06a15f5cf121c44094338306ab7b33f2c"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:112a1dd436d2cc06e6ffdc0b06d55ac019a35a63afd26475205cb4b1bf0bfbff"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4620cca5c2817177ee8706f860364cc3a8845bc1e291aaf661fb899e5d1c45b0"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ccd73a82751c523b3fc31ff8194702e4af4db21dc20e55b30ecc2079c5d43cb7"}, + {file = "tokenizers-0.15.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:107089f135b4ae7817affe6264f8c7a5c5b4fd9a90f9439ed495f54fcea56fb4"}, + {file = "tokenizers-0.15.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0ff110ecc57b7aa4a594396525a3451ad70988e517237fe91c540997c4e50e29"}, + {file = "tokenizers-0.15.2-cp311-none-win32.whl", hash = "sha256:6d76f00f5c32da36c61f41c58346a4fa7f0a61be02f4301fd30ad59834977cc3"}, + {file = "tokenizers-0.15.2-cp311-none-win_amd64.whl", hash = "sha256:cc90102ed17271cf0a1262babe5939e0134b3890345d11a19c3145184b706055"}, + {file = "tokenizers-0.15.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f86593c18d2e6248e72fb91c77d413a815153b8ea4e31f7cd443bdf28e467670"}, + {file = "tokenizers-0.15.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0774bccc6608eca23eb9d620196687c8b2360624619623cf4ba9dc9bd53e8b51"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d0222c5b7c9b26c0b4822a82f6a7011de0a9d3060e1da176f66274b70f846b98"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3835738be1de66624fff2f4f6f6684775da4e9c00bde053be7564cbf3545cc66"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0143e7d9dcd811855c1ce1ab9bf5d96d29bf5e528fd6c7824d0465741e8c10fd"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db35825f6d54215f6b6009a7ff3eedee0848c99a6271c870d2826fbbedf31a38"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f5e64b0389a2be47091d8cc53c87859783b837ea1a06edd9d8e04004df55a5c"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e0480c452217edd35eca56fafe2029fb4d368b7c0475f8dfa3c5c9c400a7456"}, + {file = "tokenizers-0.15.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a33ab881c8fe70474980577e033d0bc9a27b7ab8272896e500708b212995d834"}, + {file = "tokenizers-0.15.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a308a607ca9de2c64c1b9ba79ec9a403969715a1b8ba5f998a676826f1a7039d"}, + {file = "tokenizers-0.15.2-cp312-none-win32.whl", hash = "sha256:b8fcfa81bcb9447df582c5bc96a031e6df4da2a774b8080d4f02c0c16b42be0b"}, + {file = "tokenizers-0.15.2-cp312-none-win_amd64.whl", hash = "sha256:38d7ab43c6825abfc0b661d95f39c7f8af2449364f01d331f3b51c94dcff7221"}, + {file = "tokenizers-0.15.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:38bfb0204ff3246ca4d5e726e8cc8403bfc931090151e6eede54d0e0cf162ef0"}, + {file = "tokenizers-0.15.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c861d35e8286a53e06e9e28d030b5a05bcbf5ac9d7229e561e53c352a85b1fc"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:936bf3842db5b2048eaa53dade907b1160f318e7c90c74bfab86f1e47720bdd6"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:620beacc3373277700d0e27718aa8b25f7b383eb8001fba94ee00aeea1459d89"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2735ecbbf37e52db4ea970e539fd2d450d213517b77745114f92867f3fc246eb"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:473c83c5e2359bb81b0b6fde870b41b2764fcdd36d997485e07e72cc3a62264a"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:968fa1fb3c27398b28a4eca1cbd1e19355c4d3a6007f7398d48826bbe3a0f728"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:865c60ae6eaebdde7da66191ee9b7db52e542ed8ee9d2c653b6d190a9351b980"}, + {file = "tokenizers-0.15.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7c0d8b52664ab2d4a8d6686eb5effc68b78608a9008f086a122a7b2996befbab"}, + {file = "tokenizers-0.15.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:f33dfbdec3784093a9aebb3680d1f91336c56d86cc70ddf88708251da1fe9064"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6a9b648a58281c4672212fab04e60648fde574877d0139cd4b4f93fe28ca8944"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7c7d18b733be6bbca8a55084027f7be428c947ddf871c500ee603e375013ffba"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:13ca3611de8d9ddfbc4dc39ef54ab1d2d4aaa114ac8727dfdc6a6ec4be017378"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:237d1bf3361cf2e6463e6c140628e6406766e8b27274f5fcc62c747ae3c6f094"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67a0fe1e49e60c664915e9fb6b0cb19bac082ab1f309188230e4b2920230edb3"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4e022fe65e99230b8fd89ebdfea138c24421f91c1a4f4781a8f5016fd5cdfb4d"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d857be2df69763362ac699f8b251a8cd3fac9d21893de129bc788f8baaef2693"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:708bb3e4283177236309e698da5fcd0879ce8fd37457d7c266d16b550bcbbd18"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:64c35e09e9899b72a76e762f9854e8750213f67567787d45f37ce06daf57ca78"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1257f4394be0d3b00de8c9e840ca5601d0a4a8438361ce9c2b05c7d25f6057b"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02272fe48280e0293a04245ca5d919b2c94a48b408b55e858feae9618138aeda"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dc3ad9ebc76eabe8b1d7c04d38be884b8f9d60c0cdc09b0aa4e3bcf746de0388"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:32e16bdeffa7c4f46bf2152172ca511808b952701d13e7c18833c0b73cb5c23f"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fb16ba563d59003028b678d2361a27f7e4ae0ab29c7a80690efa20d829c81fdb"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:2277c36d2d6cdb7876c274547921a42425b6810d38354327dd65a8009acf870c"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1cf75d32e8d250781940d07f7eece253f2fe9ecdb1dc7ba6e3833fa17b82fcbc"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1b3b31884dc8e9b21508bb76da80ebf7308fdb947a17affce815665d5c4d028"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b10122d8d8e30afb43bb1fe21a3619f62c3e2574bff2699cf8af8b0b6c5dc4a3"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d88b96ff0fe8e91f6ef01ba50b0d71db5017fa4e3b1d99681cec89a85faf7bf7"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:37aaec5a52e959892870a7c47cef80c53797c0db9149d458460f4f31e2fb250e"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e2ea752f2b0fe96eb6e2f3adbbf4d72aaa1272079b0dfa1145507bd6a5d537e6"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:4b19a808d8799fda23504a5cd31d2f58e6f52f140380082b352f877017d6342b"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:64c86e5e068ac8b19204419ed8ca90f9d25db20578f5881e337d203b314f4104"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de19c4dc503c612847edf833c82e9f73cd79926a384af9d801dcf93f110cea4e"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea09acd2fe3324174063d61ad620dec3bcf042b495515f27f638270a7d466e8b"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cf27fd43472e07b57cf420eee1e814549203d56de00b5af8659cb99885472f1f"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7ca22bd897537a0080521445d91a58886c8c04084a6a19e6c78c586e0cfa92a5"}, + {file = "tokenizers-0.15.2.tar.gz", hash = "sha256:e6e9c6e019dd5484be5beafc775ae6c925f4c69a3487040ed09b45e13df2cb91"}, +] + [[package]] name = "tomli" version = "2.0.1" @@ -2096,6 +2288,29 @@ files = [ {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"}, ] +[[package]] +name = "transformers" +version = "4.38.2" +requires_python = ">=3.8.0" +summary = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +groups = ["all", "default", "dev"] +dependencies = [ + "filelock", + "huggingface-hub<1.0,>=0.19.3", + "numpy>=1.17", + "packaging>=20.0", + "pyyaml>=5.1", + "regex!=2019.12.17", + "requests", + "safetensors>=0.4.1", + "tokenizers<0.19,>=0.14", + "tqdm>=4.27", +] +files = [ + {file = "transformers-4.38.2-py3-none-any.whl", hash = "sha256:c4029cb9f01b3dd335e52f364c52d2b37c65b4c78e02e6a08b1919c5c928573e"}, + {file = "transformers-4.38.2.tar.gz", hash = "sha256:c5fc7ad682b8a50a48b2a4c05d4ea2de5567adb1bdd00053619dbe5960857dd5"}, +] + [[package]] name = "triton" version = "2.2.0" diff --git a/pyproject.toml b/pyproject.toml index 66b46d8b..7347744e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,9 @@ dependencies = [ "nibabel>=5.2.0", "timm>=0.9.12", "tensorboard>=2.16.2", + "transformers>=4.38.2", + "onnxruntime>=1.17.1", + "onnx>=1.15.0", ] [project.optional-dependencies] diff --git a/src/eva/interface/interface.py b/src/eva/interface/interface.py index b57ca3d9..74c1caeb 100644 --- a/src/eva/interface/interface.py +++ b/src/eva/interface/interface.py @@ -1,8 +1,7 @@ """Main interface class.""" -from eva import trainers +from eva import trainers as eva_trainer from eva.data import datamodules -from eva.data.datamodules import schemas from eva.models import modules @@ -15,57 +14,55 @@ class Interface: def fit( self, + trainer: eva_trainer.Trainer, model: modules.ModelModule, data: datamodules.DataModule, - trainer: trainers.Trainer, ) -> None: - """Perform model training and evaluation in place. + """Perform model training and evaluation out-of-place. This method uses the specified trainer to fit the model using the provided data. - Example usecases: + Example use cases: - Using a model consisting of a frozen backbone and a head, the backbone will generate the embeddings on the fly which are then used as input features to train the head on the downstream task specified by the given dataset. - Fitting only the head network using a dataset that loads pre-computed embeddings. - Args: - model: The model module. + trainer: The base trainer to use but not modify. + model: The model module to use but not modify. data: The data module. - trainer: The trainer which processes the model and data. + n_runs: The amount of runs (fit and evaluate) to perform. """ - trainer.fit(model=model, datamodule=data) - trainer.validate(datamodule=data) - if data.datasets.test is not None: - trainer.test(datamodule=data) + trainer.run_evaluation_session(model=model, datamodule=data) def predict( self, + trainer: eva_trainer.Trainer, model: modules.ModelModule, data: datamodules.DataModule, - trainer: trainers.Trainer, ) -> None: - """Perform model prediction in place. + """Perform model prediction out-of-place. This method performs inference with a pre-trained foundation model to compute embeddings. Args: - model: The model module. + trainer: The base trainer to use but not modify. + model: The model module to use but not modify. data: The data module. - trainer: The trainer which processes the model and data. """ - predict_datamodule = datamodules.DataModule( - dataloaders=schemas.DataloadersSchema(predict=data.dataloaders.predict), - datasets=schemas.DatasetsSchema(predict=data.datasets.predict), + eva_trainer.infer_model( + base_trainer=trainer, + base_model=model, + datamodule=data, + return_predictions=False, ) - trainer.predict(model=model, datamodule=predict_datamodule, return_predictions=False) def predict_fit( self, + trainer: eva_trainer.Trainer, model: modules.ModelModule, data: datamodules.DataModule, - trainer: trainers.Trainer, ) -> None: """Combines the predict and fit commands in one method. @@ -74,9 +71,9 @@ def predict_fit( 2. fit: training the head network using the embeddings generated in step 1. Args: - model: The model module. + trainer: The base trainer to use but not modify. + model: The model module to use but not modify. data: The data module. - trainer: The trainer which processes the model and data. """ - self.predict(model=model, data=data, trainer=trainer) - self.fit(model=model, data=data, trainer=trainer) + self.predict(trainer=trainer, model=model, data=data) + self.fit(trainer=trainer, model=model, data=data) diff --git a/src/eva/models/networks/__init__.py b/src/eva/models/networks/__init__.py index 11a9a80e..54bf968d 100644 --- a/src/eva/models/networks/__init__.py +++ b/src/eva/models/networks/__init__.py @@ -1,6 +1,6 @@ """Networks API.""" -from eva.models.networks.from_function import ModelFromFunction from eva.models.networks.mlp import MLP +from eva.models.wrappers.from_function import ModelFromFunction __all__ = ["ModelFromFunction", "MLP"] diff --git a/src/eva/models/wrappers/__init__.py b/src/eva/models/wrappers/__init__.py new file mode 100644 index 00000000..dbe6d4de --- /dev/null +++ b/src/eva/models/wrappers/__init__.py @@ -0,0 +1,8 @@ +"""Model Wrappers API.""" + +from eva.models.wrappers.base import BaseModel +from eva.models.wrappers.from_function import ModelFromFunction +from eva.models.wrappers.huggingface import HuggingFaceModel +from eva.models.wrappers.onnx import ONNXModel + +__all__ = ["BaseModel", "HuggingFaceModel", "ONNXModel", "ModelFromFunction"] diff --git a/src/eva/models/wrappers/base.py b/src/eva/models/wrappers/base.py new file mode 100644 index 00000000..4112a1f9 --- /dev/null +++ b/src/eva/models/wrappers/base.py @@ -0,0 +1,46 @@ +"""Base class for model wrappers.""" + +import abc +from typing import Callable + +import torch +import torch.nn as nn +from typing_extensions import override + + +class BaseModel(nn.Module): + """Base class for model wrappers.""" + + def __init__(self, tensor_transforms: Callable | None = None) -> None: + """Initializes the model. + + Args: + tensor_transforms: The transforms to apply to the output tensor produced by the model. + """ + super().__init__() + + self._output_transforms = tensor_transforms + + @override + def forward(self, tensor: torch.Tensor) -> torch.Tensor: + tensor = self.model_forward(tensor) + return self._apply_transforms(tensor) + + @abc.abstractmethod + def load_model(self) -> Callable[..., torch.Tensor]: + """Loads the model.""" + raise NotImplementedError + + @abc.abstractmethod + def model_forward(self, tensor: torch.Tensor) -> torch.Tensor: + """Implements the forward pass of the model. + + Args: + tensor: The input tensor to the model. + """ + raise NotImplementedError + + def _apply_transforms(self, tensor: torch.Tensor) -> torch.Tensor: + if self._output_transforms is not None: + tensor = self._output_transforms(tensor) + return tensor diff --git a/src/eva/models/networks/from_function.py b/src/eva/models/wrappers/from_function.py similarity index 64% rename from src/eva/models/networks/from_function.py rename to src/eva/models/wrappers/from_function.py index ddaca38a..7295d5c1 100644 --- a/src/eva/models/networks/from_function.py +++ b/src/eva/models/wrappers/from_function.py @@ -7,10 +7,11 @@ from torch import nn from typing_extensions import override +from eva.models import wrappers from eva.models.networks import _utils -class ModelFromFunction(nn.Module): +class ModelFromFunction(wrappers.BaseModel): """Wrapper class for models which are initialized from functions. This is helpful for initializing models in a `.yaml` configuration file. @@ -21,24 +22,29 @@ def __init__( path: Callable[..., nn.Module], arguments: Dict[str, Any] | None = None, checkpoint_path: str | None = None, + tensor_transforms: Callable | None = None, ) -> None: """Initializes and constructs the model. Args: path: The path to the callable object (class or function). arguments: The extra callable function / class arguments. - checkpoint_path: The path to the checkpoint to load the model weights from. + checkpoint_path: The path to the checkpoint to load the model weights from. This is + currently only supported for torch model checkpoints. For other formats, the + checkpoint loading should be handled within the provided callable object in . + tensor_transforms: The transforms to apply to the output tensor produced by the model. """ super().__init__() self._path = path self._arguments = arguments self._checkpoint_path = checkpoint_path + self._tensor_transforms = tensor_transforms - self._network = self.build_model() + self._model = self.load_model() - def build_model(self) -> nn.Module: - """Builds and returns the model.""" + @override + def load_model(self) -> nn.Module: class_path = jsonargparse.class_from_function(self._path, func_return=nn.Module) model = class_path(**self._arguments or {}) if self._checkpoint_path is not None: @@ -46,5 +52,5 @@ def build_model(self) -> nn.Module: return model @override - def forward(self, tensor: torch.Tensor) -> torch.Tensor: - return self._network(tensor) + def model_forward(self, tensor: torch.Tensor) -> torch.Tensor: + return self._model(tensor) diff --git a/src/eva/models/wrappers/huggingface.py b/src/eva/models/wrappers/huggingface.py new file mode 100644 index 00000000..5fdb652a --- /dev/null +++ b/src/eva/models/wrappers/huggingface.py @@ -0,0 +1,35 @@ +"""Wrappers for HuggingFace `transformers` models.""" + +from typing import Any, Callable + +import torch +import transformers +from typing_extensions import override + +from eva.models import wrappers + + +class HuggingFaceModel(wrappers.BaseModel): + """Wrapper class for loading HuggingFace `transformers` models.""" + + def __init__(self, model_name_or_path: str, tensor_transforms: Callable | None = None) -> None: + """Initializes the model. + + Args: + model_name_or_path: The model name or path to load the model from. This can be a local + path or a model name from the HuggingFace model hub. + tensor_transforms: The transforms to apply to the output tensor produced by the model. + """ + super().__init__(tensor_transforms=tensor_transforms) + + self._model_name_or_path = model_name_or_path + self._model = self.load_model() + + @override + def load_model(self) -> Any: + config = transformers.AutoConfig.from_pretrained(self._model_name_or_path) + return transformers.AutoModel.from_pretrained(self._model_name_or_path, config=config) + + @override + def model_forward(self, tensor: torch.Tensor) -> torch.Tensor: + return self._model(tensor) diff --git a/src/eva/models/wrappers/onnx.py b/src/eva/models/wrappers/onnx.py new file mode 100644 index 00000000..a96e0b43 --- /dev/null +++ b/src/eva/models/wrappers/onnx.py @@ -0,0 +1,47 @@ +"""Wrapper class for ONNX models.""" + +from typing import Any, Callable, Literal + +import onnxruntime as ort +import torch +from typing_extensions import override + +from eva.models import wrappers + + +class ONNXModel(wrappers.BaseModel): + """Wrapper class for loading ONNX models.""" + + def __init__( + self, + path: str, + device: Literal["cpu", "cuda"] | None = "cpu", + tensor_transforms: Callable | None = None, + ): + """Initializes the model. + + Args: + path: The path to the .onnx model file. + device: The device to run the model on. This can be either "cpu" or "cuda". + tensor_transforms: The transforms to apply to the output tensor produced by the model. + """ + super().__init__(tensor_transforms=tensor_transforms) + + self._path = path + self._device = device + self._model = self.load_model() + + @override + def load_model(self) -> Any: + if self._device == "cuda" and not torch.cuda.is_available(): + raise ValueError("Device is set to 'cuda', but CUDA is not available.") + provider = "CUDAExecutionProvider" if self._device == "cuda" else "CPUExecutionProvider" + return ort.InferenceSession(self._path, providers=[provider]) + + @override + def model_forward(self, tensor: torch.Tensor) -> torch.Tensor: + # TODO: Use IO binding to avoid copying the tensor to CPU. + # https://onnxruntime.ai/docs/api/python/api_summary.html#data-on-device + inputs = {self._model.get_inputs()[0].name: tensor.detach().cpu().numpy()} + outputs = self._model.run(None, inputs)[0] + return torch.from_numpy(outputs).float().to(tensor.device) diff --git a/src/eva/setup.py b/src/eva/setup.py index 7a2e5810..185d0a79 100644 --- a/src/eva/setup.py +++ b/src/eva/setup.py @@ -4,9 +4,18 @@ import sys import warnings +import jsonargparse from loguru import logger +def _configure_jsonargparse() -> None: + """Configures the `jsonargparse` library.""" + jsonargparse.set_config_read_mode( + urls_enabled=True, + fsspec_enabled=True, + ) + + def _initialize_logger() -> None: """Initializes, manipulates and customizes the logger. @@ -44,6 +53,7 @@ def _enable_mps_fallback() -> None: def setup() -> None: """Sets up the environment before the module is imported.""" + _configure_jsonargparse() _initialize_logger() _suppress_warnings() _enable_mps_fallback() diff --git a/src/eva/trainers/__init__.py b/src/eva/trainers/__init__.py index 403e799b..302fa06d 100644 --- a/src/eva/trainers/__init__.py +++ b/src/eva/trainers/__init__.py @@ -1,5 +1,6 @@ """Trainers API.""" +from eva.trainers.functional import infer_model, run_evaluation_session from eva.trainers.trainer import Trainer -__all__ = ["Trainer"] +__all__ = ["infer_model", "run_evaluation_session", "Trainer"] diff --git a/src/eva/trainers/_logging.py b/src/eva/trainers/_logging.py new file mode 100644 index 00000000..75334ed7 --- /dev/null +++ b/src/eva/trainers/_logging.py @@ -0,0 +1,81 @@ +"""Helper functions and utilities for trainer logging.""" + +import hashlib +import sys +from datetime import datetime + +from lightning_fabric.utilities import cloud_io +from loguru import logger + + +def generate_session_id() -> str: + """Generates and returns a unique string ID of an experiment. + + The ID is composed of the run timestamp and a its config hash. If the + configuration hash is an empty string, it will use only the timestamp. + """ + timestamp = _generate_timestamp_hash() + config_hash = _generate_config_hash() + return f"{timestamp}_{config_hash}" if config_hash else timestamp + + +def _generate_timestamp_hash() -> str: + """Generate a time-based hash id.""" + timestamp = datetime.now() + return timestamp.strftime("%Y%m%d-%H%M%S%f") + + +def _generate_config_hash(max_hash_len: int = 8) -> str: + """Generates a hash id based on a yaml configuration file. + + Args: + max_hash_len: The maximum length of the produced hash id. + """ + config_path = _fetch_config_path() + if config_path is None: + logger.warning( + "No or multiple configuration file found from command line arguments. " + "No configuration hash code will created for this experiment." + ) + return "" + + return _generate_hash_from_config(config_path, max_hash_len) + + +def _fetch_config_path() -> str | None: + """Retrieves the configuration path from command line arguments. + + It returns `None` if no or multiple configuration files found in + the system arguments. + + Returns: + The path to the configuration file. + """ + inputs = sys.argv + config_paths = [inputs[i + 1] for i, arg in enumerate(inputs) if arg == "--config"] + if len(config_paths) == 0 or len(config_paths) > 1: + # TODO combine the multiple configuration files + # and produced hash for the merged one. + return None + + return config_paths[0] + + +def _generate_hash_from_config(path: str, max_hash_len: int = 8) -> str: + """Return a hash from the contents of the configuration file. + + Args: + path: Path to the configuration file. + max_hash_len: Maximum length of the returned hash. + + Returns: + Hash of the configuration file content. + """ + fs = cloud_io.get_filesystem(path) + with fs.open(path, "r") as stream: + config = stream.read() + if isinstance(config, str): + config = config.encode("utf-8") + config_sha256 = hashlib.sha256(config) + hash_id = config_sha256.hexdigest() + return hash_id[:max_hash_len] diff --git a/src/eva/trainers/_utils.py b/src/eva/trainers/_utils.py new file mode 100644 index 00000000..c1cac144 --- /dev/null +++ b/src/eva/trainers/_utils.py @@ -0,0 +1,12 @@ +"""Training related utilities.""" + +import copy +from collections import abc +from typing import Any + + +def clone(*inputs: Any) -> Any: + """Deep copies a list of object and returns them.""" + if not isinstance(inputs, abc.Iterable): + return copy.deepcopy(inputs) + return [copy.deepcopy(obj) for obj in inputs] diff --git a/src/eva/trainers/functional.py b/src/eva/trainers/functional.py new file mode 100644 index 00000000..cb634730 --- /dev/null +++ b/src/eva/trainers/functional.py @@ -0,0 +1,108 @@ +"""Fit session related functions.""" + +from typing import Tuple + +from pytorch_lightning.utilities.types import _EVALUATE_OUTPUT + +from eva.data import datamodules +from eva.models import modules +from eva.trainers import _utils +from eva.trainers import trainer as eva_trainer + + +def run_evaluation_session( + base_trainer: eva_trainer.Trainer, + base_model: modules.ModelModule, + datamodule: datamodules.DataModule, + *, + n_runs: int = 1, +) -> None: + """Runs a downstream evaluation session out-of-place. + + It performs an evaluation run (fit and evaluate) the model + multiple times. Note that as the input `base_trainer` and + `base_model` would be cloned, the input object would not + be modified. + + Args: + base_trainer: The base trainer module to use. + base_model: The base model module to use. + datamodule: The data module. + n_runs: The amount of runs (fit and evaluate) to perform. + """ + for run_index in range(n_runs): + run_evaluation(base_trainer, base_model, datamodule, run_id=f"run_{run_index}") + + +def run_evaluation( + base_trainer: eva_trainer.Trainer, + base_model: modules.ModelModule, + datamodule: datamodules.DataModule, + *, + run_id: str | None = None, +) -> Tuple[_EVALUATE_OUTPUT, _EVALUATE_OUTPUT | None]: + """Fits and evaluates a model out-of-place. + + Args: + base_trainer: The base trainer to use but not modify. + base_model: The model module to use but not modify. + datamodule: The data module. + run_id: The run id to be appended to the output log directory. + If `None`, it will use the log directory of the trainer as is. + + Returns: + A tuple of with the validation and the test metrics (if exists). + """ + trainer, model = _utils.clone(base_trainer, base_model) + trainer.setup_log_dirs(run_id or "") + return fit_and_validate(trainer, model, datamodule) + + +def fit_and_validate( + trainer: eva_trainer.Trainer, + model: modules.ModelModule, + datamodule: datamodules.DataModule, +) -> Tuple[_EVALUATE_OUTPUT, _EVALUATE_OUTPUT | None]: + """Fits and evaluates a model in-place. + + If the test set is set in the datamodule, it will evaluate the model + on the test set as well. + + Args: + trainer: The trainer module to use and update in-place. + model: The model module to use and update in-place. + datamodule: The data module. + + Returns: + A tuple of with the validation and the test metrics (if exists). + """ + trainer.fit(model, datamodule=datamodule) + validation_scores = trainer.validate(datamodule=datamodule) + test_scores = None if datamodule.datasets.test is None else trainer.test(datamodule=datamodule) + return validation_scores, test_scores + + +def infer_model( + base_trainer: eva_trainer.Trainer, + base_model: modules.ModelModule, + datamodule: datamodules.DataModule, + *, + return_predictions: bool = False, +) -> None: + """Performs model inference out-of-place. + + Note that the input `base_model` and `base_trainer` would + not be modified. + + Args: + base_trainer: The base trainer to use but not modify. + base_model: The model module to use but not modify. + datamodule: The data module. + return_predictions: Whether to return the model predictions. + """ + trainer, model = _utils.clone(base_trainer, base_model) + return trainer.predict( + model=model, + datamodule=datamodule, + return_predictions=return_predictions, + ) diff --git a/src/eva/trainers/trainer.py b/src/eva/trainers/trainer.py index d48a8aa6..d47140f7 100644 --- a/src/eva/trainers/trainer.py +++ b/src/eva/trainers/trainer.py @@ -1,6 +1,97 @@ """Core trainer module.""" -from pytorch_lightning import trainer +import os +from typing import Any -Trainer = trainer.Trainer -"""Core trainer class.""" +from pytorch_lightning import loggers as pl_loggers +from pytorch_lightning import trainer as pl_trainer +from pytorch_lightning.utilities import argparse +from typing_extensions import override + +from eva.data import datamodules +from eva.models import modules +from eva.trainers import _logging, functional + + +class Trainer(pl_trainer.Trainer): + """Core trainer class. + + It is an extended version of lightning's core trainer class. + """ + + @argparse._defaults_from_env_vars + def __init__( + self, + *args: Any, + default_root_dir: str = "logs", + n_runs: int = 1, + **kwargs: Any, + ) -> None: + """Initializes the trainer. + + For the input arguments, refer to ::class::`pytorch_lightning.Trainer`. + + Args: + args: Positional arguments of ::class::`pytorch_lightning.Trainer`. + default_root_dir: The default root directory to store the output logs. + In difference with ::class::`pytorch_lightning.Trainer`, this path + would be the prior destination point. + n_runs: The amount of runs (fit and evaluate) to perform in an evaluation session. + kwargs: Kew-word arguments of ::class::`pytorch_lightning.Trainer`. + """ + super().__init__(*args, default_root_dir=default_root_dir, **kwargs) + + self._n_runs = n_runs + + self._session_id: str = _logging.generate_session_id() + self._log_dir: str = self.default_log_dir + + self.setup_log_dirs() + + @property + def default_log_dir(self) -> str: + """Returns the default log directory.""" + return os.path.join(self.default_root_dir, self._session_id) + + @property + @override + def log_dir(self) -> str | None: + return self.strategy.broadcast(self._log_dir) + + def setup_log_dirs(self, subdirectory: str = "") -> None: + """Setups the logging directory of the trainer and experimental loggers in-place. + + Args: + subdirectory: Whether to append a subdirectory to the output log. + """ + self._log_dir = os.path.join(self.default_root_dir, self._session_id, subdirectory) + os.fspath(self._log_dir) + + for logger in self.loggers: + if isinstance(logger, (pl_loggers.CSVLogger, pl_loggers.TensorBoardLogger)): + logger._root_dir = self.default_root_dir + logger._name = self._session_id + logger._version = subdirectory + + def run_evaluation_session( + self, + model: modules.ModelModule, + datamodule: datamodules.DataModule, + ) -> None: + """Runs a evaluation session out-of-place. + + It performs an evaluation run (fit and evaluate) the model + `self._n_run` times. Note that the input `base_model` would + not be modified, so the weights of the input model will remain + as is. + + Args: + model: The base model module to evaluate. + datamodule: The data module. + """ + functional.run_evaluation_session( + base_trainer=self, + base_model=model, + datamodule=datamodule, + n_runs=self._n_runs, + ) diff --git a/src/eva/vision/data/datasets/classification/total_segmentator.py b/src/eva/vision/data/datasets/classification/total_segmentator.py index 8bb95342..ca674a84 100644 --- a/src/eva/vision/data/datasets/classification/total_segmentator.py +++ b/src/eva/vision/data/datasets/classification/total_segmentator.py @@ -186,13 +186,11 @@ def _load_dataset(self) -> pd.DataFrame: def _save_manifest(self, df: pd.DataFrame) -> None: """Saves the dataset manifest to a CSV file.""" - manifest_path = os.path.join(self._root, "manifest.csv") - df.to_csv(manifest_path, index=False) + df.to_csv(self._manifest_path, index=False) def _load_manifest(self) -> pd.DataFrame: """Loads the dataset manifest from a CSV file.""" - manifest_path = os.path.join(self._root, "manifest.csv") - return pd.read_csv(manifest_path) + return pd.read_csv(self._manifest_path) def _generate_ordered_splits(self, df: pd.DataFrame) -> pd.DataFrame: """Orders each class by path and then splits it into train, val and test sets.""" diff --git a/src/eva/vision/data/transforms/__init__.py b/src/eva/vision/data/transforms/__init__.py index 8ba10985..f97cc2ff 100644 --- a/src/eva/vision/data/transforms/__init__.py +++ b/src/eva/vision/data/transforms/__init__.py @@ -1,5 +1,6 @@ """Vision data transforms.""" from eva.vision.data.transforms.common import ArrayToFloatTensor, ArrayToTensor, ResizeAndCrop +from eva.vision.data.transforms.model_output import ExtractCLSFeatures -__all__ = ["ArrayToTensor", "ResizeAndCrop", "ArrayToFloatTensor"] +__all__ = ["ArrayToTensor", "ResizeAndCrop", "ArrayToFloatTensor", "ExtractCLSFeatures"] diff --git a/src/eva/vision/data/transforms/model_output/__init__.py b/src/eva/vision/data/transforms/model_output/__init__.py new file mode 100644 index 00000000..40fc3e2d --- /dev/null +++ b/src/eva/vision/data/transforms/model_output/__init__.py @@ -0,0 +1,5 @@ +"""Transforms to process model outputs.""" + +from eva.vision.data.transforms.model_output.cls import ExtractCLSFeatures + +__all__ = ["ExtractCLSFeatures"] diff --git a/src/eva/vision/data/transforms/model_output/cls.py b/src/eva/vision/data/transforms/model_output/cls.py new file mode 100644 index 00000000..2240bef5 --- /dev/null +++ b/src/eva/vision/data/transforms/model_output/cls.py @@ -0,0 +1,23 @@ +"""Transforms for extracting the CLS output from a model output.""" + +import torch +from transformers import modeling_outputs + + +class ExtractCLSFeatures: + """Extracts the CLS token from a ViT model output.""" + + def __call__( + self, tensor: torch.Tensor | modeling_outputs.BaseModelOutputWithPooling + ) -> torch.Tensor: + """Call method for the transformation. + + Args: + tensor: The tensor representing the model output. + """ + if isinstance(tensor, torch.Tensor): + return tensor[:, 0, :] + elif isinstance(tensor, modeling_outputs.BaseModelOutputWithPooling): + return tensor.last_hidden_state[:, 0, :] + else: + raise ValueError(f"Unsupported type {type(tensor)}") diff --git a/tests/eva/models/modules/conftest.py b/tests/eva/models/modules/conftest.py index 790f3ec2..d4696e4a 100644 --- a/tests/eva/models/modules/conftest.py +++ b/tests/eva/models/modules/conftest.py @@ -6,8 +6,8 @@ import torch from torch.utils import data as torch_data -from eva import trainers from eva.data import dataloaders, datamodules, datasets +from eva.trainers import trainer as eva_trainer @pytest.fixture(scope="function") @@ -33,9 +33,13 @@ def datamodule( @pytest.fixture(scope="function") -def trainer(max_epochs: int = 1) -> trainers.Trainer: +def trainer(max_epochs: int = 1) -> eva_trainer.Trainer: """Returns a model trainer fixture.""" - return trainers.Trainer(max_epochs=max_epochs, accelerator="cpu") + return eva_trainer.Trainer( + max_epochs=max_epochs, + accelerator="cpu", + default_root_dir="logs/test", + ) @pytest.fixture(scope="function") diff --git a/tests/eva/models/modules/test_inference.py b/tests/eva/models/modules/test_inference.py index bc22a6a6..42fe7719 100644 --- a/tests/eva/models/modules/test_inference.py +++ b/tests/eva/models/modules/test_inference.py @@ -7,9 +7,9 @@ import torch from torch import nn -from eva import trainers from eva.data import datamodules from eva.models import modules +from eva.trainers import trainer as eva_trainer N_CLASSES = 4 """The number of classes in the dataset.""" @@ -25,7 +25,7 @@ def test_inference_module_predict( model: modules.InferenceModule, datamodule: datamodules.DataModule, - trainer: trainers.Trainer, + trainer: eva_trainer.Trainer, ) -> None: """Tests the HeadModule fit pipeline.""" predictions = trainer.predict(model, datamodule=datamodule) diff --git a/tests/eva/models/wrappers/__init__.py b/tests/eva/models/wrappers/__init__.py new file mode 100644 index 00000000..436e4f37 --- /dev/null +++ b/tests/eva/models/wrappers/__init__.py @@ -0,0 +1 @@ +"""Model wrapper related tests.""" diff --git a/tests/eva/models/networks/test_from_function.py b/tests/eva/models/wrappers/test_from_function.py similarity index 100% rename from tests/eva/models/networks/test_from_function.py rename to tests/eva/models/wrappers/test_from_function.py diff --git a/tests/eva/models/wrappers/test_huggingface.py b/tests/eva/models/wrappers/test_huggingface.py new file mode 100644 index 00000000..7c71e6af --- /dev/null +++ b/tests/eva/models/wrappers/test_huggingface.py @@ -0,0 +1,33 @@ +"""HuggingFaceModel wrapper tests.""" + +from typing import Callable, Tuple + +import pytest +import torch +from transformers import modeling_outputs + +from eva.models import wrappers +from eva.vision.data.transforms import ExtractCLSFeatures + + +@pytest.mark.parametrize( + "model_name_or_path, tensor_transforms, expected_output_shape", + [ + ("hf-internal-testing/tiny-random-ViTModel", None, (16, 226, 32)), + ("hf-internal-testing/tiny-random-ViTModel", ExtractCLSFeatures(), (16, 32)), + ], +) +def test_huggingface_model( + model_name_or_path: str, + tensor_transforms: Callable | None, + expected_output_shape: Tuple[int, ...], +) -> None: + """Tests the forward pass using the HuggingFaceModel wrapper.""" + model = wrappers.HuggingFaceModel(model_name_or_path, tensor_transforms) + input_tenor = torch.rand(16, 3, 30, 30) + output_tensor = model(input_tenor) + + if isinstance(output_tensor, modeling_outputs.BaseModelOutputWithPooling): + assert output_tensor.last_hidden_state.shape == expected_output_shape + else: + assert output_tensor.shape == expected_output_shape diff --git a/tests/eva/models/wrappers/test_onnx.py b/tests/eva/models/wrappers/test_onnx.py new file mode 100644 index 00000000..3a0f8a76 --- /dev/null +++ b/tests/eva/models/wrappers/test_onnx.py @@ -0,0 +1,46 @@ +"""ONNXModel wrapper tests.""" + +from collections.abc import Iterator +from pathlib import Path +from typing import Tuple + +import pytest +import torch +from pytorch_lightning.demos import boring_classes + +from eva.models.wrappers import ONNXModel + + +@pytest.mark.parametrize( + "input_shape,expected_output_shape", + [ + ((1, 32), (1, 2)), + ((4, 32), (4, 2)), + ], +) +def test_onnx_model( + model_path: str, input_shape: Tuple[int, ...], expected_output_shape: Tuple[int, ...] +) -> None: + """Tests the forward pass using the ONNXModel wrapper.""" + model = ONNXModel(path=model_path) + model.eval() + + input_tensor = torch.rand(1, 32) + output_tensor = model(input_tensor) + + assert output_tensor.shape == (1, 2) + + +@pytest.fixture +def model_path(tmp_path: Path) -> Iterator[str]: + """Fixture that creates a temporary .onnx model file.""" + model = boring_classes.BoringModel() + input_tensor = torch.randn(1, 32) + file_path = tmp_path / "model.onnx" + dynamic_axes = {"input": {0: "batch_size"}, "output": {0: "batch_size"}} + model.to_onnx( + file_path, input_sample=input_tensor, export_params=True, dynamic_axes=dynamic_axes + ) + + yield file_path.as_posix() + file_path.unlink(missing_ok=True) diff --git a/tests/eva/vision/data/datasets/classification/test_total_segmentator.py b/tests/eva/vision/data/datasets/classification/test_total_segmentator.py index ed00ee23..412b532b 100644 --- a/tests/eva/vision/data/datasets/classification/test_total_segmentator.py +++ b/tests/eva/vision/data/datasets/classification/test_total_segmentator.py @@ -1,7 +1,7 @@ """TotalSegmentator dataset tests.""" import os -from typing import Literal +from typing import Iterator, Literal from unittest.mock import patch import numpy as np @@ -33,7 +33,7 @@ def test_sample(total_segmentator_dataset: datasets.TotalSegmentatorClassificati def total_segmentator_dataset( split: Literal["train", "val", "test"], assets_path: str, -) -> datasets.TotalSegmentatorClassification: +) -> Iterator[datasets.TotalSegmentatorClassification]: """TotalSegmentator dataset fixture.""" with patch("eva.vision.data.datasets.TotalSegmentatorClassification._verify_dataset") as _: dataset = datasets.TotalSegmentatorClassification( @@ -44,4 +44,7 @@ def total_segmentator_dataset( ) dataset.prepare_data() dataset.setup() - return dataset + yield dataset + + if os.path.isfile(dataset._manifest_path): + os.remove(dataset._manifest_path)