diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index c50215da49a..3eebc5d0582 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -95,7 +95,7 @@ jobs: defaults: run: shell: bash - runs-on: ubuntu-20.04 + runs-on: ubuntu-20.04-4-cores steps: - uses: actions/checkout@v3 with: @@ -125,6 +125,8 @@ jobs: - name: Runner info continue-on-error: true run: | + export PATH=/usr/local/cuda-12.1/bin${PATH:+:${PATH}} + export LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} nvidia-smi cat /proc/cpuinfo nvcc --version @@ -140,4 +142,7 @@ jobs: run: | python -c "import torch; print(torch.cuda.is_available())" - name: Run PyTorch precommit test scope - run: make test-torch-cuda + run: | + export PATH=/usr/local/cuda-12.1/bin${PATH:+:${PATH}} + export LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + make test-torch-cuda diff --git a/tests/torch/pruning/test_tensor_processor.py b/tests/torch/pruning/test_tensor_processor.py index 67ae0a3947b..b4267df1b12 100644 --- a/tests/torch/pruning/test_tensor_processor.py +++ b/tests/torch/pruning/test_tensor_processor.py @@ -16,10 +16,10 @@ from nncf.torch.tensor import PTNNCFTensor -@pytest.mark.parametrize("device", (torch.device("cpu"), torch.device("cuda"))) -def test_ones(device): - if not torch.cuda.is_available() and device == torch.device("cuda"): +def test_ones(use_cuda): + if use_cuda and not torch.cuda.is_available(): pytest.skip("There are no available CUDA devices") + device = torch.device("cuda" if use_cuda else "cpu") shape = [1, 3, 10, 100] tensor = PTNNCFPruningTensorProcessor.ones(shape, device) assert torch.is_tensor(tensor.tensor) diff --git a/tests/torch/ptq/test_fast_bias_correction.py b/tests/torch/ptq/test_fast_bias_correction.py index 6bbdc342e0b..61d98ac7bdd 100644 --- a/tests/torch/ptq/test_fast_bias_correction.py +++ b/tests/torch/ptq/test_fast_bias_correction.py @@ -62,6 +62,7 @@ def check_bias(model: NNCFNetwork, ref_bias: list): raise ValueError("Not found node with bias") +@pytest.mark.cuda @pytest.mark.skipif(not torch.cuda.is_available(), reason="Skipping for CPU-only setups") class TestTorchCudaFBCAlgorithm(TestTorchFBCAlgorithm): @staticmethod diff --git a/tests/torch/ptq/test_reducers_and_aggregators.py b/tests/torch/ptq/test_reducers_and_aggregators.py index 1af7b4e4683..84cb20fb9ea 100644 --- a/tests/torch/ptq/test_reducers_and_aggregators.py +++ b/tests/torch/ptq/test_reducers_and_aggregators.py @@ -87,6 +87,7 @@ def all_close(self, val: torch.Tensor, ref) -> bool: return super().all_close(val, ref) +@pytest.mark.cuda @pytest.mark.skipif(not torch.cuda.is_available(), reason="Cuda is not available in current environment") class TestCudaReducersAggregators(BaseTestReducersAggregators): def get_nncf_tensor(self, x: np.array, dtype: Optional[Dtype] = None): @@ -97,11 +98,11 @@ def all_close(self, val: torch.Tensor, ref) -> bool: return super().all_close(val, ref) -@pytest.mark.parametrize("device", ["cuda", "cpu"]) @pytest.mark.parametrize("size,ref", [(16_000_000, 1_600_000.8750), (17_000_000, 1_700_000.7500)]) -def test_quantile_percentile_function(device, size, ref): - if not torch.cuda.is_available() and device == "cuda": +def test_quantile_percentile_function(use_cuda, size, ref): + if use_cuda and not torch.cuda.is_available(): pytest.skip("Cuda is not available in current environment") + device = "cuda" if use_cuda else "cpu" tensor = PTNNCFTensor(torch.arange(1, size, 1).float().to(device)) res_quantile = PTNNCFCollectorTensorProcessor.quantile(tensor, [0.1], axis=0) res_percentile = PTNNCFCollectorTensorProcessor.percentile(tensor, [10], axis=0) @@ -111,11 +112,11 @@ def test_quantile_percentile_function(device, size, ref): assert tensor.is_cuda == (device == "cuda") -@pytest.mark.parametrize("device", ["cuda", "cpu"]) @pytest.mark.parametrize("size,ref", [(16_000_000, 8_000_000), (17_000_000, 8_500_000)]) -def test_median_function(device, size, ref): - if not torch.cuda.is_available() and device == "cuda": +def test_median_function(use_cuda, size, ref): + if use_cuda and not torch.cuda.is_available(): pytest.skip("Cuda is not available in current environment") + device = "cuda" if use_cuda else "cpu" tensor = PTNNCFTensor(torch.arange(1, size, 1).float().to(device)) res = PTNNCFCollectorTensorProcessor.median(tensor, axis=0) assert res.tensor == ref diff --git a/tests/torch/ptq/test_weights_compression.py b/tests/torch/ptq/test_weights_compression.py index ba64f1341b8..8cb5e00932f 100644 --- a/tests/torch/ptq/test_weights_compression.py +++ b/tests/torch/ptq/test_weights_compression.py @@ -243,12 +243,11 @@ def test_get_dtype_attribute_of_parameter(): assert compressed_model.weight.dtype == torch.uint8 -@pytest.mark.parametrize("device", ("cpu", "cuda")) @pytest.mark.parametrize("dtype", ("float16", "float32")) -def test_model_devices_and_precisions(device, dtype): - if device == "cuda" and not torch.cuda.is_available(): +def test_model_devices_and_precisions(use_cuda, dtype): + if use_cuda and not torch.cuda.is_available(): pytest.skip("Skipping for CPU-only setups") - device = torch.device(device) + device = torch.device("cuda" if use_cuda else "cpu") dtype = torch.float16 if dtype == "float16" else torch.float32 model = MatMulModel().to(device) diff --git a/tests/torch/quantization/test_algo_quantization.py b/tests/torch/quantization/test_algo_quantization.py index 4a70ceebee2..7ad9ce2d598 100644 --- a/tests/torch/quantization/test_algo_quantization.py +++ b/tests/torch/quantization/test_algo_quantization.py @@ -232,6 +232,7 @@ def activation_quantizers_dumping_worker(current_gpu, config, tmp_path): f.writelines("%s\n" % str(aq_id)) +@pytest.mark.cuda def test_activation_quantizers_order_is_the_same__for_resnet50(tmp_path, runs_subprocess_in_precommit): if not torch.cuda.is_available(): pytest.skip("Skipping CUDA test cases for CPU only setups") @@ -803,7 +804,8 @@ def test_internal_autocast_model(self, initializing_config: NNCFConfig): compressed_model(inputs) @pytest.mark.parametrize( - "device", [pytest.param("cuda"), pytest.param("cpu", marks=pytest.mark.skip(reason="CVS-86697"))] + "device", + [pytest.param("cuda", marks=pytest.mark.cuda), pytest.param("cpu", marks=pytest.mark.skip(reason="CVS-86697"))], ) def test_manual_partial_half_precision_model(self, initializing_config: NNCFConfig, device: str): model = TestHalfPrecisionModels.ModelWithManualPartialHalfPrecision() @@ -821,11 +823,10 @@ def test_manual_partial_half_precision_model(self, initializing_config: NNCFConf # Should complete successfully, including init. compressed_model(inputs) - @pytest.mark.parametrize("device", ["cpu", "cuda"]) - def test_external_autocast(self, initializing_config: NNCFConfig, device: str): + def test_external_autocast(self, initializing_config: NNCFConfig, use_cuda): model = TestHalfPrecisionModels.RegularModel() inputs = torch.ones([1, 1, 1, 1]) - if device == "cuda": + if use_cuda: if not torch.cuda.is_available(): pytest.skip("CUDA not available") inputs = inputs.cuda() @@ -941,6 +942,7 @@ def test_can_quantize_user_module_with_addmm(): create_compressed_model_and_algo_for_test(ModelWithUserModule(), nncf_config) +@pytest.mark.cuda def test_works_when_wrapped_with_dataparallel(): if not torch.cuda.is_available(): pytest.xfail("The executing host must have > 1 CUDA GPU in order for this test to be relevant.") diff --git a/tests/torch/quantization/test_autoq_precision_init.py b/tests/torch/quantization/test_autoq_precision_init.py index 084caf8f6d1..97b851d591f 100644 --- a/tests/torch/quantization/test_autoq_precision_init.py +++ b/tests/torch/quantization/test_autoq_precision_init.py @@ -137,6 +137,7 @@ def __str__(self): ) +@pytest.mark.cuda @pytest.mark.parametrize("params", AUTOQ_TEST_PARAMS, ids=[str(p) for p in AUTOQ_TEST_PARAMS]) def test_autoq_precision_init(_seed, dataset_dir, tmp_path, mocker, params): config = params.config_builder.build() diff --git a/tests/torch/quantization/test_functions.py b/tests/torch/quantization/test_functions.py index b468f8c43d5..18ebd23afa6 100644 --- a/tests/torch/quantization/test_functions.py +++ b/tests/torch/quantization/test_functions.py @@ -614,12 +614,12 @@ class TestParametrizedLong(BaseParametrized): pass -@pytest.mark.parametrize("device", ["cuda", "cpu"]) -def test_mapping_to_zero(quantization_mode, device): +def test_mapping_to_zero(use_cuda, quantization_mode): torch.manual_seed(42) - if not torch.cuda.is_available() and device == "cuda": + if use_cuda and not torch.cuda.is_available(): pytest.skip("Skipping CUDA test cases for CPU only setups") + device = "cuda" if use_cuda else "cpu" x_zero = torch.zeros([1]).to(torch.device(device)) levels = 256 eps = 1e-6 diff --git a/tests/torch/quantization/test_hawq_precision_init.py b/tests/torch/quantization/test_hawq_precision_init.py index 440981a2360..c056ba96216 100644 --- a/tests/torch/quantization/test_hawq_precision_init.py +++ b/tests/torch/quantization/test_hawq_precision_init.py @@ -614,6 +614,7 @@ def precision_init_dumping_worker(gpu, ngpus_per_node, config, tmp_path): torch.save(act_bitwidth_per_scope, str(out_file_path)) +@pytest.mark.cuda def test_can_broadcast_initialized_precisions_in_distributed_mode(tmp_path, runs_subprocess_in_precommit): if not torch.cuda.is_available(): pytest.skip("Skipping CUDA test cases for CPU only setups") diff --git a/tests/torch/sparsity/const/test_algo.py b/tests/torch/sparsity/const/test_algo.py index baf61a4057d..ebc91d511ff 100644 --- a/tests/torch/sparsity/const/test_algo.py +++ b/tests/torch/sparsity/const/test_algo.py @@ -76,7 +76,9 @@ def test_can_restore_binary_mask_on_magnitude_algo_resume(): PTTensorListComparator.check_equal(ref_mask_2, op.operand.binary_mask) -@pytest.mark.parametrize("use_data_parallel", [True, False], ids=["dataparallel", "regular"]) +@pytest.mark.parametrize( + "use_data_parallel", [pytest.param(True, marks=pytest.mark.cuda), False], ids=["dataparallel", "regular"] +) def test_can_restore_binary_mask_on_magnitude_quant_algo_resume(tmp_path, use_data_parallel): config = get_empty_config() config["compression"] = [ diff --git a/tests/torch/sparsity/movement/test_components.py b/tests/torch/sparsity/movement/test_components.py index 5db4292301f..13b75f20728 100644 --- a/tests/torch/sparsity/movement/test_components.py +++ b/tests/torch/sparsity/movement/test_components.py @@ -323,7 +323,6 @@ class TestFunctions: ], ) @pytest.mark.parametrize("requires_grad", [True, False]) - @pytest.mark.parametrize("use_cuda", [True, False]) def test_binary_mask_by_threshold( self, input_tensor: torch.Tensor, @@ -385,6 +384,7 @@ def test_importance_loss_forward(self, desc, requires_grad: bool, use_cuda: bool assert output.requires_grad is requires_grad assert torch.allclose(output, torch.tensor(desc["ref_output"])) + @pytest.mark.gpu def test_importance_loss_adapts_to_device_change(self): if not torch.cuda.is_available(): pytest.skip("requires GPU") diff --git a/tests/torch/test_algo_common.py b/tests/torch/test_algo_common.py index 8b7be3fc382..a7d64d851db 100644 --- a/tests/torch/test_algo_common.py +++ b/tests/torch/test_algo_common.py @@ -379,6 +379,7 @@ def get_basic_rb_sparsity_int8_config(): ] +@pytest.mark.cuda @pytest.mark.parametrize( "config", comp_loss_configs, diff --git a/tests/torch/test_api_behavior.py b/tests/torch/test_api_behavior.py index 9e18479e94e..e7cfe3a6618 100644 --- a/tests/torch/test_api_behavior.py +++ b/tests/torch/test_api_behavior.py @@ -122,7 +122,10 @@ def forward(self, x): return self.model.forward(x) -@pytest.mark.parametrize("original_device", ["cpu", "cuda", "cuda:0"]) +@pytest.mark.parametrize( + "original_device", + ["cpu", pytest.param("cuda", marks=pytest.mark.cuda), pytest.param("cuda:0", marks=pytest.mark.cuda)], +) def test_model_is_inited_with_own_device_by_default(nncf_config_with_default_init_args, original_device): if not torch.cuda.is_available() and "cuda" in original_device: pytest.skip("Skipping for CPU-only setups") diff --git a/tests/torch/test_graph_building.py b/tests/torch/test_graph_building.py index 01b6695045d..11ff4d984c1 100644 --- a/tests/torch/test_graph_building.py +++ b/tests/torch/test_graph_building.py @@ -374,10 +374,10 @@ def test_filler_input_info_arg_generation(filler_gen_test_struct: FillerInputInf ], ids=["filler", "example", "loader"], ) -@pytest.mark.parametrize("device", ["cuda", "cpu"]) -def test_input_infos_respect_device_setting(input_info: ModelInputInfo, device: str): - if device == "cuda" and not torch.cuda.is_available(): +def test_input_infos_respect_device_setting(input_info: ModelInputInfo, use_cuda: bool): + if use_cuda and not torch.cuda.is_available(): pytest.skip("Skipped checking CUDA device test cases on CPU-only hosts") + device = "cuda" if use_cuda else "cpu" forward_inputs = input_info.get_forward_inputs(device) def assert_on_device(x: torch.Tensor): diff --git a/tests/torch/test_knowledge_distillation.py b/tests/torch/test_knowledge_distillation.py index da4b4fdd471..b129c816e6e 100644 --- a/tests/torch/test_knowledge_distillation.py +++ b/tests/torch/test_knowledge_distillation.py @@ -62,7 +62,15 @@ def get_sparsity_config_with_sparsity_init(config: NNCFConfig, sparsity_init=0.5 return config -@pytest.mark.parametrize("inference_type", ["cpu", "single_GPU", "DP", "DDP"]) +@pytest.mark.parametrize( + "inference_type", + [ + "cpu", + pytest.param("single_GPU", marks=pytest.mark.cuda), + pytest.param("DP", marks=pytest.mark.cuda), + pytest.param("DDP", marks=pytest.mark.cuda), + ], +) def test_knowledge_distillation_training_process(inference_type: str): if not torch.cuda.is_available() and inference_type != "cpu": pytest.skip("Skipping CUDA test cases for CPU only setups") @@ -311,7 +319,15 @@ def test_kd_sparsity_statistics(algo: str): @pytest.mark.parametrize("device_placing", ["before", "after"]) -@pytest.mark.parametrize("inference_type", ["cpu", "single_GPU", "DP", "DDP"]) +@pytest.mark.parametrize( + "inference_type", + [ + "cpu", + pytest.param("single_GPU", marks=pytest.mark.cuda), + pytest.param("DP", marks=pytest.mark.cuda), + pytest.param("DDP", marks=pytest.mark.cuda), + ], +) def test_model_device_before_create_compressed_model(device_placing, inference_type): if not torch.cuda.is_available() and inference_type != "cpu": pytest.skip("Skipping CUDA test cases for CPU only setups") diff --git a/tests/torch/test_model_transformer.py b/tests/torch/test_model_transformer.py index c554a39ccb3..ce9c2c5e6e1 100644 --- a/tests/torch/test_model_transformer.py +++ b/tests/torch/test_model_transformer.py @@ -201,7 +201,7 @@ def to(self, device): self.to_device = device @pytest.mark.parametrize("target_point", available_points) - @pytest.mark.parametrize("multidevice", (False, True)) + @pytest.mark.parametrize("multidevice", (False, pytest.param(True, marks=pytest.mark.cuda))) @pytest.mark.parametrize("hook", (lambda x: x, BaseOpWithParam(lambda x: x).cpu())) def test_pt_insertion_command(self, target_point: PTTargetPoint, multidevice: bool, hook): model = wrap_model(InsertionPointTestModel(), torch.ones([1, 1, 10, 10])) @@ -696,7 +696,7 @@ def test_create_shared_quantizer_insertion_command(): "priority", [TransformationPriority.FP32_TENSOR_STATISTICS_OBSERVATION, TransformationPriority.DEFAULT_PRIORITY] ) @pytest.mark.parametrize("compression_module_registered", [False, True]) -@pytest.mark.parametrize("multidevice_model", (False, True)) +@pytest.mark.parametrize("multidevice_model", (False, pytest.param(True, marks=pytest.mark.cuda))) def test_shared_fn_insertion_point( priority, compression_module_registered, compression_module_type, multidevice_model, mocker ): @@ -786,7 +786,7 @@ def _insert_external_op_mocked(): "priority", [TransformationPriority.FP32_TENSOR_STATISTICS_OBSERVATION, TransformationPriority.DEFAULT_PRIORITY] ) @pytest.mark.parametrize("compression_module_registered", [False, True]) -@pytest.mark.parametrize("multidevice_model", (False, True)) +@pytest.mark.parametrize("multidevice_model", (False, pytest.param(True, marks=pytest.mark.cuda))) def test_shared_fn_insertion_command_several_module_types( priority, compression_module_registered, multidevice_model, mocker ): diff --git a/tests/torch/test_nncf_network.py b/tests/torch/test_nncf_network.py index c4da4be8c82..77a03f4ebf6 100644 --- a/tests/torch/test_nncf_network.py +++ b/tests/torch/test_nncf_network.py @@ -835,6 +835,7 @@ def forward(self, x, y): return res +@pytest.mark.cuda def test_multidevice_model(): if not torch.cuda.is_available(): pytest.skip("GPU required")