From d10afcf0e229fb985271feaf1b5e2e5f939b6797 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Wed, 24 Apr 2024 15:05:25 +0200 Subject: [PATCH] [PTQ] Add default batch sizes for PTQ conformance test (#2643) ### Changes Add default batch_size for calibration dataset for every model. If model has no "batch_size" parameter meaning that it doesn't support batch_size > 1 ### Reason for changes Speed up quantization ### Related tickets N/A ### Tests N/A --- tests/post_training/conftest.py | 2 +- .../data/ptq_reference_data.yaml | 126 +++++++++--------- tests/post_training/model_scope.py | 30 ++++- .../test_quantize_conformance.py | 6 +- 4 files changed, 91 insertions(+), 73 deletions(-) diff --git a/tests/post_training/conftest.py b/tests/post_training/conftest.py index 8288b993b31..0cc92c29866 100644 --- a/tests/post_training/conftest.py +++ b/tests/post_training/conftest.py @@ -19,7 +19,7 @@ def pytest_addoption(parser): parser.addoption("--data", action="store", help="Data directory") parser.addoption("--output", action="store", default="./tmp/", help="Directory to store artifacts") parser.addoption("--no-eval", action="store_true", help="Skip validation step") - parser.addoption("--batch-size", action="store", default=1, type=int, help="Batch size of calibration dataset") + parser.addoption("--batch-size", action="store", default=None, type=int, help="Batch size of calibration dataset") parser.addoption("--subset-size", type=int, default=None, help="Set subset size") parser.addoption("--fp32", action="store_true", help="Test original model") parser.addoption("--cuda", action="store_true", help="Enable CUDA_TORCH backend") diff --git a/tests/post_training/data/ptq_reference_data.yaml b/tests/post_training/data/ptq_reference_data.yaml index b2aa7826ac8..e1a1838336b 100644 --- a/tests/post_training/data/ptq_reference_data.yaml +++ b/tests/post_training/data/ptq_reference_data.yaml @@ -19,97 +19,97 @@ timm/crossvit_9_240_backend_CUDA_TORCH: timm/crossvit_9_240_backend_FP32: metric_value: 0.73982 timm/crossvit_9_240_backend_ONNX: - metric_value: 0.72854 + metric_value: 0.73484 timm/crossvit_9_240_backend_OV: - metric_value: 0.72812 + metric_value: 0.72788 timm/crossvit_9_240_backend_TORCH: - metric_value: 0.72816 + metric_value: 0.72744 timm/darknet53_backend_CUDA_TORCH: metric_value: 0.79176 timm/darknet53_backend_FP32: metric_value: 0.80006 timm/darknet53_backend_ONNX: - metric_value: 0.79336 + metric_value: 0.79176 timm/darknet53_backend_OV: - metric_value: 0.79222 + metric_value: 0.79216 timm/darknet53_backend_TORCH: - metric_value: 0.7916 + metric_value: 0.79094 timm/deit3_small_patch16_224_backend_CUDA_TORCH: metric_value: 0.76816 timm/deit3_small_patch16_224_backend_FP32: metric_value: 0.81358 timm/deit3_small_patch16_224_backend_ONNX: - metric_value: 0.81154 + metric_value: 0.81116 timm/deit3_small_patch16_224_backend_OV: metric_value: 0.81276 timm/deit3_small_patch16_224_backend_TORCH: - metric_value: 0.81278 + metric_value: 0.81274 timm/dla34_backend_CUDA_TORCH: metric_value: 0.73978 timm/dla34_backend_FP32: metric_value: 0.74628 timm/dla34_backend_ONNX: - metric_value: 0.7455 + metric_value: 0.74564 timm/dla34_backend_OV: - metric_value: 0.74556 + metric_value: 0.74532 timm/dla34_backend_TORCH: - metric_value: 0.74242 + metric_value: 0.74256 timm/dpn68_backend_CUDA_TORCH: metric_value: 0.75492 timm/dpn68_backend_FP32: metric_value: 0.76342 timm/dpn68_backend_ONNX: - metric_value: 0.7595 + metric_value: 0.75906 timm/dpn68_backend_OV: - metric_value: 0.75968 + metric_value: 0.75972 timm/dpn68_backend_TORCH: - metric_value: 0.75826 + metric_value: 0.75868 timm/efficientnet_b0_BC_backend_FP32: metric_value: 0.77698 timm/efficientnet_b0_BC_backend_ONNX: - metric_value: 0.77212 + metric_value: 0.77132 timm/efficientnet_b0_BC_backend_OV: - metric_value: 0.77218 + metric_value: 0.77166 timm/efficientnet_b0_backend_CUDA_TORCH: metric_value: 0.768 timm/efficientnet_b0_backend_FP32: metric_value: 0.77698 timm/efficientnet_b0_backend_ONNX: - metric_value: 0.77208 + metric_value: 0.7719 timm/efficientnet_b0_backend_OV: - metric_value: 0.77196 + metric_value: 0.77104 timm/efficientnet_b0_backend_TORCH: - metric_value: 0.77124 + metric_value: 0.77042 timm/efficientnet_lite0_backend_CUDA_TORCH: metric_value: 0.74686 timm/efficientnet_lite0_backend_FP32: metric_value: 0.75496 timm/efficientnet_lite0_backend_ONNX: - metric_value: 0.75214 + metric_value: 0.75184 timm/efficientnet_lite0_backend_OV: - metric_value: 0.7515 + metric_value: 0.75176 timm/efficientnet_lite0_backend_TORCH: - metric_value: 0.75236 + metric_value: 0.7517 timm/hrnet_w18_backend_CUDA_TORCH: metric_value: 0.76712 timm/hrnet_w18_backend_FP32: metric_value: 0.78124 timm/hrnet_w18_backend_ONNX: - metric_value: 0.7747 + metric_value: 0.7743 timm/hrnet_w18_backend_OV: - metric_value: 0.77526 + metric_value: 0.7743 timm/hrnet_w18_backend_TORCH: - metric_value: 0.77316 + metric_value: 0.7722 timm/inception_resnet_v2_backend_CUDA_TORCH: metric_value: 0.80024 timm/inception_resnet_v2_backend_FP32: metric_value: 0.80448 timm/inception_resnet_v2_backend_ONNX: - metric_value: 0.804 + metric_value: 0.80396 timm/inception_resnet_v2_backend_OV: metric_value: 0.80422 timm/inception_resnet_v2_backend_TORCH: - metric_value: 0.803 + metric_value: 0.80334 timm/levit_128_backend_CUDA_TORCH: metric_value: 0.7324 timm/levit_128_backend_FP32: @@ -117,120 +117,120 @@ timm/levit_128_backend_FP32: timm/levit_128_backend_ONNX: metric_value: 0.7762 timm/levit_128_backend_OV: - metric_value: 0.77696 + metric_value: 0.77644 timm/levit_128_backend_TORCH: - metric_value: 0.77752 + metric_value: 0.77814 timm/mobilenetv2_050_BC_backend_FP32: metric_value: 0.6594 timm/mobilenetv2_050_BC_backend_ONNX: - metric_value: 0.65466 + metric_value: 0.65486 timm/mobilenetv2_050_BC_backend_OV: - metric_value: 0.6543 + metric_value: 0.65332 timm/mobilenetv2_050_backend_CUDA_TORCH: metric_value: 0.64278 timm/mobilenetv2_050_backend_FP32: metric_value: 0.6594 timm/mobilenetv2_050_backend_ONNX: - metric_value: 0.65332 + metric_value: 0.6537 timm/mobilenetv2_050_backend_OV: - metric_value: 0.65282 + metric_value: 0.65314 timm/mobilenetv2_050_backend_TORCH: - metric_value: 0.65364 + metric_value: 0.65334 timm/mobilenetv3_small_050_backend_CUDA_TORCH: metric_value: 0.41888 timm/mobilenetv3_small_050_backend_FP32: metric_value: 0.57906 timm/mobilenetv3_small_050_backend_ONNX: - metric_value: 0.42104 + metric_value: 0.41828 timm/mobilenetv3_small_050_backend_OV: - metric_value: 0.42184 + metric_value: 0.41874 timm/mobilenetv3_small_050_backend_TORCH: - metric_value: 0.4291 + metric_value: 0.4267 timm/mobilenetv3_small_050_BC_backend_FP32: metric_value: 0.57906 timm/mobilenetv3_small_050_BC_backend_ONNX: - metric_value: 0.56496 + metric_value: 0.56556 timm/mobilenetv3_small_050_BC_backend_OV: - metric_value: 0.56476 + metric_value: 0.5655 timm/regnetx_002_backend_CUDA_TORCH: metric_value: 0.67452 timm/regnetx_002_backend_FP32: metric_value: 0.68756 timm/regnetx_002_backend_ONNX: - metric_value: 0.68476 + metric_value: 0.6848 timm/regnetx_002_backend_OV: - metric_value: 0.6853 + metric_value: 0.6852 timm/regnetx_002_backend_TORCH: - metric_value: 0.68492 + metric_value: 0.68576 timm/resnest14d_backend_CUDA_TORCH: metric_value: 0.74176 timm/resnest14d_backend_FP32: metric_value: 0.75516 timm/resnest14d_backend_ONNX: - metric_value: 0.74968 + metric_value: 0.75428 timm/resnest14d_backend_OV: - metric_value: 0.74984 + metric_value: 0.75 timm/resnest14d_backend_TORCH: - metric_value: 0.74838 + metric_value: 0.7485 timm/resnet18_backend_CUDA_TORCH: metric_value: 0.69748 timm/resnet18_backend_FP32: metric_value: 0.71502 timm/resnet18_backend_ONNX: - metric_value: 0.71104 + metric_value: 0.71102 timm/resnet18_backend_OV: - metric_value: 0.71042 + metric_value: 0.71116 timm/resnet18_backend_TORCH: - metric_value: 0.71024 + metric_value: 0.70982 timm/swin_base_patch4_window7_224_backend_FP32: metric_value: 0.85274 timm/swin_base_patch4_window7_224_backend_OV: - metric_value: 0.83636 + metric_value: 0.83566 timm/swin_base_patch4_window7_224_no_sq_backend_FP32: metric_value: 0.85274 timm/swin_base_patch4_window7_224_no_sq_backend_CUDA_TORCH: metric_value: 0.85142 timm/swin_base_patch4_window7_224_no_sq_backend_ONNX: - metric_value: 0.85158 + metric_value: 0.85212 timm/swin_base_patch4_window7_224_no_sq_backend_TORCH: - metric_value: 0.85142 + metric_value: 0.85178 timm/tf_inception_v3_backend_CUDA_TORCH: metric_value: 0.77542 timm/tf_inception_v3_backend_FP32: metric_value: 0.7786 timm/tf_inception_v3_backend_ONNX: - metric_value: 0.77766 + metric_value: 0.77762 timm/tf_inception_v3_backend_OV: - metric_value: 0.77742 + metric_value: 0.77748 timm/tf_inception_v3_backend_TORCH: - metric_value: 0.77642 + metric_value: 0.77586 timm/vgg11_backend_CUDA_TORCH: metric_value: 0.6809 timm/vgg11_backend_FP32: metric_value: 0.6904 timm/vgg11_backend_ONNX: - metric_value: 0.68754 + metric_value: 0.68788 timm/vgg11_backend_OV: - metric_value: 0.68732 + metric_value: 0.68788 timm/vgg11_backend_TORCH: - metric_value: 0.68754 + metric_value: 0.6879 timm/visformer_small_backend_CUDA_TORCH: metric_value: 0.77728 timm/visformer_small_backend_FP32: metric_value: 0.82098 timm/visformer_small_backend_ONNX: - metric_value: 0.81604 + metric_value: 0.81562 timm/visformer_small_backend_OV: - metric_value: 0.81692 + metric_value: 0.81674 timm/visformer_small_backend_TORCH: - metric_value: 0.81624 + metric_value: 0.8162 timm/wide_resnet50_2_backend_CUDA_TORCH: metric_value: 0.81186 timm/wide_resnet50_2_backend_FP32: metric_value: 0.81454 timm/wide_resnet50_2_backend_ONNX: - metric_value: 0.81228 + metric_value: 0.8119 timm/wide_resnet50_2_backend_OV: - metric_value: 0.8125 + metric_value: 0.81232 timm/wide_resnet50_2_backend_TORCH: - metric_value: 0.81234 + metric_value: 0.81206 diff --git a/tests/post_training/model_scope.py b/tests/post_training/model_scope.py index e6c46f715d0..0ad69a54317 100644 --- a/tests/post_training/model_scope.py +++ b/tests/post_training/model_scope.py @@ -39,7 +39,6 @@ "subset_size": 2, }, "backends": ALL_PTQ_BACKENDS + [BackendType.OPTIMUM], - "is_batch_size_supported": False, }, { "reported_name": "hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM", @@ -51,7 +50,6 @@ "subset_size": 2, }, "backends": [BackendType.OPTIMUM], - "is_batch_size_supported": False, }, # Timm models { @@ -64,6 +62,7 @@ "advanced_parameters": AdvancedQuantizationParameters(smooth_quant_alpha=-1.0), }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/darknet53", @@ -73,6 +72,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/deit3_small_patch16_224", @@ -86,6 +86,7 @@ ), }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/dla34", @@ -95,6 +96,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/dpn68", @@ -104,6 +106,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/efficientnet_b0", @@ -113,6 +116,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/efficientnet_b0_BC", @@ -123,6 +127,7 @@ "fast_bias_correction": False, }, "backends": [BackendType.ONNX, BackendType.OV], + "batch_size": 128, }, { "reported_name": "timm/efficientnet_lite0", @@ -132,6 +137,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/hrnet_w18", @@ -141,6 +147,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/inception_resnet_v2", @@ -148,6 +155,7 @@ "pipeline_cls": ImageClassificationTimm, "compression_params": {}, "backends": NNCF_PTQ_BACKENDS, + "batch_size": 64, }, { "reported_name": "timm/levit_128", @@ -161,7 +169,6 @@ ), }, "backends": NNCF_PTQ_BACKENDS, - "is_batch_size_supported": False, # Issue is raised during export with dynamich shape. }, { "reported_name": "timm/mobilenetv2_050", @@ -171,6 +178,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/mobilenetv2_050_BC", @@ -181,6 +189,7 @@ "fast_bias_correction": False, }, "backends": [BackendType.ONNX, BackendType.OV], + "batch_size": 128, }, { "reported_name": "timm/mobilenetv3_small_050", @@ -190,6 +199,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/mobilenetv3_small_050_BC", @@ -200,6 +210,7 @@ "fast_bias_correction": False, }, "backends": [BackendType.ONNX, BackendType.OV], + "batch_size": 128, }, { "reported_name": "timm/regnetx_002", @@ -209,6 +220,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/resnest14d", @@ -218,6 +230,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/resnet18", @@ -225,6 +238,7 @@ "pipeline_cls": ImageClassificationTimm, "compression_params": {}, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/swin_base_patch4_window7_224", @@ -235,6 +249,7 @@ "model_type": ModelType.TRANSFORMER, }, "backends": [BackendType.OV], + "batch_size": 32, }, { "reported_name": "timm/swin_base_patch4_window7_224_no_sq", @@ -248,6 +263,7 @@ ), }, "backends": [BackendType.TORCH, BackendType.CUDA_TORCH, BackendType.ONNX], + "batch_size": 128, }, { "reported_name": "timm/tf_inception_v3", @@ -257,6 +273,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/vgg11", @@ -264,6 +281,7 @@ "pipeline_cls": ImageClassificationTimm, "compression_params": {}, "backends": NNCF_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/visformer_small", @@ -274,6 +292,7 @@ "model_type": ModelType.TRANSFORMER, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, { "reported_name": "timm/wide_resnet50_2", @@ -283,6 +302,7 @@ "preset": QuantizationPreset.MIXED, }, "backends": ALL_PTQ_BACKENDS, + "batch_size": 128, }, ] @@ -299,7 +319,6 @@ "sensitivity_metric": SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, }, "backends": [BackendType.OV], - "is_batch_size_supported": False, }, { "reported_name": "tinyllama_data_aware", @@ -307,7 +326,6 @@ "pipeline_cls": LMWeightCompression, "compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM}, "backends": [BackendType.OV], - "is_batch_size_supported": False, }, { "reported_name": "tinyllama_data_aware_awq", @@ -315,7 +333,6 @@ "pipeline_cls": LMWeightCompression, "compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM, "awq": True}, "backends": [BackendType.OV], - "is_batch_size_supported": False, }, { "reported_name": "tinyllama_data_aware_awq_stateful", @@ -324,7 +341,6 @@ "compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM, "awq": True}, "params": {"is_stateful": True}, "backends": [BackendType.OV], - "is_batch_size_supported": False, }, ] diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index 815c1f33b7b..0a42e81c70f 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -137,7 +137,7 @@ def maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_back pytest.skip("To run test for not quantized model use --fp32 argument") if test_model_param["backend"] == BackendType.CUDA_TORCH and not run_torch_cuda_backend: pytest.skip("To run test for CUDA_TORCH backend use --cuda argument") - if batch_size > 1 and not test_model_param["is_batch_size_supported"]: + if batch_size and batch_size > 1 and test_model_param.get("batch_size", 1) == 1: pytest.skip("The model does not support batch_size > 1. Please use --batch-size 1.") return test_model_param @@ -203,7 +203,7 @@ def test_ptq_quantization( output_dir: Path, ptq_result_data: Dict[str, RunInfo], no_eval: bool, - batch_size: int, + batch_size: Optional[int], run_fp32_backend: bool, run_torch_cuda_backend: bool, subset_size: Optional[int], @@ -222,6 +222,8 @@ def test_ptq_quantization( maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size) pipeline_cls = test_model_param["pipeline_cls"] # Recalculates subset_size when subset_size is None + if batch_size is None: + batch_size = test_model_param.get("batch_size", 1) if batch_size > 1 and subset_size is None: subset_size = 300 // batch_size print(f"Update subset_size value based on provided batch_size to {subset_size}.")