From d10afcf0e229fb985271feaf1b5e2e5f939b6797 Mon Sep 17 00:00:00 2001
From: Aleksei Kashapov <aleksei.kashapov@intel.com>
Date: Wed, 24 Apr 2024 15:05:25 +0200
Subject: [PATCH] [PTQ] Add default batch sizes for PTQ conformance test
 (#2643)

### Changes

Add default batch_size for calibration dataset for every model.
If model has no "batch_size" parameter meaning that it doesn't support
batch_size > 1

### Reason for changes

Speed up quantization

### Related tickets

N/A

### Tests

N/A
---
 tests/post_training/conftest.py               |   2 +-
 .../data/ptq_reference_data.yaml              | 126 +++++++++---------
 tests/post_training/model_scope.py            |  30 ++++-
 .../test_quantize_conformance.py              |   6 +-
 4 files changed, 91 insertions(+), 73 deletions(-)

diff --git a/tests/post_training/conftest.py b/tests/post_training/conftest.py
index 8288b993b31..0cc92c29866 100644
--- a/tests/post_training/conftest.py
+++ b/tests/post_training/conftest.py
@@ -19,7 +19,7 @@ def pytest_addoption(parser):
     parser.addoption("--data", action="store", help="Data directory")
     parser.addoption("--output", action="store", default="./tmp/", help="Directory to store artifacts")
     parser.addoption("--no-eval", action="store_true", help="Skip validation step")
-    parser.addoption("--batch-size", action="store", default=1, type=int, help="Batch size of calibration dataset")
+    parser.addoption("--batch-size", action="store", default=None, type=int, help="Batch size of calibration dataset")
     parser.addoption("--subset-size", type=int, default=None, help="Set subset size")
     parser.addoption("--fp32", action="store_true", help="Test original model")
     parser.addoption("--cuda", action="store_true", help="Enable CUDA_TORCH backend")
diff --git a/tests/post_training/data/ptq_reference_data.yaml b/tests/post_training/data/ptq_reference_data.yaml
index b2aa7826ac8..e1a1838336b 100644
--- a/tests/post_training/data/ptq_reference_data.yaml
+++ b/tests/post_training/data/ptq_reference_data.yaml
@@ -19,97 +19,97 @@ timm/crossvit_9_240_backend_CUDA_TORCH:
 timm/crossvit_9_240_backend_FP32:
   metric_value: 0.73982
 timm/crossvit_9_240_backend_ONNX:
-  metric_value: 0.72854
+  metric_value: 0.73484
 timm/crossvit_9_240_backend_OV:
-  metric_value: 0.72812
+  metric_value: 0.72788
 timm/crossvit_9_240_backend_TORCH:
-  metric_value: 0.72816
+  metric_value: 0.72744
 timm/darknet53_backend_CUDA_TORCH:
   metric_value: 0.79176
 timm/darknet53_backend_FP32:
   metric_value: 0.80006
 timm/darknet53_backend_ONNX:
-  metric_value: 0.79336
+  metric_value: 0.79176
 timm/darknet53_backend_OV:
-  metric_value: 0.79222
+  metric_value: 0.79216
 timm/darknet53_backend_TORCH:
-  metric_value: 0.7916
+  metric_value: 0.79094
 timm/deit3_small_patch16_224_backend_CUDA_TORCH:
   metric_value: 0.76816
 timm/deit3_small_patch16_224_backend_FP32:
   metric_value: 0.81358
 timm/deit3_small_patch16_224_backend_ONNX:
-  metric_value: 0.81154
+  metric_value: 0.81116
 timm/deit3_small_patch16_224_backend_OV:
   metric_value: 0.81276
 timm/deit3_small_patch16_224_backend_TORCH:
-  metric_value: 0.81278
+  metric_value: 0.81274
 timm/dla34_backend_CUDA_TORCH:
   metric_value: 0.73978
 timm/dla34_backend_FP32:
   metric_value: 0.74628
 timm/dla34_backend_ONNX:
-  metric_value: 0.7455
+  metric_value: 0.74564
 timm/dla34_backend_OV:
-  metric_value: 0.74556
+  metric_value: 0.74532
 timm/dla34_backend_TORCH:
-  metric_value: 0.74242
+  metric_value: 0.74256
 timm/dpn68_backend_CUDA_TORCH:
   metric_value: 0.75492
 timm/dpn68_backend_FP32:
   metric_value: 0.76342
 timm/dpn68_backend_ONNX:
-  metric_value: 0.7595
+  metric_value: 0.75906
 timm/dpn68_backend_OV:
-  metric_value: 0.75968
+  metric_value: 0.75972
 timm/dpn68_backend_TORCH:
-  metric_value: 0.75826
+  metric_value: 0.75868
 timm/efficientnet_b0_BC_backend_FP32:
   metric_value: 0.77698
 timm/efficientnet_b0_BC_backend_ONNX:
-  metric_value: 0.77212
+  metric_value: 0.77132
 timm/efficientnet_b0_BC_backend_OV:
-  metric_value: 0.77218
+  metric_value: 0.77166
 timm/efficientnet_b0_backend_CUDA_TORCH:
   metric_value: 0.768
 timm/efficientnet_b0_backend_FP32:
   metric_value: 0.77698
 timm/efficientnet_b0_backend_ONNX:
-  metric_value: 0.77208
+  metric_value: 0.7719
 timm/efficientnet_b0_backend_OV:
-  metric_value: 0.77196
+  metric_value: 0.77104
 timm/efficientnet_b0_backend_TORCH:
-  metric_value: 0.77124
+  metric_value: 0.77042
 timm/efficientnet_lite0_backend_CUDA_TORCH:
   metric_value: 0.74686
 timm/efficientnet_lite0_backend_FP32:
   metric_value: 0.75496
 timm/efficientnet_lite0_backend_ONNX:
-  metric_value: 0.75214
+  metric_value: 0.75184
 timm/efficientnet_lite0_backend_OV:
-  metric_value: 0.7515
+  metric_value: 0.75176
 timm/efficientnet_lite0_backend_TORCH:
-  metric_value: 0.75236
+  metric_value: 0.7517
 timm/hrnet_w18_backend_CUDA_TORCH:
   metric_value: 0.76712
 timm/hrnet_w18_backend_FP32:
   metric_value: 0.78124
 timm/hrnet_w18_backend_ONNX:
-  metric_value: 0.7747
+  metric_value: 0.7743
 timm/hrnet_w18_backend_OV:
-  metric_value: 0.77526
+  metric_value: 0.7743
 timm/hrnet_w18_backend_TORCH:
-  metric_value: 0.77316
+  metric_value: 0.7722
 timm/inception_resnet_v2_backend_CUDA_TORCH:
   metric_value: 0.80024
 timm/inception_resnet_v2_backend_FP32:
   metric_value: 0.80448
 timm/inception_resnet_v2_backend_ONNX:
-  metric_value: 0.804
+  metric_value: 0.80396
 timm/inception_resnet_v2_backend_OV:
   metric_value: 0.80422
 timm/inception_resnet_v2_backend_TORCH:
-  metric_value: 0.803
+  metric_value: 0.80334
 timm/levit_128_backend_CUDA_TORCH:
   metric_value: 0.7324
 timm/levit_128_backend_FP32:
@@ -117,120 +117,120 @@ timm/levit_128_backend_FP32:
 timm/levit_128_backend_ONNX:
   metric_value: 0.7762
 timm/levit_128_backend_OV:
-  metric_value: 0.77696
+  metric_value: 0.77644
 timm/levit_128_backend_TORCH:
-  metric_value: 0.77752
+  metric_value: 0.77814
 timm/mobilenetv2_050_BC_backend_FP32:
   metric_value: 0.6594
 timm/mobilenetv2_050_BC_backend_ONNX:
-  metric_value: 0.65466
+  metric_value: 0.65486
 timm/mobilenetv2_050_BC_backend_OV:
-  metric_value: 0.6543
+  metric_value: 0.65332
 timm/mobilenetv2_050_backend_CUDA_TORCH:
   metric_value: 0.64278
 timm/mobilenetv2_050_backend_FP32:
   metric_value: 0.6594
 timm/mobilenetv2_050_backend_ONNX:
-  metric_value: 0.65332
+  metric_value: 0.6537
 timm/mobilenetv2_050_backend_OV:
-  metric_value: 0.65282
+  metric_value: 0.65314
 timm/mobilenetv2_050_backend_TORCH:
-  metric_value: 0.65364
+  metric_value: 0.65334
 timm/mobilenetv3_small_050_backend_CUDA_TORCH:
   metric_value: 0.41888
 timm/mobilenetv3_small_050_backend_FP32:
   metric_value: 0.57906
 timm/mobilenetv3_small_050_backend_ONNX:
-  metric_value: 0.42104
+  metric_value: 0.41828
 timm/mobilenetv3_small_050_backend_OV:
-  metric_value: 0.42184
+  metric_value: 0.41874
 timm/mobilenetv3_small_050_backend_TORCH:
-  metric_value: 0.4291
+  metric_value: 0.4267
 timm/mobilenetv3_small_050_BC_backend_FP32:
   metric_value: 0.57906
 timm/mobilenetv3_small_050_BC_backend_ONNX:
-  metric_value: 0.56496
+  metric_value: 0.56556
 timm/mobilenetv3_small_050_BC_backend_OV:
-  metric_value: 0.56476
+  metric_value: 0.5655
 timm/regnetx_002_backend_CUDA_TORCH:
   metric_value: 0.67452
 timm/regnetx_002_backend_FP32:
   metric_value: 0.68756
 timm/regnetx_002_backend_ONNX:
-  metric_value: 0.68476
+  metric_value: 0.6848
 timm/regnetx_002_backend_OV:
-  metric_value: 0.6853
+  metric_value: 0.6852
 timm/regnetx_002_backend_TORCH:
-  metric_value: 0.68492
+  metric_value: 0.68576
 timm/resnest14d_backend_CUDA_TORCH:
   metric_value: 0.74176
 timm/resnest14d_backend_FP32:
   metric_value: 0.75516
 timm/resnest14d_backend_ONNX:
-  metric_value: 0.74968
+  metric_value: 0.75428
 timm/resnest14d_backend_OV:
-  metric_value: 0.74984
+  metric_value: 0.75
 timm/resnest14d_backend_TORCH:
-  metric_value: 0.74838
+  metric_value: 0.7485
 timm/resnet18_backend_CUDA_TORCH:
   metric_value: 0.69748
 timm/resnet18_backend_FP32:
   metric_value: 0.71502
 timm/resnet18_backend_ONNX:
-  metric_value: 0.71104
+  metric_value: 0.71102
 timm/resnet18_backend_OV:
-  metric_value: 0.71042
+  metric_value: 0.71116
 timm/resnet18_backend_TORCH:
-  metric_value: 0.71024
+  metric_value: 0.70982
 timm/swin_base_patch4_window7_224_backend_FP32:
   metric_value: 0.85274
 timm/swin_base_patch4_window7_224_backend_OV:
-  metric_value: 0.83636
+  metric_value: 0.83566
 timm/swin_base_patch4_window7_224_no_sq_backend_FP32:
   metric_value: 0.85274
 timm/swin_base_patch4_window7_224_no_sq_backend_CUDA_TORCH:
   metric_value: 0.85142
 timm/swin_base_patch4_window7_224_no_sq_backend_ONNX:
-  metric_value: 0.85158
+  metric_value: 0.85212
 timm/swin_base_patch4_window7_224_no_sq_backend_TORCH:
-  metric_value: 0.85142
+  metric_value: 0.85178
 timm/tf_inception_v3_backend_CUDA_TORCH:
   metric_value: 0.77542
 timm/tf_inception_v3_backend_FP32:
   metric_value: 0.7786
 timm/tf_inception_v3_backend_ONNX:
-  metric_value: 0.77766
+  metric_value: 0.77762
 timm/tf_inception_v3_backend_OV:
-  metric_value: 0.77742
+  metric_value: 0.77748
 timm/tf_inception_v3_backend_TORCH:
-  metric_value: 0.77642
+  metric_value: 0.77586
 timm/vgg11_backend_CUDA_TORCH:
   metric_value: 0.6809
 timm/vgg11_backend_FP32:
   metric_value: 0.6904
 timm/vgg11_backend_ONNX:
-  metric_value: 0.68754
+  metric_value: 0.68788
 timm/vgg11_backend_OV:
-  metric_value: 0.68732
+  metric_value: 0.68788
 timm/vgg11_backend_TORCH:
-  metric_value: 0.68754
+  metric_value: 0.6879
 timm/visformer_small_backend_CUDA_TORCH:
   metric_value: 0.77728
 timm/visformer_small_backend_FP32:
   metric_value: 0.82098
 timm/visformer_small_backend_ONNX:
-  metric_value: 0.81604
+  metric_value: 0.81562
 timm/visformer_small_backend_OV:
-  metric_value: 0.81692
+  metric_value: 0.81674
 timm/visformer_small_backend_TORCH:
-  metric_value: 0.81624
+  metric_value: 0.8162
 timm/wide_resnet50_2_backend_CUDA_TORCH:
   metric_value: 0.81186
 timm/wide_resnet50_2_backend_FP32:
   metric_value: 0.81454
 timm/wide_resnet50_2_backend_ONNX:
-  metric_value: 0.81228
+  metric_value: 0.8119
 timm/wide_resnet50_2_backend_OV:
-  metric_value: 0.8125
+  metric_value: 0.81232
 timm/wide_resnet50_2_backend_TORCH:
-  metric_value: 0.81234
+  metric_value: 0.81206
diff --git a/tests/post_training/model_scope.py b/tests/post_training/model_scope.py
index e6c46f715d0..0ad69a54317 100644
--- a/tests/post_training/model_scope.py
+++ b/tests/post_training/model_scope.py
@@ -39,7 +39,6 @@
             "subset_size": 2,
         },
         "backends": ALL_PTQ_BACKENDS + [BackendType.OPTIMUM],
-        "is_batch_size_supported": False,
     },
     {
         "reported_name": "hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
@@ -51,7 +50,6 @@
             "subset_size": 2,
         },
         "backends": [BackendType.OPTIMUM],
-        "is_batch_size_supported": False,
     },
     # Timm models
     {
@@ -64,6 +62,7 @@
             "advanced_parameters": AdvancedQuantizationParameters(smooth_quant_alpha=-1.0),
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/darknet53",
@@ -73,6 +72,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/deit3_small_patch16_224",
@@ -86,6 +86,7 @@
             ),
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/dla34",
@@ -95,6 +96,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/dpn68",
@@ -104,6 +106,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/efficientnet_b0",
@@ -113,6 +116,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/efficientnet_b0_BC",
@@ -123,6 +127,7 @@
             "fast_bias_correction": False,
         },
         "backends": [BackendType.ONNX, BackendType.OV],
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/efficientnet_lite0",
@@ -132,6 +137,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/hrnet_w18",
@@ -141,6 +147,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/inception_resnet_v2",
@@ -148,6 +155,7 @@
         "pipeline_cls": ImageClassificationTimm,
         "compression_params": {},
         "backends": NNCF_PTQ_BACKENDS,
+        "batch_size": 64,
     },
     {
         "reported_name": "timm/levit_128",
@@ -161,7 +169,6 @@
             ),
         },
         "backends": NNCF_PTQ_BACKENDS,
-        "is_batch_size_supported": False,  # Issue is raised during export with dynamich shape.
     },
     {
         "reported_name": "timm/mobilenetv2_050",
@@ -171,6 +178,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/mobilenetv2_050_BC",
@@ -181,6 +189,7 @@
             "fast_bias_correction": False,
         },
         "backends": [BackendType.ONNX, BackendType.OV],
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/mobilenetv3_small_050",
@@ -190,6 +199,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/mobilenetv3_small_050_BC",
@@ -200,6 +210,7 @@
             "fast_bias_correction": False,
         },
         "backends": [BackendType.ONNX, BackendType.OV],
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/regnetx_002",
@@ -209,6 +220,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/resnest14d",
@@ -218,6 +230,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/resnet18",
@@ -225,6 +238,7 @@
         "pipeline_cls": ImageClassificationTimm,
         "compression_params": {},
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/swin_base_patch4_window7_224",
@@ -235,6 +249,7 @@
             "model_type": ModelType.TRANSFORMER,
         },
         "backends": [BackendType.OV],
+        "batch_size": 32,
     },
     {
         "reported_name": "timm/swin_base_patch4_window7_224_no_sq",
@@ -248,6 +263,7 @@
             ),
         },
         "backends": [BackendType.TORCH, BackendType.CUDA_TORCH, BackendType.ONNX],
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/tf_inception_v3",
@@ -257,6 +273,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/vgg11",
@@ -264,6 +281,7 @@
         "pipeline_cls": ImageClassificationTimm,
         "compression_params": {},
         "backends": NNCF_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/visformer_small",
@@ -274,6 +292,7 @@
             "model_type": ModelType.TRANSFORMER,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
     {
         "reported_name": "timm/wide_resnet50_2",
@@ -283,6 +302,7 @@
             "preset": QuantizationPreset.MIXED,
         },
         "backends": ALL_PTQ_BACKENDS,
+        "batch_size": 128,
     },
 ]
 
@@ -299,7 +319,6 @@
             "sensitivity_metric": SensitivityMetric.WEIGHT_QUANTIZATION_ERROR,
         },
         "backends": [BackendType.OV],
-        "is_batch_size_supported": False,
     },
     {
         "reported_name": "tinyllama_data_aware",
@@ -307,7 +326,6 @@
         "pipeline_cls": LMWeightCompression,
         "compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM},
         "backends": [BackendType.OV],
-        "is_batch_size_supported": False,
     },
     {
         "reported_name": "tinyllama_data_aware_awq",
@@ -315,7 +333,6 @@
         "pipeline_cls": LMWeightCompression,
         "compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM, "awq": True},
         "backends": [BackendType.OV],
-        "is_batch_size_supported": False,
     },
     {
         "reported_name": "tinyllama_data_aware_awq_stateful",
@@ -324,7 +341,6 @@
         "compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM, "awq": True},
         "params": {"is_stateful": True},
         "backends": [BackendType.OV],
-        "is_batch_size_supported": False,
     },
 ]
 
diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py
index 815c1f33b7b..0a42e81c70f 100644
--- a/tests/post_training/test_quantize_conformance.py
+++ b/tests/post_training/test_quantize_conformance.py
@@ -137,7 +137,7 @@ def maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_back
         pytest.skip("To run test for not quantized model use --fp32 argument")
     if test_model_param["backend"] == BackendType.CUDA_TORCH and not run_torch_cuda_backend:
         pytest.skip("To run test for CUDA_TORCH backend use --cuda argument")
-    if batch_size > 1 and not test_model_param["is_batch_size_supported"]:
+    if batch_size and batch_size > 1 and test_model_param.get("batch_size", 1) == 1:
         pytest.skip("The model does not support batch_size > 1. Please use --batch-size 1.")
     return test_model_param
 
@@ -203,7 +203,7 @@ def test_ptq_quantization(
     output_dir: Path,
     ptq_result_data: Dict[str, RunInfo],
     no_eval: bool,
-    batch_size: int,
+    batch_size: Optional[int],
     run_fp32_backend: bool,
     run_torch_cuda_backend: bool,
     subset_size: Optional[int],
@@ -222,6 +222,8 @@ def test_ptq_quantization(
         maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size)
         pipeline_cls = test_model_param["pipeline_cls"]
         # Recalculates subset_size when subset_size is None
+        if batch_size is None:
+            batch_size = test_model_param.get("batch_size", 1)
         if batch_size > 1 and subset_size is None:
             subset_size = 300 // batch_size
             print(f"Update subset_size value based on provided batch_size to {subset_size}.")