From 0a8e3e77f3914176ce0bcab79652585dffb1c849 Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Tue, 14 Jan 2025 10:12:02 +0100 Subject: [PATCH] Apply comments --- optimum/intel/openvino/configuration.py | 20 ++++++++------------ tests/openvino/test_exporters_cli.py | 8 ++++---- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py index cfe2c9c60..cb09110b6 100644 --- a/optimum/intel/openvino/configuration.py +++ b/optimum/intel/openvino/configuration.py @@ -658,6 +658,13 @@ def __init__( self.overflow_fix = overflow_fix self.smooth_quant_alpha = smooth_quant_alpha self.activation_format = activation_format + + f8_formats = ["f8e4m3", "f8e5m2"] + if self.activation_format in f8_formats and self.weight_format in f8_formats: + logger.info( + f"{self.activation_format} for activations and {self.weight_format} weights were found. A symmetrical scheme will be used." + ) + self.sym = True self.post_init() def post_init(self): @@ -674,16 +681,6 @@ def post_init(self): f"SmoothQuant alpha parameter must be in range [0, 1], but found {self.smooth_quant_alpha}" ) - if not self.sym: - if self.activation_format != "int8": - raise ValueError( - f"Asymmetric quantization can not be performed in {self.activation_format} activation format." - ) - if self.weight_format != "int8": - raise ValueError( - f"Asymmetric quantization can not be performed in {self.weight_format} weight format." - ) - class OVConfig(BaseConfig): CONFIG_NAME = "openvino_config.json" @@ -708,8 +705,7 @@ def __init__( "compression", None ) # A field for backward-compatability of training-time compression parameters if self.quantization_config is not None: - if isinstance(self.quantization_config, OVWeightQuantizationConfig): - self.dtype = self.quantization_config.weight_format + self.dtype = self.quantization_config.weight_format else: self.dtype = dtype diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 3d31e3594..840c6d4eb 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -123,7 +123,7 @@ class OVCLIExportTestCase(unittest.TestCase): "text-generation", "llama", "f8e4m3", - "--dataset wikitext2 --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code --sym", + "--dataset wikitext2 --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code", (13,), (16,), ), @@ -418,7 +418,7 @@ def test_exporters_cli_full_quantization( model_type: str, quant_mode: str, option: str, - expected_num_fq_nodes_per_model: Tuple[int], + expected_num_f_nodes_per_model: Tuple[int], expected_num_weight_nodes_per_model: Tuple[int], ): with TemporaryDirectory() as tmpdir: @@ -432,10 +432,10 @@ def test_exporters_cli_full_quantization( models = [model] if task == "automatic-speech-recognition": models = [model.encoder, model.decoder, model.decoder_with_past] - self.assertEqual(len(expected_num_fq_nodes_per_model), len(models)) + self.assertEqual(len(expected_num_f_nodes_per_model), len(models)) for i, model in enumerate(models): actual_num_f_nodes, actual_num_weight_nodes = get_num_quantized_nodes(model) - self.assertEqual(expected_num_fq_nodes_per_model[i], actual_num_f_nodes) + self.assertEqual(expected_num_f_nodes_per_model[i], actual_num_f_nodes) self.assertEqual(expected_num_weight_nodes_per_model[i], actual_num_weight_nodes[quant_mode]) def test_exporters_cli_int4_with_local_model_and_default_config(self):