-
Notifications
You must be signed in to change notification settings - Fork 240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Computation of compression parameters via OpenVINO models #2727
base: develop
Are you sure you want to change the base?
Changes from 76 commits
10d1ddb
bd2629b
3e69252
166dd04
edbe913
b636c66
f0129ef
e887e70
9141a8a
a43c514
1216f65
8611b75
0718668
283a821
6964844
80e2c92
fc82866
f3891cd
353aac1
d20e593
dc30d8d
c5606ce
e6a9d56
ab90a08
2e308b7
6289c5c
f60fd17
57a0931
1010fcf
6e54fba
8ac0fe2
ded66f3
69ae5fa
d0f49ae
a282976
b13f186
9e90d5a
5f46593
e7617f1
925f830
5831fcd
9160de3
c7c63eb
764f722
4a448e1
73f61fc
16ccf50
cd884eb
608cfe9
9569e1e
f962bd1
5dcd83d
6e22ef5
b45e788
b2cebd0
3a71141
eeadf1d
40aef54
ab3d35f
d48c748
9a56fae
e10d806
b372dc7
63858d3
87b5c10
7134e6d
5a1866f
6a2c9fc
204fb21
dca5376
92fbba5
b27c720
6ab1c08
a0fe91a
97bd61d
58963ab
ec21996
aeffc8b
476287b
d2d66b1
f4a08b9
9f2a79b
05b3eb8
84c88fc
d821e7d
467b5b8
1c485ec
57de030
882c9b1
a9f4e70
fc64966
68e734f
48d47c8
234f698
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import inspect | ||
from importlib import import_module | ||
from typing import Any, Callable, Dict, List | ||
|
||
|
@@ -51,3 +52,62 @@ def wrapped_f(*args: Any, **kwargs: Any): # type: ignore | |
return wrapped_f | ||
|
||
return wrap | ||
|
||
|
||
class ResultsCacheContainer: | ||
""" | ||
A container for results decorated with @cache_results decorator. | ||
""" | ||
|
||
def __init__(self) -> None: | ||
# Stores the results of the decorated function | ||
self._cache: Dict[Any, Any] = {} | ||
# Stores the number of times the cached result was accessed | ||
self._access_count: Dict[Any, int] = {} | ||
|
||
def clear(self) -> None: | ||
self._cache.clear() | ||
self._access_count.clear() | ||
|
||
def is_empty(self) -> bool: | ||
return len(self._cache) == 0 | ||
|
||
def __getitem__(self, item: Any) -> Any: | ||
self._access_count[item] += 1 | ||
return self._cache[item] | ||
|
||
def __setitem__(self, key: Any, value: Any) -> None: | ||
self._access_count[key] = 0 | ||
self._cache[key] = value | ||
|
||
def __contains__(self, item: Any) -> bool: | ||
return item in self._cache | ||
|
||
|
||
def cache_results(cache: ResultsCacheContainer) -> Callable: # type: ignore | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like you implemented a general solution for function output caching based on memorization techniques. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The implemented
|
||
""" | ||
Decorator to cache the results of a function. | ||
|
||
Decorated function additionally accepts a `disable_caching` argument do disable caching if needed. If it is True, | ||
the result will not be stored saved to a cache. Also, if there is a corresponding result in the cache, it will be | ||
recomputed. | ||
:param cache: A cache container where results will be stored. | ||
""" | ||
|
||
def decorator(func: Callable) -> Callable: # type: ignore | ||
def wrapper(*args, disable_caching: bool = False, **kwargs) -> Any: # type: ignore | ||
if disable_caching: | ||
return func(*args, **kwargs) | ||
sig = inspect.signature(func) | ||
new_kwargs = {name: arg for name, arg in zip(sig.parameters, args)} | ||
new_kwargs.update(kwargs) | ||
cache_key = (func.__name__, frozenset(new_kwargs.items())) | ||
if cache_key in cache: | ||
return cache[cache_key] | ||
result = func(*args, **kwargs) | ||
cache[cache_key] = result | ||
return result | ||
|
||
return wrapper | ||
|
||
return decorator |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Copyright (c) 2024 Intel Corporation | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import importlib | ||
|
||
_openvino_available = importlib.util.find_spec("openvino") is not None | ||
_openvino_version = "N/A" | ||
if _openvino_available: | ||
try: | ||
from openvino.runtime import get_version | ||
|
||
version = get_version() | ||
# avoid invalid format | ||
if "-" in version: | ||
ov_major_version, dev_info = version.split("-", 1) | ||
commit_id = dev_info.split("-")[0] | ||
version = f"{ov_major_version}-{commit_id}" | ||
_openvino_version = version | ||
except ImportError: | ||
_openvino_available = False | ||
|
||
|
||
def is_openvino_available(): | ||
""" | ||
Check if OpenVINO is available. | ||
:return: True if openvino package is installed, False otherwise. | ||
""" | ||
return _openvino_available |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -14,6 +14,7 @@ | |||||
import numpy as np | ||||||
import openvino.runtime as ov | ||||||
import openvino.runtime.opset13 as opset | ||||||
from openvino._pyopenvino.op import Constant | ||||||
nikita-savelyevv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
import nncf | ||||||
from nncf.common.graph.graph import NNCFGraph | ||||||
|
@@ -41,6 +42,8 @@ | |||||
from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype | ||||||
from nncf.openvino.graph.metatypes.openvino_metatypes import OVOpMetatype | ||||||
from nncf.openvino.graph.metatypes.openvino_metatypes import get_node_metatype | ||||||
from nncf.tensor import Tensor | ||||||
from nncf.tensor import TensorBackend | ||||||
|
||||||
InplaceInsertionFnType = Callable[[ov.Node, int, str], ov.Node] | ||||||
|
||||||
|
@@ -107,16 +110,17 @@ def cnt_if_op(model: ov.Model, cnt: int) -> int: | |||||
return cnt_if_op(model, 0) | ||||||
|
||||||
|
||||||
def get_const_value(const_node: ov.Node) -> np.ndarray: | ||||||
def get_const_value(const_node: ov.Node, cast_bf16_to_fp32: Optional[bool] = True) -> np.ndarray: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The suggestion is not clear. The argument is still optional, isn't it? |
||||||
""" | ||||||
Returns the constant tensor for the node. | ||||||
This method is applicable only for the floating-point constant data. | ||||||
|
||||||
:param const_node: OpenVINO node. | ||||||
:param cast_bf16_to_fp32: Whether to cast bf16 node data to fp32 or not. If False and the node contains bf16 data, | ||||||
the resulting bf16 value will be returned encoded inside a numpy.float16 array. | ||||||
:return: The constant value. | ||||||
""" | ||||||
if const_node.get_element_type() == ov.Type.bf16: | ||||||
# Fixed FP32 data type as the result for BF16 constant | ||||||
if const_node.get_element_type() == ov.Type.bf16 and cast_bf16_to_fp32: | ||||||
return const_node.get_data(dtype=np.float32) | ||||||
return const_node.data | ||||||
|
||||||
|
@@ -631,3 +635,41 @@ def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: Tuple | |||||
channel_axis = activations_layout.index(OVLayoutElem.C_IN) | ||||||
|
||||||
return channel_axis | ||||||
|
||||||
|
||||||
def convert_if_needed(node: ov.Node, target_dtype: ov.Type) -> ov.Node: | ||||||
""" | ||||||
Converts the input node to the target data type if it is not already in the target data type. | ||||||
|
||||||
:param node: The input node to convert. | ||||||
:param target_dtype: The target data type to convert the input node to. | ||||||
:return: The converted node. | ||||||
""" | ||||||
if node.get_element_type() == target_dtype: | ||||||
return node | ||||||
return opset.convert(node, target_dtype) | ||||||
nikita-savelyevv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
|
||||||
def non_convertable_divide(a: ov.Node, b: ov.Node) -> ov.Node: | ||||||
nikita-savelyevv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
""" | ||||||
Creates a "non-convertable" divide operation. It won't be converted to a*(1/b). | ||||||
""" | ||||||
divide_node = a / b | ||||||
divide_node.get_rt_info()["nonconvertable_divide_0"] = True | ||||||
return divide_node | ||||||
|
||||||
|
||||||
def create_ov_const_from_tensor(x: Tensor, dtype: ov.Type, name: Optional[str] = None) -> Constant: | ||||||
nikita-savelyevv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
""" | ||||||
Create an OpenVINO Constant node from the given tensor. | ||||||
:param x: Data tensor. Supports NumPy and OV tensor backends. If x backend is OV, the constant node is created | ||||||
directly from underlying OV tensor. | ||||||
:param dtype: Data type of the constant. | ||||||
:param name: Optional name of the constant. | ||||||
:return: OpenVINO Constant node. | ||||||
""" | ||||||
if x.backend == TensorBackend.ov: | ||||||
assert x.data.get_element_type() == dtype | ||||||
return opset.constant(x.data, name=name) | ||||||
const = opset.constant(x.data, dtype=dtype, name=name) | ||||||
return const |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -40,12 +40,23 @@ def num_bits(self): | |||||
""" | ||||||
return 8 if self.mode in [CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM] else 4 | ||||||
|
||||||
@property | ||||||
def is_int_asym(self): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will |
||||||
return self.mode in [CompressWeightsMode.INT4_ASYM, CompressWeightsMode.INT8_ASYM] | ||||||
|
||||||
@property | ||||||
def is_integer(self): | ||||||
""" | ||||||
:return: True if compression type in integer, else False. | ||||||
""" | ||||||
return self.mode not in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1] | ||||||
|
||||||
def __hash__(self): | ||||||
return hash((self.mode.value, self.group_size)) | ||||||
|
||||||
def __str__(self): | ||||||
return f"{self.mode.value}_{self.group_size}" | ||||||
|
||||||
|
||||||
@dataclass | ||||||
class WeightCompressionParameters: | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NNCF already has a solution for single logging with
DuplicateFilter
:nncf/nncf/torch/quantization/algo.py
Line 627 in d90d285
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the suggestion!
With the current approach the given message will be logged exactly once. The problem is, to achieve the same behavior with duplicate filter, it needs to be applied at a very high level, e.g. before
apply()
method. But it is not a good idea to do so, because there may be some log messages which we would like to be logged multiple times during the algorithm running time.