Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
488cacc
Support scale estimation inside GPTQ
alexsu52 Jun 10, 2024
ee64877
fix for INT4_ASYM
alexsu52 Sep 4, 2024
f22e411
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Sep 23, 2024
51b4d7b
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Sep 26, 2024
f66cd1e
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Sep 30, 2024
7ce5a53
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Oct 2, 2024
f74d156
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Nov 11, 2024
5288c79
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Nov 11, 2024
1becf15
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Nov 14, 2024
047d7d9
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Dec 10, 2024
c0c7e57
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Dec 16, 2024
b74dea1
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Dec 27, 2024
26a9a77
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jan 7, 2025
25fcc2c
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Feb 25, 2025
26d4887
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Mar 12, 2025
7748233
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Apr 1, 2025
df251b3
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Apr 8, 2025
4c134c4
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Apr 9, 2025
6147097
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Apr 14, 2025
2b94d28
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 7, 2025
5e312a5
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 9, 2025
2c5e983
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 12, 2025
1d8db1e
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 23, 2025
7244f18
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 28, 2025
443048c
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jun 2, 2025
80d2d8a
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jun 11, 2025
06bb19b
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jun 26, 2025
5d97d87
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jul 2, 2025
ae7cece
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jul 10, 2025
04ca66c
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jul 14, 2025
6bc3fe4
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jul 23, 2025
ea7530b
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jul 28, 2025
08a9e82
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Sep 2, 2025
7c01fe2
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Sep 19, 2025
7938a36
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Nov 24, 2025
e0952bd
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Dec 8, 2025
a1daca6
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Dec 23, 2025
b62b7b9
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jan 30, 2026
c5715de
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Feb 5, 2026
0ecd8fe
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Feb 9, 2026
33c21e8
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Mar 10, 2026
d7622a5
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Mar 25, 2026
6f10fff
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Apr 21, 2026
529b8aa
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 4, 2026
7f2c5cf
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 15, 2026
65aed7c
Fixed asym compression for case then all values positive or negative.
andreyanufr May 15, 2026
6364003
Fixed OV optimization.
andreyanufr May 15, 2026
33ecbab
Merge remote-tracking branch 'upstream/develop' into aanuf/fix_for_asym
andreyanufr May 21, 2026
5d39420
Updated OV test references.
andreyanufr May 21, 2026
2aa48d5
Updated OV test references.
andreyanufr May 21, 2026
2004096
Updated references for OV test_scale_estimation
andreyanufr May 21, 2026
275b9dc
Updated refernces for torch scale estimation test.
andreyanufr May 21, 2026
18feba3
Updated reference values for test_scale_estimation ONNX backend.
andreyanufr May 22, 2026
02239da
Fixed test_fq_lora_export.
andreyanufr May 22, 2026
daace3b
Aligned weight values between OV and Torch.
andreyanufr May 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/nncf/openvino/optimized_functions/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,9 @@ def _build_integer_quantization_model(
min_values, max_values = opset.convert(min_values, ov.Type.f32), opset.convert(max_values, ov.Type.f32)

if is_asym_mode:
zero = opset.constant(0.0, ov.Type.f32)
min_values = opset.minimum(zero, min_values)
max_values = opset.maximum(zero, max_values)
levels = level_high - level_low + 1
scale = divide_op(max_values - min_values, opset.constant(levels - 1, ov.Type.f32))
scale = opset.select(opset.less(opset.abs(scale), eps), eps, scale)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,11 @@ def calculate_integer_quantization_params(
level_high = 2**num_bits - 1
min_values = fns.min(weight, axis=reduction_axes, keepdims=True) # [a1, r, a2] -> [a1, 1, a2]
max_values = fns.max(weight, axis=reduction_axes, keepdims=True) # [a1, r, a2] -> [a1, 1, a2]

zero = fns.zeros_like(min_values)
min_values = fns.minimum(zero, min_values)
max_values = fns.maximum(zero, max_values)
Comment on lines +321 to +323

scale, zero_point = calculate_scale_zero_point(
min_values, max_values, level_low, level_high, narrow_range=False
)
Expand Down Expand Up @@ -365,6 +370,7 @@ def get_integer_quantization_error(

decompressed_weight = integer_quantize_dequantize_weight(weight, config, reduction_axes)
decompressed_weight = decompressed_weight.reshape(weight.shape)

if reduction == "max_mean":
diff = (decompressed_weight - weight) ** 2
layer_err = fns.mean(diff, axis=reduction_axes)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,27 +209,28 @@ def wrap_model(model, data) -> CompressionParams:
("mode", "all_layers", "ratio", "ref_ids"),
(
(SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 1, [0, 1, 2, 3, 4]),
(SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.8, [0, 3, 4]),
(SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.8, [0, 1, 4]),
(SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.4, [0]),
(SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.2, []),
(SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 1, [0, 1, 2, 3]),
(SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.8, [0, 1, 3]),
(SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.8, [0, 1, 2]),
(SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.4, [0]),
(SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.2, []),
(SensitivityMetric.HESSIAN_INPUT_ACTIVATION, True, 0.8, [0, 1, 2]),
(SensitivityMetric.HESSIAN_INPUT_ACTIVATION, False, 0.8, [0, 1, 2]),
(SensitivityMetric.MEAN_ACTIVATION_VARIANCE, True, 0.8, [0, 1, 2]),
(SensitivityMetric.MEAN_ACTIVATION_VARIANCE, True, 0.8, [0, 1, 4]),
(SensitivityMetric.MEAN_ACTIVATION_VARIANCE, False, 0.8, [0, 1, 2]),
(SensitivityMetric.MAX_ACTIVATION_VARIANCE, True, 0.8, [0, 1, 2]),
(SensitivityMetric.MAX_ACTIVATION_VARIANCE, True, 0.8, [0, 1, 4]),
(SensitivityMetric.MAX_ACTIVATION_VARIANCE, False, 0.8, [0, 1, 2]),
(SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE, True, 0.8, [0, 1, 2]),
(SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE, True, 0.8, [0, 1, 4]),
(SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE, False, 0.8, [0, 1, 2]),
),
)
@pytest.mark.parametrize("transpose_a", (False, True))
def test_mixed_precision(self, mode, all_layers, ratio, ref_ids, transpose_a, mocker):
model = self.get_sequential_matmul_model(transpose_a=transpose_a)
input_shape = (4, 4) if transpose_a else (1, 4, 4)

first = self.to_tensor(np.ones(input_shape, dtype=np.float32))
second = self.to_tensor(np.arange(16, dtype=np.float32)).reshape(input_shape)
dataset = Dataset([first, second], self.get_transform_func())
Expand Down Expand Up @@ -323,6 +324,7 @@ def test_scale_estimation(self, mocker, transpose_a, is_moe, check_sampling_acti
reference = self.get_moe_scale_estimation_ref(check_sampling_activation_stats_flow)
else:
reference = self.get_scale_estimation_ref(check_sampling_activation_stats_flow)

assert fns.allclose(Tensor(reference), computed_scale)

@staticmethod
Expand Down
176 changes: 88 additions & 88 deletions tests/onnx/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,40 +553,40 @@ def get_scale_estimation_ref(check_sampling_activation_stats_flow):
return (
np.array(
[
[[0.473328]],
[[0.929023]],
[[1.446527]],
[[1.920595]],
[[2.517054]],
[[3.030102]],
[[3.584279]],
[[4.043509]],
[[4.620008]],
[[5.165322]],
[[5.710637]],
[[6.122581]],
[[6.655914]],
[[7.237174]],
[[7.722580]],
[[0.47332805]],
[[1.0]],
[[1.4732642]],
[[2.0380495]],
[[2.6054149]],
[[3.0301015]],
[[3.679056]],
[[4.175322]],
[[4.700384]],
[[5.2552223]],
[[5.8100615]],
[[6.3083715]],
[[6.858295]],
[[7.4082184]],
[[7.722581]],
[[8.255914]],
]
).T,
np.array(
[
[[0.47344488]],
[[0.9287766]],
[[1.4463282]],
[[1.920052]],
[[2.5167778]],
[[1.0]],
[[1.5450557]],
[[2.0380037]],
[[2.6055446]],
[[3.02987]],
[[3.5842714]],
[[4.0429296]],
[[4.619769]],
[[5.165224]],
[[5.7106786]],
[[6.121212]],
[[6.654546]],
[[7.2366524]],
[[3.679132]],
[[4.1754694]],
[[4.7001443]],
[[5.2551227]],
[[5.810101]],
[[6.308658]],
[[6.8587303]],
[[7.4]],
[[7.7212124]],
[[8.254545]],
]
Expand All @@ -601,44 +601,44 @@ def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow):
[
[
[
7.5732,
7.4667,
7.4667,
7.4667,
7.4667,
7.2602,
7.4667,
7.4667,
7.4667,
7.4667,
7.3083,
7.8467,
7.2233,
7.2715,
7.4205,
7.4667,
7.573249,
7.58195,
7.6,
7.6666665,
7.1209445,
7.260152,
7.866667,
7.9333334,
8.0,
8.066667,
8.528544,
8.659291,
8.879055,
8.469787,
8.4,
8.364824,
]
]
],
[
[
[
14.8205,
14.9032,
14.9858,
15.0685,
15.1512,
14.3400,
14.4173,
14.4945,
14.5718,
14.6491,
14.7264,
14.8037,
14.8810,
14.9583,
15.0355,
15.1128,
16.0,
16.089771,
16.179543,
16.269318,
16.359089,
16.44886,
16.538631,
16.628407,
16.718176,
16.80795,
16.89772,
16.987492,
15.812495,
15.89516,
15.977826,
16.060493,
]
]
],
Expand All @@ -650,43 +650,43 @@ def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow):
[
[
7.575118,
7.4666667,
7.4666667,
7.4666667,
7.4666667,
7.5841107,
7.6,
7.6666665,
7.112954,
7.254837,
7.4666667,
7.4666667,
7.4666667,
7.4666667,
7.495066,
7.866667,
7.9333334,
8.0,
8.066667,
8.531546,
7.850108,
7.219489,
7.2685375,
7.418597,
7.4666667,
8.887045,
8.468656,
8.4,
8.361673,
]
]
],
[
[
[
14.820066,
14.902746,
14.985427,
15.068108,
15.150787,
14.3391285,
14.416424,
14.493721,
14.571016,
14.648311,
14.725608,
14.802904,
14.8801985,
14.957496,
15.034791,
15.112087,
16.0,
16.089788,
16.17958,
16.269371,
16.359161,
16.448954,
16.538742,
16.628534,
16.718325,
16.808115,
16.897905,
16.987696,
15.812232,
15.894914,
15.977593,
16.060274,
]
]
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"compressed_weight": [
[
[
7.0,
8.0,
0.0,
13.0
]
Expand All @@ -17,8 +17,8 @@
],
[
[
10.0,
1.0,
12.0,
4.0,
0.0
]
],
Expand Down Expand Up @@ -79,7 +79,7 @@
"scale": [
[
[
0.040008544921875
0.046630859375
]
],
[
Expand All @@ -89,7 +89,7 @@
],
[
[
0.041839599609375
0.0545654296875
]
],
[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
"matmul_2_data": {
"compressed_weight": [
[
116,
136,
0,
213
219
],
[
255,
61,
0
],
[
177,
10,
196,
67,
0
],
[
Expand Down Expand Up @@ -54,13 +54,13 @@
],
"scale": [
[
0.002353668212890625
0.002742767333984375
],
[
0.00583648681640625
],
[
0.002460479736328125
0.0032100677490234375
],
[
0.0029277801513671875
Expand Down
Loading