diff --git a/src/nncf/openvino/optimized_functions/models.py b/src/nncf/openvino/optimized_functions/models.py index 55a9373fa6b..de5c36970c6 100644 --- a/src/nncf/openvino/optimized_functions/models.py +++ b/src/nncf/openvino/optimized_functions/models.py @@ -532,6 +532,9 @@ def _build_integer_quantization_model( min_values, max_values = opset.convert(min_values, ov.Type.f32), opset.convert(max_values, ov.Type.f32) if is_asym_mode: + zero = opset.constant(0.0, ov.Type.f32) + min_values = opset.minimum(zero, min_values) + max_values = opset.maximum(zero, max_values) levels = level_high - level_low + 1 scale = divide_op(max_values - min_values, opset.constant(levels - 1, ov.Type.f32)) scale = opset.select(opset.less(opset.abs(scale), eps), eps, scale) diff --git a/src/nncf/quantization/algorithms/weight_compression/weight_lowering.py b/src/nncf/quantization/algorithms/weight_compression/weight_lowering.py index 7f1bdf3dfae..548c5252fd3 100644 --- a/src/nncf/quantization/algorithms/weight_compression/weight_lowering.py +++ b/src/nncf/quantization/algorithms/weight_compression/weight_lowering.py @@ -317,6 +317,11 @@ def calculate_integer_quantization_params( level_high = 2**num_bits - 1 min_values = fns.min(weight, axis=reduction_axes, keepdims=True) # [a1, r, a2] -> [a1, 1, a2] max_values = fns.max(weight, axis=reduction_axes, keepdims=True) # [a1, r, a2] -> [a1, 1, a2] + + zero = fns.zeros_like(min_values) + min_values = fns.minimum(zero, min_values) + max_values = fns.maximum(zero, max_values) + scale, zero_point = calculate_scale_zero_point( min_values, max_values, level_low, level_high, narrow_range=False ) @@ -365,6 +370,7 @@ def get_integer_quantization_error( decompressed_weight = integer_quantize_dequantize_weight(weight, config, reduction_axes) decompressed_weight = decompressed_weight.reshape(weight.shape) + if reduction == "max_mean": diff = (decompressed_weight - weight) ** 2 layer_err = fns.mean(diff, axis=reduction_axes) diff --git a/tests/cross_fw/test_templates/template_test_weights_compression.py b/tests/cross_fw/test_templates/template_test_weights_compression.py index 6f55e6a2c5f..c72c1964f2b 100644 --- a/tests/cross_fw/test_templates/template_test_weights_compression.py +++ b/tests/cross_fw/test_templates/template_test_weights_compression.py @@ -209,20 +209,20 @@ def wrap_model(model, data) -> CompressionParams: ("mode", "all_layers", "ratio", "ref_ids"), ( (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 1, [0, 1, 2, 3, 4]), - (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.8, [0, 3, 4]), + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.8, [0, 1, 4]), (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.4, [0]), (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.2, []), (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 1, [0, 1, 2, 3]), - (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.8, [0, 1, 3]), + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.8, [0, 1, 2]), (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.4, [0]), (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.2, []), (SensitivityMetric.HESSIAN_INPUT_ACTIVATION, True, 0.8, [0, 1, 2]), (SensitivityMetric.HESSIAN_INPUT_ACTIVATION, False, 0.8, [0, 1, 2]), - (SensitivityMetric.MEAN_ACTIVATION_VARIANCE, True, 0.8, [0, 1, 2]), + (SensitivityMetric.MEAN_ACTIVATION_VARIANCE, True, 0.8, [0, 1, 4]), (SensitivityMetric.MEAN_ACTIVATION_VARIANCE, False, 0.8, [0, 1, 2]), - (SensitivityMetric.MAX_ACTIVATION_VARIANCE, True, 0.8, [0, 1, 2]), + (SensitivityMetric.MAX_ACTIVATION_VARIANCE, True, 0.8, [0, 1, 4]), (SensitivityMetric.MAX_ACTIVATION_VARIANCE, False, 0.8, [0, 1, 2]), - (SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE, True, 0.8, [0, 1, 2]), + (SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE, True, 0.8, [0, 1, 4]), (SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE, False, 0.8, [0, 1, 2]), ), ) @@ -230,6 +230,7 @@ def wrap_model(model, data) -> CompressionParams: def test_mixed_precision(self, mode, all_layers, ratio, ref_ids, transpose_a, mocker): model = self.get_sequential_matmul_model(transpose_a=transpose_a) input_shape = (4, 4) if transpose_a else (1, 4, 4) + first = self.to_tensor(np.ones(input_shape, dtype=np.float32)) second = self.to_tensor(np.arange(16, dtype=np.float32)).reshape(input_shape) dataset = Dataset([first, second], self.get_transform_func()) @@ -323,6 +324,7 @@ def test_scale_estimation(self, mocker, transpose_a, is_moe, check_sampling_acti reference = self.get_moe_scale_estimation_ref(check_sampling_activation_stats_flow) else: reference = self.get_scale_estimation_ref(check_sampling_activation_stats_flow) + assert fns.allclose(Tensor(reference), computed_scale) @staticmethod diff --git a/tests/onnx/quantization/test_weights_compression.py b/tests/onnx/quantization/test_weights_compression.py index 7c52a61764e..2501c360c0a 100644 --- a/tests/onnx/quantization/test_weights_compression.py +++ b/tests/onnx/quantization/test_weights_compression.py @@ -553,40 +553,40 @@ def get_scale_estimation_ref(check_sampling_activation_stats_flow): return ( np.array( [ - [[0.473328]], - [[0.929023]], - [[1.446527]], - [[1.920595]], - [[2.517054]], - [[3.030102]], - [[3.584279]], - [[4.043509]], - [[4.620008]], - [[5.165322]], - [[5.710637]], - [[6.122581]], - [[6.655914]], - [[7.237174]], - [[7.722580]], + [[0.47332805]], + [[1.0]], + [[1.4732642]], + [[2.0380495]], + [[2.6054149]], + [[3.0301015]], + [[3.679056]], + [[4.175322]], + [[4.700384]], + [[5.2552223]], + [[5.8100615]], + [[6.3083715]], + [[6.858295]], + [[7.4082184]], + [[7.722581]], [[8.255914]], ] ).T, np.array( [ [[0.47344488]], - [[0.9287766]], - [[1.4463282]], - [[1.920052]], - [[2.5167778]], + [[1.0]], + [[1.5450557]], + [[2.0380037]], + [[2.6055446]], [[3.02987]], - [[3.5842714]], - [[4.0429296]], - [[4.619769]], - [[5.165224]], - [[5.7106786]], - [[6.121212]], - [[6.654546]], - [[7.2366524]], + [[3.679132]], + [[4.1754694]], + [[4.7001443]], + [[5.2551227]], + [[5.810101]], + [[6.308658]], + [[6.8587303]], + [[7.4]], [[7.7212124]], [[8.254545]], ] @@ -601,44 +601,44 @@ def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow): [ [ [ - 7.5732, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.2602, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.3083, - 7.8467, - 7.2233, - 7.2715, - 7.4205, - 7.4667, + 7.573249, + 7.58195, + 7.6, + 7.6666665, + 7.1209445, + 7.260152, + 7.866667, + 7.9333334, + 8.0, + 8.066667, + 8.528544, + 8.659291, + 8.879055, + 8.469787, + 8.4, + 8.364824, ] ] ], [ [ [ - 14.8205, - 14.9032, - 14.9858, - 15.0685, - 15.1512, - 14.3400, - 14.4173, - 14.4945, - 14.5718, - 14.6491, - 14.7264, - 14.8037, - 14.8810, - 14.9583, - 15.0355, - 15.1128, + 16.0, + 16.089771, + 16.179543, + 16.269318, + 16.359089, + 16.44886, + 16.538631, + 16.628407, + 16.718176, + 16.80795, + 16.89772, + 16.987492, + 15.812495, + 15.89516, + 15.977826, + 16.060493, ] ] ], @@ -650,43 +650,43 @@ def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow): [ [ 7.575118, - 7.4666667, - 7.4666667, - 7.4666667, - 7.4666667, + 7.5841107, + 7.6, + 7.6666665, + 7.112954, 7.254837, - 7.4666667, - 7.4666667, - 7.4666667, - 7.4666667, - 7.495066, + 7.866667, + 7.9333334, + 8.0, + 8.066667, + 8.531546, 7.850108, - 7.219489, - 7.2685375, - 7.418597, - 7.4666667, + 8.887045, + 8.468656, + 8.4, + 8.361673, ] ] ], [ [ [ - 14.820066, - 14.902746, - 14.985427, - 15.068108, - 15.150787, - 14.3391285, - 14.416424, - 14.493721, - 14.571016, - 14.648311, - 14.725608, - 14.802904, - 14.8801985, - 14.957496, - 15.034791, - 15.112087, + 16.0, + 16.089788, + 16.17958, + 16.269371, + 16.359161, + 16.448954, + 16.538742, + 16.628534, + 16.718325, + 16.808115, + 16.897905, + 16.987696, + 15.812232, + 15.894914, + 15.977593, + 16.060274, ] ] ], diff --git a/tests/openvino/native/data/2024.1/reference_scales/IntegerModel_compressed_weights_int4_asym.json b/tests/openvino/native/data/2024.1/reference_scales/IntegerModel_compressed_weights_int4_asym.json index b4528b858d5..fd00b6a4b09 100644 --- a/tests/openvino/native/data/2024.1/reference_scales/IntegerModel_compressed_weights_int4_asym.json +++ b/tests/openvino/native/data/2024.1/reference_scales/IntegerModel_compressed_weights_int4_asym.json @@ -3,7 +3,7 @@ "compressed_weight": [ [ [ - 7.0, + 8.0, 0.0, 13.0 ] @@ -17,8 +17,8 @@ ], [ [ - 10.0, - 1.0, + 12.0, + 4.0, 0.0 ] ], @@ -79,7 +79,7 @@ "scale": [ [ [ - 0.040008544921875 + 0.046630859375 ] ], [ @@ -89,7 +89,7 @@ ], [ [ - 0.041839599609375 + 0.0545654296875 ] ], [ diff --git a/tests/openvino/native/data/2024.1/reference_scales/IntegerModel_compressed_weights_int8_asym.json b/tests/openvino/native/data/2024.1/reference_scales/IntegerModel_compressed_weights_int8_asym.json index a1cb92a00d5..1729fac42cf 100644 --- a/tests/openvino/native/data/2024.1/reference_scales/IntegerModel_compressed_weights_int8_asym.json +++ b/tests/openvino/native/data/2024.1/reference_scales/IntegerModel_compressed_weights_int8_asym.json @@ -2,9 +2,9 @@ "matmul_2_data": { "compressed_weight": [ [ - 116, + 136, 0, - 213 + 219 ], [ 255, @@ -12,8 +12,8 @@ 0 ], [ - 177, - 10, + 196, + 67, 0 ], [ @@ -54,13 +54,13 @@ ], "scale": [ [ - 0.002353668212890625 + 0.002742767333984375 ], [ 0.00583648681640625 ], [ - 0.002460479736328125 + 0.0032100677490234375 ], [ 0.0029277801513671875 diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py index d8c6932d72d..a5df7c0c735 100644 --- a/tests/openvino/native/quantization/test_weights_compression.py +++ b/tests/openvino/native/quantization/test_weights_compression.py @@ -739,11 +739,18 @@ def __str__(self): weight=TWO_GROUPS_IN_TWO_ROWS_ASYM, config=int4_asym_grouped_config, ), - # non-zero error - QuantErrorDesc(name="2 rows scaled [1, 254] linspace", weight=TWO_ROWS_LINSPACE[:, 1:-1], ref_error=239, atol=1), + QuantErrorDesc(name="2 rows scaled [1, 254] linspace", weight=TWO_ROWS_LINSPACE[:, 1:-1], ref_error=0, atol=1), + QuantErrorDesc( + name="2 columns of scaled [0, 255] linspace", weight=np.transpose(TWO_ROWS_LINSPACE), ref_error=0, atol=1 + ), QuantErrorDesc( - name="2 columns of scaled [0, 255] linspace", weight=np.transpose(TWO_ROWS_LINSPACE), ref_error=46818, atol=1 + name="2 columns of [0-15] linspace for asym", + weight=np.transpose(TWO_ROWS_LINSPACE_INT4_ASYM), + config=int4_asym_config, + ref_error=0, + atol=1, ), + # non-zero error QuantErrorDesc( name="2 rows of scaled [0, 15] linspace for sym", weight=TWO_ROWS_LINSPACE_INT4_ASYM, @@ -765,13 +772,6 @@ def __str__(self): ref_error=1.49, atol=1, ), - QuantErrorDesc( - name="2 columns of [0-15] linspace for asym", - weight=np.transpose(TWO_ROWS_LINSPACE_INT4_ASYM), - config=int4_asym_config, - ref_error=162, - atol=1, - ), ] @@ -1286,12 +1286,12 @@ def test_call_gptq_with_dataset_scale_estimation_neg_group_size(mode): ("sensitivity_metric", "all_layers", "ratio", "ref_ids", "group_size"), ( (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 1, [0, 1, 2, 3, 4], None), - (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.8, [0, 1, 2], None), - (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.4, [1], None), + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.8, [0, 3, 4], None), + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.4, [0], None), (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.2, [], None), (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 1, [0, 1, 2, 3], None), - (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.8, [0, 1, 2], None), - (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.4, [1], None), + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.8, [0, 1, 3], None), + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.4, [0], None), (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.2, [], None), (SensitivityMetric.HESSIAN_INPUT_ACTIVATION, True, 0.8, [0, 1, 2], None), (SensitivityMetric.HESSIAN_INPUT_ACTIVATION, False, 0.8, [0, 1, 2], None), @@ -1338,6 +1338,7 @@ def test_mixed_precision_mxfp(sensitivity_metric, all_layers, ratio, ref_ids, mo names_fp = {op.get_friendly_name() for op in ops} ref_fp_nodes = {f"weights_{i}" for i in ref_ids} + assert ref_fp_nodes == names_fp names_e8m0 = { @@ -1351,12 +1352,12 @@ def test_mixed_precision_mxfp(sensitivity_metric, all_layers, ratio, ref_ids, mo ("sensitivity_metric", "all_layers", "ratio", "ref_ids", "group_size"), ( (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 1, [0, 1, 2, 3, 4], None), - (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.8, [0, 1, 2], None), - (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.4, [0], None), + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.8, [0, 1, 4], None), + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.4, [1], None), (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.2, [], None), (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 1, [0, 1, 2, 3], None), - (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.8, [0, 1, 2], None), - (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.4, [0], None), + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.8, [0, 1, 3], None), + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.4, [1], None), (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.2, [], None), (SensitivityMetric.HESSIAN_INPUT_ACTIVATION, True, 0.8, [0, 1, 2], None), (SensitivityMetric.HESSIAN_INPUT_ACTIVATION, False, 0.8, [0, 1, 2], None), @@ -1405,6 +1406,7 @@ def test_mixed_precision_fp(sensitivity_metric, all_layers, ratio, ref_ids, mode names_fp = {op.get_friendly_name() for op in ops} ref_fp_nodes = {f"weights_{i}" for i in ref_ids} + assert ref_fp_nodes == names_fp scale_dtypes = (ov.Type.f16, ov.Type.f8e4m3) names_scales = { @@ -2469,40 +2471,40 @@ def get_scale_estimation_ref(check_sampling_activation_stats_flow): return ( np.array( [ - [[0.473328]], - [[0.929023]], - [[1.446527]], - [[1.920595]], - [[2.517054]], - [[3.030102]], - [[3.584279]], - [[4.043509]], - [[4.620008]], - [[5.165322]], - [[5.710637]], - [[6.122581]], - [[6.655914]], - [[7.237174]], - [[7.722580]], + [[0.47332805]], + [[1.0]], + [[1.4732642]], + [[2.0380495]], + [[2.6054149]], + [[3.0301015]], + [[3.679056]], + [[4.175322]], + [[4.700384]], + [[5.2552223]], + [[5.8100615]], + [[6.3083715]], + [[6.858295]], + [[7.4082184]], + [[7.722581]], [[8.255914]], ] ), np.array( [ [[0.47344488]], - [[0.9287766]], - [[1.4463282]], - [[1.920052]], - [[2.5167778]], + [[1.0]], + [[1.5450557]], + [[2.0380037]], + [[2.6055446]], [[3.02987]], - [[3.5842714]], - [[4.0429296]], - [[4.619769]], - [[5.165224]], - [[5.7106786]], - [[6.121212]], - [[6.654546]], - [[7.2366524]], + [[3.679132]], + [[4.1754694]], + [[4.7001443]], + [[5.2551227]], + [[5.810101]], + [[6.308658]], + [[6.8587303]], + [[7.4]], [[7.7212124]], [[8.254545]], ] @@ -2517,44 +2519,44 @@ def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow): [ [ [ - 7.5732, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.2602, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.3083, - 7.8467, - 7.2233, - 7.2715, - 7.4205, - 7.4667, + 7.573249, + 7.58195, + 7.6, + 7.6666665, + 7.1209445, + 7.260152, + 7.866667, + 7.9333334, + 8.0, + 8.066667, + 8.528544, + 8.659291, + 8.879055, + 8.469787, + 8.4, + 8.364824, ] ] ], [ [ [ - 14.8205, - 14.9032, - 14.9858, - 15.0685, - 15.1512, - 14.3400, - 14.4173, - 14.4945, - 14.5718, - 14.6491, - 14.7264, - 14.8037, - 14.8810, - 14.9583, - 15.0355, - 15.1128, + 16.0, + 16.089771, + 16.179543, + 16.269318, + 16.359089, + 16.44886, + 16.538631, + 16.628407, + 16.718176, + 16.80795, + 16.89772, + 16.987492, + 15.812495, + 15.89516, + 15.977826, + 16.060493, ] ] ], @@ -2566,43 +2568,43 @@ def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow): [ [ 7.575118, - 7.4666667, - 7.4666667, - 7.4666667, - 7.4666667, + 7.5841107, + 7.6, + 7.6666665, + 7.112954, 7.254837, - 7.4666667, - 7.4666667, - 7.4666667, - 7.4666667, - 7.495066, + 7.866667, + 7.9333334, + 8.0, + 8.066667, + 8.531546, 7.850108, - 7.219489, - 7.2685375, - 7.418597, - 7.4666667, + 8.887045, + 8.468656, + 8.4, + 8.361673, ] ] ], [ [ [ - 14.820066, - 14.902746, - 14.985427, - 15.068108, - 15.150787, - 14.3391285, - 14.416424, - 14.493721, - 14.571016, - 14.648311, - 14.725608, - 14.802904, - 14.8801985, - 14.957496, - 15.034791, - 15.112087, + 16.0, + 16.089788, + 16.17958, + 16.269371, + 16.359161, + 16.448954, + 16.538742, + 16.628534, + 16.718325, + 16.808115, + 16.897905, + 16.987696, + 15.812232, + 15.894914, + 15.977593, + 16.060274, ] ] ], diff --git a/tests/torch/function_hook/quantization/test_fq_lora.py b/tests/torch/function_hook/quantization/test_fq_lora.py index 9b4ab58912b..81073f47c8a 100644 --- a/tests/torch/function_hook/quantization/test_fq_lora.py +++ b/tests/torch/function_hook/quantization/test_fq_lora.py @@ -118,7 +118,8 @@ def test_fq_lora_export(compression_kwargs, _seed): Tests FQ-LoRA (Fake-Quantize with Low-Rank Adaptation) can be stripped and exported to OpenVINO. """ device = "cuda" - example_input = 0.01 * torch.arange(0, 4 * 8, device=device).reshape(1, 4, 8) + 0.02 + example_input = 0.01 * torch.arange(0, 4 * 8, device=device).reshape(1, 4, 8) + example_input = example_input - example_input.mean(dim=-1, keepdim=True) model = AWQLinearModel().to(device) model = nncf.compress_weights( @@ -140,6 +141,7 @@ def test_fq_lora_export(compression_kwargs, _seed): example_inputs_numpy = example_input.detach().cpu().numpy() stripped_ov_output = torch.tensor(model(example_inputs_numpy)[0], device=example_input.device) + # TODO(aanuf): fix input_low, input_range computation for AsymmetricQuantizer assert torch.allclose(tuned_output, stripped_output, atol=1e-1) assert torch.allclose(tuned_output, stripped_ov_output, atol=1e-1) diff --git a/tests/torch/function_hook/quantization/test_weights_compression.py b/tests/torch/function_hook/quantization/test_weights_compression.py index a70e0f83879..a970b808557 100644 --- a/tests/torch/function_hook/quantization/test_weights_compression.py +++ b/tests/torch/function_hook/quantization/test_weights_compression.py @@ -75,7 +75,7 @@ def __init__(self): weights_data[-1, -1] = main_value weight_tensor = weights_data.detach().clone() layer = nn.Linear(4, 4, bias=False) - layer.weight = nn.Parameter(weight_tensor.t()) + layer.weight = nn.Parameter(weight_tensor) self.layers.append(layer) def forward(self, x): @@ -659,41 +659,41 @@ def get_scale_estimation_ref(check_sampling_activation_stats_flow): return ( torch.tensor( [ - [[0.473328]], - [[0.929023]], - [[1.446527]], - [[1.920595]], - [[2.517054]], - [[3.030102]], - [[3.584279]], - [[4.043509]], - [[4.620008]], - [[5.165322]], - [[5.710637]], - [[6.122581]], - [[6.655914]], - [[7.237174]], - [[7.722580]], + [[0.47332805]], + [[1.0]], + [[1.4732642]], + [[2.0380495]], + [[2.6054149]], + [[3.0301015]], + [[3.679056]], + [[4.175322]], + [[4.700384]], + [[5.2552223]], + [[5.8100615]], + [[6.3083715]], + [[6.858295]], + [[7.4082184]], + [[7.722581]], [[8.255914]], ] ), torch.tensor( [ - [[0.473445]], - [[0.928777]], - [[1.446328]], - [[1.920052]], - [[2.516778]], - [[3.029870]], - [[3.584271]], - [[4.042929]], - [[4.619769]], - [[5.165224]], - [[5.710679]], - [[6.121212]], - [[6.654546]], - [[7.236652]], - [[7.721212]], + [[0.47344488]], + [[1.0]], + [[1.5450557]], + [[2.0380037]], + [[2.6055446]], + [[3.02987]], + [[3.679132]], + [[4.1754694]], + [[4.7001443]], + [[5.2551227]], + [[5.810101]], + [[6.308658]], + [[6.8587303]], + [[7.4]], + [[7.7212124]], [[8.254545]], ] ), @@ -707,44 +707,44 @@ def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow): [ [ [ - 7.5732, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.2602, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.3083, - 7.8467, - 7.2233, - 7.2715, - 7.4205, - 7.4667, + 7.573249, + 7.58195, + 7.6, + 7.6666665, + 7.1209445, + 7.260152, + 7.866667, + 7.9333334, + 8.0, + 8.066667, + 8.528544, + 8.659291, + 8.879055, + 8.469787, + 8.4, + 8.364824, ] ] ], [ [ [ - 14.8205, - 14.9032, - 14.9858, - 15.0685, - 15.1512, - 14.3400, - 14.4173, - 14.4945, - 14.5718, - 14.6491, - 14.7264, - 14.8037, - 14.8810, - 14.9583, - 15.0355, - 15.1128, + 16.0, + 16.089771, + 16.179543, + 16.269318, + 16.359089, + 16.44886, + 16.538631, + 16.628407, + 16.718176, + 16.80795, + 16.89772, + 16.987492, + 15.812495, + 15.89516, + 15.977826, + 16.060493, ] ] ], @@ -755,44 +755,44 @@ def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow): [ [ [ - 7.5751, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.2548, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.4951, - 7.8501, - 7.2195, - 7.2685, - 7.4186, - 7.4667, + 7.575118, + 7.5841107, + 7.6, + 7.6666665, + 7.112954, + 7.254837, + 7.866667, + 7.9333334, + 8.0, + 8.066667, + 8.531546, + 7.850108, + 8.887045, + 8.468656, + 8.4, + 8.361673, ] ] ], [ [ [ - 14.8201, - 14.9027, - 14.9854, - 15.0681, - 15.1508, - 14.3391, - 14.4164, - 14.4937, - 14.5710, - 14.6483, - 14.7256, - 14.8029, - 14.8802, - 14.9575, - 15.0348, - 15.1121, + 16.0, + 16.089788, + 16.17958, + 16.269371, + 16.359161, + 16.448954, + 16.538742, + 16.628534, + 16.718325, + 16.808115, + 16.897905, + 16.987696, + 15.812232, + 15.894914, + 15.977593, + 16.060274, ] ] ], diff --git a/tests/torch/fx/test_weights_compression.py b/tests/torch/fx/test_weights_compression.py index d05e002cd7d..004f0e018d0 100644 --- a/tests/torch/fx/test_weights_compression.py +++ b/tests/torch/fx/test_weights_compression.py @@ -437,41 +437,41 @@ def get_scale_estimation_ref(check_sampling_activation_stats_flow): return ( torch.tensor( [ - [[0.473328]], - [[0.929023]], - [[1.446527]], - [[1.920595]], - [[2.517054]], - [[3.030102]], - [[3.584279]], - [[4.043509]], - [[4.620008]], - [[5.165322]], - [[5.710637]], - [[6.122581]], - [[6.655914]], - [[7.237174]], - [[7.722580]], + [[0.47332805]], + [[1.0]], + [[1.4732642]], + [[2.0380495]], + [[2.6054149]], + [[3.0301015]], + [[3.679056]], + [[4.175322]], + [[4.700384]], + [[5.2552223]], + [[5.8100615]], + [[6.3083715]], + [[6.858295]], + [[7.4082184]], + [[7.722581]], [[8.255914]], ] ), torch.tensor( [ - [[0.473445]], - [[0.928777]], - [[1.446328]], - [[1.920052]], - [[2.516778]], - [[3.029870]], - [[3.584271]], - [[4.042929]], - [[4.619769]], - [[5.165224]], - [[5.710679]], - [[6.121212]], - [[6.654546]], - [[7.236652]], - [[7.721212]], + [[0.47344488]], + [[1.0]], + [[1.5450557]], + [[2.0380037]], + [[2.6055446]], + [[3.02987]], + [[3.679132]], + [[4.1754694]], + [[4.7001443]], + [[5.2551227]], + [[5.810101]], + [[6.308658]], + [[6.8587303]], + [[7.4]], + [[7.7212124]], [[8.254545]], ] ), @@ -485,44 +485,44 @@ def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow): [ [ [ - 7.5732, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.2602, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.3083, - 7.8467, - 7.2233, - 7.2715, - 7.4205, - 7.4667, + 7.573249, + 7.58195, + 7.6, + 7.6666665, + 7.1209445, + 7.260152, + 7.866667, + 7.9333334, + 8.0, + 8.066667, + 8.528544, + 8.659291, + 8.879055, + 8.469787, + 8.4, + 8.364824, ] ] ], [ [ [ - 14.8205, - 14.9032, - 14.9858, - 15.0685, - 15.1512, - 14.3400, - 14.4173, - 14.4945, - 14.5718, - 14.6491, - 14.7264, - 14.8037, - 14.8810, - 14.9583, - 15.0355, - 15.1128, + 16.0, + 16.089771, + 16.179543, + 16.269318, + 16.359089, + 16.44886, + 16.538631, + 16.628407, + 16.718176, + 16.80795, + 16.89772, + 16.987492, + 15.812495, + 15.89516, + 15.977826, + 16.060493, ] ] ], @@ -533,44 +533,44 @@ def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow): [ [ [ - 7.5751, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.2548, - 7.4667, - 7.4667, - 7.4667, - 7.4667, - 7.4951, - 7.8501, - 7.2195, - 7.2685, - 7.4186, - 7.4667, + 7.575118, + 7.5841107, + 7.6, + 7.6666665, + 7.112954, + 7.254837, + 7.866667, + 7.9333334, + 8.0, + 8.066667, + 8.531546, + 7.850108, + 8.887045, + 8.468656, + 8.4, + 8.361673, ] ] ], [ [ [ - 14.8201, - 14.9027, - 14.9854, - 15.0681, - 15.1508, - 14.3391, - 14.4164, - 14.4937, - 14.5710, - 14.6483, - 14.7256, - 14.8029, - 14.8802, - 14.9575, - 15.0348, - 15.1121, + 16.0, + 16.089788, + 16.17958, + 16.269371, + 16.359161, + 16.448954, + 16.538742, + 16.628534, + 16.718325, + 16.808115, + 16.897905, + 16.987696, + 15.812232, + 15.894914, + 15.977593, + 16.060274, ] ] ],