From 7f3bf69d27c3ca767a18358e4a2e6d5e051c2581 Mon Sep 17 00:00:00 2001 From: "s.malakhov" Date: Thu, 5 Mar 2026 16:03:16 +0300 Subject: [PATCH] [quantization] Fix `quantize_decoder_layer_prefill.py` This PR fixes `quantize_decoder_layer_prefill.py` to save `decoder_layer` in `prefill` mode. TICO-DCO-1.0-Signed-off-by: s.malakhov --- .../wrapq/examples/llama/quantize_decoder_layer_prefill.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py b/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py index 237c0b3d..0d7184f5 100644 --- a/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +++ b/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py @@ -58,7 +58,7 @@ # 1. Swap in the quant wrapper # ------------------------------------------------------------------------- fp32_layer = model.model.layers[0] # keep a reference for diff check -model.model.layers[0] = prepare(fp32_layer, PTQConfig()) +model.model.layers[0] = prepare(fp32_layer, PTQConfig(wrapper_variant="prefill")) model.eval() qlayer = model.model.layers[0] # alias for brevity