diff --git a/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py b/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py index 237c0b3d..0d7184f5 100644 --- a/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +++ b/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py @@ -58,7 +58,7 @@ # 1. Swap in the quant wrapper # ------------------------------------------------------------------------- fp32_layer = model.model.layers[0] # keep a reference for diff check -model.model.layers[0] = prepare(fp32_layer, PTQConfig()) +model.model.layers[0] = prepare(fp32_layer, PTQConfig(wrapper_variant="prefill")) model.eval() qlayer = model.model.layers[0] # alias for brevity