From 3a41d635e2dc38c1fcc2d9f48025a4f90510a48c Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 4 Feb 2026 00:39:43 +0100 Subject: [PATCH 1/6] WA for accuracy issues in qwen3-coder_int4 --- src/server.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/server.cpp b/src/server.cpp index d89d54fcaa..7b1c41eff7 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -516,6 +516,12 @@ int Server::startServerFromSettings(ServerSettingsImpl& serverSettings, ModelsSe // OVMS Start int Server::start(int argc, char** argv) { + // Set default for MOE_USE_MICRO_GEMM_PREFILL if not set + // This is a workaround for OpenVINO issue where prefill causes accuracy problems in long context in qwen3-coder model + const char* moeEnv = std::getenv("MOE_USE_MICRO_GEMM_PREFILL"); + if (moeEnv == nullptr) + setenv("MOE_USE_MICRO_GEMM_PREFILL", "0", 1); + auto paramsOrExit = parseArgs(argc, argv); // Check for error in parsing if (std::holds_alternative>(paramsOrExit)) { From b084aca95f67c750882273e50e0e93d5ab7cc182 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 4 Feb 2026 01:14:22 +0100 Subject: [PATCH 2/6] WA for accuracy issues in qwen3-coder_int4 --- src/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.cpp b/src/server.cpp index 7b1c41eff7..2779969b1e 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -519,7 +519,7 @@ int Server::start(int argc, char** argv) { // Set default for MOE_USE_MICRO_GEMM_PREFILL if not set // This is a workaround for OpenVINO issue where prefill causes accuracy problems in long context in qwen3-coder model const char* moeEnv = std::getenv("MOE_USE_MICRO_GEMM_PREFILL"); - if (moeEnv == nullptr) + if (moeEnv == nullptr) setenv("MOE_USE_MICRO_GEMM_PREFILL", "0", 1); auto paramsOrExit = parseArgs(argc, argv); From 305ecb081c8355eff4b488d14fb56b42203d067d Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 4 Feb 2026 08:51:41 +0100 Subject: [PATCH 3/6] fix --- src/server.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/server.cpp b/src/server.cpp index 2779969b1e..1ab9c23985 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -520,7 +520,8 @@ int Server::start(int argc, char** argv) { // This is a workaround for OpenVINO issue where prefill causes accuracy problems in long context in qwen3-coder model const char* moeEnv = std::getenv("MOE_USE_MICRO_GEMM_PREFILL"); if (moeEnv == nullptr) - setenv("MOE_USE_MICRO_GEMM_PREFILL", "0", 1); + std::unique_ptr envGuard; + envGuard->set("MOE_USE_MICRO_GEMM_PREFILL", "0"); auto paramsOrExit = parseArgs(argc, argv); // Check for error in parsing From 556d925cc58fad049388d75249b51bc51a0f282c Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 4 Feb 2026 09:08:17 +0100 Subject: [PATCH 4/6] fix --- src/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.cpp b/src/server.cpp index 1ab9c23985..db874e9f8b 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -521,7 +521,7 @@ int Server::start(int argc, char** argv) { const char* moeEnv = std::getenv("MOE_USE_MICRO_GEMM_PREFILL"); if (moeEnv == nullptr) std::unique_ptr envGuard; - envGuard->set("MOE_USE_MICRO_GEMM_PREFILL", "0"); + envGuard->set("MOE_USE_MICRO_GEMM_PREFILL", "0"); auto paramsOrExit = parseArgs(argc, argv); // Check for error in parsing From 41e7d418aa4791b8c00c5bf27d60bd89138cf701 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 4 Feb 2026 09:23:00 +0100 Subject: [PATCH 5/6] fix --- src/server.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/server.cpp b/src/server.cpp index db874e9f8b..3734143eb1 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -68,6 +68,7 @@ #include "servables_config_manager_module/servablesconfigmanagermodule.hpp" #include "stringutils.hpp" #include "version.hpp" +#include "src/utils/env_guard.hpp" #if (PYTHON_DISABLE == 0) #include "python/pythoninterpretermodule.hpp" From 07e41671001d25c77a9a6913a7d79425ebb3c9cf Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 4 Feb 2026 10:48:51 +0100 Subject: [PATCH 6/6] fix --- src/server.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/server.cpp b/src/server.cpp index 3734143eb1..e1f198e94c 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -520,9 +520,10 @@ int Server::start(int argc, char** argv) { // Set default for MOE_USE_MICRO_GEMM_PREFILL if not set // This is a workaround for OpenVINO issue where prefill causes accuracy problems in long context in qwen3-coder model const char* moeEnv = std::getenv("MOE_USE_MICRO_GEMM_PREFILL"); - if (moeEnv == nullptr) - std::unique_ptr envGuard; - envGuard->set("MOE_USE_MICRO_GEMM_PREFILL", "0"); + if (moeEnv == nullptr){ + std::unique_ptr envGuard = std::make_unique(); + envGuard->set("MOE_USE_MICRO_GEMM_PREFILL", "0"); + } auto paramsOrExit = parseArgs(argc, argv); // Check for error in parsing