diff --git a/src/server.cpp b/src/server.cpp index d89d54fcaa..e1f198e94c 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -68,6 +68,7 @@ #include "servables_config_manager_module/servablesconfigmanagermodule.hpp" #include "stringutils.hpp" #include "version.hpp" +#include "src/utils/env_guard.hpp" #if (PYTHON_DISABLE == 0) #include "python/pythoninterpretermodule.hpp" @@ -516,6 +517,14 @@ int Server::startServerFromSettings(ServerSettingsImpl& serverSettings, ModelsSe // OVMS Start int Server::start(int argc, char** argv) { + // Set default for MOE_USE_MICRO_GEMM_PREFILL if not set + // This is a workaround for OpenVINO issue where prefill causes accuracy problems in long context in qwen3-coder model + const char* moeEnv = std::getenv("MOE_USE_MICRO_GEMM_PREFILL"); + if (moeEnv == nullptr){ + std::unique_ptr envGuard = std::make_unique(); + envGuard->set("MOE_USE_MICRO_GEMM_PREFILL", "0"); + } + auto paramsOrExit = parseArgs(argc, argv); // Check for error in parsing if (std::holds_alternative>(paramsOrExit)) {