-
Notifications
You must be signed in to change notification settings - Fork 239
Qwen3-VL support #3988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Qwen3-VL support #3988
Changes from all commits
1d9d0a7
3721f50
2f930ed
94203b0
beb8a3f
180e53d
36f522f
7e94c18
21ab2fd
2ef6bb1
c2d5eb5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,9 @@ | |
| #include "openai_completions.hpp" | ||
|
|
||
| #include <cmath> | ||
| #include <limits> | ||
| #include <memory> | ||
| #include <stdexcept> | ||
| #include "src/port/rapidjson_stringbuffer.hpp" | ||
| #include "src/port/rapidjson_writer.hpp" | ||
| #include <set> | ||
|
|
@@ -44,6 +46,57 @@ namespace ovms { | |
|
|
||
| constexpr size_t DEFAULT_MAX_STOP_WORDS = 16; // same as deep-seek | ||
|
|
||
| namespace { | ||
|
|
||
| ov::genai::JsonContainer rapidJsonValueToJsonContainer(const rapidjson::Value& value) { | ||
| if (value.IsNull()) { | ||
| return ov::genai::JsonContainer(nullptr); | ||
| } | ||
| if (value.IsBool()) { | ||
| return ov::genai::JsonContainer(value.GetBool()); | ||
| } | ||
| if (value.IsInt()) { | ||
| return ov::genai::JsonContainer(value.GetInt()); | ||
| } | ||
| if (value.IsUint()) { | ||
| return ov::genai::JsonContainer(static_cast<int64_t>(value.GetUint())); | ||
| } | ||
| if (value.IsInt64()) { | ||
| return ov::genai::JsonContainer(value.GetInt64()); | ||
| } | ||
| if (value.IsUint64()) { | ||
| auto uintValue = value.GetUint64(); | ||
| if (uintValue <= static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) { | ||
| return ov::genai::JsonContainer(static_cast<int64_t>(uintValue)); | ||
| } | ||
| return ov::genai::JsonContainer(static_cast<double>(uintValue)); | ||
| } | ||
| if (value.IsDouble()) { | ||
| return ov::genai::JsonContainer(value.GetDouble()); | ||
| } | ||
| if (value.IsString()) { | ||
| return ov::genai::JsonContainer(std::string(value.GetString(), value.GetStringLength())); | ||
| } | ||
| if (value.IsArray()) { | ||
| ov::genai::JsonContainer arrayContainer = ov::genai::JsonContainer::array(); | ||
| for (const auto& item : value.GetArray()) { | ||
| arrayContainer.push_back(rapidJsonValueToJsonContainer(item)); | ||
| } | ||
| return arrayContainer; | ||
| } | ||
| if (value.IsObject()) { | ||
| ov::genai::JsonContainer objectContainer = ov::genai::JsonContainer::object(); | ||
| for (auto member = value.MemberBegin(); member != value.MemberEnd(); ++member) { | ||
| const std::string key(member->name.GetString(), member->name.GetStringLength()); | ||
| objectContainer[key] = rapidJsonValueToJsonContainer(member->value); | ||
| } | ||
| return objectContainer; | ||
| } | ||
| throw std::invalid_argument("Unsupported JSON value type"); | ||
| } | ||
dkalinowski marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| } // namespace | ||
|
|
||
| absl::Status OpenAIChatCompletionsHandler::parseCompletionsPart() { | ||
| // prompt: string | ||
| auto it = doc.FindMember("prompt"); | ||
|
|
@@ -439,6 +492,27 @@ absl::Status OpenAIChatCompletionsHandler::parseTools() { | |
| return absl::OkStatus(); | ||
| } | ||
|
|
||
| absl::StatusOr<std::optional<ov::genai::JsonContainer>> OpenAIChatCompletionsHandler::parseToolsToJsonContainer() { | ||
| auto it = doc.FindMember("tools"); | ||
| if (it == doc.MemberEnd() || it->value.IsNull()) { | ||
| return std::nullopt; | ||
| } | ||
| try { | ||
| return rapidJsonValueToJsonContainer(it->value); | ||
| } catch (const std::exception& e) { | ||
| SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Direct tools conversion to JsonContainer failed: {}. Falling back to JSON string conversion.", e.what()); | ||
| try { | ||
| rapidjson::StringBuffer toolsBuffer; | ||
| rapidjson::Writer<rapidjson::StringBuffer> toolsWriter(toolsBuffer); | ||
| it->value.Accept(toolsWriter); | ||
| return ov::genai::JsonContainer::from_json_string(toolsBuffer.GetString()); | ||
| } catch (const std::exception& fallbackEx) { | ||
| SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Fallback tools conversion failed: {}", fallbackEx.what()); | ||
| return absl::InvalidArgumentError(absl::StrCat("Invalid tools payload: ", fallbackEx.what())); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| const bool OpenAIChatCompletionsHandler::areToolsAvailable() const { | ||
| return !request.toolNameSchemaMap.empty(); | ||
| } | ||
|
|
@@ -843,6 +917,7 @@ absl::Status OpenAIChatCompletionsHandler::parseRequest(std::optional<uint32_t> | |
|
|
||
| void updateUsage(CompletionUsageStatistics& usage, const std::vector<int64_t>& generatedIds, bool echoPrompt) { | ||
| OVMS_PROFILE_FUNCTION(); | ||
| SPDLOG_INFO("Echo prompt: {}", echoPrompt); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Logging on INFO? |
||
| usage.completionTokens += generatedIds.size(); | ||
| if (echoPrompt) | ||
| usage.completionTokens -= usage.promptTokens; | ||
|
|
@@ -1049,35 +1124,47 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const ov::genai | |
| return jsonResponse.ToString(); | ||
| } | ||
|
|
||
| std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const ov::genai::VLMDecodedResults& results, size_t completionTokens) { | ||
| std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const ov::genai::VLMDecodedResults& results) { | ||
| OVMS_PROFILE_FUNCTION(); | ||
| OpenAiJsonResponse jsonResponse; | ||
| jsonResponse.StartObject(); | ||
|
|
||
| // choices: array of size N, where N is related to n request parameter | ||
| jsonResponse.StartArray("choices"); | ||
| int index = 0; | ||
| usage.completionTokens = completionTokens; | ||
| for (int i = 0; i < results.texts.size(); i++) { | ||
| const std::string& text = results.texts[i]; | ||
| SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Generated text: {}", text); | ||
|
|
||
| // Workaround to use OVMS unary parsers: get tokens from string | ||
| // This way we have detokenized text from GenAI and calculate tokens, to further convert back to text again, in parseOutputIfNeeded... | ||
| auto result = tokenizer.encode(text); | ||
| auto& input_ids = result.input_ids; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. those are only new tokens right? prompt not included? |
||
| if (input_ids.get_shape().size() != 2) | ||
| throw std::runtime_error("input_ids should have 2 dimensions"); | ||
| if (input_ids.get_shape()[0] != 1) | ||
| throw std::runtime_error("input_ids should have 1 batch size"); | ||
| if (input_ids.get_element_type() != ov::element::i64) | ||
| throw std::runtime_error("input_ids should have i64 element type"); | ||
|
|
||
| int64_t* input_ids_data = reinterpret_cast<int64_t*>(input_ids.data()); | ||
| std::vector<int64_t> tokens(input_ids_data, input_ids_data + input_ids.get_shape()[1]); | ||
|
|
||
| SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Generated tokens: {}", tokens); | ||
| updateUsage(usage, tokens, request.echo); | ||
| ParsedOutput parsedOutput = parseOutputIfNeeded(tokens); | ||
|
|
||
| jsonResponse.StartObject(); | ||
| // finish_reason: string; always "stop" for this method | ||
| jsonResponse.FinishReason("stop"); | ||
| // index: integer; Choice index, only n=1 supported anyway | ||
| jsonResponse.Index(index++); | ||
| // logprobs: object/null; Log probability information for the choice. TODO | ||
|
|
||
| // message: object | ||
| if (endpoint == Endpoint::CHAT_COMPLETIONS) { | ||
| jsonResponse.StartObject("message"); | ||
| jsonResponse.String("content", text); | ||
| jsonResponse.String("role", "assistant"); // TODO - hardcoded | ||
| // TODO: tools_call | ||
| // TODO: function_call (deprecated) | ||
| jsonResponse.EndObject(); | ||
| jsonResponse.MessageObject(parsedOutput); | ||
| } else if (endpoint == Endpoint::COMPLETIONS) { | ||
| jsonResponse.String("text", text); | ||
| jsonResponse.Text(parsedOutput); | ||
| } | ||
|
|
||
| // finish message object | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -182,8 +182,17 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptr<GenAiServableExecution | |
| #else | ||
| ov::genai::ChatHistory& chatHistory = executionContext->apiHandler->getChatHistory(); | ||
| constexpr bool add_generation_prompt = true; // confirm it should be hardcoded | ||
| auto toolsStatus = executionContext->apiHandler->parseToolsToJsonContainer(); | ||
| if (!toolsStatus.ok()) { | ||
| return toolsStatus.status(); | ||
| } | ||
| const auto& tools = toolsStatus.value(); | ||
| try { | ||
| inputText = getProperties()->tokenizer.apply_chat_template(chatHistory, add_generation_prompt); | ||
| if (tools.has_value()) { | ||
| inputText = getProperties()->tokenizer.apply_chat_template(chatHistory, add_generation_prompt, {}, tools); | ||
| } else { | ||
| inputText = getProperties()->tokenizer.apply_chat_template(chatHistory, add_generation_prompt); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. from different comment:
|
||
| } | ||
| } catch (const std::exception& e) { | ||
| SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to apply chat template: {}", e.what()); | ||
| return absl::Status(absl::StatusCode::kInvalidArgument, "Failed to apply chat template. The model either does not have chat template or has an invalid one."); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How does it work? Why do we have such condition only for uint64?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is because genai::JsonContainer has no support for uint64, only int64