From 2eabff05f1c5ea1eae4668a60bcfe61f394e1b27 Mon Sep 17 00:00:00 2001 From: atobisze Date: Thu, 16 Jan 2025 15:07:24 +0100 Subject: [PATCH 1/8] Check Revert "Check" This reverts commit dddaf1b9d1dbd1113715889731f8f10a73b67291. Check graph pool TODO: -> this requires additional patch in MP to reset initialized_ flag in CalculatorGraph and verify if that works. Previous MP tests with reruns worked due to using AddVectorSink which changes the underlying graph and does not use OutputStreamPollers. Need to verify if change in MP will enable graph pool or we need to go back to thread pool. Rebase POC MP FW test POC part 2 WIP to stash --- ci/build_test_OnCommit.groovy | 8 +- src/BUILD | 36 ++ src/kfs_frontend/kfs_graph_executor_impl.cpp | 4 + src/logging.cpp | 3 +- .../graph_executor_constants.hpp | 34 ++ src/mediapipe_internal/graph_side_packets.hpp | 69 +++ src/mediapipe_internal/graphqueue.cpp | 119 +++++ src/mediapipe_internal/graphqueue.hpp | 89 ++++ .../mediapipegraphdefinition.cpp | 92 ++-- .../mediapipegraphdefinition.hpp | 51 +-- .../mediapipegraphexecutor.cpp | 30 +- .../mediapipegraphexecutor.hpp | 133 ++++-- .../outputstreamobserver.hpp | 63 +++ src/python/BUILD | 2 +- src/test/mediapipe_framework_test.cpp | 410 +++++++++++++++++- src/test/mediapipe_validation_test.cpp | 1 + src/test/mediapipeflow_test.cpp | 45 +- src/test/pythonnode_test.cpp | 13 +- src/test/streaming_test.cpp | 110 ++++- src/test/test_utils.hpp | 10 +- 20 files changed, 1144 insertions(+), 178 deletions(-) create mode 100644 src/mediapipe_internal/graph_executor_constants.hpp create mode 100644 src/mediapipe_internal/graph_side_packets.hpp create mode 100644 src/mediapipe_internal/graphqueue.cpp create mode 100644 src/mediapipe_internal/graphqueue.hpp create mode 100644 src/mediapipe_internal/outputstreamobserver.hpp diff --git a/ci/build_test_OnCommit.groovy b/ci/build_test_OnCommit.groovy index 21029ec61c..a3e7862c29 100644 --- a/ci/build_test_OnCommit.groovy +++ b/ci/build_test_OnCommit.groovy @@ -149,7 +149,7 @@ pipeline { timeout(time: 120, unit: 'MINUTES') } parallel { - stage("Run unit tests") { + /*stage("Run unit tests") { agent { label "${agent_name_linux}" } @@ -165,7 +165,7 @@ pipeline { } } } - } + }*/ stage("Internal tests") { agent { label "${agent_name_linux}" @@ -186,7 +186,7 @@ pipeline { } } } - stage('Test windows') { + /*stage('Test windows') { agent { label "${agent_name_windows}" } @@ -210,7 +210,7 @@ pipeline { } } } - } + }*/ } } } diff --git a/src/BUILD b/src/BUILD index 71321ca7ee..46d1d51a39 100644 --- a/src/BUILD +++ b/src/BUILD @@ -150,6 +150,39 @@ ovms_cc_library( hdrs = ["queue.hpp"], visibility = ["//visibility:public",], ) +ovms_cc_library( + name = "mediapipe_internal_graph_side_packets", + hdrs = ["mediapipe_internal/graph_side_packets.hpp"], + visibility = ["//visibility:public",], +) +ovms_cc_library( + name = "mediapipe_internal_graph_executor_constants", + hdrs = ["mediapipe_internal/graph_executor_constants.hpp"], + visibility = ["//visibility:public"], +) +ovms_cc_library( + name = "mediapipe_internal_graphqueue", + hdrs = [ + "mediapipe_internal/graphqueue.hpp", + "mediapipe_internal/outputstreamobserver.hpp", + ], # TODO FIXME + srcs = ["mediapipe_internal/graphqueue.cpp"], + deps = [ + "libovms_queue", + "libovmslogging", + "libovms_execution_context", + "libovmstimer", + "libovmsmetrics", + "model_metric_reporter", + "mediapipe_internal_graph_executor_constants", + "mediapipe_internal_graph_side_packets", + "//third_party:openvino", + "@mediapipe//mediapipe/framework:calculator_graph", + "//src/python:libovmspythonmodule", # TODO not splitted + "//src/llm:genai_servables", # TODO split! + ], + visibility = ["//visibility:public",], +) ovms_cc_library( name = "libovms_ovinferrequestsqueue", hdrs = ["ovinferrequestsqueue.hpp"], @@ -542,6 +575,7 @@ ovms_cc_library( "mediapipe_internal/mediapipegraphconfig.cpp", "mediapipe_internal/mediapipegraphdefinition.cpp", "mediapipe_internal/mediapipegraphdefinition.hpp", + "mediapipe_internal/outputstreamobserver.hpp", "mediapipe_internal/mediapipegraphexecutor.cpp", "mediapipe_internal/mediapipegraphexecutor.hpp", "mediapipe_internal/packettypes.hpp", @@ -682,6 +716,8 @@ ovms_cc_library( }) + select({ "//conditions:default": [ + "mediapipe_internal_graph_executor_constants", + "mediapipe_internal_graphqueue", "@mediapipe_calculators//:mediapipe_calculators", # Need this dependencies here because we use ovms/src - cannot add in ovms_dependencies because we copy src directory later in Dockerfile "@mediapipe//mediapipe/graphs/holistic_tracking:holistic_tracking_to_render_data", "@mediapipe//mediapipe/graphs/iris_tracking:iris_tracking_cpu_deps", diff --git a/src/kfs_frontend/kfs_graph_executor_impl.cpp b/src/kfs_frontend/kfs_graph_executor_impl.cpp index 034f6f0907..2751a49e94 100644 --- a/src/kfs_frontend/kfs_graph_executor_impl.cpp +++ b/src/kfs_frontend/kfs_graph_executor_impl.cpp @@ -26,6 +26,7 @@ #include "../logging.hpp" #include "../mediapipe_internal/mediapipe_utils.hpp" #include "../mediapipe_internal/mediapipegraphdefinition.hpp" +// TODO FIXME #include "../mediapipe_internal/graph_executor_constants.hpp" #include "../predict_request_validation_utils.hpp" #include "../status.hpp" #include "../tfs_frontend/tfs_utils.hpp" @@ -925,6 +926,7 @@ static Status createPacketAndPushIntoGraph(const std::string& name, std::shared_ } std::unique_ptr inputTensor; OVMS_RETURN_ON_FAIL(deserializeTensor(name, *request, inputTensor, pythonBackend)); + SPDLOG_ERROR("Current Timestamp before actual pushing:{}", timestamp.Value()); MP_RETURN_ON_FAIL(graph.AddPacketToInputStream( name, ::mediapipe::packet_internal::Create( @@ -1040,8 +1042,10 @@ static Status deserializeTimestampIfAvailable( return status; } } else { + SPDLOG_ERROR("Current Timestamp before setting:{}", timestamp.Value()); auto now = std::chrono::system_clock::now(); timestamp = ::mediapipe::Timestamp(std::chrono::duration_cast(now.time_since_epoch()).count()); + SPDLOG_ERROR("Current Timestamp setting:{}", timestamp.Value()); } return StatusCode::OK; } diff --git a/src/logging.cpp b/src/logging.cpp index e89fce9a07..c07bb6f8d4 100644 --- a/src/logging.cpp +++ b/src/logging.cpp @@ -41,7 +41,8 @@ std::shared_ptr rerank_calculator_logger = std::make_shared ov_logger = std::make_shared("openvino"); #endif -const std::string default_pattern = "[%Y-%m-%d %T.%e][%t][%n][%l][%s:%#] %v"; +//const std::string default_pattern = "[%i] [%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v"; +const std::string default_pattern = "[%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v"; static void set_log_level(const std::string log_level, std::shared_ptr logger) { logger->set_level(spdlog::level::info); diff --git a/src/mediapipe_internal/graph_executor_constants.hpp b/src/mediapipe_internal/graph_executor_constants.hpp new file mode 100644 index 0000000000..ff565769ce --- /dev/null +++ b/src/mediapipe_internal/graph_executor_constants.hpp @@ -0,0 +1,34 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include + +namespace ovms { + +inline const std::string PYTHON_SESSION_SIDE_PACKET_TAG = "py"; +inline const std::string LLM_SESSION_SIDE_PACKET_TAG = "llm"; +inline const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes"; +inline const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable"; +inline const std::string RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable"; +inline const std::string STT_SESSION_SIDE_PACKET_TAG = "s2t_servable"; +inline const std::string TTS_SESSION_SIDE_PACKET_TAG = "t2s_servable"; +inline const std::string PYTHON_SIDE_PACKET_NAME = "py"; +inline const std::string LLM_SESSION_PACKET_NAME = "llm"; +inline constexpr int64_t STARTING_TIMESTAMP_VALUE = 0; + +} // namespace ovms diff --git a/src/mediapipe_internal/graph_side_packets.hpp b/src/mediapipe_internal/graph_side_packets.hpp new file mode 100644 index 0000000000..66b0134726 --- /dev/null +++ b/src/mediapipe_internal/graph_side_packets.hpp @@ -0,0 +1,69 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include + +namespace ovms { + +// Forward declarations - only shared_ptrs are stored so full definitions are not needed +class PythonNodeResources; +class GenAiServable; +struct ImageGenerationPipelines; +struct EmbeddingsServable; +struct RerankServable; +struct SttServable; +class TtsServable; + +using PythonNodeResourcesMap = std::unordered_map>; +using GenAiServableMap = std::unordered_map>; +using RerankServableMap = std::unordered_map>; +using SttServableMap = std::unordered_map>; +using TtsServableMap = std::unordered_map>; +using EmbeddingsServableMap = std::unordered_map>; +using ImageGenerationPipelinesMap = std::unordered_map>; + +struct GraphSidePackets { + PythonNodeResourcesMap pythonNodeResourcesMap; + GenAiServableMap genAiServableMap; + ImageGenerationPipelinesMap imageGenPipelinesMap; + EmbeddingsServableMap embeddingsServableMap; + RerankServableMap rerankServableMap; + SttServableMap sttServableMap; + TtsServableMap ttsServableMap; + void clear() { + pythonNodeResourcesMap.clear(); + genAiServableMap.clear(); + imageGenPipelinesMap.clear(); + embeddingsServableMap.clear(); + rerankServableMap.clear(); + sttServableMap.clear(); + ttsServableMap.clear(); + } + bool empty() { + return (pythonNodeResourcesMap.empty() && + genAiServableMap.empty() && + imageGenPipelinesMap.empty() && + embeddingsServableMap.empty() && + rerankServableMap.empty() && + sttServableMap.empty() && + ttsServableMap.empty()); + } +}; + +} // namespace ovms diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp new file mode 100644 index 0000000000..a3e96febb8 --- /dev/null +++ b/src/mediapipe_internal/graphqueue.cpp @@ -0,0 +1,119 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "graphqueue.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../queue.hpp" +#include "src/python/pythonnoderesources.hpp" +#include "src/llm/servable.hpp" + +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/status.h" + +#include "graph_executor_constants.hpp" +//#include "mediapipegraphexecutor.hpp" // for side packet tag names +#include "outputstreamobserver.hpp" +namespace ovms { +GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr sidePacketMaps, int streamsLength) : + Queue(streamsLength), + sidePacketMaps(sidePacketMaps) { + SPDLOG_ERROR("ER Constr graph queue:{}", (void*)this); + inferRequests.reserve(streamsLength); + // TODO FIXME split constructor to init to handle retCodes? + for (auto i = 0; i < streamsLength; ++i) { + auto gh = std::make_shared(); + gh->graph = std::make_shared<::mediapipe::CalculatorGraph>(); + gh->currentTimestamp = ::mediapipe::Timestamp(0); + + auto absStatus = gh->graph->Initialize(config); + if (!absStatus.ok()) { + SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); + throw 42; + } + for (auto& name : config.output_stream()) { + std::string streamName = getStreamName(name); + gh->outStreamObservers[streamName] = std::shared_ptr(new NullOutputStreamObserver()); // TODO use at() FIXME + auto& perGraphObserverFunctor = gh->outStreamObservers[streamName]; + absStatus = gh->graph->ObserveOutputStream(streamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }); // TODO FIXME throw? + if (!absStatus.ok()) { + SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); + throw 42; + } + } + std::map inputSidePackets; +#if (PYTHON_DISABLE == 0) + inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); +#endif + inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + SPDLOG_ERROR("ER"); + absStatus = gh->graph->StartRun(inputSidePackets); + SPDLOG_ERROR("ER"); + if (!absStatus.ok()) { + SPDLOG_ERROR("Input sidePackets size:{}", inputSidePackets.size()); + SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); + throw 42; + } + + SPDLOG_ERROR("ER"); + inferRequests.emplace_back(std::move(gh)); + SPDLOG_ERROR("ER"); + } +} +GraphQueue::~GraphQueue() { + SPDLOG_ERROR("ER Destroy graph queue:{}", (void*)this); + for (auto& graphHelper : inferRequests) { + auto absStatus = graphHelper->graph->WaitUntilIdle(); + if (!absStatus.ok()) { + SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); + // throw 42.2; + } + absStatus = graphHelper->graph->CloseAllPacketSources(); + if (!absStatus.ok()) { + SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); + // throw "as"; + } + absStatus = graphHelper->graph->WaitUntilDone(); + if (!absStatus.ok()) { + SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); + // throw 42.2; + } + graphHelper->graph->Cancel(); + if (!absStatus.ok()) { + SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); + // throw 42.2; + } + SPDLOG_ERROR("ER"); + graphHelper->graph.reset(); + SPDLOG_ERROR("ER"); + } + SPDLOG_ERROR("ER Destroy graph queue:{}", (void*)this); +} +} // namespace ovms diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp new file mode 100644 index 0000000000..7c4d89b33f --- /dev/null +++ b/src/mediapipe_internal/graphqueue.hpp @@ -0,0 +1,89 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../queue.hpp" + +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/status.h" + +#include "graph_executor_constants.hpp" +#include "graph_side_packets.hpp" +#include "outputstreamobserver.hpp" +namespace ovms { +class OutputStreamObserverI; +class NullOutputStreamObserver; +struct GraphHelper { + std::shared_ptr<::mediapipe::CalculatorGraph> graph; // TODO FIXME this does not have to be shared_ptr + std::unordered_map> outStreamObservers; + ::mediapipe::Timestamp currentTimestamp; // TODO FIXME const + // TODO FIXME move constr/= + GraphHelper() = default; + GraphHelper(const GraphHelper&) = delete; + GraphHelper& operator=(const GraphHelper&) = delete; + GraphHelper(GraphHelper&& gh) : + graph(std::move(gh.graph)), + outStreamObservers(std::move(gh.outStreamObservers)), + currentTimestamp(gh.currentTimestamp) {} + GraphHelper& operator=(GraphHelper&& gh) = default; +}; +// we need to keep Graph alive during MP reload hence shared_ptr +//class GraphQueue : public Queue> { +class GraphQueue : public Queue> { + public: // XXX TODO make private? we need to acces in mediapipegraphdefinition to set side packets though + std::shared_ptr sidePacketMaps; + +public: + GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr sidePacketMaps, int streamsLength); + ~GraphQueue(); +}; + +struct GraphIdGuard { + std::weak_ptr weakQueue; + const int id; + std::shared_ptr gh; + // TODO FIXME shared_ptr + ::mediapipe::CalculatorGraph& graph; + GraphIdGuard(std::shared_ptr& queue) : + weakQueue(queue), + id(queue->getIdleStream().get()), + gh((queue->getInferRequest(id))), + graph(*gh->graph) { + SPDLOG_ERROR("ER Guard construct this:{}", (void*)this); + } + GraphIdGuard(GraphIdGuard&&) = default; + GraphIdGuard(const GraphIdGuard&) = delete; + ~GraphIdGuard() { + auto existingQueue = weakQueue.lock(); + SPDLOG_ERROR("ER DEstroy Guard begin qu:{}", (void*)existingQueue.get()); + if (existingQueue) + existingQueue->returnStream(this->id); + SPDLOG_ERROR("ER Destroy Guard end qu:{}", (void*)existingQueue.get()); + SPDLOG_ERROR("ER Guard destroy this:{}", (void*)this); + } +}; +} // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp index 9047765e75..8b028d186b 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.cpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -24,25 +24,25 @@ #include #include -#include "../execution_context.hpp" -#include "../filesystem.hpp" -#include "../kfs_frontend/kfs_utils.hpp" -#include "../kfs_frontend/kfs_request_utils.hpp" -#include "../deserialization_main.hpp" -#include "../metric.hpp" -#include "../model_metric_reporter.hpp" -#include "../modelmanager.hpp" -#include "../ov_utils.hpp" -#include "../llm/servable.hpp" -#include "../llm/servable_initializer.hpp" +#include "src/execution_context.hpp" +#include "src/filesystem.hpp" +#include "src/kfs_frontend/kfs_utils.hpp" +#include "src/kfs_frontend/kfs_request_utils.hpp" +#include "src/deserialization_main.hpp" +#include "src/metric.hpp" +#include "src/model_metric_reporter.hpp" +#include "src/modelmanager.hpp" +#include "src/ov_utils.hpp" +#include "src/llm/servable.hpp" +#include "src/llm/servable_initializer.hpp" #if (PYTHON_DISABLE == 0) -#include "../python/pythonnoderesources.hpp" +#include "src/python/pythonnoderesources.hpp" #endif -#include "../status.hpp" -#include "../stringutils.hpp" -#include "../tensorinfo.hpp" -#include "../timer.hpp" -#include "../version.hpp" +#include "src/status.hpp" +#include "src/stringutils.hpp" +#include "src/tensorinfo.hpp" +#include "src/timer.hpp" +#include "src/version.hpp" #include "mediapipe/framework/port/parse_text_proto.h" #include "mediapipe/framework/port/status.h" #include "mediapipe_utils.hpp" @@ -54,6 +54,13 @@ #include "src/image_gen/imagegen_init.hpp" #include "src/image_gen/image_gen_calculator.pb.h" +#include "src/sidepacket_servable.hpp" +#include "src/embeddings/embeddings_servable.hpp" +#include "src/rerank/rerank_servable.hpp" +#include "src/audio/speech_to_text/s2t_servable.hpp" +#include "src/audio/text_to_speech/t2s_servable.hpp" + + namespace ovms { MediapipeGraphConfig MediapipeGraphDefinition::MGC; @@ -129,7 +136,7 @@ Status MediapipeGraphDefinition::dryInitializeTest() { } Status MediapipeGraphDefinition::validate(ModelManager& manager) { SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Started validation of mediapipe: {}", getName()); - if (!this->sidePacketMaps.empty()) { + if (!this->sidePacketMaps->empty()) { SPDLOG_ERROR("Internal Error: MediaPipe definition is in unexpected state."); return StatusCode::INTERNAL_ERROR; } @@ -146,12 +153,14 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) { if (!validationResult.ok()) { return validationResult; } + SPDLOG_ERROR("ER"); std::unique_lock lock(metadataMtx); auto status = createInputsInfo(); if (!status.ok()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create inputs info for mediapipe graph definition: {}", getName()); return status; } + SPDLOG_ERROR("ER"); status = createOutputsInfo(); if (!status.ok()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create outputs info for mediapipe graph definition: {}", getName()); @@ -177,6 +186,10 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) { if (!status.ok()) { return status; } + // TODO FIXME @atobisze + SPDLOG_ERROR("ER"); + this->queue = std::make_shared(this->config, this->sidePacketMaps, 12); + SPDLOG_ERROR("XXX ER GraphQueue:{}", (void*)this->queue.get()); lock.unlock(); notifier.passed = true; @@ -192,12 +205,20 @@ MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name, MetricRegistry* registry, const MetricConfig* metricConfig, PythonBackend* pythonBackend) : + sidePacketMaps(std::make_shared()), name(name), status(SCHEDULER_CLASS_NAME, this->name), pythonBackend(pythonBackend), reporter(std::make_unique(metricConfig, registry, name)) { mgconfig = config; passKfsRequestFlag = false; + SPDLOG_ERROR("XXX ER new PythonNodeResourcesMap:{}", (void*)&this->sidePacketMaps->pythonNodeResourcesMap); + SPDLOG_ERROR("XXX ER new genAiServableMap:{}", (void*)&this->sidePacketMaps->genAiServableMap); + /*if (!sharedThreadPool) { + SPDLOG_ERROR("Created shared Thread Pool XXX"); + //sharedThreadPool = std::make_shared(std::thread::hardware_concurrency()); // TODO FIXME should be in MP factory + }*/ + // TODO FIXME illegal constructor as we do not create queue here } Status MediapipeGraphDefinition::createInputsInfo() { @@ -261,11 +282,12 @@ Status MediapipeGraphDefinition::create(std::unique_ptr& return status; } SPDLOG_DEBUG("Creating Mediapipe graph executor: {}", getName()); - + GraphIdGuard graphIdGuard(this->queue); // TODO timeout? + SPDLOG_ERROR("ER"); pipeline = std::make_unique(getName(), std::to_string(getVersion()), this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames, - this->sidePacketMaps, - this->pythonBackend, this->reporter.get()); + *this->sidePacketMaps, + this->pythonBackend, this->reporter.get(), std::move(graphIdGuard)); return status; } @@ -339,12 +361,20 @@ Status MediapipeGraphDefinition::reload(ModelManager& manager, const MediapipeGr std::this_thread::sleep_for(std::chrono::microseconds(1)); } this->mgconfig = config; - this->sidePacketMaps.clear(); + //this->pythonNodeResourcesMap.reset(); + //this->genAiServableMap.reset(); + this->queue.reset(); + SPDLOG_ERROR("XXX ER cleared queue"); + this->sidePacketMaps.reset(); + SPDLOG_ERROR("XXX ER cleared sidePacketMaps"); + // TODO FIXME @atobisze NOW we created new maps here before return validate(manager); } void MediapipeGraphDefinition::retire(ModelManager& manager) { - this->sidePacketMaps.clear(); + this->queue.reset(); + // now we reset shared ptr maps so ongoing executions can continue + this->sidePacketMaps.reset(); this->status.handle(RetireEvent()); } @@ -411,7 +441,7 @@ class ResourcesCleaningGuard { resources(resources) {} ~ResourcesCleaningGuard() { if (shouldCleanup) { - resources.clear(); + resources.clear(); // TODO FIXME @atobisze check } } void disableCleaning() { @@ -423,7 +453,7 @@ Status MediapipeGraphDefinition::initializeNodes() { SPDLOG_INFO("MediapipeGraphDefinition initializing graph nodes"); for (int i = 0; i < config.node().size(); i++) { #if (PYTHON_DISABLE == 0) - auto& pythonNodeResourcesMap = this->sidePacketMaps.pythonNodeResourcesMap; + auto& pythonNodeResourcesMap = this->sidePacketMaps->pythonNodeResourcesMap; if (config.node(i).calculator() == PYTHON_NODE_CALCULATOR_NAME) { ResourcesCleaningGuard pythonResourcesCleaningGuard(pythonNodeResourcesMap); if (!config.node(i).node_options().size()) { @@ -453,7 +483,7 @@ Status MediapipeGraphDefinition::initializeNodes() { #endif // Passed to both calculators that require LLM Engine (gRPC KServe & HTTP OpenAI) if (endsWith(config.node(i).calculator(), LLM_NODE_CALCULATOR_NAME)) { - auto& genAiServableMap = this->sidePacketMaps.genAiServableMap; + auto& genAiServableMap = this->sidePacketMaps->genAiServableMap; ResourcesCleaningGuard genAiServablesCleaningGuard(genAiServableMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node missing options in graph: {}. ", this->name); @@ -479,7 +509,7 @@ Status MediapipeGraphDefinition::initializeNodes() { } // Passed to both calculators that require Image Generation pipelines if (endsWith(config.node(i).calculator(), IMAGE_GEN_CALCULATOR_NAME)) { - auto& imageGenPipelinesMap = this->sidePacketMaps.imageGenPipelinesMap; + auto& imageGenPipelinesMap = this->sidePacketMaps->imageGenPipelinesMap; ResourcesCleaningGuard guard(imageGenPipelinesMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Image Gen node missing options in graph: {}. ", this->name); @@ -513,7 +543,7 @@ Status MediapipeGraphDefinition::initializeNodes() { guard.disableCleaning(); } if (endsWith(config.node(i).calculator(), EMBEDDINGS_NODE_CALCULATOR_NAME)) { - auto& embeddingsServableMap = this->sidePacketMaps.embeddingsServableMap; + auto& embeddingsServableMap = this->sidePacketMaps->embeddingsServableMap; ResourcesCleaningGuard embeddingsServablesCleaningGuard(embeddingsServableMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Embeddings node missing options in graph: {}. ", this->name); @@ -546,7 +576,7 @@ Status MediapipeGraphDefinition::initializeNodes() { embeddingsServablesCleaningGuard.disableCleaning(); } if (endsWith(config.node(i).calculator(), RERANK_NODE_CALCULATOR_NAME)) { - auto& rerankServableMap = this->sidePacketMaps.rerankServableMap; + auto& rerankServableMap = this->sidePacketMaps->rerankServableMap; ResourcesCleaningGuard rerankServablesCleaningGuard(rerankServableMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Rerank node missing options in graph: {}. ", this->name); @@ -569,7 +599,7 @@ Status MediapipeGraphDefinition::initializeNodes() { rerankServablesCleaningGuard.disableCleaning(); } if (endsWith(config.node(i).calculator(), STT_NODE_CALCULATOR_NAME)) { - auto& sttServableMap = this->sidePacketMaps.sttServableMap; + auto& sttServableMap = this->sidePacketMaps->sttServableMap; ResourcesCleaningGuard sttServablesCleaningGuard(sttServableMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "SpeechToText node missing options in graph: {}. ", this->name); @@ -595,7 +625,7 @@ Status MediapipeGraphDefinition::initializeNodes() { sttServablesCleaningGuard.disableCleaning(); } if (endsWith(config.node(i).calculator(), TTS_NODE_CALCULATOR_NAME)) { - auto& ttsServableMap = this->sidePacketMaps.ttsServableMap; + auto& ttsServableMap = this->sidePacketMaps->ttsServableMap; ResourcesCleaningGuard ttsServablesCleaningGuard(ttsServableMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "TextToSpeech node missing options in graph: {}. ", this->name); diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp index 14c9e0679f..2a0804b01e 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.hpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp @@ -40,14 +40,10 @@ #pragma GCC diagnostic pop #pragma warning(pop) +#include "graph_side_packets.hpp" #include "mediapipegraphconfig.hpp" #include "packettypes.hpp" - -#include "../sidepacket_servable.hpp" -#include "../embeddings/embeddings_servable.hpp" -#include "../rerank/rerank_servable.hpp" -#include "../audio/speech_to_text/s2t_servable.hpp" -#include "../audio/text_to_speech/t2s_servable.hpp" +#include "graphqueue.hpp" namespace ovms { class MediapipeGraphDefinitionUnloadGuard; @@ -58,44 +54,6 @@ class ModelManager; class MediapipeGraphExecutor; class Status; class PythonBackend; -class PythonNodeResources; -class GenAiServable; -struct ImageGenerationPipelines; -using PythonNodeResourcesMap = std::unordered_map>; -using GenAiServableMap = std::unordered_map>; -using RerankServableMap = std::unordered_map>; -using SttServableMap = std::unordered_map>; -using TtsServableMap = std::unordered_map>; -using EmbeddingsServableMap = std::unordered_map>; -using ImageGenerationPipelinesMap = std::unordered_map>; - -struct GraphSidePackets { - PythonNodeResourcesMap pythonNodeResourcesMap; - GenAiServableMap genAiServableMap; - ImageGenerationPipelinesMap imageGenPipelinesMap; - EmbeddingsServableMap embeddingsServableMap; - RerankServableMap rerankServableMap; - SttServableMap sttServableMap; - TtsServableMap ttsServableMap; - void clear() { - pythonNodeResourcesMap.clear(); - genAiServableMap.clear(); - imageGenPipelinesMap.clear(); - embeddingsServableMap.clear(); - rerankServableMap.clear(); - sttServableMap.clear(); - ttsServableMap.clear(); - } - bool empty() { - return (pythonNodeResourcesMap.empty() && - genAiServableMap.empty() && - imageGenPipelinesMap.empty() && - embeddingsServableMap.empty() && - rerankServableMap.empty() && - sttServableMap.empty() && - ttsServableMap.empty()); - } -}; class MediapipeGraphDefinition { friend MediapipeGraphDefinitionUnloadGuard; @@ -142,7 +100,7 @@ class MediapipeGraphDefinition { static constexpr model_version_t VERSION = 1; protected: - GraphSidePackets sidePacketMaps; + std::shared_ptr sidePacketMaps; struct ValidationResultNotifier { ValidationResultNotifier(PipelineDefinitionStatus& status, std::condition_variable& loadedNotify) : @@ -179,7 +137,7 @@ class MediapipeGraphDefinition { PipelineDefinitionStatus status; MediapipeGraphConfig mgconfig; - ::mediapipe::CalculatorGraphConfig config; + ::mediapipe::CalculatorGraphConfig config; // TODO rename configs Status createInputsInfo(); Status createOutputsInfo(); @@ -209,6 +167,7 @@ class MediapipeGraphDefinition { PythonBackend* pythonBackend; std::unique_ptr reporter; + std::shared_ptr queue; }; class MediapipeGraphDefinitionUnloadGuard { diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp index 93b53fdf8e..c8825f82c8 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.cpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp @@ -19,6 +19,8 @@ #include #include +#include "graph_executor_constants.hpp" + #pragma warning(push) #pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) #pragma GCC diagnostic push @@ -28,10 +30,10 @@ #pragma warning(pop) #if (PYTHON_DISABLE == 0) -#include "../python/python_backend.hpp" +#include "src/python/python_backend.hpp" #endif -#include "../image_gen/pipelines.hpp" +#include "src/image_gen/pipelines.hpp" namespace ovms { @@ -50,7 +52,8 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( const SttServableMap& sttServableMap, const TtsServableMap& ttsServableMap, PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter) : + MediapipeServableMetricReporter* mediapipeServableMetricReporter, + GraphIdGuard&& guard) : name(name), version(version), config(config), @@ -60,8 +63,9 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( outputNames(std::move(outputNames)), sidePacketMaps({pythonNodeResourcesMap, llmNodeResourcesMap, {}, embeddingsServableMap, rerankServableMap, sttServableMap, ttsServableMap}), pythonBackend(pythonBackend), - currentStreamTimestamp(STARTING_TIMESTAMP), - mediapipeServableMetricReporter(mediapipeServableMetricReporter) {} + currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)), + mediapipeServableMetricReporter(mediapipeServableMetricReporter), + guard(std::move(guard)) {} MediapipeGraphExecutor::MediapipeGraphExecutor( const std::string& name, const std::string& version, @@ -72,7 +76,8 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( std::vector outputNames, const GraphSidePackets& sidePacketMaps, PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter) : + MediapipeServableMetricReporter* mediapipeServableMetricReporter, + GraphIdGuard&& guard) : name(name), version(version), config(config), @@ -82,16 +87,9 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( outputNames(std::move(outputNames)), sidePacketMaps(sidePacketMaps), pythonBackend(pythonBackend), - currentStreamTimestamp(STARTING_TIMESTAMP), - mediapipeServableMetricReporter(mediapipeServableMetricReporter) {} + currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)), + mediapipeServableMetricReporter(mediapipeServableMetricReporter), + guard(std::move(guard)) {} -const std::string MediapipeGraphExecutor::PYTHON_SESSION_SIDE_PACKET_TAG = "py"; -const std::string MediapipeGraphExecutor::LLM_SESSION_SIDE_PACKET_TAG = "llm"; -const std::string MediapipeGraphExecutor::IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes"; -const std::string MediapipeGraphExecutor::EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable"; -const std::string MediapipeGraphExecutor::RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable"; -const std::string MediapipeGraphExecutor::STT_SESSION_SIDE_PACKET_TAG = "s2t_servable"; -const std::string MediapipeGraphExecutor::TTS_SESSION_SIDE_PACKET_TAG = "t2s_servable"; -const ::mediapipe::Timestamp MediapipeGraphExecutor::STARTING_TIMESTAMP = ::mediapipe::Timestamp(0); } // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp index c165469395..21159d03a3 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.hpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -36,9 +36,11 @@ #include "mediapipe/framework/port/status.h" #pragma GCC diagnostic pop #pragma warning(pop) +#include "graph_executor_constants.hpp" #include "mediapipe_utils.hpp" #include "mediapipegraphdefinition.hpp" // for version in response and PythonNodeResourceMap #include "packettypes.hpp" +#include "graphqueue.hpp" namespace ovms { class PythonBackend; @@ -71,9 +73,32 @@ inline StatusCode mediapipeAbslToOvmsStatus(absl::StatusCode code) { } \ _Pragma("warning(pop)") +template +struct MyFunctor : public OutputStreamObserverI { + const std::string& requestId; + MediapipeGraphExecutor& exec; + const std::string outputStreamName; + mediapipe_packet_type_enum packetType; + ResponseType& response; + MyFunctor(const std::string& outputStreamName, mediapipe_packet_type_enum packetType, MediapipeGraphExecutor& exec, const RequestType& request, ResponseType& response) : + requestId(getRequestId(request)), + exec(exec), + outputStreamName(outputStreamName), + packetType(packetType), + response(response) { + SPDLOG_ERROR("ER MyFunctor:{} observer constructed:{}", outputStreamName, (void*)this); + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override; + ~MyFunctor() { + SPDLOG_ERROR("ER Destroy Functor:{} this:{}", outputStreamName, (void*)this); + } +}; class MediapipeGraphExecutor { +public: const std::string name; const std::string version; + +private: const ::mediapipe::CalculatorGraphConfig config; stream_types_mapping_t inputTypes; stream_types_mapping_t outputTypes; @@ -86,17 +111,11 @@ class MediapipeGraphExecutor { ::mediapipe::Timestamp currentStreamTimestamp; MediapipeServableMetricReporter* mediapipeServableMetricReporter; + GraphIdGuard guard; public: - static const std::string PYTHON_SESSION_SIDE_PACKET_TAG; - static const std::string LLM_SESSION_SIDE_PACKET_TAG; - static const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG; - static const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG; - static const std::string RERANK_SESSION_SIDE_PACKET_TAG; - static const std::string STT_SESSION_SIDE_PACKET_TAG; - static const std::string TTS_SESSION_SIDE_PACKET_TAG; - static const ::mediapipe::Timestamp STARTING_TIMESTAMP; + [[deprecated("Use constructor with side packets instead")]] MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config, stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, @@ -108,14 +127,17 @@ class MediapipeGraphExecutor { const SttServableMap& sttServableMap, const TtsServableMap& ttsServableMap, PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter); - MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config, + MediapipeServableMetricReporter* mediapipeServableMetricReporter, + GraphIdGuard&& guard); + MediapipeGraphExecutor(const std::string& name, + const std::string& version, + const ::mediapipe::CalculatorGraphConfig& config, stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, const GraphSidePackets& sidePacketMaps, PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter); + MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard); template Status infer(const RequestType* request, ResponseType* response, ExecutionContext executionContext) { @@ -123,20 +145,28 @@ class MediapipeGraphExecutor { SPDLOG_DEBUG("Start unary KServe request mediapipe graph: {} execution", this->name); MetricCounterGuard failedRequestsGuard(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, false)); MetricGaugeGuard currentGraphsGuard(this->mediapipeServableMetricReporter->currentGraphs.get()); - ::mediapipe::CalculatorGraph graph; - MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); - enum : unsigned int { - PROCESS, - TIMER_END2 - }; - Timer timer; - timer.start(PROCESS); - std::unordered_map outputPollers; + ::mediapipe::CalculatorGraph& graph = this->guard.graph; + SPDLOG_ERROR("SetExecutor XXX"); + //std::ignore = graph.SetExecutor("", sharedThreadPool); // TODO FIXME + SPDLOG_ERROR("Start unary KServe request mediapipe graph: {} initializationXXXbegin", this->name); + //MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); + //std::unordered_map outputPollers; for (auto& name : this->outputNames) { if (name.empty()) { SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", name); return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR; } + SPDLOG_ERROR("ER XXX Will construct observer for guard:{}, helper:{}, graph:{}", (void*)&this->guard, (void*)this->guard.gh.get(), (void*)&graph); + guard.gh->outStreamObservers[name] = std::make_shared>(name, this->outputTypes.at(name), *this, *request, *response); // TODO use at() FIXME + /* + /////////////// + ///// OutputStreamPollers + /////////// + // CreateAPI Specific observer + // Replace guard ptr with new one + // What to do if + //MP_RETURN_ON_FAIL(graph.ObserveOutputStream(outputName, [&serverReaderWriter, &sendMutex, &outputName, &executionContext, this](const ::mediapipe::Packet& packet) -> absl::Status { + auto absStatusOrPoller = graph.AddOutputStreamPoller(name); if (!absStatusOrPoller.ok()) { const std::string absMessage = absStatusOrPoller.status().ToString(); @@ -144,26 +174,33 @@ class MediapipeGraphExecutor { return Status(StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR, std::move(absMessage)); } outputPollers.emplace(name, std::move(absStatusOrPoller).value()); + */ } - std::map inputSidePackets; + /*std::map inputSidePackets; OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, *request)); #if (PYTHON_DISABLE == 0) - inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.pythonNodeResourcesMap).At(STARTING_TIMESTAMP); + inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); #endif - inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.imageGenPipelinesMap).At(STARTING_TIMESTAMP); - inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(STARTING_TIMESTAMP); + inputSidePackets[PYTHON_SIDE_PACKET_NAME] = mediapipe::MakePacket(*this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[LLM_SESSION_PACKET_NAME] = mediapipe::MakePacket(*this->sidePacketMaps.llmNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + + inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); - inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.rerankServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.sttServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.ttsServableMap).At(STARTING_TIMESTAMP); + inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); MP_RETURN_ON_FAIL(graph.StartRun(inputSidePackets), std::string("start MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_START_ERROR); ::mediapipe::Packet packet; std::set outputPollersWithReceivedPacket; + // TODO FIXME no mechanism to check that + */ size_t numberOfPacketsCreated = 0; + SPDLOG_ERROR("Current Timestamp pushing:{}", this->guard.gh->currentTimestamp.Value()); auto ovms_status = createAndPushPacketsImpl( std::shared_ptr(request, // Custom deleter to avoid deallocation by custom holder @@ -173,8 +210,10 @@ class MediapipeGraphExecutor { this->inputTypes, this->pythonBackend, graph, - this->currentStreamTimestamp, + this->guard.gh->currentTimestamp, + // this->currentStreamTimestamp, numberOfPacketsCreated); + SPDLOG_ERROR("Current Timestamp pushed:{}", this->guard.gh->currentTimestamp.Value()); if (!ovms_status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); return ovms_status; @@ -201,7 +240,9 @@ class MediapipeGraphExecutor { } MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); - MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR); + // MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR); + // + /* for (auto& [outputStreamName, poller] : outputPollers) { size_t receivedOutputs = 0; SPDLOG_DEBUG("Will wait for output stream: {} packet", outputStreamName); @@ -225,19 +266,21 @@ class MediapipeGraphExecutor { } SPDLOG_TRACE("Received all: {} packets for: {}", receivedOutputs, outputStreamName); } - status = graph.WaitUntilDone(); + */ + // status = graph.WaitUntilDone(); + status = graph.WaitUntilIdle(); if (!status.ok()) { // Collect error metric after Process() INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); - if (outputPollers.size() != outputPollersWithReceivedPacket.size()) { + /* if (outputPollers.size() != outputPollersWithReceivedPacket.size()) { SPDLOG_DEBUG("Mediapipe failed to execute. Failed to receive all output packets"); return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Unknown error during mediapipe execution"); - } - timer.stop(PROCESS); + }*/ + /*timer.stop(PROCESS); double processTime = timer.template elapsed(PROCESS); OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime); - INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext)); + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext));*/ SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name); return StatusCode::OK; } @@ -299,10 +342,10 @@ class MediapipeGraphExecutor { OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, req)); #if (PYTHON_DISABLE == 0) inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.pythonNodeResourcesMap) - .At(STARTING_TIMESTAMP); + .At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); #endif - inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(STARTING_TIMESTAMP); - inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(STARTING_TIMESTAMP); + inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); // Add image generation side packet in case image generation allow for streaming } @@ -394,4 +437,18 @@ class MediapipeGraphExecutor { } }; +template +absl::Status MyFunctor::handlePacket(const ::mediapipe::Packet& packet) { + SPDLOG_ERROR("ER my functor:{}", (void*)this); + auto status = onPacketReadySerializeImpl( + this->requestId, + this->exec.name, + this->exec.version, + this->outputStreamName, + this->packetType, + packet, + response); + return status.ok() ? absl::OkStatus() : absl::Status(absl::StatusCode::kInternal, "Some error"); + ; +} } // namespace ovms diff --git a/src/mediapipe_internal/outputstreamobserver.hpp b/src/mediapipe_internal/outputstreamobserver.hpp new file mode 100644 index 0000000000..e08b99fc43 --- /dev/null +++ b/src/mediapipe_internal/outputstreamobserver.hpp @@ -0,0 +1,63 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../execution_context.hpp" +#include "../model_metric_reporter.hpp" +#include "../profiler.hpp" +#include "../status.hpp" +#include "../timer.hpp" +#pragma warning(push) +#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/status.h" +#pragma GCC diagnostic pop +#pragma warning(pop) +#include "mediapipe_utils.hpp" +//#include "mediapipegraphdefinition.hpp" // for version in response and PythonNodeResourceMap +#include "packettypes.hpp" +#include "graphqueue.hpp" + +namespace ovms { +class PythonBackend; +class ServableMetricReporter; +class OutputStreamObserverI { +public: + virtual absl::Status handlePacket(const ::mediapipe::Packet& packet) = 0; + virtual ~OutputStreamObserverI() = default; +}; +class NullOutputStreamObserver : public OutputStreamObserverI { +public: + NullOutputStreamObserver() { + SPDLOG_ERROR("NUll observer constructed:{}", (void*)this); + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + SPDLOG_ERROR("Internal error occured:{}", (void*)this); + throw std::runtime_error("Should not happen"); + return absl::Status(absl::StatusCode::kInternal, "Should not happen"); + } +}; +} // namespace ovms diff --git a/src/python/BUILD b/src/python/BUILD index f4fd4c571e..539abaf355 100644 --- a/src/python/BUILD +++ b/src/python/BUILD @@ -75,7 +75,7 @@ ovms_cc_library( "pythonexecutorcalculator_cc_proto", "utils", ], - visibility = ["//visibility:private"], + visibility = ["//visibility:public"], # TODO FIXME? alwayslink = 1, data = ["//src/python/binding:pyovms.so"], ) diff --git a/src/test/mediapipe_framework_test.cpp b/src/test/mediapipe_framework_test.cpp index 85abfbd519..bc7c13bad9 100644 --- a/src/test/mediapipe_framework_test.cpp +++ b/src/test/mediapipe_framework_test.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -29,8 +30,11 @@ #include "../grpcservermodule.hpp" #include "../http_rest_api_handler.hpp" #include "../kfs_frontend/kfs_grpc_inference_service.hpp" +#include "../mediapipe_internal/outputstreamobserver.hpp" #include "../mediapipe_internal/mediapipefactory.hpp" #include "../mediapipe_internal/mediapipegraphdefinition.hpp" +#include "../mediapipe_internal/mediapipe_utils.hpp" +#include "mediapipe/framework/thread_pool_executor.h" #include "../metric_config.hpp" #include "../metric_module.hpp" #include "../model_service.hpp" @@ -79,9 +83,411 @@ class MediapipeFrameworkTest : public TestWithTempDir { class MediapipeNegativeFrameworkTest : public MediapipeFrameworkTest { }; -// purpose of this test is to ensure there is no hang in case of one of the graph nodes -// not producing output packet +using mediapipe::Adopt; +using mediapipe::CalculatorGraphConfig; +using mediapipe::Packet; +using mediapipe::ParseTextProtoOrDie; +using mediapipe::Timestamp; + +#define MP_ERROR_STOP(A) \ + { \ + absStatus = A; \ + if (!absStatus.ok()) { \ + const std::string absMessage = absStatus.ToString(); \ + SPDLOG_DEBUG("{}", absMessage); \ + ASSERT_TRUE(false); \ + } \ + } +TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerCheckNoInputPackets) { + // we need it only so that dummy is available via C-API + // ServerGuard servGuard("/ovms/src/test/configs/config_standard_dummy.json"); + ServerGuard servGuard("/ovms/src/test/configs/config_benchmark.json"); + std::string graph_proto = R"( + input_stream: "IN:input" + output_stream: "OUT:output" + node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + } + } + } + node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:input" + output_stream: "OVTENSOR:output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "b" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "a" + } + } + } + } + )"; + CalculatorGraphConfig graphConfig = + ParseTextProtoOrDie(graph_proto); + const std::string inputStreamName = "input"; + const std::string outputStreamName = "output"; + // avoid creating pollers, retreiving packets etc. + ////////////////// + // model mgmt thread + ////////////////// + //std::shared_ptr queue; + //queue = std::make_shared(graphConfig, 1); + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + // Install NullObserver + // its not per graph but per output + std::shared_ptr perGraphObserverFunctor = std::make_shared(); + const std::string outputName{"output"}; + absl::Status absStatus; + MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); + // Here ends model management + // Here starts mp graph executor + //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // get graphIdGuard from queue + // create FrontendAppropriateObserver + float expVal = 13.5; + struct MyFunctor : public OutputStreamObserverI { + float expVal; + MyFunctor(float expVal) : + expVal(expVal) { + SPDLOG_ERROR("MyFunctor observer constructed:{}", (void*)this); + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + SPDLOG_ERROR("ER my functor:{}", (void*)this); + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + return absl::OkStatus(); + } + }; + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + // now start execution + absStatus = graph.StartRun({}); + auto datatype = ov::element::Type_t::f32; + ov::Shape shape{1, 10}; + int timestamp{0}; + std::vector data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + MP_ERROR_STOP(graph.WaitUntilIdle()); + SPDLOG_ERROR("Now swap Functor, we don't have to call ObserverOutputStream"); + expVal = 42; + data[0] = expVal - 1; + perGraphObserverFunctor = std::make_shared(expVal); + // now add second packet + auto inputTensor2 = std::make_unique(datatype, shape, data.data()); + //MP_ERROR_STOP(graph.AddPacketToInputStream( + // inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++)))); + //MP_ERROR_STOP(graph.WaitUntilIdle()); + MP_ERROR_STOP(graph.CloseAllPacketSources()); + MP_ERROR_STOP(graph.WaitUntilDone()); +} +TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOC) { + // we need it only so that dummy is available via C-API + // ServerGuard servGuard("/ovms/src/test/configs/config_standard_dummy.json"); + ServerGuard servGuard("/ovms/src/test/configs/config_benchmark.json"); + std::string graph_proto = R"( + input_stream: "IN:input" + output_stream: "OUT:output" + node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + } + } + } + node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:input" + output_stream: "OVTENSOR:output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "b" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "a" + } + } + } + } + )"; + CalculatorGraphConfig graphConfig = + ParseTextProtoOrDie(graph_proto); + const std::string inputStreamName = "input"; + const std::string outputStreamName = "output"; + // avoid creating pollers, retreiving packets etc. + ////////////////// + // model mgmt thread + ////////////////// + //std::shared_ptr queue; + //queue = std::make_shared(graphConfig, 1); + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + // Install NullObserver + // its not per graph but per output + std::shared_ptr perGraphObserverFunctor = std::make_shared(); + const std::string outputName{"output"}; + absl::Status absStatus; + MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); + // Here ends model management + // Here starts mp graph executor + //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // get graphIdGuard from queue + // create FrontendAppropriateObserver + float expVal = 13.5; + struct MyFunctor : public OutputStreamObserverI { + float expVal; + MyFunctor(float expVal) : + expVal(expVal) { + SPDLOG_ERROR("MyFunctor observer constructed:{}", (void*)this); + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + SPDLOG_ERROR("ER my functor:{}", (void*)this); + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + return absl::OkStatus(); + } + }; + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + // now start execution + absStatus = graph.StartRun({}); + auto datatype = ov::element::Type_t::f32; + ov::Shape shape{1, 10}; + int timestamp{0}; + std::vector data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + MP_ERROR_STOP(graph.WaitUntilIdle()); + SPDLOG_ERROR("Now swap Functor, we don't have to call ObserverOutputStream"); + expVal = 42; + data[0] = expVal - 1; + perGraphObserverFunctor = std::make_shared(expVal); + // now add second packet + auto inputTensor2 = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++)))); + MP_ERROR_STOP(graph.WaitUntilIdle()); +} +TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOCCompare) { + // we need it only so that dummy is available via C-API + ServerGuard servGuard("/ovms/src/test/configs/config_standard_dummy.json"); + std::string graph_proto = R"( + input_stream: "IN:input" + output_stream: "OUT:output" + node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + } + } + } + node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:input" + output_stream: "OVTENSOR:output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "b" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "a" + } + } + } + } + )"; + CalculatorGraphConfig graphConfig = + ParseTextProtoOrDie(graph_proto); + const std::string inputStreamName = "input"; + const std::string outputStreamName = "output"; + // avoid creating pollers, retreiving packets etc. + ////////////////// + // model mgmt thread + ////////////////// + //std::shared_ptr queue; + //queue = std::make_shared(graphConfig, 1); + auto datatype = ov::element::Type_t::f32; + ov::Shape shape{1, 10}; + int timestamp{0}; + float expVal = 13.5; + std::vector data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + ovms::Timer<3> timer; + const std::string outputName{"output"}; + int N = 1000; + + absl::Status absStatus; + // here starts new case of ovms + { // new case of ovms + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + auto inputTensor = std::make_unique(datatype, shape, data.data()); + // Install NullObserver + // its not per graph but per output + std::shared_ptr perGraphObserverFunctor = std::make_shared(); + MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); + // Here ends model management + // Here starts mp graph executor + //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // get graphIdGuard from queue + // create FrontendAppropriateObserver + struct MyFunctor : public OutputStreamObserverI { + float expVal; + MyFunctor(float expVal) : + expVal(expVal) { + SPDLOG_ERROR("MyFunctor observer constructed:{}", (void*)this); + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override { + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY Getting output tensor:{}", (void*)this); + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + return absl::OkStatus(); + } + }; + absStatus = graph.StartRun({}); + SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX warmup"); + { + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + } + std::this_thread::sleep_for(std::chrono::seconds(1)); + SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX warmup end"); + SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX new"); + timer.start(0); + for (auto i = 0; i < N; ++i) { // iter begin + perGraphObserverFunctor = std::make_shared(expVal); + auto copyOfMyFunctor = perGraphObserverFunctor; + auto inputTensor = std::make_unique(datatype, shape, data.data()); + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY AddingPacket"); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY WaitUntilIdle"); + MP_ERROR_STOP(graph.WaitUntilIdle()); + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY After WaitUntilIdle"); + } // iter end + timer.stop(0); + SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX end:{}", timer.elapsed(0) / 1000); + } // end of new case ovms + { // current ovms case + SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ovms"); + timer.start(1); + for (auto i = 0; i < N; ++i) { // iter begin + ::mediapipe::CalculatorGraph graph; + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + auto absStatusOrPoller = graph.AddOutputStreamPoller(outputName); + MP_ERROR_STOP(graph.StartRun({})); + auto inputTensor = std::make_unique(datatype, shape, data.data()); + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY AddingPacket"); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + ::mediapipe::Packet packet; + absStatusOrPoller.value().Next(&packet); + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY Getting output tensor"); + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY WaitUntilIdle"); + MP_ERROR_STOP(graph.WaitUntilIdle()); + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY After WaitUntilIdle"); + MP_ERROR_STOP(graph.CloseAllPacketSources()); + MP_ERROR_STOP(graph.WaitUntilDone()); + } // iter end + timer.stop(1); + SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX end:{}", timer.elapsed(1) / 1000); + } + { // thread pool case + //auto sharedThreadPool = std::make_shared(std::thread::hardware_concurrency()); + auto sharedThreadPool = std::make_shared(24); + SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX thread"); + timer.start(2); + for (auto i = 0; i < N; ++i) { // iter begin + ::mediapipe::CalculatorGraph graph; + MP_ERROR_STOP(graph.SetExecutor("", sharedThreadPool)); + EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); + auto absStatusOrPoller = graph.AddOutputStreamPoller(outputName); + MP_ERROR_STOP(graph.StartRun({})); + auto inputTensor = std::make_unique(datatype, shape, data.data()); + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY AddingPacket"); + MP_ERROR_STOP(graph.AddPacketToInputStream( + inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++)))); + ::mediapipe::Packet packet; + absStatusOrPoller.value().Next(&packet); + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY Getting output tensor"); + const ov::Tensor& outputTensor = + packet.Get(); + auto datatype = ov::element::Type_t::f32; + EXPECT_EQ(datatype, outputTensor.get_element_type()); + EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10)); + const void* outputData = outputTensor.data(); + EXPECT_EQ(*((float*)outputData), expVal); + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY WaitUntilIdle"); + MP_ERROR_STOP(graph.WaitUntilIdle()); + SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY After WaitUntilIdle"); + MP_ERROR_STOP(graph.CloseAllPacketSources()); + MP_ERROR_STOP(graph.WaitUntilDone()); + } // iter end + timer.stop(2); + SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX end:{}", timer.elapsed(2) / 1000); + } // end of thread pool case + double ms = timer.elapsed(0) / 1000; + SPDLOG_ERROR("{} iterations of new flow took:{} ms. FPS:{}", N, ms, N / ms * 1000); + ms = timer.elapsed(1) / 1000; + SPDLOG_ERROR("{} iterations of old flow took:{} ms. FPS:{}", N, ms, N / ms * 1000); + ms = timer.elapsed(2) / 1000; + SPDLOG_ERROR("{} iterations of thread pool flow took:{} ms. FPS:{}", N, ms, N / ms * 1000); + SPDLOG_ERROR("Threads: {}", std::thread::hardware_concurrency()); +} + TEST_F(MediapipeNegativeFrameworkTest, NoOutputPacketProduced) { + // purpose of this test is to ensure there is no hang in case of one of the graph nodes + // not producing output packet SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/negative/config_no_calc_output_stream.json").c_str()); const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); diff --git a/src/test/mediapipe_validation_test.cpp b/src/test/mediapipe_validation_test.cpp index bdaa588887..78aae33e4e 100644 --- a/src/test/mediapipe_validation_test.cpp +++ b/src/test/mediapipe_validation_test.cpp @@ -170,4 +170,5 @@ TEST_F(MediapipeValidationTest, WrongPrecision) { prepareSingleInput(); request.mutable_inputs(0)->set_datatype("unknown"); ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); + SPDLOG_ERROR("ER"); } diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp index 55b6ab96ed..aa1eee869b 100644 --- a/src/test/mediapipeflow_test.cpp +++ b/src/test/mediapipeflow_test.cpp @@ -1557,7 +1557,7 @@ TEST_F(MediapipeStreamFlowAddTest, InferOnUnloadedGraph) { // Inference on reloaded mediapipe graph, completely different pipeline // Expects old stream to still use old configuration -// Expect new stream to use new configuration +// Expect new stream to use new configuration XXXXXX TEST_F(MediapipeStreamFlowAddTest, InferOnReloadedGraph) { const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); @@ -1687,6 +1687,8 @@ TEST_P(MediapipeFlowAddTest, InferStreamDisconnectionBeforeFirstRequest) { } TEST_F(MediapipeFlowTest, InferWithParams) { + GTEST_SKIP() << "Not possible with graph queue"; + return; SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_graph_with_side_packets.json"); const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); @@ -1991,6 +1993,28 @@ TEST(Mediapipe, MetadataDummyInputTypes) { } } } + node { + calculator: "OVMSOVCalculator" + input_stream: "B:in2" + output_stream: "A:out2" + node_options: { + [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: { + servable_name: "dummyUpper" + servable_version: "1" + } + } + } + node { + calculator: "OVMSOVCalculator" + input_stream: "B:in2" + output_stream: "A:out3" + node_options: { + [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: { + servable_name: "dummyUpper" + servable_version: "1" + } + } + } )"; ovms::MediapipeGraphConfig mgc{"mediaDummy", "", ""}; @@ -2681,13 +2705,15 @@ class MediapipeSerialization : public ::testing::Test { stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, - MediapipeServableMetricReporter* mediapipeServableMetricReporter) : - MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, pythonNodeResourcesMap, {}, {}, {}, {}, {}, nullptr, mediapipeServableMetricReporter) {} + const std::shared_ptr& pythonNodeResourcesMap, + const std::shared_ptr& gasm, + MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) : + MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, *pythonNodeResourcesMap, *gasm, {}, {}, {}, {}, nullptr, mediapipeServableMetricReporter, std::move(guard)) {} }; protected: std::unique_ptr reporter; + std::shared_ptr queue; std::unique_ptr executor; ::inference::ModelInferResponse mp_response; void SetUp() { @@ -2700,9 +2726,14 @@ class MediapipeSerialization : public ::testing::Test { const std::vector inputNames; const std::vector outputNames; const ::mediapipe::CalculatorGraphConfig config; - PythonNodeResourcesMap pythonNodeResourcesMap; this->reporter = std::make_unique(nullptr, nullptr, ""); // disabled reporter - executor = std::make_unique("", "", config, mapping, mapping, inputNames, outputNames, pythonNodeResourcesMap, this->reporter.get()); + auto sidePackets = std::make_shared(); + std::shared_ptr pnsm = std::make_shared(); + std::shared_ptr gasm = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); + executor = std::make_unique("", "", config, mapping, mapping, inputNames, outputNames, pnsm, gasm, this->reporter.get(), std::move(guard)); + SPDLOG_ERROR("Exit SetUp"); } }; @@ -3099,7 +3130,7 @@ class MediapipeFlowStartTest : public TestWithTempDir { auto start = std::chrono::high_resolution_clock::now(); while (!isMpReady(waitForServable) && (std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start).count() < SERVER_START_FROM_CONFIG_TIMEOUT_SECONDS)) { - std::this_thread::sleep_for(std::chrono::microseconds(100)); + std::this_thread::sleep_for(std::chrono::microseconds(1000)); } const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); if (!grpcModule) { diff --git a/src/test/pythonnode_test.cpp b/src/test/pythonnode_test.cpp index 54c9acbfa1..7e2595a58f 100644 --- a/src/test/pythonnode_test.cpp +++ b/src/test/pythonnode_test.cpp @@ -1002,10 +1002,10 @@ class MockedMediapipeGraphExecutorPy : public ovms::MediapipeGraphExecutor { stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, + const std::shared_ptr& pythonNodeResourcesMap, PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter) : - MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, pythonNodeResourcesMap, {}, {}, {}, {}, {}, pythonBackend, mediapipeServableMetricReporter) {} + MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) : + MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, *pythonNodeResourcesMap, {}, {}, {}, {}, {}, pythonBackend, mediapipeServableMetricReporter, std::move(guard)) {} }; TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) { @@ -1014,8 +1014,11 @@ TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) { const std::vector inputNames; const std::vector outputNames; const ::mediapipe::CalculatorGraphConfig config; - PythonNodeResourcesMap pythonNodeResourcesMap; - auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, pythonNodeResourcesMap, getPythonBackend(), this->reporter.get()); + auto sidePackets = std::make_shared(); + std::shared_ptr pnsm = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); + auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, pnsm, getPythonBackend(), this->reporter.get(), std::move(guard)); std::string datatype = "FP32"; std::string name = "python_result"; diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp index 02e7c4178a..1a2e6d2b78 100644 --- a/src/test/streaming_test.cpp +++ b/src/test/streaming_test.cpp @@ -355,11 +355,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::KFS_REQUEST}}, {{"out", mediapipe_packet_type_enum::KFS_RESPONSE}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // Mock receiving 3 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}); @@ -412,11 +415,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // Mock receiving 3 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}); // no timestamp specified, server will assign one @@ -555,11 +561,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // Mock receiving 3 requests with manually (client) assigned ascending order of timestamp and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}, 3); // first request with timestamp 3 @@ -600,11 +609,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // Mock only 1 request and disconnect immediately prepareRequest(this->firstRequest, {{"in", 3.5f}}); @@ -1220,6 +1232,9 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in1", mediapipe_packet_type_enum::OVTENSOR}, @@ -1230,7 +1245,7 @@ node { {"out3", mediapipe_packet_type_enum::OVTENSOR}}, {"in1", "in2", "in3"}, {"out1", "out2", "out3"}, - {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1272,6 +1287,9 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in1", mediapipe_packet_type_enum::OVTENSOR}, @@ -1282,7 +1300,7 @@ node { {"out3", mediapipe_packet_type_enum::OVTENSOR}}, {"in1", "in2", "in3"}, {"out1", "out2", "out3"}, - {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1313,11 +1331,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1347,11 +1368,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"wrong_name"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; // cannot install observer due to wrong output name (should never happen due to validation) +{"in"}, {"wrong_name"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // cannot install observer due to wrong output name (should never happen due to validation) EXPECT_CALL(this->stream, Read(_)).Times(0); EXPECT_CALL(this->stream, Write(_, _)).Times(0); @@ -1372,11 +1396,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; prepareRequest(this->firstRequest, {}); EXPECT_CALL(this->stream, Read(_)) @@ -1400,11 +1427,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1436,11 +1466,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); @@ -1459,11 +1492,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // Invalid request - missing data in buffer prepareInvalidRequest(this->firstRequest, {"in"}); // no timestamp specified, server will assign one @@ -1494,11 +1530,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; std::promise signalPromise[3]; std::future signalFuture[3] = { @@ -1541,11 +1580,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; prepareRequest(this->firstRequest, {{"in", 3.5f}}, 0); EXPECT_CALL(this->stream, Read(_)) @@ -1569,11 +1611,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); setRequestTimestamp(this->firstRequest, std::string("not an int")); @@ -1604,11 +1649,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // Timestamps not allowed in stream // Expect continuity of operation and response with error message @@ -1646,11 +1694,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // Allowed in stream for (auto timestamp : std::vector<::mediapipe::Timestamp>{ @@ -1682,11 +1733,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // Mock receiving 3 requests and disconnection prepareRequestWithParam(this->firstRequest, {{"in", 3.5f}}, {"val", 65}); // request with parameter val @@ -1719,11 +1773,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // Mock receiving the invalid request and disconnection // Request with invalid param py (special pythons session side packet) @@ -1748,11 +1805,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); // missing required request param EXPECT_CALL(this->stream, Read(_)).Times(0); @@ -1774,11 +1834,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // Mock receiving 2 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}, std::nullopt, this->name, this->version); // no timestamp specified, server will assign one @@ -1808,11 +1871,14 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + auto sidePackets = std::make_shared(); + std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, - {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()}; +{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); diff --git a/src/test/test_utils.hpp b/src/test/test_utils.hpp index 879ab1313e..18c0f6e01d 100644 --- a/src/test/test_utils.hpp +++ b/src/test/test_utils.hpp @@ -816,8 +816,8 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition { std::string inputConfig; #if (PYTHON_DISABLE == 0) ovms::PythonNodeResources* getPythonNodeResources(const std::string& nodeName) { - auto it = this->sidePacketMaps.pythonNodeResourcesMap.find(nodeName); - if (it == std::end(this->sidePacketMaps.pythonNodeResourcesMap)) { + auto it = this->sidePacketMaps->pythonNodeResourcesMap.find(nodeName); + if (it == std::end(this->sidePacketMaps->pythonNodeResourcesMap)) { return nullptr; } else { return it->second.get(); @@ -826,8 +826,8 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition { #endif ovms::GenAiServable* getGenAiServable(const std::string& nodeName) { - auto it = this->sidePacketMaps.genAiServableMap.find(nodeName); - if (it == std::end(this->sidePacketMaps.genAiServableMap)) { + auto it = this->sidePacketMaps->genAiServableMap.find(nodeName); + if (it == std::end(this->sidePacketMaps->genAiServableMap)) { return nullptr; } else { return it->second.get(); @@ -838,7 +838,7 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition { return this->validateForConfigLoadableness(); } - ovms::GenAiServableMap& getGenAiServableMap() { return this->sidePacketMaps.genAiServableMap; } + ovms::GenAiServableMap& getGenAiServableMap() { return this->sidePacketMaps->genAiServableMap; } DummyMediapipeGraphDefinition(const std::string name, const ovms::MediapipeGraphConfig& config, From 829257b497ac9eb3078c53901bacc8028d6aabcf Mon Sep 17 00:00:00 2001 From: Adrian Tobiszewski Date: Tue, 17 Feb 2026 12:16:11 +0100 Subject: [PATCH 2/8] Add config for queue --- .../mediapipegraphconfig.cpp | 11 +++ .../mediapipegraphconfig.hpp | 71 +++++++++++++++++++ .../mediapipegraphdefinition.cpp | 21 ++++-- .../mediapipegraphdefinition.hpp | 1 + src/test/http_openai_handler_test.cpp | 4 +- src/test/test_utils.hpp | 5 +- 6 files changed, 106 insertions(+), 7 deletions(-) diff --git a/src/mediapipe_internal/mediapipegraphconfig.cpp b/src/mediapipe_internal/mediapipegraphconfig.cpp index 448da4e1b8..7cde853717 100644 --- a/src/mediapipe_internal/mediapipegraphconfig.cpp +++ b/src/mediapipe_internal/mediapipegraphconfig.cpp @@ -118,6 +118,17 @@ Status MediapipeGraphConfig::parseNode(const rapidjson::Value& v) { this->setSubconfigPath(DEFAULT_SUBCONFIG_FILENAME); this->setModelMeshSubconfigPath(DEFAULT_MODELMESH_SUBCONFIG_FILENAME); } + if (v.HasMember("graph_queue_size")) { + const auto& val = v["graph_queue_size"]; + if (val.IsInt()) { + this->setGraphQueueSize(val.GetInt()); + } else if (val.IsString() && std::string(val.GetString()) == "AUTO") { + this->setGraphQueueSizeAuto(); + } else { + SPDLOG_ERROR("Invalid graph_queue_size value. Expected integer or \"AUTO\"."); + return StatusCode::JSON_INVALID; + } + } } catch (std::logic_error& e) { SPDLOG_DEBUG("Relative path error: {}", e.what()); return StatusCode::INTERNAL_ERROR; diff --git a/src/mediapipe_internal/mediapipegraphconfig.hpp b/src/mediapipe_internal/mediapipegraphconfig.hpp index 2e4f3d428e..c4f71b3f6f 100644 --- a/src/mediapipe_internal/mediapipegraphconfig.hpp +++ b/src/mediapipe_internal/mediapipegraphconfig.hpp @@ -15,7 +15,9 @@ //***************************************************************************** #pragma once +#include #include +#include #pragma warning(push) #pragma warning(disable : 6313) #include @@ -27,6 +29,22 @@ extern const std::string DEFAULT_GRAPH_FILENAME; extern const std::string DEFAULT_SUBCONFIG_FILENAME; extern const std::string DEFAULT_MODELMESH_SUBCONFIG_FILENAME; +/** + * @brief Tag type representing AUTO graph queue size (determined at runtime). + */ +struct GraphQueueAutoTag { + bool operator==(const GraphQueueAutoTag&) const { return true; } +}; + +/** + * @brief Represents the user's graph_queue_size setting. + * + * - std::nullopt => user did not set this field + * - int => user explicitly set a numeric value + * - GraphQueueAutoTag => user explicitly set "AUTO" + */ +using GraphQueueSizeValue = std::optional>; + class Status; /** @@ -69,6 +87,15 @@ class MediapipeGraphConfig { */ std::string currentGraphPbTxtMD5; + /** + * @brief Graph queue size configuration. + * + * - std::nullopt => user did not set this field + * - int => user explicitly set a numeric size + * - GraphQueueAutoTag => user explicitly set "AUTO" + */ + GraphQueueSizeValue graphQueueSize; + public: /** * @brief Construct a new Mediapie Graph configuration object @@ -206,6 +233,50 @@ class MediapipeGraphConfig { this->currentGraphPbTxtMD5 = currentGraphPbTxtMD5; } + /** + * @brief Get the graph queue size setting. + * + * @return const GraphQueueSizeValue& - nullopt if not set, int or GraphQueueAutoTag + */ + const GraphQueueSizeValue& getGraphQueueSize() const { + return this->graphQueueSize; + } + + /** + * @brief Set the graph queue size to an explicit numeric value. + */ + void setGraphQueueSize(int size) { + this->graphQueueSize = size; + } + + /** + * @brief Set the graph queue size to AUTO. + */ + void setGraphQueueSizeAuto() { + this->graphQueueSize = GraphQueueAutoTag{}; + } + + /** + * @brief Resolve the graph queue size setting to a concrete integer. + * + * Returns: + * -1 => queue creation disabled (user set -1) + * 0 => queue with size 0 (user set 0) + * >0 => explicit size or resolved AUTO / default + * + * When not set (nullopt): returns default of 1. + * When AUTO: returns hardcoded value (TODO FIXME @atobisze determine optimal size). + */ + int getInitialQueueSize() const { + if (!this->graphQueueSize.has_value()) { + return 1; // not set - default + } + if (std::holds_alternative(*this->graphQueueSize)) { + return 16; // TODO FIXME @atobisze determine optimal size based on nireq / hardware + } + return std::get(*this->graphQueueSize); + } + bool isReloadRequired(const MediapipeGraphConfig& rhs) const; /** diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp index 8b028d186b..e0453e52e1 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.cpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -186,10 +186,7 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) { if (!status.ok()) { return status; } - // TODO FIXME @atobisze - SPDLOG_ERROR("ER"); - this->queue = std::make_shared(this->config, this->sidePacketMaps, 12); - SPDLOG_ERROR("XXX ER GraphQueue:{}", (void*)this->queue.get()); + this->initializeQueueIfRequired(); lock.unlock(); notifier.passed = true; @@ -200,6 +197,17 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) { return StatusCode::OK; } +void MediapipeGraphDefinition::initializeQueueIfRequired() { + // TODO FIXME @atobisze + int initialQueueSize = this->mgconfig.getInitialQueueSize(); + if (initialQueueSize < 0) { + SPDLOG_DEBUG("Graph queue creation disabled for mediapipe: {} (graph_queue_size={})", getName(), initialQueueSize); + return; + } + this->queue = std::make_shared(this->config, this->sidePacketMaps, initialQueueSize); + SPDLOG_DEBUG("Created graph queue with size {} for mediapipe: {}", initialQueueSize, getName()); +} + MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name, const MediapipeGraphConfig& config, MetricRegistry* registry, @@ -282,6 +290,10 @@ Status MediapipeGraphDefinition::create(std::unique_ptr& return status; } SPDLOG_DEBUG("Creating Mediapipe graph executor: {}", getName()); + if (!this->queue) { + SPDLOG_ERROR("Cannot create mediapipe graph executor: {} - graph queue not initialized (graph_queue_size=0)", getName()); + return StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR; + } GraphIdGuard graphIdGuard(this->queue); // TODO timeout? SPDLOG_ERROR("ER"); pipeline = std::make_unique(getName(), std::to_string(getVersion()), @@ -451,6 +463,7 @@ class ResourcesCleaningGuard { Status MediapipeGraphDefinition::initializeNodes() { SPDLOG_INFO("MediapipeGraphDefinition initializing graph nodes"); + this->sidePacketMaps = std::make_shared(); for (int i = 0; i < config.node().size(); i++) { #if (PYTHON_DISABLE == 0) auto& pythonNodeResourcesMap = this->sidePacketMaps->pythonNodeResourcesMap; diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp index 2a0804b01e..7a4739438b 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.hpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp @@ -127,6 +127,7 @@ class MediapipeGraphDefinition { Status setStreamTypes(); Status dryInitializeTest(); + void initializeQueueIfRequired(); std::string chosenConfig; static MediapipeGraphConfig MGC; const std::string name; diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index fec2009867..6494514806 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -212,7 +212,7 @@ Key: content-type; Value: application/json } JSON Parser: -{"model":"gpt","stream":false,"messages":[]}0)"; +{"model":"gpt","stream":false,"messages":[]}012345678)"; // we reuse graph so this appends each time ASSERT_EQ(response, expectedResponse); } @@ -244,7 +244,7 @@ Key: test2; Value: header } JSON Parser: -{"model":"gpt","stream":false,"messages":[]}0)"; +{"model":"gpt","stream":false,"messages":[]}012345678)"; // we reuse graph so this appends each time ASSERT_EQ(response, expectedResponse); } diff --git a/src/test/test_utils.hpp b/src/test/test_utils.hpp index 18c0f6e01d..65e72b543c 100644 --- a/src/test/test_utils.hpp +++ b/src/test/test_utils.hpp @@ -844,7 +844,10 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition { const ovms::MediapipeGraphConfig& config, std::string inputConfig, ovms::PythonBackend* pythonBackend = nullptr) : - ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) { this->inputConfig = inputConfig; } + ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) { + this->inputConfig = inputConfig; + this->mgconfig.setGraphQueueSize(-1); // TODO FIXME @atobisze + } // Do not read from path - use predefined config contents ovms::Status validateForConfigFileExistence() override { From 5f96c66b4a605abfb56474eec719a3e1099cc39b Mon Sep 17 00:00:00 2001 From: Adrian Tobiszewski Date: Wed, 18 Feb 2026 11:50:54 +0100 Subject: [PATCH 3/8] Checkpoint - switchable queue --- .../mediapipegraphconfig.cpp | 11 -- .../mediapipegraphconfig.hpp | 4 +- .../mediapipegraphdefinition.cpp | 67 +++++++-- .../mediapipegraphdefinition.hpp | 1 + .../mediapipegraphexecutor.cpp | 23 ++- .../mediapipegraphexecutor.hpp | 139 +++++++++++------- src/test/http_openai_handler_test.cpp | 97 +++++++++++- ...ediapipe_openai_chat_completions_mock.json | 3 +- src/test/mediapipeflow_test.cpp | 123 +++++++++++++++- src/test/streaming_test.cpp | 110 +++----------- src/test/test_utils.hpp | 3 +- 11 files changed, 404 insertions(+), 177 deletions(-) diff --git a/src/mediapipe_internal/mediapipegraphconfig.cpp b/src/mediapipe_internal/mediapipegraphconfig.cpp index 7cde853717..448da4e1b8 100644 --- a/src/mediapipe_internal/mediapipegraphconfig.cpp +++ b/src/mediapipe_internal/mediapipegraphconfig.cpp @@ -118,17 +118,6 @@ Status MediapipeGraphConfig::parseNode(const rapidjson::Value& v) { this->setSubconfigPath(DEFAULT_SUBCONFIG_FILENAME); this->setModelMeshSubconfigPath(DEFAULT_MODELMESH_SUBCONFIG_FILENAME); } - if (v.HasMember("graph_queue_size")) { - const auto& val = v["graph_queue_size"]; - if (val.IsInt()) { - this->setGraphQueueSize(val.GetInt()); - } else if (val.IsString() && std::string(val.GetString()) == "AUTO") { - this->setGraphQueueSizeAuto(); - } else { - SPDLOG_ERROR("Invalid graph_queue_size value. Expected integer or \"AUTO\"."); - return StatusCode::JSON_INVALID; - } - } } catch (std::logic_error& e) { SPDLOG_DEBUG("Relative path error: {}", e.what()); return StatusCode::INTERNAL_ERROR; diff --git a/src/mediapipe_internal/mediapipegraphconfig.hpp b/src/mediapipe_internal/mediapipegraphconfig.hpp index c4f71b3f6f..64a75a9a12 100644 --- a/src/mediapipe_internal/mediapipegraphconfig.hpp +++ b/src/mediapipe_internal/mediapipegraphconfig.hpp @@ -264,12 +264,12 @@ class MediapipeGraphConfig { * 0 => queue with size 0 (user set 0) * >0 => explicit size or resolved AUTO / default * - * When not set (nullopt): returns default of 1. + * When not set (nullopt): returns -1 (queue disabled). * When AUTO: returns hardcoded value (TODO FIXME @atobisze determine optimal size). */ int getInitialQueueSize() const { if (!this->graphQueueSize.has_value()) { - return 1; // not set - default + return -1; // not set - queue disabled by default } if (std::holds_alternative(*this->graphQueueSize)) { return 16; // TODO FIXME @atobisze determine optimal size based on nireq / hardware diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp index e0453e52e1..6b2161ca58 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.cpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -18,8 +18,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -99,6 +101,45 @@ Status MediapipeGraphDefinition::validateForConfigFileExistence() { config << ifs.rdbuf(); this->mgconfig.setCurrentGraphPbTxtMD5(ovms::FileSystem::getStringMD5(config.str())); this->chosenConfig.assign(config.str()); + return parseGraphQueueSizeDirective(); +} + +Status MediapipeGraphDefinition::parseGraphQueueSizeDirective() { + // Scan pbtxt content for: # OVMS_GRAPH_QUEUE_SIZE: + static const std::regex directiveRegex( + R"(^\s*#\s*OVMS_GRAPH_QUEUE_SIZE\s*:\s*(\S+)\s*$)", + std::regex::multiline); + std::smatch match; + if (!std::regex_search(this->chosenConfig, match, directiveRegex)) { + SPDLOG_TRACE("OVMS_GRAPH_QUEUE_SIZE directive not found in pbtxt for mediapipe: {}", getName()); + return StatusCode::OK; // directive not present - queue disabled by default + } + std::string value = match[1].str(); + if (value == "AUTO") { + this->mgconfig.setGraphQueueSizeAuto(); + return StatusCode::OK; + } + // Try to parse as integer + auto parsed = stoi32(value); + if (!parsed.has_value()) { + SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: '{}'. Expected integer or 'AUTO'.", value); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + int queueSize = parsed.value(); + if (queueSize < -1) { + SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: {}. Must be -1 (disabled), or a positive integer.", queueSize); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + if (queueSize == 0) { + SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: 0. Must be -1 (disabled), or a positive integer."); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + unsigned int maxThreads = std::thread::hardware_concurrency(); + if (maxThreads > 0 && queueSize > static_cast(maxThreads)) { + SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: {}. Exceeds available hardware threads: {}.", queueSize, maxThreads); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + this->mgconfig.setGraphQueueSize(queueSize); return StatusCode::OK; } @@ -290,16 +331,18 @@ Status MediapipeGraphDefinition::create(std::unique_ptr& return status; } SPDLOG_DEBUG("Creating Mediapipe graph executor: {}", getName()); - if (!this->queue) { - SPDLOG_ERROR("Cannot create mediapipe graph executor: {} - graph queue not initialized (graph_queue_size=0)", getName()); - return StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR; + if (this->queue) { + GraphIdGuard graphIdGuard(this->queue); + pipeline = std::make_unique(getName(), std::to_string(getVersion()), + this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames, + *this->sidePacketMaps, + this->pythonBackend, this->reporter.get(), std::move(graphIdGuard)); + } else { + pipeline = std::make_unique(getName(), std::to_string(getVersion()), + this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames, + *this->sidePacketMaps, + this->pythonBackend, this->reporter.get()); } - GraphIdGuard graphIdGuard(this->queue); // TODO timeout? - SPDLOG_ERROR("ER"); - pipeline = std::make_unique(getName(), std::to_string(getVersion()), - this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames, - *this->sidePacketMaps, - this->pythonBackend, this->reporter.get(), std::move(graphIdGuard)); return status; } @@ -373,13 +416,10 @@ Status MediapipeGraphDefinition::reload(ModelManager& manager, const MediapipeGr std::this_thread::sleep_for(std::chrono::microseconds(1)); } this->mgconfig = config; - //this->pythonNodeResourcesMap.reset(); - //this->genAiServableMap.reset(); this->queue.reset(); SPDLOG_ERROR("XXX ER cleared queue"); - this->sidePacketMaps.reset(); + this->sidePacketMaps = std::make_shared(); SPDLOG_ERROR("XXX ER cleared sidePacketMaps"); - // TODO FIXME @atobisze NOW we created new maps here before return validate(manager); } @@ -463,7 +503,6 @@ class ResourcesCleaningGuard { Status MediapipeGraphDefinition::initializeNodes() { SPDLOG_INFO("MediapipeGraphDefinition initializing graph nodes"); - this->sidePacketMaps = std::make_shared(); for (int i = 0; i < config.node().size(); i++) { #if (PYTHON_DISABLE == 0) auto& pythonNodeResourcesMap = this->sidePacketMaps->pythonNodeResourcesMap; diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp index 7a4739438b..5f03ff2ba5 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.hpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp @@ -123,6 +123,7 @@ class MediapipeGraphDefinition { }; virtual Status validateForConfigFileExistence(); + Status parseGraphQueueSizeDirective(); Status validateForConfigLoadableness(); Status setStreamTypes(); diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp index c8825f82c8..5c59d00235 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.cpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp @@ -90,6 +90,27 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)), mediapipeServableMetricReporter(mediapipeServableMetricReporter), guard(std::move(guard)) {} - +MediapipeGraphExecutor::MediapipeGraphExecutor( + const std::string& name, + const std::string& version, + const ::mediapipe::CalculatorGraphConfig& config, + stream_types_mapping_t inputTypes, + stream_types_mapping_t outputTypes, + std::vector inputNames, + std::vector outputNames, + const GraphSidePackets& sidePacketMaps, + PythonBackend* pythonBackend, + MediapipeServableMetricReporter* mediapipeServableMetricReporter) : + name(name), + version(version), + config(config), + inputTypes(std::move(inputTypes)), + outputTypes(std::move(outputTypes)), + inputNames(std::move(inputNames)), + outputNames(std::move(outputNames)), + sidePacketMaps(sidePacketMaps), + pythonBackend(pythonBackend), + currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)), + mediapipeServableMetricReporter(mediapipeServableMetricReporter) {} } // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp index 21159d03a3..a73f246e66 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.hpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -16,6 +16,7 @@ #pragma once #include #include +#include #include #include #include @@ -111,7 +112,7 @@ class MediapipeGraphExecutor { ::mediapipe::Timestamp currentStreamTimestamp; MediapipeServableMetricReporter* mediapipeServableMetricReporter; - GraphIdGuard guard; + std::optional guard; public: @@ -138,6 +139,16 @@ class MediapipeGraphExecutor { const GraphSidePackets& sidePacketMaps, PythonBackend* pythonBackend, MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard); + // Constructor without graph queue (old path - graph created per-request) + MediapipeGraphExecutor(const std::string& name, + const std::string& version, + const ::mediapipe::CalculatorGraphConfig& config, + stream_types_mapping_t inputTypes, + stream_types_mapping_t outputTypes, + std::vector inputNames, std::vector outputNames, + const GraphSidePackets& sidePacketMaps, + PythonBackend* pythonBackend, + MediapipeServableMetricReporter* mediapipeServableMetricReporter); template Status infer(const RequestType* request, ResponseType* response, ExecutionContext executionContext) { @@ -145,28 +156,74 @@ class MediapipeGraphExecutor { SPDLOG_DEBUG("Start unary KServe request mediapipe graph: {} execution", this->name); MetricCounterGuard failedRequestsGuard(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, false)); MetricGaugeGuard currentGraphsGuard(this->mediapipeServableMetricReporter->currentGraphs.get()); - ::mediapipe::CalculatorGraph& graph = this->guard.graph; - SPDLOG_ERROR("SetExecutor XXX"); - //std::ignore = graph.SetExecutor("", sharedThreadPool); // TODO FIXME - SPDLOG_ERROR("Start unary KServe request mediapipe graph: {} initializationXXXbegin", this->name); - //MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); - //std::unordered_map outputPollers; + if (this->guard.has_value()) { + return inferWithQueue(request, response, executionContext, failedRequestsGuard); + } else { + return inferWithoutQueue(request, response, executionContext, failedRequestsGuard); + } + } + + template + Status inferWithQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) { + ::mediapipe::CalculatorGraph& graph = this->guard->graph; for (auto& name : this->outputNames) { if (name.empty()) { SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", name); return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR; } - SPDLOG_ERROR("ER XXX Will construct observer for guard:{}, helper:{}, graph:{}", (void*)&this->guard, (void*)this->guard.gh.get(), (void*)&graph); - guard.gh->outStreamObservers[name] = std::make_shared>(name, this->outputTypes.at(name), *this, *request, *response); // TODO use at() FIXME - /* - /////////////// - ///// OutputStreamPollers - /////////// - // CreateAPI Specific observer - // Replace guard ptr with new one - // What to do if - //MP_RETURN_ON_FAIL(graph.ObserveOutputStream(outputName, [&serverReaderWriter, &sendMutex, &outputName, &executionContext, this](const ::mediapipe::Packet& packet) -> absl::Status { + guard->gh->outStreamObservers[name] = std::make_shared>(name, this->outputTypes.at(name), *this, *request, *response); + } + + size_t numberOfPacketsCreated = 0; + auto ovms_status = createAndPushPacketsImpl( + std::shared_ptr(request, [](const RequestType*) {}), + this->inputTypes, + this->pythonBackend, + graph, + this->guard->gh->currentTimestamp, + numberOfPacketsCreated); + if (!ovms_status.ok()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + return ovms_status; + } + if (this->inputNames.size() > numberOfPacketsCreated) { + SPDLOG_DEBUG("Not all input packets created. Expected: {}, Actual: {}. Aborting execution of mediapipe graph: {}", + this->inputNames.size(), numberOfPacketsCreated, this->name); + return Status(StatusCode::INVALID_NO_OF_INPUTS, "Not all input packets created"); + } + + failedRequestsGuard.disable(); + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, true)); + + auto status = graph.WaitUntilIdle(); + if (!status.ok()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + } + MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); + + status = graph.WaitUntilIdle(); + if (!status.ok()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + } + MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); + SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name); + return StatusCode::OK; + } + + template + Status inferWithoutQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) { + ::mediapipe::CalculatorGraph graph; + MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); + enum : unsigned int { PROCESS, TIMER_END2 }; + Timer timer; + timer.start(PROCESS); + std::unordered_map outputPollers; + for (auto& name : this->outputNames) { + if (name.empty()) { + SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", name); + return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR; + } auto absStatusOrPoller = graph.AddOutputStreamPoller(name); if (!absStatusOrPoller.ok()) { const std::string absMessage = absStatusOrPoller.status().ToString(); @@ -174,20 +231,15 @@ class MediapipeGraphExecutor { return Status(StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR, std::move(absMessage)); } outputPollers.emplace(name, std::move(absStatusOrPoller).value()); - */ } - /*std::map inputSidePackets; + std::map inputSidePackets; OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, *request)); #if (PYTHON_DISABLE == 0) inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); #endif - inputSidePackets[PYTHON_SIDE_PACKET_NAME] = mediapipe::MakePacket(*this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); - inputSidePackets[LLM_SESSION_PACKET_NAME] = mediapipe::MakePacket(*this->sidePacketMaps.llmNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); - inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); - inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); @@ -196,53 +248,36 @@ class MediapipeGraphExecutor { ::mediapipe::Packet packet; std::set outputPollersWithReceivedPacket; - // TODO FIXME no mechanism to check that - */ size_t numberOfPacketsCreated = 0; - SPDLOG_ERROR("Current Timestamp pushing:{}", this->guard.gh->currentTimestamp.Value()); auto ovms_status = createAndPushPacketsImpl( - std::shared_ptr(request, - // Custom deleter to avoid deallocation by custom holder - // Conversion to shared_ptr is required for unified deserialization method - // for first and subsequent requests - [](const RequestType*) {}), + std::shared_ptr(request, [](const RequestType*) {}), this->inputTypes, this->pythonBackend, graph, - this->guard.gh->currentTimestamp, - // this->currentStreamTimestamp, + this->currentStreamTimestamp, numberOfPacketsCreated); - SPDLOG_ERROR("Current Timestamp pushed:{}", this->guard.gh->currentTimestamp.Value()); if (!ovms_status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); return ovms_status; } - // This differs from inferStream - we require user to feed all streams if (this->inputNames.size() > numberOfPacketsCreated) { SPDLOG_DEBUG("Not all input packets created. Expected: {}, Actual: {}. Aborting execution of mediapipe graph: {}", - this->inputNames.size(), - numberOfPacketsCreated, - this->name); + this->inputNames.size(), numberOfPacketsCreated, this->name); return Status(StatusCode::INVALID_NO_OF_INPUTS, "Not all input packets created"); } failedRequestsGuard.disable(); INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, true)); - // we wait idle since some calculators could hold ownership on packet content while nodes further down the graph - // can be still processing those. Closing packet sources triggers Calculator::Close() on nodes that do not expect - // new packets auto status = graph.WaitUntilIdle(); - if (!status.ok()) { // Collect error metric after Open() + if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); - // MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR); - // - /* + MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR); for (auto& [outputStreamName, poller] : outputPollers) { size_t receivedOutputs = 0; SPDLOG_DEBUG("Will wait for output stream: {} packet", outputStreamName); @@ -266,21 +301,19 @@ class MediapipeGraphExecutor { } SPDLOG_TRACE("Received all: {} packets for: {}", receivedOutputs, outputStreamName); } - */ - // status = graph.WaitUntilDone(); - status = graph.WaitUntilIdle(); - if (!status.ok()) { // Collect error metric after Process() + status = graph.WaitUntilDone(); + if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); - /* if (outputPollers.size() != outputPollersWithReceivedPacket.size()) { + if (outputPollers.size() != outputPollersWithReceivedPacket.size()) { SPDLOG_DEBUG("Mediapipe failed to execute. Failed to receive all output packets"); return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Unknown error during mediapipe execution"); - }*/ - /*timer.stop(PROCESS); + } + timer.stop(PROCESS); double processTime = timer.template elapsed(PROCESS); OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime); - INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext));*/ + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext)); SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name); return StatusCode::OK; } diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 6494514806..316917f788 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -212,7 +212,7 @@ Key: content-type; Value: application/json } JSON Parser: -{"model":"gpt","stream":false,"messages":[]}012345678)"; // we reuse graph so this appends each time +{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only ASSERT_EQ(response, expectedResponse); } @@ -244,7 +244,7 @@ Key: test2; Value: header } JSON Parser: -{"model":"gpt","stream":false,"messages":[]}012345678)"; // we reuse graph so this appends each time +{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only ASSERT_EQ(response, expectedResponse); } @@ -1456,3 +1456,96 @@ TEST_F(HttpOpenAIHandlerParsingTest, responseFormatNullValue) { EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus()); EXPECT_FALSE(apiHandler->getResponseFormat().has_value()); } + +// ==================== HttpOpenAIHandlerWithQueueTest ==================== +// Same as HttpOpenAIHandlerTest but uses config with graph_queue_size=1 +// to verify the graph pool (GraphQueue) path works correctly. +class HttpOpenAIHandlerWithQueueTest : public ::testing::Test { +protected: + ovms::Server& server = ovms::Server::instance(); + std::unique_ptr handler; + + std::unique_ptr t; + std::string port = "9173"; + + std::unordered_map headers{{"content-type", "application/json"}}; + ovms::HttpRequestComponents comp; + std::string endpoint = "/v3/chat/completions"; + std::shared_ptr writer; + std::shared_ptr multiPartParser; + std::string response; + ovms::HttpResponseComponents responseComponents; + + void SetUpServer(const char* configPath) { + ::SetUpServer(this->t, this->server, this->port, configPath); + EnsureServerStartedWithTimeout(this->server, 5); + handler = std::make_unique(server, 5); + } + + void SetUp() { + writer = std::make_shared(); + multiPartParser = std::make_shared(); + SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json").c_str()); + ASSERT_EQ(handler->parseRequestComponents(comp, "POST", endpoint, headers), ovms::StatusCode::OK); + } + + void TearDown() { + handler.reset(); + server.setShutdownRequest(1); + t->join(); + server.setShutdownRequest(0); + } +}; + +TEST_F(HttpOpenAIHandlerWithQueueTest, UnaryWithQueue) { + std::string requestBody = R"( + { + "model": "gpt", + "stream": false, + "messages": [] + } + )"; + + const std::string URI = "/v3/something"; + ASSERT_EQ( + handler->dispatchToProcessor(URI, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + + std::string expectedResponse = R"(URI: /v3/something +Key: content-type; Value: application/json +Body: + + { + "model": "gpt", + "stream": false, + "messages": [] + } + +JSON Parser: +{"model":"gpt","stream":false,"messages":[]}012345678)"; + ASSERT_EQ(response, expectedResponse); +} + +TEST_F(HttpOpenAIHandlerWithQueueTest, StreamWithQueue) { + std::string requestBody = R"( + { + "model": "gpt", + "stream": true, + "messages": [] + } + )"; + + EXPECT_CALL(*writer, PartialReplyBegin(::testing::_)).WillOnce(testing::Invoke([](std::function fn) { fn(); })); + EXPECT_CALL(*writer, PartialReplyEnd()).Times(1); + // The calculator produces 9 packets (timestamps 0-8) via loopback, + // each containing the accumulated body + timestamp. The '8' in the body stops the loop. + EXPECT_CALL(*writer, PartialReply(::testing::_)).Times(9); + EXPECT_CALL(*writer, IsDisconnected()).Times(9); + + ASSERT_EQ( + handler->dispatchToProcessor("/v3/completions", requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::PARTIAL_END); + + // For streaming, the response body stays empty (content goes through PartialReply callbacks) + ASSERT_EQ(response, ""); +} diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json index 5137dbea92..848729c2e6 100644 --- a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json +++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json @@ -3,7 +3,8 @@ "mediapipe_config_list": [ { "name": "gpt", - "graph_path": "/ovms/src/test/mediapipe/graph_gpt.pbtxt" + "graph_path": "/ovms/src/test/mediapipe/graph_gpt.pbtxt", + "graph_queue_size": -1 } ] } \ No newline at end of file diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp index aa1eee869b..ca1d1d2d91 100644 --- a/src/test/mediapipeflow_test.cpp +++ b/src/test/mediapipeflow_test.cpp @@ -232,9 +232,11 @@ class MediapipeFlowTest : public ::testing::TestWithParam { void SetUp() override { } void TearDown() { - server.setShutdownRequest(1); - t->join(); - server.setShutdownRequest(0); + if (t) { + server.setShutdownRequest(1); + t->join(); + server.setShutdownRequest(0); + } } }; @@ -4067,3 +4069,118 @@ TEST(WhitelistRegistered, MediapipeSubgraphList) { ASSERT_THAT(mediapipe::SubgraphRegistry::GetRegisteredNames(), UnorderedElementsAreArray(expected)) << readableSetError(mediapipe::SubgraphRegistry::GetRegisteredNames(), expected); } + +// --- OVMS_GRAPH_QUEUE_SIZE pbtxt directive tests --- + +// Minimal valid pbtxt that MediaPipe can parse (uses a registered test calculator) +static const char* MINIMAL_PBTXT_TEMPLATE = R"( +input_stream: "HTTP_REQUEST_PAYLOAD:input" +output_stream: "HTTP_RESPONSE_PAYLOAD:output" +node: { + calculator: "OpenAIChatCompletionsMockCalculator" + input_stream: "LOOPBACK:loopback" + input_stream: "HTTP_REQUEST_PAYLOAD:input" + output_stream: "LOOPBACK:loopback" + output_stream: "HTTP_RESPONSE_PAYLOAD:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } + input_stream_handler { + input_stream_handler: "SyncSetInputStreamHandler", + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { + tag_index: "LOOPBACK:0" + } + } + } + } +} +)"; + +static std::string makePbtxtWithDirective(const std::string& directive) { + return directive + "\n" + MINIMAL_PBTXT_TEMPLATE; +} + +TEST(MediapipeGraphQueueSizeDirective, NoDirectiveMeansDisabled) { + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, MINIMAL_PBTXT_TEMPLATE); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_FALSE(mgc.getGraphQueueSize().has_value()); + // getInitialQueueSize on default mgc returns -1 + EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), -1); +} + +TEST(MediapipeGraphQueueSizeDirective, ExplicitPositiveValue) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: 4"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), 4); +} + +TEST(MediapipeGraphQueueSizeDirective, DisabledExplicitly) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: -1"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), -1); +} + +TEST(MediapipeGraphQueueSizeDirective, AutoValue) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: AUTO"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + ASSERT_EQ(status, ovms::StatusCode::OK); + EXPECT_GT(def.getMediapipeGraphConfig().getInitialQueueSize(), 0); +} + +TEST(MediapipeGraphQueueSizeDirective, ZeroRejected) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: 0"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} + +TEST(MediapipeGraphQueueSizeDirective, NegativeBelowMinusOneRejected) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: -2"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} + +TEST(MediapipeGraphQueueSizeDirective, ExceedsHardwareThreads) { + unsigned int maxThreads = std::thread::hardware_concurrency(); + if (maxThreads == 0) { + GTEST_SKIP() << "hardware_concurrency() returned 0, cannot test thread limit"; + } + int oversized = static_cast(maxThreads) + 1; + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: " + std::to_string(oversized)); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} + +TEST(MediapipeGraphQueueSizeDirective, InvalidStringRejected) { + std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: INVALID"); + ovms::MediapipeGraphConfig mgc; + DummyMediapipeGraphDefinition def("test", mgc, pbtxt); + ovms::ModelManager manager; + auto status = def.validate(manager); + EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID); +} diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp index 1a2e6d2b78..c50969717e 100644 --- a/src/test/streaming_test.cpp +++ b/src/test/streaming_test.cpp @@ -355,14 +355,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::KFS_REQUEST}}, {{"out", mediapipe_packet_type_enum::KFS_RESPONSE}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}); @@ -415,14 +412,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}); // no timestamp specified, server will assign one @@ -561,14 +555,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests with manually (client) assigned ascending order of timestamp and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}, 3); // first request with timestamp 3 @@ -609,14 +600,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock only 1 request and disconnect immediately prepareRequest(this->firstRequest, {{"in", 3.5f}}); @@ -1232,9 +1220,6 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in1", mediapipe_packet_type_enum::OVTENSOR}, @@ -1245,7 +1230,7 @@ node { {"out3", mediapipe_packet_type_enum::OVTENSOR}}, {"in1", "in2", "in3"}, {"out1", "out2", "out3"}, -{}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1287,9 +1272,6 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in1", mediapipe_packet_type_enum::OVTENSOR}, @@ -1300,7 +1282,7 @@ node { {"out3", mediapipe_packet_type_enum::OVTENSOR}}, {"in1", "in2", "in3"}, {"out1", "out2", "out3"}, -{}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1331,14 +1313,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1368,14 +1347,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"wrong_name"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; // cannot install observer due to wrong output name (should never happen due to validation) +{"in"}, {"wrong_name"}, {}, nullptr, this->reporter.get()}; // cannot install observer due to wrong output name (should never happen due to validation) EXPECT_CALL(this->stream, Read(_)).Times(0); EXPECT_CALL(this->stream, Write(_, _)).Times(0); @@ -1396,14 +1372,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {}); EXPECT_CALL(this->stream, Read(_)) @@ -1427,14 +1400,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1466,14 +1436,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); @@ -1492,14 +1459,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Invalid request - missing data in buffer prepareInvalidRequest(this->firstRequest, {"in"}); // no timestamp specified, server will assign one @@ -1530,14 +1494,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise[3]; std::future signalFuture[3] = { @@ -1580,14 +1541,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}, 0); EXPECT_CALL(this->stream, Read(_)) @@ -1611,14 +1569,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); setRequestTimestamp(this->firstRequest, std::string("not an int")); @@ -1649,14 +1604,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Timestamps not allowed in stream // Expect continuity of operation and response with error message @@ -1694,14 +1646,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Allowed in stream for (auto timestamp : std::vector<::mediapipe::Timestamp>{ @@ -1733,14 +1682,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequestWithParam(this->firstRequest, {{"in", 3.5f}}, {"val", 65}); // request with parameter val @@ -1773,14 +1719,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving the invalid request and disconnection // Request with invalid param py (special pythons session side packet) @@ -1805,14 +1748,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); // missing required request param EXPECT_CALL(this->stream, Read(_)).Times(0); @@ -1834,14 +1774,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 2 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}, std::nullopt, this->name, this->version); // no timestamp specified, server will assign one @@ -1871,14 +1808,11 @@ node { ::mediapipe::CalculatorGraphConfig config; ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); - auto sidePackets = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); - GraphIdGuard guard(queue); MediapipeGraphExecutor executor{ this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)}; +{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); diff --git a/src/test/test_utils.hpp b/src/test/test_utils.hpp index 65e72b543c..d9e256621b 100644 --- a/src/test/test_utils.hpp +++ b/src/test/test_utils.hpp @@ -846,13 +846,12 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition { ovms::PythonBackend* pythonBackend = nullptr) : ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) { this->inputConfig = inputConfig; - this->mgconfig.setGraphQueueSize(-1); // TODO FIXME @atobisze } // Do not read from path - use predefined config contents ovms::Status validateForConfigFileExistence() override { this->chosenConfig = this->inputConfig; - return ovms::StatusCode::OK; + return parseGraphQueueSizeDirective(); } }; #endif From 3fb09d9c9ff59ac41e9525e0c3dc9dce0d38afa7 Mon Sep 17 00:00:00 2001 From: Adrian Tobiszewski Date: Thu, 19 Feb 2026 16:37:49 +0100 Subject: [PATCH 4/8] Checkpoint --- common_settings.bzl | 2 - src/mediapipe_internal/graphqueue.cpp | 38 +++++-------------- src/mediapipe_internal/graphqueue.hpp | 4 -- .../mediapipegraphdefinition.cpp | 12 +----- .../mediapipegraphexecutor.hpp | 12 +----- .../outputstreamobserver.hpp | 9 ++--- 6 files changed, 15 insertions(+), 62 deletions(-) diff --git a/common_settings.bzl b/common_settings.bzl index 2a995d59c5..c5bc6ddcc6 100644 --- a/common_settings.bzl +++ b/common_settings.bzl @@ -209,8 +209,6 @@ COMMON_STATIC_TEST_COPTS = select({ "-Wall", "-Wno-unknown-pragmas", "-Werror", - # ov::Tensor::data method call results in deprecated warning and we use it in multiple places - "-Wno-deprecated-declarations", "-Isrc", "-fconcepts", # for gmock related utils "-fvisibility=hidden",# Needed for pybind targets diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp index a3e96febb8..ea9a2680f4 100644 --- a/src/mediapipe_internal/graphqueue.cpp +++ b/src/mediapipe_internal/graphqueue.cpp @@ -40,7 +40,6 @@ namespace ovms { GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr sidePacketMaps, int streamsLength) : Queue(streamsLength), sidePacketMaps(sidePacketMaps) { - SPDLOG_ERROR("ER Constr graph queue:{}", (void*)this); inferRequests.reserve(streamsLength); // TODO FIXME split constructor to init to handle retCodes? for (auto i = 0; i < streamsLength; ++i) { @@ -50,17 +49,17 @@ GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::sh auto absStatus = gh->graph->Initialize(config); if (!absStatus.ok()) { - SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); - throw 42; + SPDLOG_ERROR("Graph queue initialization failed: {}", absStatus.ToString()); + throw std::runtime_error(absStatus.ToString()); } for (auto& name : config.output_stream()) { std::string streamName = getStreamName(name); gh->outStreamObservers[streamName] = std::shared_ptr(new NullOutputStreamObserver()); // TODO use at() FIXME auto& perGraphObserverFunctor = gh->outStreamObservers[streamName]; - absStatus = gh->graph->ObserveOutputStream(streamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }); // TODO FIXME throw? + absStatus = gh->graph->ObserveOutputStream(streamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }); if (!absStatus.ok()) { - SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); - throw 42; + SPDLOG_ERROR("Graph queue ObserveOutputStream failed: {}", absStatus.ToString()); + throw std::runtime_error(absStatus.ToString()); } } std::map inputSidePackets; @@ -73,47 +72,30 @@ GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::sh inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); - SPDLOG_ERROR("ER"); absStatus = gh->graph->StartRun(inputSidePackets); - SPDLOG_ERROR("ER"); if (!absStatus.ok()) { - SPDLOG_ERROR("Input sidePackets size:{}", inputSidePackets.size()); - SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); - throw 42; + SPDLOG_ERROR("Graph queue StartRun failed: {}", absStatus.ToString()); + throw std::runtime_error(absStatus.ToString()); } - - SPDLOG_ERROR("ER"); inferRequests.emplace_back(std::move(gh)); - SPDLOG_ERROR("ER"); } } GraphQueue::~GraphQueue() { - SPDLOG_ERROR("ER Destroy graph queue:{}", (void*)this); for (auto& graphHelper : inferRequests) { auto absStatus = graphHelper->graph->WaitUntilIdle(); if (!absStatus.ok()) { - SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); - // throw 42.2; + SPDLOG_DEBUG("Graph queue WaitUntilIdle error: {}", absStatus.ToString()); } absStatus = graphHelper->graph->CloseAllPacketSources(); if (!absStatus.ok()) { - SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); - // throw "as"; + SPDLOG_DEBUG("Graph queue CloseAllPacketSources error: {}", absStatus.ToString()); } absStatus = graphHelper->graph->WaitUntilDone(); if (!absStatus.ok()) { - SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); - // throw 42.2; + SPDLOG_DEBUG("Graph queue WaitUntilDone error: {}", absStatus.ToString()); } graphHelper->graph->Cancel(); - if (!absStatus.ok()) { - SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this); - // throw 42.2; - } - SPDLOG_ERROR("ER"); graphHelper->graph.reset(); - SPDLOG_ERROR("ER"); } - SPDLOG_ERROR("ER Destroy graph queue:{}", (void*)this); } } // namespace ovms diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp index 7c4d89b33f..a570557211 100644 --- a/src/mediapipe_internal/graphqueue.hpp +++ b/src/mediapipe_internal/graphqueue.hpp @@ -73,17 +73,13 @@ struct GraphIdGuard { id(queue->getIdleStream().get()), gh((queue->getInferRequest(id))), graph(*gh->graph) { - SPDLOG_ERROR("ER Guard construct this:{}", (void*)this); } GraphIdGuard(GraphIdGuard&&) = default; GraphIdGuard(const GraphIdGuard&) = delete; ~GraphIdGuard() { auto existingQueue = weakQueue.lock(); - SPDLOG_ERROR("ER DEstroy Guard begin qu:{}", (void*)existingQueue.get()); if (existingQueue) existingQueue->returnStream(this->id); - SPDLOG_ERROR("ER Destroy Guard end qu:{}", (void*)existingQueue.get()); - SPDLOG_ERROR("ER Guard destroy this:{}", (void*)this); } }; } // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp index 6b2161ca58..8cb3443f48 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.cpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -194,14 +194,12 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) { if (!validationResult.ok()) { return validationResult; } - SPDLOG_ERROR("ER"); std::unique_lock lock(metadataMtx); auto status = createInputsInfo(); if (!status.ok()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create inputs info for mediapipe graph definition: {}", getName()); return status; } - SPDLOG_ERROR("ER"); status = createOutputsInfo(); if (!status.ok()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create outputs info for mediapipe graph definition: {}", getName()); @@ -261,13 +259,6 @@ MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name, reporter(std::make_unique(metricConfig, registry, name)) { mgconfig = config; passKfsRequestFlag = false; - SPDLOG_ERROR("XXX ER new PythonNodeResourcesMap:{}", (void*)&this->sidePacketMaps->pythonNodeResourcesMap); - SPDLOG_ERROR("XXX ER new genAiServableMap:{}", (void*)&this->sidePacketMaps->genAiServableMap); - /*if (!sharedThreadPool) { - SPDLOG_ERROR("Created shared Thread Pool XXX"); - //sharedThreadPool = std::make_shared(std::thread::hardware_concurrency()); // TODO FIXME should be in MP factory - }*/ - // TODO FIXME illegal constructor as we do not create queue here } Status MediapipeGraphDefinition::createInputsInfo() { @@ -343,6 +334,7 @@ Status MediapipeGraphDefinition::create(std::unique_ptr& *this->sidePacketMaps, this->pythonBackend, this->reporter.get()); } + SPDLOG_DEBUG("Created Mediapipe graph executor: {}", getName()); return status; } @@ -417,9 +409,7 @@ Status MediapipeGraphDefinition::reload(ModelManager& manager, const MediapipeGr } this->mgconfig = config; this->queue.reset(); - SPDLOG_ERROR("XXX ER cleared queue"); this->sidePacketMaps = std::make_shared(); - SPDLOG_ERROR("XXX ER cleared sidePacketMaps"); return validate(manager); } diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp index a73f246e66..54996fddca 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.hpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -87,12 +87,9 @@ struct MyFunctor : public OutputStreamObserverI { outputStreamName(outputStreamName), packetType(packetType), response(response) { - SPDLOG_ERROR("ER MyFunctor:{} observer constructed:{}", outputStreamName, (void*)this); } absl::Status handlePacket(const ::mediapipe::Packet& packet) override; - ~MyFunctor() { - SPDLOG_ERROR("ER Destroy Functor:{} this:{}", outputStreamName, (void*)this); - } + ~MyFunctor() = default; }; class MediapipeGraphExecutor { public: @@ -201,12 +198,6 @@ class MediapipeGraphExecutor { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); - - status = graph.WaitUntilIdle(); - if (!status.ok()) { - INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); - } - MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name); return StatusCode::OK; } @@ -472,7 +463,6 @@ class MediapipeGraphExecutor { template absl::Status MyFunctor::handlePacket(const ::mediapipe::Packet& packet) { - SPDLOG_ERROR("ER my functor:{}", (void*)this); auto status = onPacketReadySerializeImpl( this->requestId, this->exec.name, diff --git a/src/mediapipe_internal/outputstreamobserver.hpp b/src/mediapipe_internal/outputstreamobserver.hpp index e08b99fc43..1a314e73ae 100644 --- a/src/mediapipe_internal/outputstreamobserver.hpp +++ b/src/mediapipe_internal/outputstreamobserver.hpp @@ -51,13 +51,10 @@ class OutputStreamObserverI { }; class NullOutputStreamObserver : public OutputStreamObserverI { public: - NullOutputStreamObserver() { - SPDLOG_ERROR("NUll observer constructed:{}", (void*)this); - } + NullOutputStreamObserver() = default; absl::Status handlePacket(const ::mediapipe::Packet& packet) override { - SPDLOG_ERROR("Internal error occured:{}", (void*)this); - throw std::runtime_error("Should not happen"); - return absl::Status(absl::StatusCode::kInternal, "Should not happen"); + SPDLOG_ERROR("NullOutputStreamObserver::handlePacket called - graph observer was not replaced before execution"); + throw std::runtime_error("NullOutputStreamObserver should have been replaced before graph execution"); } }; } // namespace ovms From 0ae35e18c032c7b8480564e4e7aeb29178634c26 Mon Sep 17 00:00:00 2001 From: Adrian Tobiszewski Date: Fri, 20 Feb 2026 08:06:06 +0100 Subject: [PATCH 5/8] Streaming with queue --- .../http_graph_executor_impl.cpp | 4 + .../http_graph_executor_impl.hpp | 3 + src/kfs_frontend/kfs_graph_executor_impl.cpp | 10 + src/kfs_frontend/kfs_graph_executor_impl.hpp | 4 + .../mediapipegraphdefinition.cpp | 21 +- .../mediapipegraphdefinition.hpp | 2 +- .../mediapipegraphexecutor.cpp | 29 --- .../mediapipegraphexecutor.hpp | 188 ++++++++++++++-- src/test/ensemble_config_change_stress.cpp | 89 +++++++- ...ediapipe_openai_chat_completions_mock.json | 5 +- ...enai_chat_completions_mock_with_queue.json | 9 + src/test/mediapipe/graph_gpt_with_queue.pbtxt | 40 ++++ ...eue_dummyadapterfull_dummyinputnames.pbtxt | 46 ++++ ...yadapterfull_dummyinputnames_newpath.pbtxt | 46 ++++ ...yadapterfull_dummyinputnames_newpath.pbtxt | 45 ++++ src/test/mediapipeflow_test.cpp | 17 +- src/test/pythonnode_test.cpp | 9 +- src/test/streaming_test.cpp | 207 ++++++++++++++++++ src/test/stress_test_utils.hpp | 144 +++++++++++- 19 files changed, 848 insertions(+), 70 deletions(-) create mode 100644 src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json create mode 100644 src/test/mediapipe/graph_gpt_with_queue.pbtxt create mode 100644 src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt create mode 100644 src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt create mode 100644 src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt diff --git a/src/http_frontend/http_graph_executor_impl.cpp b/src/http_frontend/http_graph_executor_impl.cpp index b970f62594..4848f3760a 100644 --- a/src/http_frontend/http_graph_executor_impl.cpp +++ b/src/http_frontend/http_graph_executor_impl.cpp @@ -38,6 +38,10 @@ namespace ovms { static const std::string UNUSED_REQUEST_ID = ""; +bool requestHasInputSidePackets(const HttpPayload& request) { + return false; +} + Status deserializeInputSidePacketsFromFirstRequestImpl( std::map& inputSidePackets, // out const HttpPayload& request) { // in diff --git a/src/http_frontend/http_graph_executor_impl.hpp b/src/http_frontend/http_graph_executor_impl.hpp index 9846b10158..205d428a1b 100644 --- a/src/http_frontend/http_graph_executor_impl.hpp +++ b/src/http_frontend/http_graph_executor_impl.hpp @@ -48,6 +48,9 @@ class PythonBackend; using HttpReaderWriter = HttpAsyncWriter; +// Checks whether the request contains user-provided input side packets. +bool requestHasInputSidePackets(const HttpPayload& request); + // Deserialization of parameters inside KServe gRPC request // into mediapipe Packets. // To be used by both - infer & inferStream. diff --git a/src/kfs_frontend/kfs_graph_executor_impl.cpp b/src/kfs_frontend/kfs_graph_executor_impl.cpp index 2751a49e94..2a2d0ff3b8 100644 --- a/src/kfs_frontend/kfs_graph_executor_impl.cpp +++ b/src/kfs_frontend/kfs_graph_executor_impl.cpp @@ -1156,6 +1156,16 @@ Status createAndPushPacketsImpl( return StatusCode::OK; } +bool requestHasInputSidePackets(const KFSRequest& request) { + static const std::string TIMESTAMP_PARAM{"OVMS_MP_TIMESTAMP"}; + for (const auto& [name, valueChoice] : request.parameters()) { + if (name != TIMESTAMP_PARAM) { + return true; + } + } + return false; +} + Status deserializeInputSidePacketsFromFirstRequestImpl( std::map& inputSidePackets, const KFSRequest& request) { diff --git a/src/kfs_frontend/kfs_graph_executor_impl.hpp b/src/kfs_frontend/kfs_graph_executor_impl.hpp index cfa65b6a57..1c6e697455 100644 --- a/src/kfs_frontend/kfs_graph_executor_impl.hpp +++ b/src/kfs_frontend/kfs_graph_executor_impl.hpp @@ -36,6 +36,10 @@ namespace ovms { class PythonBackend; class Status; +// Checks whether the request contains user-provided input side packets +// (parameters other than the reserved OVMS_MP_TIMESTAMP). +bool requestHasInputSidePackets(const KFSRequest& request); + // Deserialization of parameters inside KServe gRPC request // into mediapipe Packets. // To be used by both - infer & inferStream. diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp index 8cb3443f48..38533093b7 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.cpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -225,7 +225,10 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) { if (!status.ok()) { return status; } - this->initializeQueueIfRequired(); + status = this->initializeQueueIfRequired(); + if (!status.ok()) { + return status; + } lock.unlock(); notifier.passed = true; @@ -236,15 +239,23 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) { return StatusCode::OK; } -void MediapipeGraphDefinition::initializeQueueIfRequired() { - // TODO FIXME @atobisze +Status MediapipeGraphDefinition::initializeQueueIfRequired() { int initialQueueSize = this->mgconfig.getInitialQueueSize(); if (initialQueueSize < 0) { SPDLOG_DEBUG("Graph queue creation disabled for mediapipe: {} (graph_queue_size={})", getName(), initialQueueSize); - return; + return StatusCode::OK; + } + try { + this->queue = std::make_shared(this->config, this->sidePacketMaps, initialQueueSize); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create graph queue for mediapipe: {} error: {}", getName(), e.what()); + return StatusCode::INTERNAL_ERROR; + } catch (...) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create graph queue for mediapipe: {} unknown error", getName()); + return StatusCode::INTERNAL_ERROR; } - this->queue = std::make_shared(this->config, this->sidePacketMaps, initialQueueSize); SPDLOG_DEBUG("Created graph queue with size {} for mediapipe: {}", initialQueueSize, getName()); + return StatusCode::OK; } MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name, diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp index 5f03ff2ba5..808d0eb531 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.hpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp @@ -128,7 +128,7 @@ class MediapipeGraphDefinition { Status setStreamTypes(); Status dryInitializeTest(); - void initializeQueueIfRequired(); + Status initializeQueueIfRequired(); std::string chosenConfig; static MediapipeGraphConfig MGC; const std::string name; diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp index 5c59d00235..601a164f61 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.cpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp @@ -37,35 +37,6 @@ namespace ovms { -MediapipeGraphExecutor::MediapipeGraphExecutor( - const std::string& name, - const std::string& version, - const ::mediapipe::CalculatorGraphConfig& config, - stream_types_mapping_t inputTypes, - stream_types_mapping_t outputTypes, - std::vector inputNames, - std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, - const GenAiServableMap& llmNodeResourcesMap, - const EmbeddingsServableMap& embeddingsServableMap, - const RerankServableMap& rerankServableMap, - const SttServableMap& sttServableMap, - const TtsServableMap& ttsServableMap, - PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter, - GraphIdGuard&& guard) : - name(name), - version(version), - config(config), - inputTypes(std::move(inputTypes)), - outputTypes(std::move(outputTypes)), - inputNames(std::move(inputNames)), - outputNames(std::move(outputNames)), - sidePacketMaps({pythonNodeResourcesMap, llmNodeResourcesMap, {}, embeddingsServableMap, rerankServableMap, sttServableMap, ttsServableMap}), - pythonBackend(pythonBackend), - currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)), - mediapipeServableMetricReporter(mediapipeServableMetricReporter), - guard(std::move(guard)) {} MediapipeGraphExecutor::MediapipeGraphExecutor( const std::string& name, const std::string& version, diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp index 54996fddca..391d1849fb 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.hpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -91,6 +91,33 @@ struct MyFunctor : public OutputStreamObserverI { absl::Status handlePacket(const ::mediapipe::Packet& packet) override; ~MyFunctor() = default; }; + +template +struct StreamingFunctor : public OutputStreamObserverI { + ReaderWriterType& serverReaderWriter; + std::mutex& sendMutex; + const std::string& executorName; + const std::string& executorVersion; + const std::string outputStreamName; + mediapipe_packet_type_enum packetType; + ExecutionContext executionContext; + MediapipeServableMetricReporter* metricReporter; + StreamingFunctor(const std::string& outputStreamName, mediapipe_packet_type_enum packetType, + const std::string& executorName, const std::string& executorVersion, + ReaderWriterType& serverReaderWriter, std::mutex& sendMutex, + ExecutionContext executionContext, MediapipeServableMetricReporter* metricReporter) : + serverReaderWriter(serverReaderWriter), + sendMutex(sendMutex), + executorName(executorName), + executorVersion(executorVersion), + outputStreamName(outputStreamName), + packetType(packetType), + executionContext(executionContext), + metricReporter(metricReporter) { + } + absl::Status handlePacket(const ::mediapipe::Packet& packet) override; + ~StreamingFunctor() = default; +}; class MediapipeGraphExecutor { public: const std::string name; @@ -112,21 +139,6 @@ class MediapipeGraphExecutor { std::optional guard; public: - - [[deprecated("Use constructor with side packets instead")]] - MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config, - stream_types_mapping_t inputTypes, - stream_types_mapping_t outputTypes, - std::vector inputNames, std::vector outputNames, - const PythonNodeResourcesMap& pythonNodeResourcesMap, - const GenAiServableMap& llmNodeResourcesMap, - const EmbeddingsServableMap& embeddingsServableMap, - const RerankServableMap& rerankServableMap, - const SttServableMap& sttServableMap, - const TtsServableMap& ttsServableMap, - PythonBackend* pythonBackend, - MediapipeServableMetricReporter* mediapipeServableMetricReporter, - GraphIdGuard&& guard); MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config, @@ -312,6 +324,123 @@ class MediapipeGraphExecutor { template Status inferStream(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) { OVMS_PROFILE_FUNCTION(); + if (this->guard.has_value()) { + return inferStreamWithQueue(req, serverReaderWriter, executionContext); + } else { + return inferStreamWithoutQueue(req, serverReaderWriter, executionContext); + } + } + + template + Status inferStreamWithQueue(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) { + SPDLOG_DEBUG("Start streaming mediapipe graph: {} execution (queue path)", this->name); + std::mutex sendMutex; + try { + // Graph queue does not support user-provided input side packets. + // Side packets are set at queue construction time. + if (requestHasInputSidePackets(req)) { + SPDLOG_DEBUG("Graph queue does not support user-provided input side packets. " + "Side packets are set at graph queue construction time. Graph: {}", this->name); + return Status(StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR, + "Input side packets are not supported for graphs with queue enabled"); + } + MetricGaugeGuard currentGraphs(this->mediapipeServableMetricReporter->currentGraphs.get()); + ::mediapipe::CalculatorGraph& graph = this->guard->graph; + + enum : unsigned int { + PROCESS, + TIMER_END2 + }; + Timer timer; + timer.start(PROCESS); + + // Swap output stream observers to streaming functors. + // Observers are already installed on the graph at queue construction time; + // we only replace the functor implementation to serialize+send to the client. + // Lifetime: sendMutex and serverReaderWriter are stack-local in this method + // and outlive all callbacks because we WaitUntilIdle() before returning. + for (const auto& outputName : this->outputNames) { + if (outputName.empty()) { + SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", outputName); + return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR; + } + guard->gh->outStreamObservers[outputName] = std::make_shared>( + outputName, this->outputTypes.at(outputName), + this->name, this->version, + serverReaderWriter, sendMutex, + executionContext, this->mediapipeServableMetricReporter); + } + + size_t numberOfPacketsCreated = 0; + { + OVMS_PROFILE_SCOPE("Mediapipe graph deserializing first request"); + bool isSuccess = true; + OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE( + createAndPushPacketsImpl( + std::shared_ptr(&req, + [](const RequestType*) {}), + this->inputTypes, + this->pythonBackend, + graph, + this->guard->gh->currentTimestamp, + numberOfPacketsCreated), + "partial deserialization of first request", isSuccess); + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, isSuccess)); + } + + // Read loop + auto newReq = std::make_shared(); + while (waitForNewRequest(serverReaderWriter, *newReq)) { + auto pstatus = validateSubsequentRequestImpl( + *newReq, + this->name, + this->version, + this->inputTypes); + bool isSuccess = true; + if (pstatus.ok()) { + OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE( + createAndPushPacketsImpl( + newReq, + this->inputTypes, + this->pythonBackend, + graph, + this->guard->gh->currentTimestamp, + numberOfPacketsCreated), + "partial deserialization of subsequent requests", isSuccess); + } else { + OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE(std::move(pstatus), "validate subsequent requests", isSuccess); + } + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, isSuccess)); + + if (graph.HasError()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + SPDLOG_DEBUG("Graph {}: encountered an error, stopping the execution", this->name); + break; + } + + newReq = std::make_shared(); + } + + // Do NOT CloseAllPacketSources or WaitUntilDone - graph stays alive for reuse + auto status = graph.WaitUntilIdle(); + if (!status.ok()) { + INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); + } + MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); + SPDLOG_DEBUG("Graph {}: Done streaming execution (queue path)", this->name); + + timer.stop(PROCESS); + double processTime = timer.template elapsed(PROCESS); + OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime); + return StatusCode::OK; + } catch (...) { + SPDLOG_DEBUG("Graph {}: Exception while processing MediaPipe graph (queue path)", this->name); + return Status(StatusCode::UNKNOWN_ERROR, "Exception while processing MediaPipe graph"); + } + } + + template + Status inferStreamWithoutQueue(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) { SPDLOG_DEBUG("Start MediapipeGraphExecutor::inferEx mediapipe graph: {} execution", this->name); std::mutex sendMutex; try { @@ -472,6 +601,33 @@ absl::Status MyFunctor::handlePacket(const ::mediapip packet, response); return status.ok() ? absl::OkStatus() : absl::Status(absl::StatusCode::kInternal, "Some error"); - ; +} + +template +absl::Status StreamingFunctor::handlePacket(const ::mediapipe::Packet& packet) { + OVMS_PROFILE_SCOPE("Mediapipe Packet Ready Callback"); + try { + std::lock_guard lock(sendMutex); + auto status = onPacketReadySerializeAndSendImpl( + "" /*no ids for streaming*/, + executorName, + executorVersion, + outputStreamName, + packetType, + packet, + serverReaderWriter); + if (!status.ok()) { + SPDLOG_DEBUG("error in send packet routine {}", status.string()); + return absl::Status(absl::StatusCode::kInternal, "error in send packet routine"); + } + auto now = std::chrono::system_clock::now(); + auto currentTimestamp = ::mediapipe::Timestamp(std::chrono::duration_cast(now.time_since_epoch()).count()); + OBSERVE_IF_ENABLED(metricReporter->getRequestLatencyMetric(executionContext), (currentTimestamp - packet.Timestamp()).Microseconds()); + INCREMENT_IF_ENABLED(metricReporter->getResponsesMetric(executionContext)); + return absl::OkStatus(); + } catch (...) { + SPDLOG_DEBUG("Error occurred during packet serialization in mediapipe graph: {}", executorName); + return absl::Status(absl::StatusCode::kCancelled, "error in serialization"); + } } } // namespace ovms diff --git a/src/test/ensemble_config_change_stress.cpp b/src/test/ensemble_config_change_stress.cpp index 7fa5a70d31..6ebaeb0e18 100644 --- a/src/test/ensemble_config_change_stress.cpp +++ b/src/test/ensemble_config_change_stress.cpp @@ -813,7 +813,8 @@ TEST_F(StressMediapipeChanges, ReloadMediapipeGraphDuringMetadataLoad) { SetUpConfig(basicMediapipeConfig); bool performWholeConfigReload = true; std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuity of operation - std::set allowedLoadResults = {}; + // Graph path change triggers real reload, briefly entering NOT_LOADED_YET state + std::set allowedLoadResults = {StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET}; performStressTest( &ConfigChangeStressTest::triggerKFSGetPipelineMetadataInALoop, &ConfigChangeStressTest::reloadMediapipeGraph, @@ -821,4 +822,90 @@ TEST_F(StressMediapipeChanges, ReloadMediapipeGraphDuringMetadataLoad) { requiredLoadResults, allowedLoadResults); } + +class StressMediapipeQueueChanges : public StressPipelineConfigChanges { + const std::string modelName = PIPELINE_1_DUMMY_NAME; + const std::string modelInputName = "b"; + const std::string modelOutputName = "a"; + +public: + std::string getServableName() override { + return modelName; + } + void SetUp() override { + SetUpCAPIServerInstance(createStressTestPipelineOneDummyConfig()); + } +}; +TEST_F(StressMediapipeQueueChanges, AddGraphDuringPredictLoad) { + // we add another graph definition during load (queue-enabled graph) + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuity of operation + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::addNewMediapipeQueueGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, RemoveGraphDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK, + StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE}; + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::removeMediapipeQueueGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, RemoveModelDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + // With queue path, pre-initialized graphs may keep working with cached sessions + // even after model removal, so MEDIAPIPE_PRECONDITION_FAILED may not occur + std::set requiredLoadResults = { + StatusCode::OK, + }; + std::set allowedLoadResults = { + StatusCode::MEDIAPIPE_EXECUTION_ERROR, + StatusCode::MEDIAPIPE_GRAPH_ADD_PACKET_INPUT_STREAM, + StatusCode::MEDIAPIPE_PRECONDITION_FAILED, + }; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::removeMediapipeQueueGraphUsedModel, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, ReloadModelDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::reloadMediapipeQueueGraphUsedModel, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeQueueChanges, ReloadMediapipeGraphDuringPredictLoad) { + SetUpConfig(basicMediapipeQueueConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; + std::set allowedLoadResults = {}; + performStressTest( + &ConfigChangeStressTest::triggerPredictInALoop, + &ConfigChangeStressTest::reloadMediapipeQueueGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +// Status and metadata tests are not duplicated for queue fixture because +// neither status nor metadata operations exercise the graph queue path. #endif diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json index 848729c2e6..d2803b795f 100644 --- a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json +++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json @@ -3,8 +3,7 @@ "mediapipe_config_list": [ { "name": "gpt", - "graph_path": "/ovms/src/test/mediapipe/graph_gpt.pbtxt", - "graph_queue_size": -1 + "graph_path": "/ovms/src/test/mediapipe/graph_gpt.pbtxt" } ] -} \ No newline at end of file +} diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json new file mode 100644 index 0000000000..ea25079556 --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json @@ -0,0 +1,9 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name": "gpt", + "graph_path": "/ovms/src/test/mediapipe/graph_gpt_with_queue.pbtxt" + } + ] +} diff --git a/src/test/mediapipe/graph_gpt_with_queue.pbtxt b/src/test/mediapipe/graph_gpt_with_queue.pbtxt new file mode 100644 index 0000000000..43c2ef68c1 --- /dev/null +++ b/src/test/mediapipe/graph_gpt_with_queue.pbtxt @@ -0,0 +1,40 @@ +# +# Copyright 2026 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_SIZE: 1 +input_stream: "HTTP_REQUEST_PAYLOAD:input" +output_stream: "HTTP_RESPONSE_PAYLOAD:output" + +node: { + calculator: "OpenAIChatCompletionsMockCalculator" + input_stream: "LOOPBACK:loopback" + input_stream: "HTTP_REQUEST_PAYLOAD:input" + output_stream: "LOOPBACK:loopback" + output_stream: "HTTP_RESPONSE_PAYLOAD:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } + input_stream_handler { + input_stream_handler: "SyncSetInputStreamHandler", + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { + tag_index: "LOOPBACK:0" + } + } + } + } +} diff --git a/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt new file mode 100644 index 0000000000..2a5016a7fb --- /dev/null +++ b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt @@ -0,0 +1,46 @@ +# +# Copyright 2026 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_SIZE: 16 +input_stream: "custom_dummy_input" +output_stream: "custom_dummy_output" +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:custom_dummy_input" + output_stream: "A:custom_dummy_output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} diff --git a/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt new file mode 100644 index 0000000000..2a5016a7fb --- /dev/null +++ b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt @@ -0,0 +1,46 @@ +# +# Copyright 2026 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_SIZE: 16 +input_stream: "custom_dummy_input" +output_stream: "custom_dummy_output" +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:custom_dummy_input" + output_stream: "A:custom_dummy_output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} diff --git a/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt new file mode 100644 index 0000000000..01521b1c08 --- /dev/null +++ b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt @@ -0,0 +1,45 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "custom_dummy_input" +output_stream: "custom_dummy_output" +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:custom_dummy_input" + output_stream: "A:custom_dummy_output" + node_options: { + [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp index ca1d1d2d91..f96cf584b2 100644 --- a/src/test/mediapipeflow_test.cpp +++ b/src/test/mediapipeflow_test.cpp @@ -2707,14 +2707,16 @@ class MediapipeSerialization : public ::testing::Test { stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const std::shared_ptr& pythonNodeResourcesMap, - const std::shared_ptr& gasm, + const GraphSidePackets& sidePackets, MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) : - MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, *pythonNodeResourcesMap, *gasm, {}, {}, {}, {}, nullptr, mediapipeServableMetricReporter, std::move(guard)) {} + MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, + sidePackets, + nullptr, mediapipeServableMetricReporter, std::move(guard)) {} }; protected: std::unique_ptr reporter; + std::shared_ptr sidePackets; std::shared_ptr queue; std::unique_ptr executor; ::inference::ModelInferResponse mp_response; @@ -2729,13 +2731,10 @@ class MediapipeSerialization : public ::testing::Test { const std::vector outputNames; const ::mediapipe::CalculatorGraphConfig config; this->reporter = std::make_unique(nullptr, nullptr, ""); // disabled reporter - auto sidePackets = std::make_shared(); - std::shared_ptr pnsm = std::make_shared(); - std::shared_ptr gasm = std::make_shared(); - std::shared_ptr queue = std::make_shared(config, sidePackets, 1); + sidePackets = std::make_shared(); + queue = std::make_shared(config, sidePackets, 1); GraphIdGuard guard(queue); - executor = std::make_unique("", "", config, mapping, mapping, inputNames, outputNames, pnsm, gasm, this->reporter.get(), std::move(guard)); - SPDLOG_ERROR("Exit SetUp"); + executor = std::make_unique("", "", config, mapping, mapping, inputNames, outputNames, *sidePackets, this->reporter.get(), std::move(guard)); } }; diff --git a/src/test/pythonnode_test.cpp b/src/test/pythonnode_test.cpp index 7e2595a58f..6f9dc6bfa8 100644 --- a/src/test/pythonnode_test.cpp +++ b/src/test/pythonnode_test.cpp @@ -1002,10 +1002,12 @@ class MockedMediapipeGraphExecutorPy : public ovms::MediapipeGraphExecutor { stream_types_mapping_t inputTypes, stream_types_mapping_t outputTypes, std::vector inputNames, std::vector outputNames, - const std::shared_ptr& pythonNodeResourcesMap, + const GraphSidePackets& sidePackets, PythonBackend* pythonBackend, MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) : - MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, *pythonNodeResourcesMap, {}, {}, {}, {}, {}, pythonBackend, mediapipeServableMetricReporter, std::move(guard)) {} + MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, + sidePackets, + pythonBackend, mediapipeServableMetricReporter, std::move(guard)) {} }; TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) { @@ -1015,10 +1017,9 @@ TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) { const std::vector outputNames; const ::mediapipe::CalculatorGraphConfig config; auto sidePackets = std::make_shared(); - std::shared_ptr pnsm = std::make_shared(); std::shared_ptr queue = std::make_shared(config, sidePackets, 1); GraphIdGuard guard(queue); - auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, pnsm, getPythonBackend(), this->reporter.get(), std::move(guard)); + auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, *sidePackets, getPythonBackend(), this->reporter.get(), std::move(guard)); std::string datatype = "FP32"; std::string name = "python_result"; diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp index c50969717e..997d2048ef 100644 --- a/src/test/streaming_test.cpp +++ b/src/test/streaming_test.cpp @@ -70,6 +70,35 @@ class StreamingTest : public Test { } }; +class StreamingQueueTest : public StreamingTest { +protected: + std::shared_ptr queue; + + MediapipeGraphExecutor createQueueExecutor( + const ::mediapipe::CalculatorGraphConfig& config, + stream_types_mapping_t inputTypes, + stream_types_mapping_t outputTypes, + std::vector inputNames, + std::vector outputNames, + int queueSize = 1) { + auto sidePackets = std::make_shared(); + queue = std::make_shared(config, sidePackets, queueSize); + GraphIdGuard graphIdGuard(queue); + return MediapipeGraphExecutor{ + this->name, + this->version, + config, + std::move(inputTypes), + std::move(outputTypes), + std::move(inputNames), + std::move(outputNames), + *sidePackets, + nullptr, + this->reporter.get(), + std::move(graphIdGuard)}; + } +}; + #if (PYTHON_DISABLE == 0) class PythonStreamingTest : public StreamingTest { protected: @@ -621,6 +650,184 @@ node { ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); } +TEST_F(StreamingQueueTest, SingleStreamSend3Receive3AutomaticTimestamp) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 2); + + prepareRequest(this->firstRequest, {{"in", 3.5f}}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(Receive({{"in", 7.2f}})) + .WillOnce(Receive({{"in", 102.4f}})) + .WillOnce(Disconnect()); + + auto timestamp = std::make_shared(-1); + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(SendWithAutomaticTimestamp({{"out", 4.5f}}, timestamp)) + .WillOnce(SendWithAutomaticTimestamp({{"out", 8.2f}}, timestamp)) + .WillOnce(SendWithAutomaticTimestamp({{"out", 103.4f}}, timestamp)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + +TEST_F(StreamingQueueTest, SingleStreamSend1Receive3) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOne3CycleIterationsTestCalculator" + input_stream: "in" + input_stream: "signal" + input_stream_info: { + tag_index: ':1', + back_edge: true + } + input_stream_handler { + input_stream_handler: 'ImmediateInputStreamHandler' + } + output_stream: "out" + output_stream: "signal" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 2); + + prepareRequest(this->firstRequest, {{"in", 3.5f}}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(Disconnect()); + + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(SendWithTimestamp({{"out", 4.5f}}, 1)) + .WillOnce(SendWithTimestamp({{"out", 5.5f}}, 2)) + .WillOnce(SendWithTimestamp({{"out", 6.5f}}, 3)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + +TEST_F(StreamingQueueTest, ExitOnDisconnectionDuringRead) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 2); + + prepareRequest(this->firstRequest, {}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(Disconnect()); + + EXPECT_CALL(this->stream, Write(_, _)).Times(0); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + +TEST_F(StreamingQueueTest, ErrorOnDisconnectionDuringWrite) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 2); + + std::promise signalPromise; + std::future signalFuture = signalPromise.get_future(); + + prepareRequest(this->firstRequest, {{"in", 3.5f}}); + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(DisconnectWhenNotified(signalFuture)); + + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(DisconnectOnWriteAndNotifyEnd(signalPromise)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_EXECUTION_ERROR); +} + +TEST_F(StreamingQueueTest, ErrorDuringFirstRequestDeserialization) { + const std::string pbTxt{R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "AddOneSingleStreamTestCalculator" + input_stream: "in" + output_stream: "out" +} + )"}; + ::mediapipe::CalculatorGraphConfig config; + ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config)); + + auto executor = createQueueExecutor( + config, + {{"in", mediapipe_packet_type_enum::OVTENSOR}}, + {{"out", mediapipe_packet_type_enum::OVTENSOR}}, + {"in"}, + {"out"}, + 2); + + prepareInvalidRequest(this->firstRequest, {"in"}); + + std::promise signalPromise; + std::future signalFuture = signalPromise.get_future(); + + EXPECT_CALL(this->stream, Read(_)) + .WillOnce(DisconnectWhenNotified(signalFuture)); + EXPECT_CALL(this->stream, Write(_, _)) + .WillOnce(SendErrorAndNotifyEnd( + Status(StatusCode::INVALID_CONTENT_SIZE).string() + std::string{" - Expected: 4 bytes; Actual: 0 bytes; input name: in; partial deserialization of first request"}, + signalPromise)); + + ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK); +} + // PYTHON CALCULATOR CASES #if (PYTHON_DISABLE == 0) diff --git a/src/test/stress_test_utils.hpp b/src/test/stress_test_utils.hpp index ccbdd60758..836f9f8f36 100644 --- a/src/test/stress_test_utils.hpp +++ b/src/test/stress_test_utils.hpp @@ -50,6 +50,7 @@ #include "../server.hpp" #include "../status.hpp" #include "../stringutils.hpp" +#include "src/timer.hpp" #include "../tfs_frontend/tfs_utils.hpp" #include "c_api_test_utils.hpp" #include "test_utils.hpp" @@ -1067,7 +1068,99 @@ static const std::string basicMediapipeConfigWithNewGraphPath = R"({ "mediapipe_config_list": [ { "name":"pipeline1Dummy", - "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt" + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt" + } + ] +})"; + +const std::string basicMediapipeQueueConfig = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithAddedGraph = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + }, + { + "name":"pipeline2Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithRemovedGraph = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + ] +})"; + +static const std::string basicMediapipeQueueConfigWithRemovedModel = R"({ + "model_config_list": [ + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithReloadedModel = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "nireq": 47 + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +static const std::string basicMediapipeQueueConfigWithNewGraphPath = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt" } ] })"; @@ -1094,9 +1187,16 @@ static void mediacreate(std::unique_ptr& executorPtr, ov sc = static_cast(code); \ } +enum StressTimerSlot : unsigned int { + STRESS_LOOP, + CREATE, + EXECUTE, + TIMER_END +}; + class ConfigChangeStressTest : public TestWithTempDir { protected: - const uint32_t loadThreadCount = 20; + const uint32_t loadThreadCount = 16; const uint32_t beforeConfigChangeLoadTimeMs = 30; const uint32_t afterConfigChangeLoadTimeMs = 50; const int stressIterationsLimit = 10000; @@ -1291,6 +1391,12 @@ class ConfigChangeStressTest : public TestWithTempDir { createConfigFileWithContent(ovmsConfig, configFilePath); SPDLOG_INFO("{} end", __FUNCTION__); } + void addNewMediapipeQueueGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithAddedGraph); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } void removeMediapipeGraph() { SPDLOG_INFO("{} start", __FUNCTION__); SetUpConfig(basicMediapipeConfigWithRemovedGraph); @@ -1315,6 +1421,30 @@ class ConfigChangeStressTest : public TestWithTempDir { createConfigFileWithContent(ovmsConfig, configFilePath); SPDLOG_INFO("{} end", __FUNCTION__); } + void removeMediapipeQueueGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithRemovedGraph); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void removeMediapipeQueueGraphUsedModel() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithRemovedModel); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void reloadMediapipeQueueGraphUsedModel() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithReloadedModel); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void reloadMediapipeQueueGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeQueueConfigWithNewGraphPath); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } void checkMetricGreaterThan(const std::string& metricName, double value, std::string& metricOutput, bool& result) { ASSERT_THAT(metricOutput, ::testing::HasSubstr(metricName + std::string{"{name=\"dummy\",version=\"1\"} "})) << "cannot find dummys " << metricName << " metric\n" << metricOutput; @@ -1706,6 +1836,8 @@ class ConfigChangeStressTest : public TestWithTempDir { auto stressIterationsCounter = stressIterationsLimit; bool breakLoop = false; while (stressIterationsCounter-- > 0) { + ovms::Timer timer; + timer.start(STRESS_LOOP); auto futureWaitResult = stopSignal.wait_for(std::chrono::milliseconds(0)); if (true == breakLoop) { SPDLOG_INFO("Ending Load"); @@ -1725,6 +1857,7 @@ class ConfigChangeStressTest : public TestWithTempDir { RequestType request2; RequestType request = preparePipelinePredictRequest(request2); ovms::Status createPipelineStatus = StatusCode::UNKNOWN_ERROR; + timer.start(CREATE); if (typeid(ServableType) == typeid(ovms::Pipeline)) { createPipelineStatus = this->manager->createPipeline(pipelinePtr, pipelineName, &request, &response); #if (MEDIAPIPE_DISABLE == 0) @@ -1732,6 +1865,8 @@ class ConfigChangeStressTest : public TestWithTempDir { mediacreate(executorPtr, *(this->manager), request, response, createPipelineStatus); #endif } + timer.stop(CREATE); + SPDLOG_TRACE("XYZ creation time: {} us", timer.elapsed(CREATE)); // we need to make sure that expected status happened and still accept // some that could happen but we may not hit them EXPECT_TRUE((requiredLoadResults.find(createPipelineStatus.getCode()) != requiredLoadResults.end()) || @@ -1743,6 +1878,7 @@ class ConfigChangeStressTest : public TestWithTempDir { } ovms::Status executePipelineStatus = StatusCode::UNKNOWN_ERROR; + timer.start(EXECUTE); if (typeid(ServableType) == typeid(ovms::Pipeline)) { executePipelineStatus = pipelinePtr->execute(ovms::ExecutionContext( ovms::ExecutionContext::Interface::GRPC, @@ -1752,6 +1888,8 @@ class ConfigChangeStressTest : public TestWithTempDir { mediaexec(executorPtr, *(this->manager), request, response, executePipelineStatus); #endif } + timer.stop(EXECUTE); + SPDLOG_TRACE("XYZ execution time: {} us", timer.elapsed(EXECUTE)); createPipelineRetCodesCounters[executePipelineStatus.getCode()]++; EXPECT_TRUE((requiredLoadResults.find(executePipelineStatus.getCode()) != requiredLoadResults.end()) || (allowedLoadResults.find(executePipelineStatus.getCode()) != allowedLoadResults.end())) @@ -1763,6 +1901,8 @@ class ConfigChangeStressTest : public TestWithTempDir { SPDLOG_INFO("Earlier fail detected. Stopping execution"); break; } + timer.stop(STRESS_LOOP); + SPDLOG_TRACE("XYZ loop iteration time: {} us", timer.elapsed(STRESS_LOOP)); } for (auto& [retCode, counter] : createPipelineRetCodesCounters) { if (counter > 0) { From e61a1744a72263b54cee570875ad46131b4766e7 Mon Sep 17 00:00:00 2001 From: Adrian Tobiszewski Date: Mon, 23 Feb 2026 08:40:44 +0100 Subject: [PATCH 6/8] All gtest tests pass --- ci/build_test_OnCommit.groovy | 8 +- src/kfs_frontend/kfs_graph_executor_impl.cpp | 1 - src/llm/BUILD | 1 + src/llm/http_llm_calculator.cc | 25 ++- .../graph_executor_constants.hpp | 1 + src/mediapipe_internal/graph_side_packets.hpp | 11 + src/mediapipe_internal/graphqueue.cpp | 5 +- src/mediapipe_internal/graphqueue.hpp | 4 +- .../mediapipegraphdefinition.cpp | 9 +- .../mediapipegraphexecutor.cpp | 29 +++ .../mediapipegraphexecutor.hpp | 36 +++- src/test/http_openai_handler_test.cpp | 4 +- src/test/llm/config_queue.json | 9 + src/test/llm/llmnode_test.cpp | 196 ++++++++++++++++++ src/test/llm/lm_cb_regular_queue.pbtxt | 47 +++++ src/test/streaming_test.cpp | 54 ++--- 16 files changed, 399 insertions(+), 41 deletions(-) create mode 100644 src/test/llm/config_queue.json create mode 100644 src/test/llm/lm_cb_regular_queue.pbtxt diff --git a/ci/build_test_OnCommit.groovy b/ci/build_test_OnCommit.groovy index a3e7862c29..21029ec61c 100644 --- a/ci/build_test_OnCommit.groovy +++ b/ci/build_test_OnCommit.groovy @@ -149,7 +149,7 @@ pipeline { timeout(time: 120, unit: 'MINUTES') } parallel { - /*stage("Run unit tests") { + stage("Run unit tests") { agent { label "${agent_name_linux}" } @@ -165,7 +165,7 @@ pipeline { } } } - }*/ + } stage("Internal tests") { agent { label "${agent_name_linux}" @@ -186,7 +186,7 @@ pipeline { } } } - /*stage('Test windows') { + stage('Test windows') { agent { label "${agent_name_windows}" } @@ -210,7 +210,7 @@ pipeline { } } } - }*/ + } } } } diff --git a/src/kfs_frontend/kfs_graph_executor_impl.cpp b/src/kfs_frontend/kfs_graph_executor_impl.cpp index 2a2d0ff3b8..86778ca899 100644 --- a/src/kfs_frontend/kfs_graph_executor_impl.cpp +++ b/src/kfs_frontend/kfs_graph_executor_impl.cpp @@ -26,7 +26,6 @@ #include "../logging.hpp" #include "../mediapipe_internal/mediapipe_utils.hpp" #include "../mediapipe_internal/mediapipegraphdefinition.hpp" -// TODO FIXME #include "../mediapipe_internal/graph_executor_constants.hpp" #include "../predict_request_validation_utils.hpp" #include "../status.hpp" #include "../tfs_frontend/tfs_utils.hpp" diff --git a/src/llm/BUILD b/src/llm/BUILD index ae37d936ca..5f64ad197f 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -24,6 +24,7 @@ ovms_cc_library( "//third_party:openvino", "@mediapipe//mediapipe/framework:calculator_framework", "@com_github_tencent_rapidjson//:rapidjson", + "//src:mediapipe_internal_graph_side_packets", "//src/kfserving_api:kfserving_api_cpp", "//src:libovmsprofiler", ":genai_servables", diff --git a/src/llm/http_llm_calculator.cc b/src/llm/http_llm_calculator.cc index ae6461c61a..2415ae08da 100644 --- a/src/llm/http_llm_calculator.cc +++ b/src/llm/http_llm_calculator.cc @@ -14,6 +14,7 @@ // limitations under the License. //***************************************************************************** #include +#include #include #pragma warning(push) @@ -27,6 +28,7 @@ #include "../http_payload.hpp" #include "../logging.hpp" +#include "../mediapipe_internal/graph_side_packets.hpp" #include "../profiler.hpp" #include "apis/openai_completions.hpp" #include "servable.hpp" @@ -36,9 +38,11 @@ using namespace ovms; namespace mediapipe { const std::string LLM_SESSION_SIDE_PACKET_TAG = "LLM_NODE_RESOURCES"; +const std::string LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG = "LLM_NODE_EXECUTION_CONTEXTS"; class HttpLLMCalculator : public CalculatorBase { std::shared_ptr servable; + std::shared_ptr executionContextHolder; std::shared_ptr executionContext; static const std::string INPUT_TAG_NAME; @@ -54,6 +58,9 @@ class HttpLLMCalculator : public CalculatorBase { cc->Inputs().Tag(INPUT_TAG_NAME).Set(); cc->Inputs().Tag(LOOPBACK_TAG_NAME).Set(); cc->InputSidePackets().Tag(LLM_SESSION_SIDE_PACKET_TAG).Set(); + if (cc->InputSidePackets().HasTag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG)) { + cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).Set(); + } cc->Outputs().Tag(OUTPUT_TAG_NAME).Set(); cc->Outputs().Tag(LOOPBACK_TAG_NAME).Set(); return absl::OkStatus(); @@ -72,7 +79,17 @@ class HttpLLMCalculator : public CalculatorBase { auto it = servableMap.find(cc->NodeName()); RET_CHECK(it != servableMap.end()) << "Could not find initialized LLM node named: " << cc->NodeName(); this->servable = it->second; - this->executionContext = servable->createExecutionContext(); + + if (cc->InputSidePackets().HasTag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG) && !cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).IsEmpty()) { + ovms::GenAiExecutionContextMap executionContextMap = cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).Get(); + auto contextIt = executionContextMap.find(cc->NodeName()); + RET_CHECK(contextIt != executionContextMap.end()) << "Could not find LLM execution context holder for node named: " << cc->NodeName(); + this->executionContextHolder = contextIt->second; + } + + if (!this->executionContextHolder) { + this->executionContext = servable->createExecutionContext(); + } SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "LLMCalculator [Node: {}] Open end", cc->NodeName()); return absl::OkStatus(); } @@ -81,6 +98,12 @@ class HttpLLMCalculator : public CalculatorBase { OVMS_PROFILE_FUNCTION(); RET_CHECK(this->servable != nullptr); + if (this->executionContextHolder) { + std::lock_guard lock(this->executionContextHolder->mutex); + this->executionContext = this->executionContextHolder->executionContext; + } + RET_CHECK(this->executionContext != nullptr) << "LLM execution context not initialized for node: " << cc->NodeName(); + // For cases where MediaPipe decides to trigger Process() when there are no inputs if (cc->Inputs().Tag(INPUT_TAG_NAME).IsEmpty() && cc->Inputs().Tag(LOOPBACK_TAG_NAME).IsEmpty()) { return absl::OkStatus(); diff --git a/src/mediapipe_internal/graph_executor_constants.hpp b/src/mediapipe_internal/graph_executor_constants.hpp index ff565769ce..55e3af7f59 100644 --- a/src/mediapipe_internal/graph_executor_constants.hpp +++ b/src/mediapipe_internal/graph_executor_constants.hpp @@ -22,6 +22,7 @@ namespace ovms { inline const std::string PYTHON_SESSION_SIDE_PACKET_TAG = "py"; inline const std::string LLM_SESSION_SIDE_PACKET_TAG = "llm"; +inline const std::string LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG = "llm_ctx"; inline const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes"; inline const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable"; inline const std::string RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable"; diff --git a/src/mediapipe_internal/graph_side_packets.hpp b/src/mediapipe_internal/graph_side_packets.hpp index 66b0134726..8b67bd3bc0 100644 --- a/src/mediapipe_internal/graph_side_packets.hpp +++ b/src/mediapipe_internal/graph_side_packets.hpp @@ -15,6 +15,7 @@ //***************************************************************************** #pragma once +#include #include #include #include @@ -24,6 +25,7 @@ namespace ovms { // Forward declarations - only shared_ptrs are stored so full definitions are not needed class PythonNodeResources; class GenAiServable; +struct GenAiServableExecutionContext; struct ImageGenerationPipelines; struct EmbeddingsServable; struct RerankServable; @@ -38,9 +40,16 @@ using TtsServableMap = std::unordered_map>; using ImageGenerationPipelinesMap = std::unordered_map>; +struct GenAiExecutionContextHolder { + std::mutex mutex; + std::shared_ptr executionContext; +}; +using GenAiExecutionContextMap = std::unordered_map>; + struct GraphSidePackets { PythonNodeResourcesMap pythonNodeResourcesMap; GenAiServableMap genAiServableMap; + GenAiExecutionContextMap genAiExecutionContextMap; ImageGenerationPipelinesMap imageGenPipelinesMap; EmbeddingsServableMap embeddingsServableMap; RerankServableMap rerankServableMap; @@ -49,6 +58,7 @@ struct GraphSidePackets { void clear() { pythonNodeResourcesMap.clear(); genAiServableMap.clear(); + genAiExecutionContextMap.clear(); imageGenPipelinesMap.clear(); embeddingsServableMap.clear(); rerankServableMap.clear(); @@ -58,6 +68,7 @@ struct GraphSidePackets { bool empty() { return (pythonNodeResourcesMap.empty() && genAiServableMap.empty() && + genAiExecutionContextMap.empty() && imageGenPipelinesMap.empty() && embeddingsServableMap.empty() && rerankServableMap.empty() && diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp index ea9a2680f4..ccce0f65c3 100644 --- a/src/mediapipe_internal/graphqueue.cpp +++ b/src/mediapipe_internal/graphqueue.cpp @@ -34,7 +34,6 @@ #include "mediapipe/framework/port/status.h" #include "graph_executor_constants.hpp" -//#include "mediapipegraphexecutor.hpp" // for side packet tag names #include "outputstreamobserver.hpp" namespace ovms { GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr sidePacketMaps, int streamsLength) : @@ -62,11 +61,15 @@ GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::sh throw std::runtime_error(absStatus.ToString()); } } + for (const auto& [nodeName, _] : sidePacketMaps->genAiServableMap) { + gh->genAiExecutionContextMap[nodeName] = std::make_shared(); + } std::map inputSidePackets; #if (PYTHON_DISABLE == 0) inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); #endif inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(gh->genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(sidePacketMaps->rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp index a570557211..ba09edd85e 100644 --- a/src/mediapipe_internal/graphqueue.hpp +++ b/src/mediapipe_internal/graphqueue.hpp @@ -40,6 +40,7 @@ class NullOutputStreamObserver; struct GraphHelper { std::shared_ptr<::mediapipe::CalculatorGraph> graph; // TODO FIXME this does not have to be shared_ptr std::unordered_map> outStreamObservers; + GenAiExecutionContextMap genAiExecutionContextMap; ::mediapipe::Timestamp currentTimestamp; // TODO FIXME const // TODO FIXME move constr/= GraphHelper() = default; @@ -48,13 +49,14 @@ struct GraphHelper { GraphHelper(GraphHelper&& gh) : graph(std::move(gh.graph)), outStreamObservers(std::move(gh.outStreamObservers)), + genAiExecutionContextMap(std::move(gh.genAiExecutionContextMap)), currentTimestamp(gh.currentTimestamp) {} GraphHelper& operator=(GraphHelper&& gh) = default; }; // we need to keep Graph alive during MP reload hence shared_ptr //class GraphQueue : public Queue> { class GraphQueue : public Queue> { - public: // XXX TODO make private? we need to acces in mediapipegraphdefinition to set side packets though +public: // XXX TODO make private? we need to acces in mediapipegraphdefinition to set side packets though std::shared_ptr sidePacketMaps; public: diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp index 38533093b7..a063ac5dd5 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.cpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -62,7 +62,6 @@ #include "src/audio/speech_to_text/s2t_servable.hpp" #include "src/audio/text_to_speech/t2s_servable.hpp" - namespace ovms { MediapipeGraphConfig MediapipeGraphDefinition::MGC; @@ -494,7 +493,7 @@ class ResourcesCleaningGuard { resources(resources) {} ~ResourcesCleaningGuard() { if (shouldCleanup) { - resources.clear(); // TODO FIXME @atobisze check + resources.clear(); // TODO FIXME @atobisze check } } void disableCleaning() { @@ -537,6 +536,7 @@ Status MediapipeGraphDefinition::initializeNodes() { // Passed to both calculators that require LLM Engine (gRPC KServe & HTTP OpenAI) if (endsWith(config.node(i).calculator(), LLM_NODE_CALCULATOR_NAME)) { auto& genAiServableMap = this->sidePacketMaps->genAiServableMap; + auto& genAiExecutionContextMap = this->sidePacketMaps->genAiExecutionContextMap; ResourcesCleaningGuard genAiServablesCleaningGuard(genAiServableMap); if (!config.node(i).node_options().size()) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node missing options in graph: {}. ", this->name); @@ -551,6 +551,10 @@ Status MediapipeGraphDefinition::initializeNodes() { SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node name: {} already used in graph: {}. ", nodeName, this->name); return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; } + if (genAiExecutionContextMap.find(nodeName) != genAiExecutionContextMap.end()) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM execution context holder for node name: {} already exists in graph: {}. ", nodeName, this->name); + return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; + } std::shared_ptr servable; Status status = initializeGenAiServable(servable, config.node(i), mgconfig.getBasePath()); if (!status.ok()) { @@ -558,6 +562,7 @@ Status MediapipeGraphDefinition::initializeNodes() { return status; } genAiServableMap.insert(std::pair>(nodeName, std::move(servable))); + genAiExecutionContextMap.insert(std::pair>(nodeName, std::make_shared())); genAiServablesCleaningGuard.disableCleaning(); } // Passed to both calculators that require Image Generation pipelines diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp index 601a164f61..b821d1fef1 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.cpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp @@ -34,6 +34,7 @@ #endif #include "src/image_gen/pipelines.hpp" +#include "src/llm/servable.hpp" namespace ovms { @@ -84,4 +85,32 @@ MediapipeGraphExecutor::MediapipeGraphExecutor( currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)), mediapipeServableMetricReporter(mediapipeServableMetricReporter) {} +Status MediapipeGraphExecutor::initializeLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap) { + for (const auto& [nodeName, servable] : this->sidePacketMaps.genAiServableMap) { + auto it = executionContextMap.find(nodeName); + if (it == executionContextMap.end() || !it->second) { + SPDLOG_DEBUG("Missing LLM execution context holder for node: {}", nodeName); + return StatusCode::INTERNAL_ERROR; + } + auto& holder = it->second; + std::lock_guard lock(holder->mutex); + holder->executionContext = servable->createExecutionContext(); + if (!holder->executionContext) { + SPDLOG_DEBUG("Failed to create LLM execution context for node: {}", nodeName); + return StatusCode::INTERNAL_ERROR; + } + } + return StatusCode::OK; +} + +void MediapipeGraphExecutor::resetLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap) { + for (auto& [_, holder] : executionContextMap) { + if (!holder) { + continue; + } + std::lock_guard lock(holder->mutex); + holder->executionContext.reset(); + } +} + } // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp index 391d1849fb..1e36d27e42 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.hpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -159,6 +159,10 @@ class MediapipeGraphExecutor { PythonBackend* pythonBackend, MediapipeServableMetricReporter* mediapipeServableMetricReporter); + Status initializeLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap); + + void resetLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap); + template Status infer(const RequestType* request, ResponseType* response, ExecutionContext executionContext) { OVMS_PROFILE_FUNCTION(); @@ -175,6 +179,10 @@ class MediapipeGraphExecutor { template Status inferWithQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) { ::mediapipe::CalculatorGraph& graph = this->guard->graph; + auto llmContextStatus = initializeLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } for (auto& name : this->outputNames) { if (name.empty()) { SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", name); @@ -209,7 +217,10 @@ class MediapipeGraphExecutor { if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } + resetLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap); MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); + // Increment timestamp for next request reusing this graph from the queue + this->guard->gh->currentTimestamp = ::mediapipe::Timestamp(this->guard->gh->currentTimestamp.Value() + 1); SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name); return StatusCode::OK; } @@ -218,7 +229,12 @@ class MediapipeGraphExecutor { Status inferWithoutQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) { ::mediapipe::CalculatorGraph graph; MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); - enum : unsigned int { PROCESS, TIMER_END2 }; + auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } + enum : unsigned int { PROCESS, + TIMER_END2 }; Timer timer; timer.start(PROCESS); std::unordered_map outputPollers; @@ -241,6 +257,7 @@ class MediapipeGraphExecutor { inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); #endif inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); @@ -278,6 +295,7 @@ class MediapipeGraphExecutor { if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } + resetLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap); MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR); @@ -340,12 +358,17 @@ class MediapipeGraphExecutor { // Side packets are set at queue construction time. if (requestHasInputSidePackets(req)) { SPDLOG_DEBUG("Graph queue does not support user-provided input side packets. " - "Side packets are set at graph queue construction time. Graph: {}", this->name); + "Side packets are set at graph queue construction time. Graph: {}", + this->name); return Status(StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR, "Input side packets are not supported for graphs with queue enabled"); } MetricGaugeGuard currentGraphs(this->mediapipeServableMetricReporter->currentGraphs.get()); ::mediapipe::CalculatorGraph& graph = this->guard->graph; + auto llmContextStatus = initializeLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } enum : unsigned int { PROCESS, @@ -426,7 +449,10 @@ class MediapipeGraphExecutor { if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } + resetLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap); MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code())); + // Increment timestamp for next request reusing this graph from the queue + this->guard->gh->currentTimestamp = ::mediapipe::Timestamp(this->guard->gh->currentTimestamp.Value() + 1); SPDLOG_DEBUG("Graph {}: Done streaming execution (queue path)", this->name); timer.stop(PROCESS); @@ -451,6 +477,10 @@ class MediapipeGraphExecutor { // Init MP_RETURN_ON_FAIL(graph.Initialize(this->config), "graph initialization", StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); } + auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap); + if (!llmContextStatus.ok()) { + return llmContextStatus; + } enum : unsigned int { PROCESS, TIMER_END2 @@ -498,6 +528,7 @@ class MediapipeGraphExecutor { .At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); #endif inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); + inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)); // Add image generation side packet in case image generation allow for streaming } @@ -576,6 +607,7 @@ class MediapipeGraphExecutor { if (!status.ok()) { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } + resetLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap); MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); SPDLOG_DEBUG("Graph {}: Done execution", this->name); } diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 316917f788..6179b65d69 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -212,7 +212,7 @@ Key: content-type; Value: application/json } JSON Parser: -{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only +{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only ASSERT_EQ(response, expectedResponse); } @@ -244,7 +244,7 @@ Key: test2; Value: header } JSON Parser: -{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only +{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only ASSERT_EQ(response, expectedResponse); } diff --git a/src/test/llm/config_queue.json b/src/test/llm/config_queue.json new file mode 100644 index 0000000000..1e16802ed9 --- /dev/null +++ b/src/test/llm/config_queue.json @@ -0,0 +1,9 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name":"lm_cb_regular_queue", + "graph_path":"/ovms/src/test/llm/lm_cb_regular_queue.pbtxt" + } + ] +} diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp index 19e2d75246..355e6856ac 100644 --- a/src/test/llm/llmnode_test.cpp +++ b/src/test/llm/llmnode_test.cpp @@ -174,6 +174,51 @@ std::shared_ptr LLMFlowHttpTest::cbPipe; std::shared_ptr LLMFlowHttpTest::llmExecutorWrapper; std::unique_ptr LLMFlowHttpTest::t; +class LLMFlowHttpQueueGraphTest : public ::testing::Test { +protected: + static std::unique_ptr t; + +public: + std::unique_ptr handler; + std::unordered_map headers{{"content-type", "application/json"}}; + ovms::HttpRequestComponents comp; + const std::string endpointChatCompletions = "/v3/chat/completions"; + const std::string endpointCompletions = "/v3/completions"; + std::shared_ptr writer; + std::shared_ptr multiPartParser; + std::string response; + rapidjson::Document parsedResponse; + ovms::HttpResponseComponents responseComponents; + + static void SetUpTestSuite() { + std::string port = "9173"; + ovms::Server& server = ovms::Server::instance(); + ::SetUpServer(t, server, port, getGenericFullPathForSrcTest("/ovms/src/test/llm/config_queue.json").c_str(), 60); + } + + static void TearDownTestSuite() { + ovms::Server& server = ovms::Server::instance(); + server.setShutdownRequest(1); + t->join(); + server.setShutdownRequest(0); + } + + void SetUp() { + writer = std::make_shared(); + multiPartParser = std::make_shared(); + ON_CALL(*writer, PartialReplyBegin(::testing::_)).WillByDefault(testing::Invoke([](std::function fn) { fn(); })); + ovms::Server& server = ovms::Server::instance(); + handler = std::make_unique(server, 5); + ASSERT_EQ(handler->parseRequestComponents(comp, "POST", endpointCompletions, headers), ovms::StatusCode::OK); + } + + void TearDown() { + handler.reset(); + } +}; + +std::unique_ptr LLMFlowHttpQueueGraphTest::t; + // --------------------------------------- OVMS LLM nodes tests /* @@ -249,6 +294,157 @@ TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJson) { } } +TEST_F(LLMFlowHttpQueueGraphTest, unaryCompletionsJsonQueueGraph) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": false, + "seed" : 1, + "best_of": 16, + "max_tokens": 5, + "prompt": "What is OpenVINO?" + } + )"; + + ASSERT_EQ( + handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + for (auto& choice : parsedResponse["choices"].GetArray()) { + ASSERT_TRUE(choice["finish_reason"].IsString()); + ASSERT_FALSE(choice["logprobs"].IsObject()); + ASSERT_TRUE(choice["text"].IsString()); + } + + ASSERT_TRUE(parsedResponse["usage"].IsObject()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["prompt_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["completion_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["total_tokens"].IsInt()); + ASSERT_EQ(parsedResponse["usage"].GetObject()["completion_tokens"].GetInt(), 5); + EXPECT_STREQ(parsedResponse["model"].GetString(), "lm_cb_regular_queue"); + EXPECT_STREQ(parsedResponse["object"].GetString(), "text_completion"); +} + +TEST_F(LLMFlowHttpQueueGraphTest, unaryChatCompletionsJsonQueueGraph) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": false, + "seed" : 1, + "max_tokens": 5, + "messages": [ + { + "role": "user", + "content": "What is OpenVINO?" + } + ] + } + )"; + + ASSERT_EQ( + handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + for (auto& choice : parsedResponse["choices"].GetArray()) { + ASSERT_TRUE(choice["finish_reason"].IsString()); + ASSERT_TRUE(choice["message"].IsObject()); + ASSERT_TRUE(choice["message"]["content"].IsString()); + EXPECT_STREQ(choice["message"]["role"].GetString(), "assistant"); + } + + ASSERT_TRUE(parsedResponse["usage"].IsObject()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["prompt_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["completion_tokens"].IsInt()); + ASSERT_TRUE(parsedResponse["usage"].GetObject()["total_tokens"].IsInt()); + ASSERT_EQ(parsedResponse["usage"].GetObject()["completion_tokens"].GetInt(), 5); + EXPECT_STREQ(parsedResponse["model"].GetString(), "lm_cb_regular_queue"); + EXPECT_STREQ(parsedResponse["object"].GetString(), "chat.completion"); +} + +TEST_F(LLMFlowHttpQueueGraphTest, streamChatCompletionsQueueGraph) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": true, + "seed" : 1, + "max_tokens": 5, + "ignore_eos": true, + "messages": [ + { + "role": "user", + "content": "What is OpenVINO?" + } + ] + } + )"; + ON_CALL(*writer, PartialReply).WillByDefault([this](std::string response) { + rapidjson::Document d; + std::string dataPrefix = "data:"; + ASSERT_STREQ(response.substr(0, dataPrefix.size()).c_str(), dataPrefix.c_str()); + size_t pos = response.find("\n"); + ASSERT_NE(pos, response.npos); + rapidjson::ParseResult parsingSucceeded = d.Parse(response.substr(dataPrefix.size(), (pos - dataPrefix.size())).c_str()); + ASSERT_EQ(parsingSucceeded.Code(), 0); + ASSERT_TRUE(d["choices"].IsArray()); + ASSERT_EQ(d["choices"].Capacity(), 1); + int i = 0; + for (auto& choice : d["choices"].GetArray()) { + if (choice["finish_reason"].IsString()) { + EXPECT_STREQ(choice["finish_reason"].GetString(), "length"); + } else { + ASSERT_TRUE(choice["finish_reason"].IsNull()); + } + ASSERT_EQ(choice["index"], i++); + ASSERT_TRUE(choice["delta"].IsObject()); + ASSERT_TRUE(choice["delta"]["content"].IsString()); + } + EXPECT_STREQ(d["model"].GetString(), "lm_cb_regular_queue"); + EXPECT_STREQ(d["object"].GetString(), "chat.completion.chunk"); + }); + ASSERT_EQ( + handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::PARTIAL_END); +} + +// Test that verifies graph reuse works correctly with queue size 1 +// Sends 2 sequential requests to ensure the same graph instance is reused +TEST_F(LLMFlowHttpQueueGraphTest, queueGraphReuseTwoRequests) { + std::string requestBody = R"( + { + "model": "lm_cb_regular_queue", + "stream": false, + "seed" : 1, + "max_tokens": 5, + "prompt": "What is OpenVINO?" + } + )"; + + // First request + ASSERT_EQ( + handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + ASSERT_TRUE(parsedResponse["choices"].GetArray()[0]["text"].IsString()); + + // Second request - reuses the same graph from the queue + // This validates that timestamp increment works for graph reuse + response.clear(); + ASSERT_EQ( + handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser), + ovms::StatusCode::OK); + parsedResponse.Parse(response.c_str()); + ASSERT_TRUE(parsedResponse["choices"].IsArray()); + ASSERT_EQ(parsedResponse["choices"].Capacity(), 1); + ASSERT_TRUE(parsedResponse["choices"].GetArray()[0]["text"].IsString()); + // Note: Responses may differ due to KV cache state despite same seed +} + TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJsonEchoWithCompletion) { auto params = GetParam(); // TODO: In the next step we should break this suite into smaller ones, use proper configuration instead of skipping diff --git a/src/test/llm/lm_cb_regular_queue.pbtxt b/src/test/llm/lm_cb_regular_queue.pbtxt new file mode 100644 index 0000000000..60ef13f6b7 --- /dev/null +++ b/src/test/llm/lm_cb_regular_queue.pbtxt @@ -0,0 +1,47 @@ +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# OVMS_GRAPH_QUEUE_SIZE: 1 +input_stream: "HTTP_REQUEST_PAYLOAD:input" +output_stream: "HTTP_RESPONSE_PAYLOAD:output" +node { + name: "llmNode1" + calculator: "HttpLLMCalculator" + input_side_packet: "LLM_NODE_RESOURCES:llm" + input_side_packet: "LLM_NODE_EXECUTION_CONTEXTS:llm_ctx" + input_stream: "LOOPBACK:loopback" + input_stream: "HTTP_REQUEST_PAYLOAD:input" + output_stream: "LOOPBACK:loopback" + output_stream: "HTTP_RESPONSE_PAYLOAD:output" + input_stream_info: { + tag_index: 'LOOPBACK:0', + back_edge: true + } + node_options: { + [type.googleapis.com/mediapipe.LLMCalculatorOptions]: { + models_path: "/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct" + cache_size: 1 + } + } + input_stream_handler { + input_stream_handler: "SyncSetInputStreamHandler", + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { + tag_index: "LOOPBACK:0" + } + } + } + } +} diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp index 997d2048ef..b61d8a48ef 100644 --- a/src/test/streaming_test.cpp +++ b/src/test/streaming_test.cpp @@ -388,7 +388,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::KFS_REQUEST}}, {{"out", mediapipe_packet_type_enum::KFS_RESPONSE}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}); @@ -445,7 +445,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}); // no timestamp specified, server will assign one @@ -588,7 +588,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests with manually (client) assigned ascending order of timestamp and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}, 3); // first request with timestamp 3 @@ -633,7 +633,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock only 1 request and disconnect immediately prepareRequest(this->firstRequest, {{"in", 3.5f}}); @@ -669,7 +669,7 @@ node { {{"out", mediapipe_packet_type_enum::OVTENSOR}}, {"in"}, {"out"}, - 2); + 1); prepareRequest(this->firstRequest, {{"in", 3.5f}}); EXPECT_CALL(this->stream, Read(_)) @@ -714,7 +714,7 @@ node { {{"out", mediapipe_packet_type_enum::OVTENSOR}}, {"in"}, {"out"}, - 2); + 1); prepareRequest(this->firstRequest, {{"in", 3.5f}}); EXPECT_CALL(this->stream, Read(_)) @@ -747,7 +747,7 @@ node { {{"out", mediapipe_packet_type_enum::OVTENSOR}}, {"in"}, {"out"}, - 2); + 1); prepareRequest(this->firstRequest, {}); EXPECT_CALL(this->stream, Read(_)) @@ -777,7 +777,7 @@ node { {{"out", mediapipe_packet_type_enum::OVTENSOR}}, {"in"}, {"out"}, - 2); + 1); std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -811,7 +811,7 @@ node { {{"out", mediapipe_packet_type_enum::OVTENSOR}}, {"in"}, {"out"}, - 2); + 1); prepareInvalidRequest(this->firstRequest, {"in"}); @@ -1437,7 +1437,7 @@ node { {"out3", mediapipe_packet_type_enum::OVTENSOR}}, {"in1", "in2", "in3"}, {"out1", "out2", "out3"}, -{}, nullptr, this->reporter.get()}; + {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1489,7 +1489,7 @@ node { {"out3", mediapipe_packet_type_enum::OVTENSOR}}, {"in1", "in2", "in3"}, {"out1", "out2", "out3"}, -{}, nullptr, this->reporter.get()}; + {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1524,7 +1524,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1558,7 +1558,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"wrong_name"}, {}, nullptr, this->reporter.get()}; // cannot install observer due to wrong output name (should never happen due to validation) + {"in"}, {"wrong_name"}, {}, nullptr, this->reporter.get()}; // cannot install observer due to wrong output name (should never happen due to validation) EXPECT_CALL(this->stream, Read(_)).Times(0); EXPECT_CALL(this->stream, Write(_, _)).Times(0); @@ -1583,7 +1583,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {}); EXPECT_CALL(this->stream, Read(_)) @@ -1611,7 +1611,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); @@ -1647,7 +1647,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); @@ -1670,7 +1670,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Invalid request - missing data in buffer prepareInvalidRequest(this->firstRequest, {"in"}); // no timestamp specified, server will assign one @@ -1705,7 +1705,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise[3]; std::future signalFuture[3] = { @@ -1752,7 +1752,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}, 0); EXPECT_CALL(this->stream, Read(_)) @@ -1780,7 +1780,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); setRequestTimestamp(this->firstRequest, std::string("not an int")); @@ -1815,7 +1815,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Timestamps not allowed in stream // Expect continuity of operation and response with error message @@ -1857,7 +1857,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Allowed in stream for (auto timestamp : std::vector<::mediapipe::Timestamp>{ @@ -1893,7 +1893,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 3 requests and disconnection prepareRequestWithParam(this->firstRequest, {{"in", 3.5f}}, {"val", 65}); // request with parameter val @@ -1930,7 +1930,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving the invalid request and disconnection // Request with invalid param py (special pythons session side packet) @@ -1959,7 +1959,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; prepareRequest(this->firstRequest, {{"in", 3.5f}}); // missing required request param EXPECT_CALL(this->stream, Read(_)).Times(0); @@ -1985,7 +1985,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; // Mock receiving 2 requests and disconnection prepareRequest(this->firstRequest, {{"in", 3.5f}}, std::nullopt, this->name, this->version); // no timestamp specified, server will assign one @@ -2019,7 +2019,7 @@ node { this->name, this->version, config, {{"in", mediapipe_packet_type_enum::OVTENSOR}}, {{"out", mediapipe_packet_type_enum::OVTENSOR}}, -{"in"}, {"out"}, {}, nullptr, this->reporter.get()}; + {"in"}, {"out"}, {}, nullptr, this->reporter.get()}; std::promise signalPromise; std::future signalFuture = signalPromise.get_future(); From 662a8f1ad734ed239738d3540660984d62f97c39 Mon Sep 17 00:00:00 2001 From: Adrian Tobiszewski Date: Mon, 23 Feb 2026 10:09:25 +0100 Subject: [PATCH 7/8] Style fixes --- src/BUILD | 2 +- src/logging.cpp | 2 +- src/mediapipe_internal/graphqueue.cpp | 1 + src/mediapipe_internal/graphqueue.hpp | 5 ++-- .../outputstreamobserver.hpp | 1 - src/test/mediapipe_framework_test.cpp | 30 +++++++++---------- 6 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/BUILD b/src/BUILD index 46d1d51a39..9cf49970fe 100644 --- a/src/BUILD +++ b/src/BUILD @@ -178,7 +178,7 @@ ovms_cc_library( "mediapipe_internal_graph_side_packets", "//third_party:openvino", "@mediapipe//mediapipe/framework:calculator_graph", - "//src/python:libovmspythonmodule", # TODO not splitted + "//src/python:libovmspythonmodule", # TODO not split "//src/llm:genai_servables", # TODO split! ], visibility = ["//visibility:public",], diff --git a/src/logging.cpp b/src/logging.cpp index c07bb6f8d4..aee9e4bc2e 100644 --- a/src/logging.cpp +++ b/src/logging.cpp @@ -41,7 +41,7 @@ std::shared_ptr rerank_calculator_logger = std::make_shared ov_logger = std::make_shared("openvino"); #endif -//const std::string default_pattern = "[%i] [%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v"; +// const std::string default_pattern = "[%i] [%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v"; const std::string default_pattern = "[%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v"; static void set_log_level(const std::string log_level, std::shared_ptr logger) { diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp index ccce0f65c3..37d6b742b8 100644 --- a/src/mediapipe_internal/graphqueue.cpp +++ b/src/mediapipe_internal/graphqueue.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp index ba09edd85e..5d59e6bac2 100644 --- a/src/mediapipe_internal/graphqueue.hpp +++ b/src/mediapipe_internal/graphqueue.hpp @@ -21,8 +21,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -54,9 +56,8 @@ struct GraphHelper { GraphHelper& operator=(GraphHelper&& gh) = default; }; // we need to keep Graph alive during MP reload hence shared_ptr -//class GraphQueue : public Queue> { class GraphQueue : public Queue> { -public: // XXX TODO make private? we need to acces in mediapipegraphdefinition to set side packets though +public: // XXX TODO make private? we need to access in mediapipegraphdefinition to set side packets though std::shared_ptr sidePacketMaps; public: diff --git a/src/mediapipe_internal/outputstreamobserver.hpp b/src/mediapipe_internal/outputstreamobserver.hpp index 1a314e73ae..f2f8a5023e 100644 --- a/src/mediapipe_internal/outputstreamobserver.hpp +++ b/src/mediapipe_internal/outputstreamobserver.hpp @@ -37,7 +37,6 @@ #pragma GCC diagnostic pop #pragma warning(pop) #include "mediapipe_utils.hpp" -//#include "mediapipegraphdefinition.hpp" // for version in response and PythonNodeResourceMap #include "packettypes.hpp" #include "graphqueue.hpp" diff --git a/src/test/mediapipe_framework_test.cpp b/src/test/mediapipe_framework_test.cpp index bc7c13bad9..53c86f001b 100644 --- a/src/test/mediapipe_framework_test.cpp +++ b/src/test/mediapipe_framework_test.cpp @@ -137,12 +137,12 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerCheckNoInputPackets) ParseTextProtoOrDie(graph_proto); const std::string inputStreamName = "input"; const std::string outputStreamName = "output"; - // avoid creating pollers, retreiving packets etc. + // avoid creating pollers, retrieving packets etc. ////////////////// // model mgmt thread ////////////////// - //std::shared_ptr queue; - //queue = std::make_shared(graphConfig, 1); + // std::shared_ptr queue; + // queue = std::make_shared(graphConfig, 1); ::mediapipe::CalculatorGraph graph; EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); // Install NullObserver @@ -153,7 +153,7 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerCheckNoInputPackets) MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); // Here ends model management // Here starts mp graph executor - //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? // get graphIdGuard from queue // create FrontendAppropriateObserver float expVal = 13.5; @@ -193,9 +193,9 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerCheckNoInputPackets) perGraphObserverFunctor = std::make_shared(expVal); // now add second packet auto inputTensor2 = std::make_unique(datatype, shape, data.data()); - //MP_ERROR_STOP(graph.AddPacketToInputStream( + // MP_ERROR_STOP(graph.AddPacketToInputStream( // inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++)))); - //MP_ERROR_STOP(graph.WaitUntilIdle()); + // MP_ERROR_STOP(graph.WaitUntilIdle()); MP_ERROR_STOP(graph.CloseAllPacketSources()); MP_ERROR_STOP(graph.WaitUntilDone()); } @@ -238,12 +238,12 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOC) { ParseTextProtoOrDie(graph_proto); const std::string inputStreamName = "input"; const std::string outputStreamName = "output"; - // avoid creating pollers, retreiving packets etc. + // avoid creating pollers, retrieving packets etc. ////////////////// // model mgmt thread ////////////////// - //std::shared_ptr queue; - //queue = std::make_shared(graphConfig, 1); + // std::shared_ptr queue; + // queue = std::make_shared(graphConfig, 1); ::mediapipe::CalculatorGraph graph; EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk); // Install NullObserver @@ -254,7 +254,7 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOC) { MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); // Here ends model management // Here starts mp graph executor - //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? // get graphIdGuard from queue // create FrontendAppropriateObserver float expVal = 13.5; @@ -336,12 +336,12 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOCCompare) { ParseTextProtoOrDie(graph_proto); const std::string inputStreamName = "input"; const std::string outputStreamName = "output"; - // avoid creating pollers, retreiving packets etc. + // avoid creating pollers, retrieving packets etc. ////////////////// // model mgmt thread ////////////////// - //std::shared_ptr queue; - //queue = std::make_shared(graphConfig, 1); + // std::shared_ptr queue; + // queue = std::make_shared(graphConfig, 1); auto datatype = ov::element::Type_t::f32; ov::Shape shape{1, 10}; int timestamp{0}; @@ -363,7 +363,7 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOCCompare) { MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); })); // Here ends model management // Here starts mp graph executor - //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? + // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout? // get graphIdGuard from queue // create FrontendAppropriateObserver struct MyFunctor : public OutputStreamObserverI { @@ -443,7 +443,7 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOCCompare) { SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX end:{}", timer.elapsed(1) / 1000); } { // thread pool case - //auto sharedThreadPool = std::make_shared(std::thread::hardware_concurrency()); + // auto sharedThreadPool = std::make_shared(std::thread::hardware_concurrency()); auto sharedThreadPool = std::make_shared(24); SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX thread"); timer.start(2); From 79f91f35143d81ddab4f92ff662eb017b731e20c Mon Sep 17 00:00:00 2001 From: Adrian Tobiszewski Date: Mon, 23 Feb 2026 12:40:47 +0100 Subject: [PATCH 8/8] Fix windows --- src/kfs_frontend/kfs_graph_executor_impl.cpp | 2 +- src/mediapipe_internal/graphqueue.cpp | 3 +++ src/mediapipe_internal/graphqueue.hpp | 6 ++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/kfs_frontend/kfs_graph_executor_impl.cpp b/src/kfs_frontend/kfs_graph_executor_impl.cpp index 86778ca899..2935b90a23 100644 --- a/src/kfs_frontend/kfs_graph_executor_impl.cpp +++ b/src/kfs_frontend/kfs_graph_executor_impl.cpp @@ -24,6 +24,7 @@ #include "../kfs_frontend/kfs_utils.hpp" #include "../logging.hpp" +#include "../mediapipe_internal/graph_executor_constants.hpp" #include "../mediapipe_internal/mediapipe_utils.hpp" #include "../mediapipe_internal/mediapipegraphdefinition.hpp" #include "../predict_request_validation_utils.hpp" @@ -1168,7 +1169,6 @@ bool requestHasInputSidePackets(const KFSRequest& request) { Status deserializeInputSidePacketsFromFirstRequestImpl( std::map& inputSidePackets, const KFSRequest& request) { - static const std::string PYTHON_SESSION_SIDE_PACKET_TAG{"py"}; for (const auto& [name, valueChoice] : request.parameters()) { SPDLOG_DEBUG("Found: {}; parameter in request for: {};", name, request.model_name()); if (name == TIMESTAMP_PARAMETER_NAME) { diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp index 37d6b742b8..b5b0146192 100644 --- a/src/mediapipe_internal/graphqueue.cpp +++ b/src/mediapipe_internal/graphqueue.cpp @@ -31,8 +31,11 @@ #include "src/python/pythonnoderesources.hpp" #include "src/llm/servable.hpp" +#pragma warning(push) +#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) #include "mediapipe/framework/calculator_graph.h" #include "mediapipe/framework/port/status.h" +#pragma warning(pop) #include "graph_executor_constants.hpp" #include "outputstreamobserver.hpp" diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp index 5d59e6bac2..6884f31877 100644 --- a/src/mediapipe_internal/graphqueue.hpp +++ b/src/mediapipe_internal/graphqueue.hpp @@ -30,8 +30,14 @@ #include "../queue.hpp" +#pragma warning(push) +#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include "mediapipe/framework/calculator_graph.h" #include "mediapipe/framework/port/status.h" +#pragma GCC diagnostic pop +#pragma warning(pop) #include "graph_executor_constants.hpp" #include "graph_side_packets.hpp"