From 2eabff05f1c5ea1eae4668a60bcfe61f394e1b27 Mon Sep 17 00:00:00 2001
From: atobisze <adrian.tobiszewski@intel.com>
Date: Thu, 16 Jan 2025 15:07:24 +0100
Subject: [PATCH 1/8] Check

Revert "Check"

This reverts commit dddaf1b9d1dbd1113715889731f8f10a73b67291.

Check graph pool

TODO:
-> this requires additional patch in MP to reset initialized_ flag in
CalculatorGraph and verify if that works. Previous MP tests with reruns
worked due to using AddVectorSink which changes the underlying graph and
does not use OutputStreamPollers. Need to verify if change in MP will
enable graph pool or we need to go back to thread pool.

Rebase

POC MP FW test

POC part 2

WIP to stash
---
 ci/build_test_OnCommit.groovy                 |   8 +-
 src/BUILD                                     |  36 ++
 src/kfs_frontend/kfs_graph_executor_impl.cpp  |   4 +
 src/logging.cpp                               |   3 +-
 .../graph_executor_constants.hpp              |  34 ++
 src/mediapipe_internal/graph_side_packets.hpp |  69 +++
 src/mediapipe_internal/graphqueue.cpp         | 119 +++++
 src/mediapipe_internal/graphqueue.hpp         |  89 ++++
 .../mediapipegraphdefinition.cpp              |  92 ++--
 .../mediapipegraphdefinition.hpp              |  51 +--
 .../mediapipegraphexecutor.cpp                |  30 +-
 .../mediapipegraphexecutor.hpp                | 133 ++++--
 .../outputstreamobserver.hpp                  |  63 +++
 src/python/BUILD                              |   2 +-
 src/test/mediapipe_framework_test.cpp         | 410 +++++++++++++++++-
 src/test/mediapipe_validation_test.cpp        |   1 +
 src/test/mediapipeflow_test.cpp               |  45 +-
 src/test/pythonnode_test.cpp                  |  13 +-
 src/test/streaming_test.cpp                   | 110 ++++-
 src/test/test_utils.hpp                       |  10 +-
 20 files changed, 1144 insertions(+), 178 deletions(-)
 create mode 100644 src/mediapipe_internal/graph_executor_constants.hpp
 create mode 100644 src/mediapipe_internal/graph_side_packets.hpp
 create mode 100644 src/mediapipe_internal/graphqueue.cpp
 create mode 100644 src/mediapipe_internal/graphqueue.hpp
 create mode 100644 src/mediapipe_internal/outputstreamobserver.hpp

diff --git a/ci/build_test_OnCommit.groovy b/ci/build_test_OnCommit.groovy
index 21029ec61c..a3e7862c29 100644
--- a/ci/build_test_OnCommit.groovy
+++ b/ci/build_test_OnCommit.groovy
@@ -149,7 +149,7 @@ pipeline {
             timeout(time: 120, unit: 'MINUTES')
           }
           parallel {
-            stage("Run unit tests") {
+            /*stage("Run unit tests") {
               agent {
                 label "${agent_name_linux}"
               }
@@ -165,7 +165,7 @@ pipeline {
               }
               } 
               }
-            }
+            }*/
             stage("Internal tests") {
               agent {
                 label "${agent_name_linux}"
@@ -186,7 +186,7 @@ pipeline {
                 }
               }            
             }
-            stage('Test windows') {
+            /*stage('Test windows') {
               agent {
                 label "${agent_name_windows}"
               }
@@ -210,7 +210,7 @@ pipeline {
                       }
                   }
               }
-            }
+            }*/
           }
         }
     }
diff --git a/src/BUILD b/src/BUILD
index 71321ca7ee..46d1d51a39 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -150,6 +150,39 @@ ovms_cc_library(
     hdrs = ["queue.hpp"],
     visibility = ["//visibility:public",],
 )
+ovms_cc_library(
+    name = "mediapipe_internal_graph_side_packets",
+    hdrs = ["mediapipe_internal/graph_side_packets.hpp"],
+    visibility = ["//visibility:public",],
+)
+ovms_cc_library(
+    name = "mediapipe_internal_graph_executor_constants",
+    hdrs = ["mediapipe_internal/graph_executor_constants.hpp"],
+    visibility = ["//visibility:public"],
+)
+ovms_cc_library(
+    name = "mediapipe_internal_graphqueue",
+    hdrs = [
+    "mediapipe_internal/graphqueue.hpp",
+    "mediapipe_internal/outputstreamobserver.hpp",
+    ], # TODO FIXME
+    srcs = ["mediapipe_internal/graphqueue.cpp"],
+    deps = [
+        "libovms_queue",
+        "libovmslogging",
+        "libovms_execution_context",
+        "libovmstimer",
+        "libovmsmetrics",
+        "model_metric_reporter",
+        "mediapipe_internal_graph_executor_constants",
+        "mediapipe_internal_graph_side_packets",
+        "//third_party:openvino",
+        "@mediapipe//mediapipe/framework:calculator_graph",
+        "//src/python:libovmspythonmodule", # TODO not splitted
+        "//src/llm:genai_servables", # TODO split!
+    ],
+    visibility = ["//visibility:public",],
+)
 ovms_cc_library(
     name = "libovms_ovinferrequestsqueue",
     hdrs = ["ovinferrequestsqueue.hpp"],
@@ -542,6 +575,7 @@ ovms_cc_library(
                 "mediapipe_internal/mediapipegraphconfig.cpp",
                 "mediapipe_internal/mediapipegraphdefinition.cpp",
                 "mediapipe_internal/mediapipegraphdefinition.hpp",
+                "mediapipe_internal/outputstreamobserver.hpp",
                 "mediapipe_internal/mediapipegraphexecutor.cpp",
                 "mediapipe_internal/mediapipegraphexecutor.hpp",
                 "mediapipe_internal/packettypes.hpp",
@@ -682,6 +716,8 @@ ovms_cc_library(
                 })
             + select({
             "//conditions:default": [
+                "mediapipe_internal_graph_executor_constants",
+                "mediapipe_internal_graphqueue",
                 "@mediapipe_calculators//:mediapipe_calculators", # Need this dependencies here because we use ovms/src - cannot add in ovms_dependencies because we copy src directory later in Dockerfile
                 "@mediapipe//mediapipe/graphs/holistic_tracking:holistic_tracking_to_render_data",
                 "@mediapipe//mediapipe/graphs/iris_tracking:iris_tracking_cpu_deps",
diff --git a/src/kfs_frontend/kfs_graph_executor_impl.cpp b/src/kfs_frontend/kfs_graph_executor_impl.cpp
index 034f6f0907..2751a49e94 100644
--- a/src/kfs_frontend/kfs_graph_executor_impl.cpp
+++ b/src/kfs_frontend/kfs_graph_executor_impl.cpp
@@ -26,6 +26,7 @@
 #include "../logging.hpp"
 #include "../mediapipe_internal/mediapipe_utils.hpp"
 #include "../mediapipe_internal/mediapipegraphdefinition.hpp"
+// TODO FIXME #include "../mediapipe_internal/graph_executor_constants.hpp"
 #include "../predict_request_validation_utils.hpp"
 #include "../status.hpp"
 #include "../tfs_frontend/tfs_utils.hpp"
@@ -925,6 +926,7 @@ static Status createPacketAndPushIntoGraph(const std::string& name, std::shared_
     }
     std::unique_ptr<T> inputTensor;
     OVMS_RETURN_ON_FAIL(deserializeTensor(name, *request, inputTensor, pythonBackend));
+    SPDLOG_ERROR("Current Timestamp before actual pushing:{}", timestamp.Value());
     MP_RETURN_ON_FAIL(graph.AddPacketToInputStream(
                           name,
                           ::mediapipe::packet_internal::Create(
@@ -1040,8 +1042,10 @@ static Status deserializeTimestampIfAvailable(
             return status;
         }
     } else {
+        SPDLOG_ERROR("Current Timestamp before setting:{}", timestamp.Value());
         auto now = std::chrono::system_clock::now();
         timestamp = ::mediapipe::Timestamp(std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
+        SPDLOG_ERROR("Current Timestamp setting:{}", timestamp.Value());
     }
     return StatusCode::OK;
 }
diff --git a/src/logging.cpp b/src/logging.cpp
index e89fce9a07..c07bb6f8d4 100644
--- a/src/logging.cpp
+++ b/src/logging.cpp
@@ -41,7 +41,8 @@ std::shared_ptr<spdlog::logger> rerank_calculator_logger = std::make_shared<spdl
 #if (OV_TRACE == 1)
 std::shared_ptr<spdlog::logger> ov_logger = std::make_shared<spdlog::logger>("openvino");
 #endif
-const std::string default_pattern = "[%Y-%m-%d %T.%e][%t][%n][%l][%s:%#] %v";
+//const std::string default_pattern = "[%i] [%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v";
+const std::string default_pattern = "[%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v";
 
 static void set_log_level(const std::string log_level, std::shared_ptr<spdlog::logger> logger) {
     logger->set_level(spdlog::level::info);
diff --git a/src/mediapipe_internal/graph_executor_constants.hpp b/src/mediapipe_internal/graph_executor_constants.hpp
new file mode 100644
index 0000000000..ff565769ce
--- /dev/null
+++ b/src/mediapipe_internal/graph_executor_constants.hpp
@@ -0,0 +1,34 @@
+//*****************************************************************************
+// Copyright 2026 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+namespace ovms {
+
+inline const std::string PYTHON_SESSION_SIDE_PACKET_TAG = "py";
+inline const std::string LLM_SESSION_SIDE_PACKET_TAG = "llm";
+inline const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes";
+inline const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable";
+inline const std::string RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable";
+inline const std::string STT_SESSION_SIDE_PACKET_TAG = "s2t_servable";
+inline const std::string TTS_SESSION_SIDE_PACKET_TAG = "t2s_servable";
+inline const std::string PYTHON_SIDE_PACKET_NAME = "py";
+inline const std::string LLM_SESSION_PACKET_NAME = "llm";
+inline constexpr int64_t STARTING_TIMESTAMP_VALUE = 0;
+
+}  // namespace ovms
diff --git a/src/mediapipe_internal/graph_side_packets.hpp b/src/mediapipe_internal/graph_side_packets.hpp
new file mode 100644
index 0000000000..66b0134726
--- /dev/null
+++ b/src/mediapipe_internal/graph_side_packets.hpp
@@ -0,0 +1,69 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+namespace ovms {
+
+// Forward declarations - only shared_ptrs are stored so full definitions are not needed
+class PythonNodeResources;
+class GenAiServable;
+struct ImageGenerationPipelines;
+struct EmbeddingsServable;
+struct RerankServable;
+struct SttServable;
+class TtsServable;
+
+using PythonNodeResourcesMap = std::unordered_map<std::string, std::shared_ptr<PythonNodeResources>>;
+using GenAiServableMap = std::unordered_map<std::string, std::shared_ptr<GenAiServable>>;
+using RerankServableMap = std::unordered_map<std::string, std::shared_ptr<RerankServable>>;
+using SttServableMap = std::unordered_map<std::string, std::shared_ptr<SttServable>>;
+using TtsServableMap = std::unordered_map<std::string, std::shared_ptr<TtsServable>>;
+using EmbeddingsServableMap = std::unordered_map<std::string, std::shared_ptr<EmbeddingsServable>>;
+using ImageGenerationPipelinesMap = std::unordered_map<std::string, std::shared_ptr<ImageGenerationPipelines>>;
+
+struct GraphSidePackets {
+    PythonNodeResourcesMap pythonNodeResourcesMap;
+    GenAiServableMap genAiServableMap;
+    ImageGenerationPipelinesMap imageGenPipelinesMap;
+    EmbeddingsServableMap embeddingsServableMap;
+    RerankServableMap rerankServableMap;
+    SttServableMap sttServableMap;
+    TtsServableMap ttsServableMap;
+    void clear() {
+        pythonNodeResourcesMap.clear();
+        genAiServableMap.clear();
+        imageGenPipelinesMap.clear();
+        embeddingsServableMap.clear();
+        rerankServableMap.clear();
+        sttServableMap.clear();
+        ttsServableMap.clear();
+    }
+    bool empty() {
+        return (pythonNodeResourcesMap.empty() &&
+                genAiServableMap.empty() &&
+                imageGenPipelinesMap.empty() &&
+                embeddingsServableMap.empty() &&
+                rerankServableMap.empty() &&
+                sttServableMap.empty() &&
+                ttsServableMap.empty());
+    }
+};
+
+}  // namespace ovms
diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp
new file mode 100644
index 0000000000..a3e96febb8
--- /dev/null
+++ b/src/mediapipe_internal/graphqueue.cpp
@@ -0,0 +1,119 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "graphqueue.hpp"
+
+#include <atomic>
+#include <condition_variable>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <queue>
+#include <thread>
+#include <utility>
+#include <vector>
+
+#include "../queue.hpp"
+#include "src/python/pythonnoderesources.hpp"
+#include "src/llm/servable.hpp"
+
+#include "mediapipe/framework/calculator_graph.h"
+#include "mediapipe/framework/port/status.h"
+
+#include "graph_executor_constants.hpp"
+//#include "mediapipegraphexecutor.hpp"  // for side packet tag names
+#include "outputstreamobserver.hpp"
+namespace ovms {
+GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr<GraphSidePackets> sidePacketMaps, int streamsLength) :
+    Queue(streamsLength),
+    sidePacketMaps(sidePacketMaps) {
+    SPDLOG_ERROR("ER Constr graph queue:{}", (void*)this);
+    inferRequests.reserve(streamsLength);
+    // TODO FIXME split constructor to init to handle retCodes?
+    for (auto i = 0; i < streamsLength; ++i) {
+        auto gh = std::make_shared<GraphHelper>();
+        gh->graph = std::make_shared<::mediapipe::CalculatorGraph>();
+        gh->currentTimestamp = ::mediapipe::Timestamp(0);
+
+        auto absStatus = gh->graph->Initialize(config);
+        if (!absStatus.ok()) {
+            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
+            throw 42;
+        }
+        for (auto& name : config.output_stream()) {
+            std::string streamName = getStreamName(name);
+            gh->outStreamObservers[streamName] = std::shared_ptr<OutputStreamObserverI>(new NullOutputStreamObserver());  // TODO use at() FIXME
+            auto& perGraphObserverFunctor = gh->outStreamObservers[streamName];
+            absStatus = gh->graph->ObserveOutputStream(streamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); });  // TODO FIXME throw?
+            if (!absStatus.ok()) {
+                SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
+                throw 42;
+            }
+        }
+        std::map<std::string, mediapipe::Packet> inputSidePackets;
+#if (PYTHON_DISABLE == 0)
+        inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<PythonNodeResourcesMap>(sidePacketMaps->pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+#endif
+        inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(sidePacketMaps->genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<ImageGenerationPipelinesMap>(sidePacketMaps->imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(sidePacketMaps->embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(sidePacketMaps->rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<SttServableMap>(sidePacketMaps->sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<TtsServableMap>(sidePacketMaps->ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        SPDLOG_ERROR("ER");
+        absStatus = gh->graph->StartRun(inputSidePackets);
+        SPDLOG_ERROR("ER");
+        if (!absStatus.ok()) {
+            SPDLOG_ERROR("Input sidePackets size:{}", inputSidePackets.size());
+            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
+            throw 42;
+        }
+
+        SPDLOG_ERROR("ER");
+        inferRequests.emplace_back(std::move(gh));
+        SPDLOG_ERROR("ER");
+    }
+}
+GraphQueue::~GraphQueue() {
+    SPDLOG_ERROR("ER Destroy graph queue:{}", (void*)this);
+    for (auto& graphHelper : inferRequests) {
+        auto absStatus = graphHelper->graph->WaitUntilIdle();
+        if (!absStatus.ok()) {
+            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
+            //        throw 42.2;
+        }
+        absStatus = graphHelper->graph->CloseAllPacketSources();
+        if (!absStatus.ok()) {
+            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
+            //      throw "as";
+        }
+        absStatus = graphHelper->graph->WaitUntilDone();
+        if (!absStatus.ok()) {
+            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
+            //    throw 42.2;
+        }
+        graphHelper->graph->Cancel();
+        if (!absStatus.ok()) {
+            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
+            //    throw 42.2;
+        }
+        SPDLOG_ERROR("ER");
+        graphHelper->graph.reset();
+        SPDLOG_ERROR("ER");
+    }
+    SPDLOG_ERROR("ER Destroy graph queue:{}", (void*)this);
+}
+}  // namespace ovms
diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp
new file mode 100644
index 0000000000..7c4d89b33f
--- /dev/null
+++ b/src/mediapipe_internal/graphqueue.hpp
@@ -0,0 +1,89 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <queue>
+#include <thread>
+#include <utility>
+#include <vector>
+
+#include "../queue.hpp"
+
+#include "mediapipe/framework/calculator_graph.h"
+#include "mediapipe/framework/port/status.h"
+
+#include "graph_executor_constants.hpp"
+#include "graph_side_packets.hpp"
+#include "outputstreamobserver.hpp"
+namespace ovms {
+class OutputStreamObserverI;
+class NullOutputStreamObserver;
+struct GraphHelper {
+    std::shared_ptr<::mediapipe::CalculatorGraph> graph;  // TODO FIXME this does not have to be shared_ptr
+    std::unordered_map<std::string, std::shared_ptr<OutputStreamObserverI>> outStreamObservers;
+    ::mediapipe::Timestamp currentTimestamp;  // TODO FIXME const
+    // TODO FIXME move constr/=
+    GraphHelper() = default;
+    GraphHelper(const GraphHelper&) = delete;
+    GraphHelper& operator=(const GraphHelper&) = delete;
+    GraphHelper(GraphHelper&& gh) :
+        graph(std::move(gh.graph)),
+        outStreamObservers(std::move(gh.outStreamObservers)),
+        currentTimestamp(gh.currentTimestamp) {}
+    GraphHelper& operator=(GraphHelper&& gh) = default;
+};
+// we need to keep Graph alive during MP reload hence shared_ptr
+//class GraphQueue : public Queue<std::shared_ptr<::mediapipe::CalculatorGraph>> {
+class GraphQueue : public Queue<std::shared_ptr<GraphHelper>> {
+    public: // XXX TODO make private? we need to acces in mediapipegraphdefinition to set side packets though
+    std::shared_ptr<GraphSidePackets> sidePacketMaps;
+
+public:
+    GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr<GraphSidePackets> sidePacketMaps, int streamsLength);
+    ~GraphQueue();
+};
+
+struct GraphIdGuard {
+    std::weak_ptr<GraphQueue> weakQueue;
+    const int id;
+    std::shared_ptr<GraphHelper> gh;
+    // TODO FIXME shared_ptr
+    ::mediapipe::CalculatorGraph& graph;
+    GraphIdGuard(std::shared_ptr<GraphQueue>& queue) :
+        weakQueue(queue),
+        id(queue->getIdleStream().get()),
+        gh((queue->getInferRequest(id))),
+        graph(*gh->graph) {
+        SPDLOG_ERROR("ER Guard construct this:{}", (void*)this);
+    }
+    GraphIdGuard(GraphIdGuard&&) = default;
+    GraphIdGuard(const GraphIdGuard&) = delete;
+    ~GraphIdGuard() {
+        auto existingQueue = weakQueue.lock();
+        SPDLOG_ERROR("ER DEstroy Guard begin qu:{}", (void*)existingQueue.get());
+        if (existingQueue)
+            existingQueue->returnStream(this->id);
+        SPDLOG_ERROR("ER Destroy Guard end qu:{}", (void*)existingQueue.get());
+        SPDLOG_ERROR("ER Guard destroy this:{}", (void*)this);
+    }
+};
+}  // namespace ovms
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp
index 9047765e75..8b028d186b 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.cpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp
@@ -24,25 +24,25 @@
 #include <utility>
 #include <vector>
 
-#include "../execution_context.hpp"
-#include "../filesystem.hpp"
-#include "../kfs_frontend/kfs_utils.hpp"
-#include "../kfs_frontend/kfs_request_utils.hpp"
-#include "../deserialization_main.hpp"
-#include "../metric.hpp"
-#include "../model_metric_reporter.hpp"
-#include "../modelmanager.hpp"
-#include "../ov_utils.hpp"
-#include "../llm/servable.hpp"
-#include "../llm/servable_initializer.hpp"
+#include "src/execution_context.hpp"
+#include "src/filesystem.hpp"
+#include "src/kfs_frontend/kfs_utils.hpp"
+#include "src/kfs_frontend/kfs_request_utils.hpp"
+#include "src/deserialization_main.hpp"
+#include "src/metric.hpp"
+#include "src/model_metric_reporter.hpp"
+#include "src/modelmanager.hpp"
+#include "src/ov_utils.hpp"
+#include "src/llm/servable.hpp"
+#include "src/llm/servable_initializer.hpp"
 #if (PYTHON_DISABLE == 0)
-#include "../python/pythonnoderesources.hpp"
+#include "src/python/pythonnoderesources.hpp"
 #endif
-#include "../status.hpp"
-#include "../stringutils.hpp"
-#include "../tensorinfo.hpp"
-#include "../timer.hpp"
-#include "../version.hpp"
+#include "src/status.hpp"
+#include "src/stringutils.hpp"
+#include "src/tensorinfo.hpp"
+#include "src/timer.hpp"
+#include "src/version.hpp"
 #include "mediapipe/framework/port/parse_text_proto.h"
 #include "mediapipe/framework/port/status.h"
 #include "mediapipe_utils.hpp"
@@ -54,6 +54,13 @@
 #include "src/image_gen/imagegen_init.hpp"
 #include "src/image_gen/image_gen_calculator.pb.h"
 
+#include "src/sidepacket_servable.hpp"
+#include "src/embeddings/embeddings_servable.hpp"
+#include "src/rerank/rerank_servable.hpp"
+#include "src/audio/speech_to_text/s2t_servable.hpp"
+#include "src/audio/text_to_speech/t2s_servable.hpp"
+
+
 namespace ovms {
 MediapipeGraphConfig MediapipeGraphDefinition::MGC;
 
@@ -129,7 +136,7 @@ Status MediapipeGraphDefinition::dryInitializeTest() {
 }
 Status MediapipeGraphDefinition::validate(ModelManager& manager) {
     SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Started validation of mediapipe: {}", getName());
-    if (!this->sidePacketMaps.empty()) {
+    if (!this->sidePacketMaps->empty()) {
         SPDLOG_ERROR("Internal Error: MediaPipe definition is in unexpected state.");
         return StatusCode::INTERNAL_ERROR;
     }
@@ -146,12 +153,14 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) {
     if (!validationResult.ok()) {
         return validationResult;
     }
+    SPDLOG_ERROR("ER");
     std::unique_lock lock(metadataMtx);
     auto status = createInputsInfo();
     if (!status.ok()) {
         SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create inputs info for mediapipe graph definition: {}", getName());
         return status;
     }
+    SPDLOG_ERROR("ER");
     status = createOutputsInfo();
     if (!status.ok()) {
         SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create outputs info for mediapipe graph definition: {}", getName());
@@ -177,6 +186,10 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) {
     if (!status.ok()) {
         return status;
     }
+    // TODO FIXME @atobisze
+    SPDLOG_ERROR("ER");
+    this->queue = std::make_shared<GraphQueue>(this->config, this->sidePacketMaps, 12);
+    SPDLOG_ERROR("XXX ER GraphQueue:{}", (void*)this->queue.get());
 
     lock.unlock();
     notifier.passed = true;
@@ -192,12 +205,20 @@ MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name,
     MetricRegistry* registry,
     const MetricConfig* metricConfig,
     PythonBackend* pythonBackend) :
+    sidePacketMaps(std::make_shared<GraphSidePackets>()),
     name(name),
     status(SCHEDULER_CLASS_NAME, this->name),
     pythonBackend(pythonBackend),
     reporter(std::make_unique<MediapipeServableMetricReporter>(metricConfig, registry, name)) {
     mgconfig = config;
     passKfsRequestFlag = false;
+    SPDLOG_ERROR("XXX ER new PythonNodeResourcesMap:{}", (void*)&this->sidePacketMaps->pythonNodeResourcesMap);
+    SPDLOG_ERROR("XXX ER new genAiServableMap:{}", (void*)&this->sidePacketMaps->genAiServableMap);
+    /*if (!sharedThreadPool) {
+        SPDLOG_ERROR("Created shared Thread Pool XXX");
+        //sharedThreadPool = std::make_shared<mediapipe::ThreadPoolExecutor>(std::thread::hardware_concurrency());  // TODO FIXME should be in MP factory
+    }*/
+   // TODO FIXME illegal constructor as we do not create queue here
 }
 
 Status MediapipeGraphDefinition::createInputsInfo() {
@@ -261,11 +282,12 @@ Status MediapipeGraphDefinition::create(std::unique_ptr<MediapipeGraphExecutor>&
         return status;
     }
     SPDLOG_DEBUG("Creating Mediapipe graph executor: {}", getName());
-
+    GraphIdGuard graphIdGuard(this->queue);  // TODO timeout?
+    SPDLOG_ERROR("ER");
     pipeline = std::make_unique<MediapipeGraphExecutor>(getName(), std::to_string(getVersion()),
         this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames,
-        this->sidePacketMaps,
-        this->pythonBackend, this->reporter.get());
+        *this->sidePacketMaps,
+        this->pythonBackend, this->reporter.get(), std::move(graphIdGuard));
     return status;
 }
 
@@ -339,12 +361,20 @@ Status MediapipeGraphDefinition::reload(ModelManager& manager, const MediapipeGr
         std::this_thread::sleep_for(std::chrono::microseconds(1));
     }
     this->mgconfig = config;
-    this->sidePacketMaps.clear();
+    //this->pythonNodeResourcesMap.reset();
+    //this->genAiServableMap.reset();
+    this->queue.reset();
+    SPDLOG_ERROR("XXX ER cleared queue");
+    this->sidePacketMaps.reset(); 
+    SPDLOG_ERROR("XXX ER cleared sidePacketMaps");
+    // TODO FIXME @atobisze NOW we created new maps here before
     return validate(manager);
 }
 
 void MediapipeGraphDefinition::retire(ModelManager& manager) {
-    this->sidePacketMaps.clear();
+    this->queue.reset();
+    // now we reset shared ptr maps so ongoing executions can continue
+    this->sidePacketMaps.reset();
     this->status.handle(RetireEvent());
 }
 
@@ -411,7 +441,7 @@ class ResourcesCleaningGuard {
         resources(resources) {}
     ~ResourcesCleaningGuard() {
         if (shouldCleanup) {
-            resources.clear();
+            resources.clear(); // TODO FIXME @atobisze check
         }
     }
     void disableCleaning() {
@@ -423,7 +453,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
     SPDLOG_INFO("MediapipeGraphDefinition initializing graph nodes");
     for (int i = 0; i < config.node().size(); i++) {
 #if (PYTHON_DISABLE == 0)
-        auto& pythonNodeResourcesMap = this->sidePacketMaps.pythonNodeResourcesMap;
+        auto& pythonNodeResourcesMap = this->sidePacketMaps->pythonNodeResourcesMap;
         if (config.node(i).calculator() == PYTHON_NODE_CALCULATOR_NAME) {
             ResourcesCleaningGuard<PythonNodeResourcesMap> pythonResourcesCleaningGuard(pythonNodeResourcesMap);
             if (!config.node(i).node_options().size()) {
@@ -453,7 +483,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
 #endif
         // Passed to both calculators that require LLM Engine (gRPC KServe & HTTP OpenAI)
         if (endsWith(config.node(i).calculator(), LLM_NODE_CALCULATOR_NAME)) {
-            auto& genAiServableMap = this->sidePacketMaps.genAiServableMap;
+            auto& genAiServableMap = this->sidePacketMaps->genAiServableMap;
             ResourcesCleaningGuard<GenAiServableMap> genAiServablesCleaningGuard(genAiServableMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node missing options in graph: {}. ", this->name);
@@ -479,7 +509,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
         }
         // Passed to both calculators that require Image Generation pipelines
         if (endsWith(config.node(i).calculator(), IMAGE_GEN_CALCULATOR_NAME)) {
-            auto& imageGenPipelinesMap = this->sidePacketMaps.imageGenPipelinesMap;
+            auto& imageGenPipelinesMap = this->sidePacketMaps->imageGenPipelinesMap;
             ResourcesCleaningGuard<ImageGenerationPipelinesMap> guard(imageGenPipelinesMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "Image Gen node missing options in graph: {}. ", this->name);
@@ -513,7 +543,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
             guard.disableCleaning();
         }
         if (endsWith(config.node(i).calculator(), EMBEDDINGS_NODE_CALCULATOR_NAME)) {
-            auto& embeddingsServableMap = this->sidePacketMaps.embeddingsServableMap;
+            auto& embeddingsServableMap = this->sidePacketMaps->embeddingsServableMap;
             ResourcesCleaningGuard<EmbeddingsServableMap> embeddingsServablesCleaningGuard(embeddingsServableMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "Embeddings node missing options in graph: {}. ", this->name);
@@ -546,7 +576,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
             embeddingsServablesCleaningGuard.disableCleaning();
         }
         if (endsWith(config.node(i).calculator(), RERANK_NODE_CALCULATOR_NAME)) {
-            auto& rerankServableMap = this->sidePacketMaps.rerankServableMap;
+            auto& rerankServableMap = this->sidePacketMaps->rerankServableMap;
             ResourcesCleaningGuard<RerankServableMap> rerankServablesCleaningGuard(rerankServableMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "Rerank node missing options in graph: {}. ", this->name);
@@ -569,7 +599,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
             rerankServablesCleaningGuard.disableCleaning();
         }
         if (endsWith(config.node(i).calculator(), STT_NODE_CALCULATOR_NAME)) {
-            auto& sttServableMap = this->sidePacketMaps.sttServableMap;
+            auto& sttServableMap = this->sidePacketMaps->sttServableMap;
             ResourcesCleaningGuard<SttServableMap> sttServablesCleaningGuard(sttServableMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "SpeechToText node missing options in graph: {}. ", this->name);
@@ -595,7 +625,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
             sttServablesCleaningGuard.disableCleaning();
         }
         if (endsWith(config.node(i).calculator(), TTS_NODE_CALCULATOR_NAME)) {
-            auto& ttsServableMap = this->sidePacketMaps.ttsServableMap;
+            auto& ttsServableMap = this->sidePacketMaps->ttsServableMap;
             ResourcesCleaningGuard<TtsServableMap> ttsServablesCleaningGuard(ttsServableMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "TextToSpeech node missing options in graph: {}. ", this->name);
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp
index 14c9e0679f..2a0804b01e 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.hpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp
@@ -40,14 +40,10 @@
 #pragma GCC diagnostic pop
 #pragma warning(pop)
 
+#include "graph_side_packets.hpp"
 #include "mediapipegraphconfig.hpp"
 #include "packettypes.hpp"
-
-#include "../sidepacket_servable.hpp"
-#include "../embeddings/embeddings_servable.hpp"
-#include "../rerank/rerank_servable.hpp"
-#include "../audio/speech_to_text/s2t_servable.hpp"
-#include "../audio/text_to_speech/t2s_servable.hpp"
+#include "graphqueue.hpp"
 
 namespace ovms {
 class MediapipeGraphDefinitionUnloadGuard;
@@ -58,44 +54,6 @@ class ModelManager;
 class MediapipeGraphExecutor;
 class Status;
 class PythonBackend;
-class PythonNodeResources;
-class GenAiServable;
-struct ImageGenerationPipelines;
-using PythonNodeResourcesMap = std::unordered_map<std::string, std::shared_ptr<PythonNodeResources>>;
-using GenAiServableMap = std::unordered_map<std::string, std::shared_ptr<GenAiServable>>;
-using RerankServableMap = std::unordered_map<std::string, std::shared_ptr<RerankServable>>;
-using SttServableMap = std::unordered_map<std::string, std::shared_ptr<SttServable>>;
-using TtsServableMap = std::unordered_map<std::string, std::shared_ptr<TtsServable>>;
-using EmbeddingsServableMap = std::unordered_map<std::string, std::shared_ptr<EmbeddingsServable>>;
-using ImageGenerationPipelinesMap = std::unordered_map<std::string, std::shared_ptr<ImageGenerationPipelines>>;
-
-struct GraphSidePackets {
-    PythonNodeResourcesMap pythonNodeResourcesMap;
-    GenAiServableMap genAiServableMap;
-    ImageGenerationPipelinesMap imageGenPipelinesMap;
-    EmbeddingsServableMap embeddingsServableMap;
-    RerankServableMap rerankServableMap;
-    SttServableMap sttServableMap;
-    TtsServableMap ttsServableMap;
-    void clear() {
-        pythonNodeResourcesMap.clear();
-        genAiServableMap.clear();
-        imageGenPipelinesMap.clear();
-        embeddingsServableMap.clear();
-        rerankServableMap.clear();
-        sttServableMap.clear();
-        ttsServableMap.clear();
-    }
-    bool empty() {
-        return (pythonNodeResourcesMap.empty() &&
-                genAiServableMap.empty() &&
-                imageGenPipelinesMap.empty() &&
-                embeddingsServableMap.empty() &&
-                rerankServableMap.empty() &&
-                sttServableMap.empty() &&
-                ttsServableMap.empty());
-    }
-};
 
 class MediapipeGraphDefinition {
     friend MediapipeGraphDefinitionUnloadGuard;
@@ -142,7 +100,7 @@ class MediapipeGraphDefinition {
     static constexpr model_version_t VERSION = 1;
 
 protected:
-    GraphSidePackets sidePacketMaps;
+    std::shared_ptr<GraphSidePackets> sidePacketMaps;
 
     struct ValidationResultNotifier {
         ValidationResultNotifier(PipelineDefinitionStatus& status, std::condition_variable& loadedNotify) :
@@ -179,7 +137,7 @@ class MediapipeGraphDefinition {
     PipelineDefinitionStatus status;
 
     MediapipeGraphConfig mgconfig;
-    ::mediapipe::CalculatorGraphConfig config;
+    ::mediapipe::CalculatorGraphConfig config;  // TODO rename configs
 
     Status createInputsInfo();
     Status createOutputsInfo();
@@ -209,6 +167,7 @@ class MediapipeGraphDefinition {
     PythonBackend* pythonBackend;
 
     std::unique_ptr<MediapipeServableMetricReporter> reporter;
+    std::shared_ptr<GraphQueue> queue;
 };
 
 class MediapipeGraphDefinitionUnloadGuard {
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp
index 93b53fdf8e..c8825f82c8 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.cpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp
@@ -19,6 +19,8 @@
 #include <utility>
 #include <vector>
 
+#include "graph_executor_constants.hpp"
+
 #pragma warning(push)
 #pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246)
 #pragma GCC diagnostic push
@@ -28,10 +30,10 @@
 #pragma warning(pop)
 
 #if (PYTHON_DISABLE == 0)
-#include "../python/python_backend.hpp"
+#include "src/python/python_backend.hpp"
 #endif
 
-#include "../image_gen/pipelines.hpp"
+#include "src/image_gen/pipelines.hpp"
 
 namespace ovms {
 
@@ -50,7 +52,8 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
     const SttServableMap& sttServableMap,
     const TtsServableMap& ttsServableMap,
     PythonBackend* pythonBackend,
-    MediapipeServableMetricReporter* mediapipeServableMetricReporter) :
+    MediapipeServableMetricReporter* mediapipeServableMetricReporter,
+    GraphIdGuard&& guard) :
     name(name),
     version(version),
     config(config),
@@ -60,8 +63,9 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
     outputNames(std::move(outputNames)),
     sidePacketMaps({pythonNodeResourcesMap, llmNodeResourcesMap, {}, embeddingsServableMap, rerankServableMap, sttServableMap, ttsServableMap}),
     pythonBackend(pythonBackend),
-    currentStreamTimestamp(STARTING_TIMESTAMP),
-    mediapipeServableMetricReporter(mediapipeServableMetricReporter) {}
+    currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)),
+    mediapipeServableMetricReporter(mediapipeServableMetricReporter),
+    guard(std::move(guard)) {}
 MediapipeGraphExecutor::MediapipeGraphExecutor(
     const std::string& name,
     const std::string& version,
@@ -72,7 +76,8 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
     std::vector<std::string> outputNames,
     const GraphSidePackets& sidePacketMaps,
     PythonBackend* pythonBackend,
-    MediapipeServableMetricReporter* mediapipeServableMetricReporter) :
+    MediapipeServableMetricReporter* mediapipeServableMetricReporter,
+    GraphIdGuard&& guard) :
     name(name),
     version(version),
     config(config),
@@ -82,16 +87,9 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
     outputNames(std::move(outputNames)),
     sidePacketMaps(sidePacketMaps),
     pythonBackend(pythonBackend),
-    currentStreamTimestamp(STARTING_TIMESTAMP),
-    mediapipeServableMetricReporter(mediapipeServableMetricReporter) {}
+    currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)),
+    mediapipeServableMetricReporter(mediapipeServableMetricReporter),
+    guard(std::move(guard)) {}
 
-const std::string MediapipeGraphExecutor::PYTHON_SESSION_SIDE_PACKET_TAG = "py";
-const std::string MediapipeGraphExecutor::LLM_SESSION_SIDE_PACKET_TAG = "llm";
-const std::string MediapipeGraphExecutor::IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes";
-const std::string MediapipeGraphExecutor::EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable";
-const std::string MediapipeGraphExecutor::RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable";
-const std::string MediapipeGraphExecutor::STT_SESSION_SIDE_PACKET_TAG = "s2t_servable";
-const std::string MediapipeGraphExecutor::TTS_SESSION_SIDE_PACKET_TAG = "t2s_servable";
-const ::mediapipe::Timestamp MediapipeGraphExecutor::STARTING_TIMESTAMP = ::mediapipe::Timestamp(0);
 
 }  // namespace ovms
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp
index c165469395..21159d03a3 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.hpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp
@@ -36,9 +36,11 @@
 #include "mediapipe/framework/port/status.h"
 #pragma GCC diagnostic pop
 #pragma warning(pop)
+#include "graph_executor_constants.hpp"
 #include "mediapipe_utils.hpp"
 #include "mediapipegraphdefinition.hpp"  // for version in response and PythonNodeResourceMap
 #include "packettypes.hpp"
+#include "graphqueue.hpp"
 
 namespace ovms {
 class PythonBackend;
@@ -71,9 +73,32 @@ inline StatusCode mediapipeAbslToOvmsStatus(absl::StatusCode code) {
     }                                                                    \
     _Pragma("warning(pop)")
 
+template <typename RequestType, typename ResponseType>
+struct MyFunctor : public OutputStreamObserverI {
+    const std::string& requestId;
+    MediapipeGraphExecutor& exec;
+    const std::string outputStreamName;
+    mediapipe_packet_type_enum packetType;
+    ResponseType& response;
+    MyFunctor(const std::string& outputStreamName, mediapipe_packet_type_enum packetType, MediapipeGraphExecutor& exec, const RequestType& request, ResponseType& response) :
+        requestId(getRequestId(request)),
+        exec(exec),
+        outputStreamName(outputStreamName),
+        packetType(packetType),
+        response(response) {
+        SPDLOG_ERROR("ER MyFunctor:{} observer constructed:{}", outputStreamName, (void*)this);
+    }
+    absl::Status handlePacket(const ::mediapipe::Packet& packet) override;
+    ~MyFunctor() {
+        SPDLOG_ERROR("ER Destroy Functor:{} this:{}", outputStreamName, (void*)this);
+    }
+};
 class MediapipeGraphExecutor {
+public:
     const std::string name;
     const std::string version;
+
+private:
     const ::mediapipe::CalculatorGraphConfig config;
     stream_types_mapping_t inputTypes;
     stream_types_mapping_t outputTypes;
@@ -86,17 +111,11 @@ class MediapipeGraphExecutor {
     ::mediapipe::Timestamp currentStreamTimestamp;
 
     MediapipeServableMetricReporter* mediapipeServableMetricReporter;
+    GraphIdGuard guard;
 
 public:
-    static const std::string PYTHON_SESSION_SIDE_PACKET_TAG;
-    static const std::string LLM_SESSION_SIDE_PACKET_TAG;
-    static const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG;
-    static const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG;
-    static const std::string RERANK_SESSION_SIDE_PACKET_TAG;
-    static const std::string STT_SESSION_SIDE_PACKET_TAG;
-    static const std::string TTS_SESSION_SIDE_PACKET_TAG;
-    static const ::mediapipe::Timestamp STARTING_TIMESTAMP;
 
+    [[deprecated("Use constructor with side packets instead")]]
     MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config,
         stream_types_mapping_t inputTypes,
         stream_types_mapping_t outputTypes,
@@ -108,14 +127,17 @@ class MediapipeGraphExecutor {
         const SttServableMap& sttServableMap,
         const TtsServableMap& ttsServableMap,
         PythonBackend* pythonBackend,
-        MediapipeServableMetricReporter* mediapipeServableMetricReporter);
-    MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config,
+        MediapipeServableMetricReporter* mediapipeServableMetricReporter,
+        GraphIdGuard&& guard);
+    MediapipeGraphExecutor(const std::string& name,
+        const std::string& version,
+        const ::mediapipe::CalculatorGraphConfig& config,
         stream_types_mapping_t inputTypes,
         stream_types_mapping_t outputTypes,
         std::vector<std::string> inputNames, std::vector<std::string> outputNames,
         const GraphSidePackets& sidePacketMaps,
         PythonBackend* pythonBackend,
-        MediapipeServableMetricReporter* mediapipeServableMetricReporter);
+        MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard);
 
     template <typename RequestType, typename ResponseType>
     Status infer(const RequestType* request, ResponseType* response, ExecutionContext executionContext) {
@@ -123,20 +145,28 @@ class MediapipeGraphExecutor {
         SPDLOG_DEBUG("Start unary KServe request mediapipe graph: {} execution", this->name);
         MetricCounterGuard failedRequestsGuard(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, false));
         MetricGaugeGuard currentGraphsGuard(this->mediapipeServableMetricReporter->currentGraphs.get());
-        ::mediapipe::CalculatorGraph graph;
-        MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
-        enum : unsigned int {
-            PROCESS,
-            TIMER_END2
-        };
-        Timer<TIMER_END2> timer;
-        timer.start(PROCESS);
-        std::unordered_map<std::string, ::mediapipe::OutputStreamPoller> outputPollers;
+        ::mediapipe::CalculatorGraph& graph = this->guard.graph;
+        SPDLOG_ERROR("SetExecutor XXX");
+        //std::ignore = graph.SetExecutor("", sharedThreadPool);  // TODO FIXME
+        SPDLOG_ERROR("Start unary KServe request mediapipe graph: {} initializationXXXbegin", this->name);
+        //MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
+        //std::unordered_map<std::string, ::mediapipe::OutputStreamPoller> outputPollers;
         for (auto& name : this->outputNames) {
             if (name.empty()) {
                 SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", name);
                 return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR;
             }
+            SPDLOG_ERROR("ER XXX Will construct observer for guard:{}, helper:{}, graph:{}", (void*)&this->guard, (void*)this->guard.gh.get(), (void*)&graph);
+            guard.gh->outStreamObservers[name] = std::make_shared<MyFunctor<RequestType, ResponseType>>(name, this->outputTypes.at(name), *this, *request, *response);  // TODO use at() FIXME
+            /*
+            ///////////////
+            ///// OutputStreamPollers
+            ///////////
+            // CreateAPI Specific observer
+            // Replace guard ptr with new one
+            // What to do if
+            //MP_RETURN_ON_FAIL(graph.ObserveOutputStream(outputName, [&serverReaderWriter, &sendMutex, &outputName, &executionContext, this](const ::mediapipe::Packet& packet) -> absl::Status {
+
             auto absStatusOrPoller = graph.AddOutputStreamPoller(name);
             if (!absStatusOrPoller.ok()) {
                 const std::string absMessage = absStatusOrPoller.status().ToString();
@@ -144,26 +174,33 @@ class MediapipeGraphExecutor {
                 return Status(StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR, std::move(absMessage));
             }
             outputPollers.emplace(name, std::move(absStatusOrPoller).value());
+            */
         }
-        std::map<std::string, mediapipe::Packet> inputSidePackets;
+        /*std::map<std::string, mediapipe::Packet> inputSidePackets;
         OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, *request));
 #if (PYTHON_DISABLE == 0)
-        inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<PythonNodeResourcesMap>(this->sidePacketMaps.pythonNodeResourcesMap).At(STARTING_TIMESTAMP);
+        inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<PythonNodeResourcesMap>(this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
 #endif
-        inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(this->sidePacketMaps.genAiServableMap).At(STARTING_TIMESTAMP);
-        inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<ImageGenerationPipelinesMap>(this->sidePacketMaps.imageGenPipelinesMap).At(STARTING_TIMESTAMP);
-        inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(STARTING_TIMESTAMP);
+        inputSidePackets[PYTHON_SIDE_PACKET_NAME] = mediapipe::MakePacket<PythonNodeResourcesMap>(*this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[LLM_SESSION_PACKET_NAME] = mediapipe::MakePacket<GenAiServableMap>(*this->sidePacketMaps.llmNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+
+        inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<ImageGenerationPipelinesMap>(this->sidePacketMaps.imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
 
-        inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(this->sidePacketMaps.rerankServableMap).At(STARTING_TIMESTAMP);
-        inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<SttServableMap>(this->sidePacketMaps.sttServableMap).At(STARTING_TIMESTAMP);
-        inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<TtsServableMap>(this->sidePacketMaps.ttsServableMap).At(STARTING_TIMESTAMP);
+        inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(this->sidePacketMaps.rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<SttServableMap>(this->sidePacketMaps.sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<TtsServableMap>(this->sidePacketMaps.ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
 
         MP_RETURN_ON_FAIL(graph.StartRun(inputSidePackets), std::string("start MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_START_ERROR);
 
         ::mediapipe::Packet packet;
         std::set<std::string> outputPollersWithReceivedPacket;
+        // TODO FIXME no mechanism to check that
+        */
 
         size_t numberOfPacketsCreated = 0;
+        SPDLOG_ERROR("Current Timestamp pushing:{}", this->guard.gh->currentTimestamp.Value());
         auto ovms_status = createAndPushPacketsImpl(
             std::shared_ptr<const RequestType>(request,
                 // Custom deleter to avoid deallocation by custom holder
@@ -173,8 +210,10 @@ class MediapipeGraphExecutor {
             this->inputTypes,
             this->pythonBackend,
             graph,
-            this->currentStreamTimestamp,
+            this->guard.gh->currentTimestamp,
+            //            this->currentStreamTimestamp,
             numberOfPacketsCreated);
+        SPDLOG_ERROR("Current Timestamp pushed:{}", this->guard.gh->currentTimestamp.Value());
         if (!ovms_status.ok()) {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
             return ovms_status;
@@ -201,7 +240,9 @@ class MediapipeGraphExecutor {
         }
         MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code()));
 
-        MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR);
+        //        MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR);
+        //
+        /*
         for (auto& [outputStreamName, poller] : outputPollers) {
             size_t receivedOutputs = 0;
             SPDLOG_DEBUG("Will wait for output stream: {} packet", outputStreamName);
@@ -225,19 +266,21 @@ class MediapipeGraphExecutor {
             }
             SPDLOG_TRACE("Received all: {} packets for: {}", receivedOutputs, outputStreamName);
         }
-        status = graph.WaitUntilDone();
+        */
+        // status = graph.WaitUntilDone();
+        status = graph.WaitUntilIdle();
         if (!status.ok()) {  // Collect error metric after Process()
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
         MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
-        if (outputPollers.size() != outputPollersWithReceivedPacket.size()) {
+        /*        if (outputPollers.size() != outputPollersWithReceivedPacket.size()) {
             SPDLOG_DEBUG("Mediapipe failed to execute. Failed to receive all output packets");
             return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Unknown error during mediapipe execution");
-        }
-        timer.stop(PROCESS);
+        }*/
+        /*timer.stop(PROCESS);
         double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
         OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
-        INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext));
+        INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext));*/
         SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name);
         return StatusCode::OK;
     }
@@ -299,10 +342,10 @@ class MediapipeGraphExecutor {
                 OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, req));
 #if (PYTHON_DISABLE == 0)
                 inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<PythonNodeResourcesMap>(this->sidePacketMaps.pythonNodeResourcesMap)
-                                                                       .At(STARTING_TIMESTAMP);
+                                                                       .At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
 #endif
-                inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(this->sidePacketMaps.genAiServableMap).At(STARTING_TIMESTAMP);
-                inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(STARTING_TIMESTAMP);
+                inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+                inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
                 // Add image generation side packet in case image generation allow for streaming
             }
 
@@ -394,4 +437,18 @@ class MediapipeGraphExecutor {
     }
 };
 
+template <typename RequestType, typename ResponseType>
+absl::Status MyFunctor<RequestType, ResponseType>::handlePacket(const ::mediapipe::Packet& packet) {
+    SPDLOG_ERROR("ER my functor:{}", (void*)this);
+    auto status = onPacketReadySerializeImpl(
+        this->requestId,
+        this->exec.name,
+        this->exec.version,
+        this->outputStreamName,
+        this->packetType,
+        packet,
+        response);
+    return status.ok() ? absl::OkStatus() : absl::Status(absl::StatusCode::kInternal, "Some error");
+    ;
+}
 }  // namespace ovms
diff --git a/src/mediapipe_internal/outputstreamobserver.hpp b/src/mediapipe_internal/outputstreamobserver.hpp
new file mode 100644
index 0000000000..e08b99fc43
--- /dev/null
+++ b/src/mediapipe_internal/outputstreamobserver.hpp
@@ -0,0 +1,63 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+#include <map>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "../execution_context.hpp"
+#include "../model_metric_reporter.hpp"
+#include "../profiler.hpp"
+#include "../status.hpp"
+#include "../timer.hpp"
+#pragma warning(push)
+#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#include "mediapipe/framework/calculator_graph.h"
+#include "mediapipe/framework/port/status.h"
+#pragma GCC diagnostic pop
+#pragma warning(pop)
+#include "mediapipe_utils.hpp"
+//#include "mediapipegraphdefinition.hpp"  // for version in response and PythonNodeResourceMap
+#include "packettypes.hpp"
+#include "graphqueue.hpp"
+
+namespace ovms {
+class PythonBackend;
+class ServableMetricReporter;
+class OutputStreamObserverI {
+public:
+    virtual absl::Status handlePacket(const ::mediapipe::Packet& packet) = 0;
+    virtual ~OutputStreamObserverI() = default;
+};
+class NullOutputStreamObserver : public OutputStreamObserverI {
+public:
+    NullOutputStreamObserver() {
+        SPDLOG_ERROR("NUll observer constructed:{}", (void*)this);
+    }
+    absl::Status handlePacket(const ::mediapipe::Packet& packet) override {
+        SPDLOG_ERROR("Internal error occured:{}", (void*)this);
+        throw std::runtime_error("Should not happen");
+        return absl::Status(absl::StatusCode::kInternal, "Should not happen");
+    }
+};
+}  // namespace ovms
diff --git a/src/python/BUILD b/src/python/BUILD
index f4fd4c571e..539abaf355 100644
--- a/src/python/BUILD
+++ b/src/python/BUILD
@@ -75,7 +75,7 @@ ovms_cc_library(
         "pythonexecutorcalculator_cc_proto",
         "utils",
     ],
-    visibility = ["//visibility:private"],
+    visibility = ["//visibility:public"], # TODO FIXME?
     alwayslink = 1,
     data = ["//src/python/binding:pyovms.so"],
 )
diff --git a/src/test/mediapipe_framework_test.cpp b/src/test/mediapipe_framework_test.cpp
index 85abfbd519..bc7c13bad9 100644
--- a/src/test/mediapipe_framework_test.cpp
+++ b/src/test/mediapipe_framework_test.cpp
@@ -18,6 +18,7 @@
 #include <sstream>
 #include <string>
 #include <thread>
+#include <chrono>
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -29,8 +30,11 @@
 #include "../grpcservermodule.hpp"
 #include "../http_rest_api_handler.hpp"
 #include "../kfs_frontend/kfs_grpc_inference_service.hpp"
+#include "../mediapipe_internal/outputstreamobserver.hpp"
 #include "../mediapipe_internal/mediapipefactory.hpp"
 #include "../mediapipe_internal/mediapipegraphdefinition.hpp"
+#include "../mediapipe_internal/mediapipe_utils.hpp"
+#include "mediapipe/framework/thread_pool_executor.h"
 #include "../metric_config.hpp"
 #include "../metric_module.hpp"
 #include "../model_service.hpp"
@@ -79,9 +83,411 @@ class MediapipeFrameworkTest : public TestWithTempDir {
 class MediapipeNegativeFrameworkTest : public MediapipeFrameworkTest {
 };
 
-// purpose of this test is to ensure there is no hang in case of one of the graph nodes
-// not producing output packet
+using mediapipe::Adopt;
+using mediapipe::CalculatorGraphConfig;
+using mediapipe::Packet;
+using mediapipe::ParseTextProtoOrDie;
+using mediapipe::Timestamp;
+
+#define MP_ERROR_STOP(A)                                         \
+    {                                                            \
+        absStatus = A;                                           \
+        if (!absStatus.ok()) {                                   \
+            const std::string absMessage = absStatus.ToString(); \
+            SPDLOG_DEBUG("{}", absMessage);                      \
+            ASSERT_TRUE(false);                                  \
+        }                                                        \
+    }
+TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerCheckNoInputPackets) {
+    // we need it only so that dummy is available via C-API
+    //    ServerGuard servGuard("/ovms/src/test/configs/config_standard_dummy.json");
+    ServerGuard servGuard("/ovms/src/test/configs/config_benchmark.json");
+    std::string graph_proto = R"(
+      input_stream: "IN:input"
+      output_stream: "OUT:output"
+      node {
+          calculator: "OpenVINOModelServerSessionCalculator"
+          output_side_packet: "SESSION:session"
+          node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+              servable_name: "dummy"
+            }
+          }
+      }
+      node {
+        calculator: "OpenVINOInferenceCalculator"
+        input_side_packet: "SESSION:session"
+        input_stream: "OVTENSOR:input"
+        output_stream: "OVTENSOR:output"
+        node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+                tag_to_input_tensor_names {
+                    key: "OVTENSOR"
+                    value: "b"
+                }
+                tag_to_output_tensor_names {
+                    key: "OVTENSOR"
+                    value: "a"
+                }
+            }
+        }
+      }
+    )";
+    CalculatorGraphConfig graphConfig =
+        ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
+    const std::string inputStreamName = "input";
+    const std::string outputStreamName = "output";
+    // avoid creating pollers, retreiving packets etc.
+    //////////////////
+    // model mgmt thread
+    //////////////////
+    //std::shared_ptr<ovms::GraphQueue> queue;
+    //queue = std::make_shared<GraphQueue>(graphConfig, 1);
+    ::mediapipe::CalculatorGraph graph;
+    EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
+    // Install NullObserver
+    // its not per graph but per output
+    std::shared_ptr<OutputStreamObserverI> perGraphObserverFunctor = std::make_shared<NullOutputStreamObserver>();
+    const std::string outputName{"output"};
+    absl::Status absStatus;
+    MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }));
+    // Here ends model management
+    // Here starts mp graph executor
+    //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
+    // get graphIdGuard from queue
+    // create FrontendAppropriateObserver
+    float expVal = 13.5;
+    struct MyFunctor : public OutputStreamObserverI {
+        float expVal;
+        MyFunctor(float expVal) :
+            expVal(expVal) {
+            SPDLOG_ERROR("MyFunctor observer constructed:{}", (void*)this);
+        }
+        absl::Status handlePacket(const ::mediapipe::Packet& packet) override {
+            SPDLOG_ERROR("ER my functor:{}", (void*)this);
+            const ov::Tensor& outputTensor =
+                packet.Get<ov::Tensor>();
+            auto datatype = ov::element::Type_t::f32;
+            EXPECT_EQ(datatype, outputTensor.get_element_type());
+            EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10));
+            const void* outputData = outputTensor.data();
+            EXPECT_EQ(*((float*)outputData), expVal);
+            return absl::OkStatus();
+        }
+    };
+    perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+    auto copyOfMyFunctor = perGraphObserverFunctor;
+    // now start execution
+    absStatus = graph.StartRun({});
+    auto datatype = ov::element::Type_t::f32;
+    ov::Shape shape{1, 10};
+    int timestamp{0};
+    std::vector<float> data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+    MP_ERROR_STOP(graph.AddPacketToInputStream(
+        inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+    MP_ERROR_STOP(graph.WaitUntilIdle());
+    SPDLOG_ERROR("Now swap Functor, we don't have to call ObserverOutputStream");
+    expVal = 42;
+    data[0] = expVal - 1;
+    perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+    // now add second packet
+    auto inputTensor2 = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+    //MP_ERROR_STOP(graph.AddPacketToInputStream(
+    //    inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++))));
+    //MP_ERROR_STOP(graph.WaitUntilIdle());
+    MP_ERROR_STOP(graph.CloseAllPacketSources());
+    MP_ERROR_STOP(graph.WaitUntilDone());
+}
+TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOC) {
+    // we need it only so that dummy is available via C-API
+    //    ServerGuard servGuard("/ovms/src/test/configs/config_standard_dummy.json");
+    ServerGuard servGuard("/ovms/src/test/configs/config_benchmark.json");
+    std::string graph_proto = R"(
+      input_stream: "IN:input"
+      output_stream: "OUT:output"
+      node {
+          calculator: "OpenVINOModelServerSessionCalculator"
+          output_side_packet: "SESSION:session"
+          node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+              servable_name: "dummy"
+            }
+          }
+      }
+      node {
+        calculator: "OpenVINOInferenceCalculator"
+        input_side_packet: "SESSION:session"
+        input_stream: "OVTENSOR:input"
+        output_stream: "OVTENSOR:output"
+        node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+                tag_to_input_tensor_names {
+                    key: "OVTENSOR"
+                    value: "b"
+                }
+                tag_to_output_tensor_names {
+                    key: "OVTENSOR"
+                    value: "a"
+                }
+            }
+        }
+      }
+    )";
+    CalculatorGraphConfig graphConfig =
+        ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
+    const std::string inputStreamName = "input";
+    const std::string outputStreamName = "output";
+    // avoid creating pollers, retreiving packets etc.
+    //////////////////
+    // model mgmt thread
+    //////////////////
+    //std::shared_ptr<ovms::GraphQueue> queue;
+    //queue = std::make_shared<GraphQueue>(graphConfig, 1);
+    ::mediapipe::CalculatorGraph graph;
+    EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
+    // Install NullObserver
+    // its not per graph but per output
+    std::shared_ptr<OutputStreamObserverI> perGraphObserverFunctor = std::make_shared<NullOutputStreamObserver>();
+    const std::string outputName{"output"};
+    absl::Status absStatus;
+    MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }));
+    // Here ends model management
+    // Here starts mp graph executor
+    //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
+    // get graphIdGuard from queue
+    // create FrontendAppropriateObserver
+    float expVal = 13.5;
+    struct MyFunctor : public OutputStreamObserverI {
+        float expVal;
+        MyFunctor(float expVal) :
+            expVal(expVal) {
+            SPDLOG_ERROR("MyFunctor observer constructed:{}", (void*)this);
+        }
+        absl::Status handlePacket(const ::mediapipe::Packet& packet) override {
+            SPDLOG_ERROR("ER my functor:{}", (void*)this);
+            const ov::Tensor& outputTensor =
+                packet.Get<ov::Tensor>();
+            auto datatype = ov::element::Type_t::f32;
+            EXPECT_EQ(datatype, outputTensor.get_element_type());
+            EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10));
+            const void* outputData = outputTensor.data();
+            EXPECT_EQ(*((float*)outputData), expVal);
+            return absl::OkStatus();
+        }
+    };
+    perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+    auto copyOfMyFunctor = perGraphObserverFunctor;
+    // now start execution
+    absStatus = graph.StartRun({});
+    auto datatype = ov::element::Type_t::f32;
+    ov::Shape shape{1, 10};
+    int timestamp{0};
+    std::vector<float> data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+    MP_ERROR_STOP(graph.AddPacketToInputStream(
+        inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+    MP_ERROR_STOP(graph.WaitUntilIdle());
+    SPDLOG_ERROR("Now swap Functor, we don't have to call ObserverOutputStream");
+    expVal = 42;
+    data[0] = expVal - 1;
+    perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+    // now add second packet
+    auto inputTensor2 = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+    MP_ERROR_STOP(graph.AddPacketToInputStream(
+        inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++))));
+    MP_ERROR_STOP(graph.WaitUntilIdle());
+}
+TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOCCompare) {
+    // we need it only so that dummy is available via C-API
+    ServerGuard servGuard("/ovms/src/test/configs/config_standard_dummy.json");
+    std::string graph_proto = R"(
+      input_stream: "IN:input"
+      output_stream: "OUT:output"
+      node {
+          calculator: "OpenVINOModelServerSessionCalculator"
+          output_side_packet: "SESSION:session"
+          node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+              servable_name: "dummy"
+            }
+          }
+      }
+      node {
+        calculator: "OpenVINOInferenceCalculator"
+        input_side_packet: "SESSION:session"
+        input_stream: "OVTENSOR:input"
+        output_stream: "OVTENSOR:output"
+        node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+                tag_to_input_tensor_names {
+                    key: "OVTENSOR"
+                    value: "b"
+                }
+                tag_to_output_tensor_names {
+                    key: "OVTENSOR"
+                    value: "a"
+                }
+            }
+        }
+      }
+    )";
+    CalculatorGraphConfig graphConfig =
+        ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
+    const std::string inputStreamName = "input";
+    const std::string outputStreamName = "output";
+    // avoid creating pollers, retreiving packets etc.
+    //////////////////
+    // model mgmt thread
+    //////////////////
+    //std::shared_ptr<ovms::GraphQueue> queue;
+    //queue = std::make_shared<GraphQueue>(graphConfig, 1);
+    auto datatype = ov::element::Type_t::f32;
+    ov::Shape shape{1, 10};
+    int timestamp{0};
+    float expVal = 13.5;
+    std::vector<float> data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    ovms::Timer<3> timer;
+    const std::string outputName{"output"};
+    int N = 1000;
+
+    absl::Status absStatus;
+    // here starts new case of ovms
+    {  // new case of ovms
+        ::mediapipe::CalculatorGraph graph;
+        EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
+        auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+        // Install NullObserver
+        // its not per graph but per output
+        std::shared_ptr<ovms::OutputStreamObserverI> perGraphObserverFunctor = std::make_shared<NullOutputStreamObserver>();
+        MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }));
+        // Here ends model management
+        // Here starts mp graph executor
+        //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
+        // get graphIdGuard from queue
+        // create FrontendAppropriateObserver
+        struct MyFunctor : public OutputStreamObserverI {
+            float expVal;
+            MyFunctor(float expVal) :
+                expVal(expVal) {
+                SPDLOG_ERROR("MyFunctor observer constructed:{}", (void*)this);
+            }
+            absl::Status handlePacket(const ::mediapipe::Packet& packet) override {
+                SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY Getting output tensor:{}", (void*)this);
+                const ov::Tensor& outputTensor =
+                    packet.Get<ov::Tensor>();
+                auto datatype = ov::element::Type_t::f32;
+                EXPECT_EQ(datatype, outputTensor.get_element_type());
+                EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10));
+                const void* outputData = outputTensor.data();
+                EXPECT_EQ(*((float*)outputData), expVal);
+                return absl::OkStatus();
+            }
+        };
+        absStatus = graph.StartRun({});
+        SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX warmup");
+        {
+            perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+            auto copyOfMyFunctor = perGraphObserverFunctor;
+            auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+            MP_ERROR_STOP(graph.AddPacketToInputStream(
+                inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+        }
+        std::this_thread::sleep_for(std::chrono::seconds(1));
+        SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX warmup end");
+        SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX new");
+        timer.start(0);
+        for (auto i = 0; i < N; ++i) {  // iter begin
+            perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+            auto copyOfMyFunctor = perGraphObserverFunctor;
+            auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+            SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY AddingPacket");
+            MP_ERROR_STOP(graph.AddPacketToInputStream(
+                inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+            SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY WaitUntilIdle");
+            MP_ERROR_STOP(graph.WaitUntilIdle());
+            SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY After WaitUntilIdle");
+        }  // iter end
+        timer.stop(0);
+        SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX end:{}", timer.elapsed<std::chrono::microseconds>(0) / 1000);
+    }  // end of new case ovms
+    {  // current ovms case
+        SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ovms");
+        timer.start(1);
+        for (auto i = 0; i < N; ++i) {  // iter begin
+            ::mediapipe::CalculatorGraph graph;
+            EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
+            auto absStatusOrPoller = graph.AddOutputStreamPoller(outputName);
+            MP_ERROR_STOP(graph.StartRun({}));
+            auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+            SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY AddingPacket");
+            MP_ERROR_STOP(graph.AddPacketToInputStream(
+                inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+            ::mediapipe::Packet packet;
+            absStatusOrPoller.value().Next(&packet);
+            SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY Getting output tensor");
+            const ov::Tensor& outputTensor =
+                packet.Get<ov::Tensor>();
+            auto datatype = ov::element::Type_t::f32;
+            EXPECT_EQ(datatype, outputTensor.get_element_type());
+            EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10));
+            const void* outputData = outputTensor.data();
+            EXPECT_EQ(*((float*)outputData), expVal);
+            SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY WaitUntilIdle");
+            MP_ERROR_STOP(graph.WaitUntilIdle());
+            SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY After WaitUntilIdle");
+            MP_ERROR_STOP(graph.CloseAllPacketSources());
+            MP_ERROR_STOP(graph.WaitUntilDone());
+        }  // iter end
+        timer.stop(1);
+        SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX end:{}", timer.elapsed<std::chrono::microseconds>(1) / 1000);
+    }
+    {  // thread pool case
+        //auto sharedThreadPool = std::make_shared<mediapipe::ThreadPoolExecutor>(std::thread::hardware_concurrency());
+        auto sharedThreadPool = std::make_shared<mediapipe::ThreadPoolExecutor>(24);
+        SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX thread");
+        timer.start(2);
+        for (auto i = 0; i < N; ++i) {  // iter begin
+            ::mediapipe::CalculatorGraph graph;
+            MP_ERROR_STOP(graph.SetExecutor("", sharedThreadPool));
+            EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
+            auto absStatusOrPoller = graph.AddOutputStreamPoller(outputName);
+            MP_ERROR_STOP(graph.StartRun({}));
+            auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+            SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY AddingPacket");
+            MP_ERROR_STOP(graph.AddPacketToInputStream(
+                inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+            ::mediapipe::Packet packet;
+            absStatusOrPoller.value().Next(&packet);
+            SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY Getting output tensor");
+            const ov::Tensor& outputTensor =
+                packet.Get<ov::Tensor>();
+            auto datatype = ov::element::Type_t::f32;
+            EXPECT_EQ(datatype, outputTensor.get_element_type());
+            EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10));
+            const void* outputData = outputTensor.data();
+            EXPECT_EQ(*((float*)outputData), expVal);
+            SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY WaitUntilIdle");
+            MP_ERROR_STOP(graph.WaitUntilIdle());
+            SPDLOG_ERROR("YYYYYYYYYYYYYYYYYYYYYYYYYYYYY After WaitUntilIdle");
+            MP_ERROR_STOP(graph.CloseAllPacketSources());
+            MP_ERROR_STOP(graph.WaitUntilDone());
+        }  // iter end
+        timer.stop(2);
+        SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX end:{}", timer.elapsed<std::chrono::microseconds>(2) / 1000);
+    }  // end of thread pool case
+    double ms = timer.elapsed<std::chrono::microseconds>(0) / 1000;
+    SPDLOG_ERROR("{} iterations of new flow took:{} ms. FPS:{}", N, ms, N / ms * 1000);
+    ms = timer.elapsed<std::chrono::microseconds>(1) / 1000;
+    SPDLOG_ERROR("{} iterations of old flow took:{} ms. FPS:{}", N, ms, N / ms * 1000);
+    ms = timer.elapsed<std::chrono::microseconds>(2) / 1000;
+    SPDLOG_ERROR("{} iterations of thread pool flow took:{} ms. FPS:{}", N, ms, N / ms * 1000);
+    SPDLOG_ERROR("Threads: {}", std::thread::hardware_concurrency());
+}
+
 TEST_F(MediapipeNegativeFrameworkTest, NoOutputPacketProduced) {
+    // purpose of this test is to ensure there is no hang in case of one of the graph nodes
+    // not producing output packet
     SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/negative/config_no_calc_output_stream.json").c_str());
     const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME);
     KFSInferenceServiceImpl& impl = dynamic_cast<const ovms::GRPCServerModule*>(grpcModule)->getKFSGrpcImpl();
diff --git a/src/test/mediapipe_validation_test.cpp b/src/test/mediapipe_validation_test.cpp
index bdaa588887..78aae33e4e 100644
--- a/src/test/mediapipe_validation_test.cpp
+++ b/src/test/mediapipe_validation_test.cpp
@@ -170,4 +170,5 @@ TEST_F(MediapipeValidationTest, WrongPrecision) {
     prepareSingleInput();
     request.mutable_inputs(0)->set_datatype("unknown");
     ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT);
+    SPDLOG_ERROR("ER");
 }
diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp
index 55b6ab96ed..aa1eee869b 100644
--- a/src/test/mediapipeflow_test.cpp
+++ b/src/test/mediapipeflow_test.cpp
@@ -1557,7 +1557,7 @@ TEST_F(MediapipeStreamFlowAddTest, InferOnUnloadedGraph) {
 
 // Inference on reloaded mediapipe graph, completely different pipeline
 // Expects old stream to still use old configuration
-// Expect new stream to use new configuration
+// Expect new stream to use new configuration XXXXXX
 TEST_F(MediapipeStreamFlowAddTest, InferOnReloadedGraph) {
     const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME);
     KFSInferenceServiceImpl& impl = dynamic_cast<const ovms::GRPCServerModule*>(grpcModule)->getKFSGrpcImpl();
@@ -1687,6 +1687,8 @@ TEST_P(MediapipeFlowAddTest, InferStreamDisconnectionBeforeFirstRequest) {
 }
 
 TEST_F(MediapipeFlowTest, InferWithParams) {
+    GTEST_SKIP() << "Not possible with graph queue";
+    return;
     SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_graph_with_side_packets.json");
     const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME);
     KFSInferenceServiceImpl& impl = dynamic_cast<const ovms::GRPCServerModule*>(grpcModule)->getKFSGrpcImpl();
@@ -1991,6 +1993,28 @@ TEST(Mediapipe, MetadataDummyInputTypes) {
                 }
             }
         }
+        node {
+            calculator: "OVMSOVCalculator"
+            input_stream: "B:in2"
+            output_stream: "A:out2"
+            node_options: {
+                [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: {
+                  servable_name: "dummyUpper"
+                  servable_version: "1"
+                }
+            }
+        }
+        node {
+            calculator: "OVMSOVCalculator"
+            input_stream: "B:in2"
+            output_stream: "A:out3"
+            node_options: {
+                [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: {
+                  servable_name: "dummyUpper"
+                  servable_version: "1"
+                }
+            }
+        }
     )";
 
     ovms::MediapipeGraphConfig mgc{"mediaDummy", "", ""};
@@ -2681,13 +2705,15 @@ class MediapipeSerialization : public ::testing::Test {
             stream_types_mapping_t inputTypes,
             stream_types_mapping_t outputTypes,
             std::vector<std::string> inputNames, std::vector<std::string> outputNames,
-            const PythonNodeResourcesMap& pythonNodeResourcesMap,
-            MediapipeServableMetricReporter* mediapipeServableMetricReporter) :
-            MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, pythonNodeResourcesMap, {}, {}, {}, {}, {}, nullptr, mediapipeServableMetricReporter) {}
+            const std::shared_ptr<PythonNodeResourcesMap>& pythonNodeResourcesMap,
+            const std::shared_ptr<GenAiServableMap>& gasm,
+            MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) :
+            MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, *pythonNodeResourcesMap, *gasm, {}, {}, {}, {}, nullptr, mediapipeServableMetricReporter, std::move(guard)) {}
     };
 
 protected:
     std::unique_ptr<MediapipeServableMetricReporter> reporter;
+    std::shared_ptr<GraphQueue> queue;
     std::unique_ptr<MockedMediapipeGraphExecutor> executor;
     ::inference::ModelInferResponse mp_response;
     void SetUp() {
@@ -2700,9 +2726,14 @@ class MediapipeSerialization : public ::testing::Test {
         const std::vector<std::string> inputNames;
         const std::vector<std::string> outputNames;
         const ::mediapipe::CalculatorGraphConfig config;
-        PythonNodeResourcesMap pythonNodeResourcesMap;
         this->reporter = std::make_unique<MediapipeServableMetricReporter>(nullptr, nullptr, "");  // disabled reporter
-        executor = std::make_unique<MockedMediapipeGraphExecutor>("", "", config, mapping, mapping, inputNames, outputNames, pythonNodeResourcesMap, this->reporter.get());
+        auto sidePackets = std::make_shared<GraphSidePackets>();
+        std::shared_ptr<PythonNodeResourcesMap> pnsm = std::make_shared<PythonNodeResourcesMap>();
+        std::shared_ptr<GenAiServableMap> gasm = std::make_shared<GenAiServableMap>();
+        std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+        GraphIdGuard guard(queue);
+        executor = std::make_unique<MockedMediapipeGraphExecutor>("", "", config, mapping, mapping, inputNames, outputNames, pnsm, gasm, this->reporter.get(), std::move(guard));
+        SPDLOG_ERROR("Exit SetUp");
     }
 };
 
@@ -3099,7 +3130,7 @@ class MediapipeFlowStartTest : public TestWithTempDir {
         auto start = std::chrono::high_resolution_clock::now();
         while (!isMpReady(waitForServable) &&
                (std::chrono::duration_cast<std::chrono::seconds>(std::chrono::high_resolution_clock::now() - start).count() < SERVER_START_FROM_CONFIG_TIMEOUT_SECONDS)) {
-            std::this_thread::sleep_for(std::chrono::microseconds(100));
+            std::this_thread::sleep_for(std::chrono::microseconds(1000));
         }
         const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME);
         if (!grpcModule) {
diff --git a/src/test/pythonnode_test.cpp b/src/test/pythonnode_test.cpp
index 54c9acbfa1..7e2595a58f 100644
--- a/src/test/pythonnode_test.cpp
+++ b/src/test/pythonnode_test.cpp
@@ -1002,10 +1002,10 @@ class MockedMediapipeGraphExecutorPy : public ovms::MediapipeGraphExecutor {
         stream_types_mapping_t inputTypes,
         stream_types_mapping_t outputTypes,
         std::vector<std::string> inputNames, std::vector<std::string> outputNames,
-        const PythonNodeResourcesMap& pythonNodeResourcesMap,
+        const std::shared_ptr<PythonNodeResourcesMap>& pythonNodeResourcesMap,
         PythonBackend* pythonBackend,
-        MediapipeServableMetricReporter* mediapipeServableMetricReporter) :
-        MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, pythonNodeResourcesMap, {}, {}, {}, {}, {}, pythonBackend, mediapipeServableMetricReporter) {}
+        MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) :
+        MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, *pythonNodeResourcesMap, {}, {}, {}, {}, {}, pythonBackend, mediapipeServableMetricReporter, std::move(guard)) {}
 };
 
 TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) {
@@ -1014,8 +1014,11 @@ TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) {
     const std::vector<std::string> inputNames;
     const std::vector<std::string> outputNames;
     const ::mediapipe::CalculatorGraphConfig config;
-    PythonNodeResourcesMap pythonNodeResourcesMap;
-    auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, pythonNodeResourcesMap, getPythonBackend(), this->reporter.get());
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<PythonNodeResourcesMap> pnsm = std::make_shared<PythonNodeResourcesMap>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
+    auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, pnsm, getPythonBackend(), this->reporter.get(), std::move(guard));
 
     std::string datatype = "FP32";
     std::string name = "python_result";
diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp
index 02e7c4178a..1a2e6d2b78 100644
--- a/src/test/streaming_test.cpp
+++ b/src/test/streaming_test.cpp
@@ -355,11 +355,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::KFS_REQUEST}},
         {{"out", mediapipe_packet_type_enum::KFS_RESPONSE}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     // Mock receiving 3 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
@@ -412,11 +415,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     // Mock receiving 3 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}});  // no timestamp specified, server will assign one
@@ -555,11 +561,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     // Mock receiving 3 requests with manually (client) assigned ascending order of timestamp and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, 3);  // first request with timestamp 3
@@ -600,11 +609,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     // Mock only 1 request and disconnect immediately
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
@@ -1220,6 +1232,9 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in1", mediapipe_packet_type_enum::OVTENSOR},
@@ -1230,7 +1245,7 @@ node {
             {"out3", mediapipe_packet_type_enum::OVTENSOR}},
         {"in1", "in2", "in3"},
         {"out1", "out2", "out3"},
-        {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1272,6 +1287,9 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in1", mediapipe_packet_type_enum::OVTENSOR},
@@ -1282,7 +1300,7 @@ node {
             {"out3", mediapipe_packet_type_enum::OVTENSOR}},
         {"in1", "in2", "in3"},
         {"out1", "out2", "out3"},
-        {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1313,11 +1331,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1347,11 +1368,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"wrong_name"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};  // cannot install observer due to wrong output name (should never happen due to validation)
+{"in"}, {"wrong_name"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};  // cannot install observer due to wrong output name (should never happen due to validation)
 
     EXPECT_CALL(this->stream, Read(_)).Times(0);
     EXPECT_CALL(this->stream, Write(_, _)).Times(0);
@@ -1372,11 +1396,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     prepareRequest(this->firstRequest, {});
     EXPECT_CALL(this->stream, Read(_))
@@ -1400,11 +1427,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1436,11 +1466,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
     ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
@@ -1459,11 +1492,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     // Invalid request - missing data in buffer
     prepareInvalidRequest(this->firstRequest, {"in"});  // no timestamp specified, server will assign one
@@ -1494,11 +1530,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     std::promise<void> signalPromise[3];
     std::future<void> signalFuture[3] = {
@@ -1541,11 +1580,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, 0);
     EXPECT_CALL(this->stream, Read(_))
@@ -1569,11 +1611,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
     setRequestTimestamp(this->firstRequest, std::string("not an int"));
@@ -1604,11 +1649,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     // Timestamps not allowed in stream
     // Expect continuity of operation and response with error message
@@ -1646,11 +1694,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     // Allowed in stream
     for (auto timestamp : std::vector<::mediapipe::Timestamp>{
@@ -1682,11 +1733,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     // Mock receiving 3 requests and disconnection
     prepareRequestWithParam(this->firstRequest, {{"in", 3.5f}}, {"val", 65});  // request with parameter val
@@ -1719,11 +1773,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     // Mock receiving the invalid request and disconnection
     // Request with invalid param py (special pythons session side packet)
@@ -1748,11 +1805,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});  // missing required request param
     EXPECT_CALL(this->stream, Read(_)).Times(0);
@@ -1774,11 +1834,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     // Mock receiving 2 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, std::nullopt, this->name, this->version);  // no timestamp specified, server will assign one
@@ -1808,11 +1871,14 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
diff --git a/src/test/test_utils.hpp b/src/test/test_utils.hpp
index 879ab1313e..18c0f6e01d 100644
--- a/src/test/test_utils.hpp
+++ b/src/test/test_utils.hpp
@@ -816,8 +816,8 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition {
     std::string inputConfig;
 #if (PYTHON_DISABLE == 0)
     ovms::PythonNodeResources* getPythonNodeResources(const std::string& nodeName) {
-        auto it = this->sidePacketMaps.pythonNodeResourcesMap.find(nodeName);
-        if (it == std::end(this->sidePacketMaps.pythonNodeResourcesMap)) {
+        auto it = this->sidePacketMaps->pythonNodeResourcesMap.find(nodeName);
+        if (it == std::end(this->sidePacketMaps->pythonNodeResourcesMap)) {
             return nullptr;
         } else {
             return it->second.get();
@@ -826,8 +826,8 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition {
 #endif
 
     ovms::GenAiServable* getGenAiServable(const std::string& nodeName) {
-        auto it = this->sidePacketMaps.genAiServableMap.find(nodeName);
-        if (it == std::end(this->sidePacketMaps.genAiServableMap)) {
+        auto it = this->sidePacketMaps->genAiServableMap.find(nodeName);
+        if (it == std::end(this->sidePacketMaps->genAiServableMap)) {
             return nullptr;
         } else {
             return it->second.get();
@@ -838,7 +838,7 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition {
         return this->validateForConfigLoadableness();
     }
 
-    ovms::GenAiServableMap& getGenAiServableMap() { return this->sidePacketMaps.genAiServableMap; }
+    ovms::GenAiServableMap& getGenAiServableMap() { return this->sidePacketMaps->genAiServableMap; }
 
     DummyMediapipeGraphDefinition(const std::string name,
         const ovms::MediapipeGraphConfig& config,

From 829257b497ac9eb3078c53901bacc8028d6aabcf Mon Sep 17 00:00:00 2001
From: Adrian Tobiszewski <adrian.tobiszewski@intel.com>
Date: Tue, 17 Feb 2026 12:16:11 +0100
Subject: [PATCH 2/8] Add config for queue

---
 .../mediapipegraphconfig.cpp                  | 11 +++
 .../mediapipegraphconfig.hpp                  | 71 +++++++++++++++++++
 .../mediapipegraphdefinition.cpp              | 21 ++++--
 .../mediapipegraphdefinition.hpp              |  1 +
 src/test/http_openai_handler_test.cpp         |  4 +-
 src/test/test_utils.hpp                       |  5 +-
 6 files changed, 106 insertions(+), 7 deletions(-)

diff --git a/src/mediapipe_internal/mediapipegraphconfig.cpp b/src/mediapipe_internal/mediapipegraphconfig.cpp
index 448da4e1b8..7cde853717 100644
--- a/src/mediapipe_internal/mediapipegraphconfig.cpp
+++ b/src/mediapipe_internal/mediapipegraphconfig.cpp
@@ -118,6 +118,17 @@ Status MediapipeGraphConfig::parseNode(const rapidjson::Value& v) {
             this->setSubconfigPath(DEFAULT_SUBCONFIG_FILENAME);
             this->setModelMeshSubconfigPath(DEFAULT_MODELMESH_SUBCONFIG_FILENAME);
         }
+        if (v.HasMember("graph_queue_size")) {
+            const auto& val = v["graph_queue_size"];
+            if (val.IsInt()) {
+                this->setGraphQueueSize(val.GetInt());
+            } else if (val.IsString() && std::string(val.GetString()) == "AUTO") {
+                this->setGraphQueueSizeAuto();
+            } else {
+                SPDLOG_ERROR("Invalid graph_queue_size value. Expected integer or \"AUTO\".");
+                return StatusCode::JSON_INVALID;
+            }
+        }
     } catch (std::logic_error& e) {
         SPDLOG_DEBUG("Relative path error: {}", e.what());
         return StatusCode::INTERNAL_ERROR;
diff --git a/src/mediapipe_internal/mediapipegraphconfig.hpp b/src/mediapipe_internal/mediapipegraphconfig.hpp
index 2e4f3d428e..c4f71b3f6f 100644
--- a/src/mediapipe_internal/mediapipegraphconfig.hpp
+++ b/src/mediapipe_internal/mediapipegraphconfig.hpp
@@ -15,7 +15,9 @@
 //*****************************************************************************
 #pragma once
 
+#include <optional>
 #include <string>
+#include <variant>
 #pragma warning(push)
 #pragma warning(disable : 6313)
 #include <rapidjson/document.h>
@@ -27,6 +29,22 @@ extern const std::string DEFAULT_GRAPH_FILENAME;
 extern const std::string DEFAULT_SUBCONFIG_FILENAME;
 extern const std::string DEFAULT_MODELMESH_SUBCONFIG_FILENAME;
 
+/**
+ * @brief Tag type representing AUTO graph queue size (determined at runtime).
+ */
+struct GraphQueueAutoTag {
+    bool operator==(const GraphQueueAutoTag&) const { return true; }
+};
+
+/**
+ * @brief Represents the user's graph_queue_size setting.
+ *
+ * - std::nullopt              => user did not set this field
+ * - int                       => user explicitly set a numeric value
+ * - GraphQueueAutoTag         => user explicitly set "AUTO"
+ */
+using GraphQueueSizeValue = std::optional<std::variant<int, GraphQueueAutoTag>>;
+
 class Status;
 
 /**
@@ -69,6 +87,15 @@ class MediapipeGraphConfig {
      */
     std::string currentGraphPbTxtMD5;
 
+    /**
+     * @brief Graph queue size configuration.
+     *
+     * - std::nullopt              => user did not set this field
+     * - int                       => user explicitly set a numeric size
+     * - GraphQueueAutoTag         => user explicitly set "AUTO"
+     */
+    GraphQueueSizeValue graphQueueSize;
+
 public:
     /**
          * @brief Construct a new Mediapie Graph configuration object
@@ -206,6 +233,50 @@ class MediapipeGraphConfig {
         this->currentGraphPbTxtMD5 = currentGraphPbTxtMD5;
     }
 
+    /**
+     * @brief Get the graph queue size setting.
+     *
+     * @return const GraphQueueSizeValue& - nullopt if not set, int or GraphQueueAutoTag
+     */
+    const GraphQueueSizeValue& getGraphQueueSize() const {
+        return this->graphQueueSize;
+    }
+
+    /**
+     * @brief Set the graph queue size to an explicit numeric value.
+     */
+    void setGraphQueueSize(int size) {
+        this->graphQueueSize = size;
+    }
+
+    /**
+     * @brief Set the graph queue size to AUTO.
+     */
+    void setGraphQueueSizeAuto() {
+        this->graphQueueSize = GraphQueueAutoTag{};
+    }
+
+    /**
+     * @brief Resolve the graph queue size setting to a concrete integer.
+     *
+     * Returns:
+     *   -1  => queue creation disabled (user set -1)
+     *    0  => queue with size 0 (user set 0)
+     *   >0  => explicit size or resolved AUTO / default
+     *
+     * When not set (nullopt): returns default of 1.
+     * When AUTO: returns hardcoded value (TODO FIXME @atobisze determine optimal size).
+     */
+    int getInitialQueueSize() const {
+        if (!this->graphQueueSize.has_value()) {
+            return 1;  // not set - default
+        }
+        if (std::holds_alternative<GraphQueueAutoTag>(*this->graphQueueSize)) {
+            return 16;  // TODO FIXME @atobisze determine optimal size based on nireq / hardware
+        }
+        return std::get<int>(*this->graphQueueSize);
+    }
+
     bool isReloadRequired(const MediapipeGraphConfig& rhs) const;
 
     /**
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp
index 8b028d186b..e0453e52e1 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.cpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp
@@ -186,10 +186,7 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) {
     if (!status.ok()) {
         return status;
     }
-    // TODO FIXME @atobisze
-    SPDLOG_ERROR("ER");
-    this->queue = std::make_shared<GraphQueue>(this->config, this->sidePacketMaps, 12);
-    SPDLOG_ERROR("XXX ER GraphQueue:{}", (void*)this->queue.get());
+    this->initializeQueueIfRequired();
 
     lock.unlock();
     notifier.passed = true;
@@ -200,6 +197,17 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) {
     return StatusCode::OK;
 }
 
+void MediapipeGraphDefinition::initializeQueueIfRequired() {
+    // TODO FIXME @atobisze
+    int initialQueueSize = this->mgconfig.getInitialQueueSize();
+    if (initialQueueSize < 0) {
+        SPDLOG_DEBUG("Graph queue creation disabled for mediapipe: {} (graph_queue_size={})", getName(), initialQueueSize);
+        return;
+    }
+    this->queue = std::make_shared<GraphQueue>(this->config, this->sidePacketMaps, initialQueueSize);
+    SPDLOG_DEBUG("Created graph queue with size {} for mediapipe: {}", initialQueueSize, getName());
+}
+
 MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name,
     const MediapipeGraphConfig& config,
     MetricRegistry* registry,
@@ -282,6 +290,10 @@ Status MediapipeGraphDefinition::create(std::unique_ptr<MediapipeGraphExecutor>&
         return status;
     }
     SPDLOG_DEBUG("Creating Mediapipe graph executor: {}", getName());
+    if (!this->queue) {
+        SPDLOG_ERROR("Cannot create mediapipe graph executor: {} - graph queue not initialized (graph_queue_size=0)", getName());
+        return StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR;
+    }
     GraphIdGuard graphIdGuard(this->queue);  // TODO timeout?
     SPDLOG_ERROR("ER");
     pipeline = std::make_unique<MediapipeGraphExecutor>(getName(), std::to_string(getVersion()),
@@ -451,6 +463,7 @@ class ResourcesCleaningGuard {
 
 Status MediapipeGraphDefinition::initializeNodes() {
     SPDLOG_INFO("MediapipeGraphDefinition initializing graph nodes");
+    this->sidePacketMaps = std::make_shared<GraphSidePackets>();
     for (int i = 0; i < config.node().size(); i++) {
 #if (PYTHON_DISABLE == 0)
         auto& pythonNodeResourcesMap = this->sidePacketMaps->pythonNodeResourcesMap;
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp
index 2a0804b01e..7a4739438b 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.hpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp
@@ -127,6 +127,7 @@ class MediapipeGraphDefinition {
 
     Status setStreamTypes();
     Status dryInitializeTest();
+    void initializeQueueIfRequired();
     std::string chosenConfig;
     static MediapipeGraphConfig MGC;
     const std::string name;
diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp
index fec2009867..6494514806 100644
--- a/src/test/http_openai_handler_test.cpp
+++ b/src/test/http_openai_handler_test.cpp
@@ -212,7 +212,7 @@ Key: content-type; Value: application/json
         }
     
 JSON Parser:
-{"model":"gpt","stream":false,"messages":[]}0)";
+{"model":"gpt","stream":false,"messages":[]}012345678)"; // we reuse graph so this appends each time
     ASSERT_EQ(response, expectedResponse);
 }
 
@@ -244,7 +244,7 @@ Key: test2; Value: header
         }
     
 JSON Parser:
-{"model":"gpt","stream":false,"messages":[]}0)";
+{"model":"gpt","stream":false,"messages":[]}012345678)"; // we reuse graph so this appends each time
     ASSERT_EQ(response, expectedResponse);
 }
 
diff --git a/src/test/test_utils.hpp b/src/test/test_utils.hpp
index 18c0f6e01d..65e72b543c 100644
--- a/src/test/test_utils.hpp
+++ b/src/test/test_utils.hpp
@@ -844,7 +844,10 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition {
         const ovms::MediapipeGraphConfig& config,
         std::string inputConfig,
         ovms::PythonBackend* pythonBackend = nullptr) :
-        ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) { this->inputConfig = inputConfig; }
+        ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) {
+        this->inputConfig = inputConfig;
+        this->mgconfig.setGraphQueueSize(-1); // TODO FIXME @atobisze
+    }
 
     // Do not read from path - use predefined config contents
     ovms::Status validateForConfigFileExistence() override {

From 5f96c66b4a605abfb56474eec719a3e1099cc39b Mon Sep 17 00:00:00 2001
From: Adrian Tobiszewski <adrian.tobiszewski@intel.com>
Date: Wed, 18 Feb 2026 11:50:54 +0100
Subject: [PATCH 3/8] Checkpoint - switchable queue

---
 .../mediapipegraphconfig.cpp                  |  11 --
 .../mediapipegraphconfig.hpp                  |   4 +-
 .../mediapipegraphdefinition.cpp              |  67 +++++++--
 .../mediapipegraphdefinition.hpp              |   1 +
 .../mediapipegraphexecutor.cpp                |  23 ++-
 .../mediapipegraphexecutor.hpp                | 139 +++++++++++-------
 src/test/http_openai_handler_test.cpp         |  97 +++++++++++-
 ...ediapipe_openai_chat_completions_mock.json |   3 +-
 src/test/mediapipeflow_test.cpp               | 123 +++++++++++++++-
 src/test/streaming_test.cpp                   | 110 +++-----------
 src/test/test_utils.hpp                       |   3 +-
 11 files changed, 404 insertions(+), 177 deletions(-)

diff --git a/src/mediapipe_internal/mediapipegraphconfig.cpp b/src/mediapipe_internal/mediapipegraphconfig.cpp
index 7cde853717..448da4e1b8 100644
--- a/src/mediapipe_internal/mediapipegraphconfig.cpp
+++ b/src/mediapipe_internal/mediapipegraphconfig.cpp
@@ -118,17 +118,6 @@ Status MediapipeGraphConfig::parseNode(const rapidjson::Value& v) {
             this->setSubconfigPath(DEFAULT_SUBCONFIG_FILENAME);
             this->setModelMeshSubconfigPath(DEFAULT_MODELMESH_SUBCONFIG_FILENAME);
         }
-        if (v.HasMember("graph_queue_size")) {
-            const auto& val = v["graph_queue_size"];
-            if (val.IsInt()) {
-                this->setGraphQueueSize(val.GetInt());
-            } else if (val.IsString() && std::string(val.GetString()) == "AUTO") {
-                this->setGraphQueueSizeAuto();
-            } else {
-                SPDLOG_ERROR("Invalid graph_queue_size value. Expected integer or \"AUTO\".");
-                return StatusCode::JSON_INVALID;
-            }
-        }
     } catch (std::logic_error& e) {
         SPDLOG_DEBUG("Relative path error: {}", e.what());
         return StatusCode::INTERNAL_ERROR;
diff --git a/src/mediapipe_internal/mediapipegraphconfig.hpp b/src/mediapipe_internal/mediapipegraphconfig.hpp
index c4f71b3f6f..64a75a9a12 100644
--- a/src/mediapipe_internal/mediapipegraphconfig.hpp
+++ b/src/mediapipe_internal/mediapipegraphconfig.hpp
@@ -264,12 +264,12 @@ class MediapipeGraphConfig {
      *    0  => queue with size 0 (user set 0)
      *   >0  => explicit size or resolved AUTO / default
      *
-     * When not set (nullopt): returns default of 1.
+     * When not set (nullopt): returns -1 (queue disabled).
      * When AUTO: returns hardcoded value (TODO FIXME @atobisze determine optimal size).
      */
     int getInitialQueueSize() const {
         if (!this->graphQueueSize.has_value()) {
-            return 1;  // not set - default
+            return -1;  // not set - queue disabled by default
         }
         if (std::holds_alternative<GraphQueueAutoTag>(*this->graphQueueSize)) {
             return 16;  // TODO FIXME @atobisze determine optimal size based on nireq / hardware
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp
index e0453e52e1..6b2161ca58 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.cpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp
@@ -18,8 +18,10 @@
 #include <algorithm>
 #include <iostream>
 #include <memory>
+#include <regex>
 #include <sstream>
 #include <string>
+#include <thread>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -99,6 +101,45 @@ Status MediapipeGraphDefinition::validateForConfigFileExistence() {
     config << ifs.rdbuf();
     this->mgconfig.setCurrentGraphPbTxtMD5(ovms::FileSystem::getStringMD5(config.str()));
     this->chosenConfig.assign(config.str());
+    return parseGraphQueueSizeDirective();
+}
+
+Status MediapipeGraphDefinition::parseGraphQueueSizeDirective() {
+    // Scan pbtxt content for: # OVMS_GRAPH_QUEUE_SIZE: <value>
+    static const std::regex directiveRegex(
+        R"(^\s*#\s*OVMS_GRAPH_QUEUE_SIZE\s*:\s*(\S+)\s*$)",
+        std::regex::multiline);
+    std::smatch match;
+    if (!std::regex_search(this->chosenConfig, match, directiveRegex)) {
+        SPDLOG_TRACE("OVMS_GRAPH_QUEUE_SIZE directive not found in pbtxt for mediapipe: {}", getName());
+        return StatusCode::OK;  // directive not present - queue disabled by default
+    }
+    std::string value = match[1].str();
+    if (value == "AUTO") {
+        this->mgconfig.setGraphQueueSizeAuto();
+        return StatusCode::OK;
+    }
+    // Try to parse as integer
+    auto parsed = stoi32(value);
+    if (!parsed.has_value()) {
+        SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: '{}'. Expected integer or 'AUTO'.", value);
+        return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
+    }
+    int queueSize = parsed.value();
+    if (queueSize < -1) {
+        SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: {}. Must be -1 (disabled), or a positive integer.", queueSize);
+        return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
+    }
+    if (queueSize == 0) {
+        SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: 0. Must be -1 (disabled), or a positive integer.");
+        return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
+    }
+    unsigned int maxThreads = std::thread::hardware_concurrency();
+    if (maxThreads > 0 && queueSize > static_cast<int>(maxThreads)) {
+        SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: {}. Exceeds available hardware threads: {}.", queueSize, maxThreads);
+        return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
+    }
+    this->mgconfig.setGraphQueueSize(queueSize);
     return StatusCode::OK;
 }
 
@@ -290,16 +331,18 @@ Status MediapipeGraphDefinition::create(std::unique_ptr<MediapipeGraphExecutor>&
         return status;
     }
     SPDLOG_DEBUG("Creating Mediapipe graph executor: {}", getName());
-    if (!this->queue) {
-        SPDLOG_ERROR("Cannot create mediapipe graph executor: {} - graph queue not initialized (graph_queue_size=0)", getName());
-        return StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR;
+    if (this->queue) {
+        GraphIdGuard graphIdGuard(this->queue);
+        pipeline = std::make_unique<MediapipeGraphExecutor>(getName(), std::to_string(getVersion()),
+            this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames,
+            *this->sidePacketMaps,
+            this->pythonBackend, this->reporter.get(), std::move(graphIdGuard));
+    } else {
+        pipeline = std::make_unique<MediapipeGraphExecutor>(getName(), std::to_string(getVersion()),
+            this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames,
+            *this->sidePacketMaps,
+            this->pythonBackend, this->reporter.get());
     }
-    GraphIdGuard graphIdGuard(this->queue);  // TODO timeout?
-    SPDLOG_ERROR("ER");
-    pipeline = std::make_unique<MediapipeGraphExecutor>(getName(), std::to_string(getVersion()),
-        this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames,
-        *this->sidePacketMaps,
-        this->pythonBackend, this->reporter.get(), std::move(graphIdGuard));
     return status;
 }
 
@@ -373,13 +416,10 @@ Status MediapipeGraphDefinition::reload(ModelManager& manager, const MediapipeGr
         std::this_thread::sleep_for(std::chrono::microseconds(1));
     }
     this->mgconfig = config;
-    //this->pythonNodeResourcesMap.reset();
-    //this->genAiServableMap.reset();
     this->queue.reset();
     SPDLOG_ERROR("XXX ER cleared queue");
-    this->sidePacketMaps.reset(); 
+    this->sidePacketMaps = std::make_shared<GraphSidePackets>();
     SPDLOG_ERROR("XXX ER cleared sidePacketMaps");
-    // TODO FIXME @atobisze NOW we created new maps here before
     return validate(manager);
 }
 
@@ -463,7 +503,6 @@ class ResourcesCleaningGuard {
 
 Status MediapipeGraphDefinition::initializeNodes() {
     SPDLOG_INFO("MediapipeGraphDefinition initializing graph nodes");
-    this->sidePacketMaps = std::make_shared<GraphSidePackets>();
     for (int i = 0; i < config.node().size(); i++) {
 #if (PYTHON_DISABLE == 0)
         auto& pythonNodeResourcesMap = this->sidePacketMaps->pythonNodeResourcesMap;
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp
index 7a4739438b..5f03ff2ba5 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.hpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp
@@ -123,6 +123,7 @@ class MediapipeGraphDefinition {
     };
 
     virtual Status validateForConfigFileExistence();
+    Status parseGraphQueueSizeDirective();
     Status validateForConfigLoadableness();
 
     Status setStreamTypes();
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp
index c8825f82c8..5c59d00235 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.cpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp
@@ -90,6 +90,27 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
     currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)),
     mediapipeServableMetricReporter(mediapipeServableMetricReporter),
     guard(std::move(guard)) {}
-
+MediapipeGraphExecutor::MediapipeGraphExecutor(
+    const std::string& name,
+    const std::string& version,
+    const ::mediapipe::CalculatorGraphConfig& config,
+    stream_types_mapping_t inputTypes,
+    stream_types_mapping_t outputTypes,
+    std::vector<std::string> inputNames,
+    std::vector<std::string> outputNames,
+    const GraphSidePackets& sidePacketMaps,
+    PythonBackend* pythonBackend,
+    MediapipeServableMetricReporter* mediapipeServableMetricReporter) :
+    name(name),
+    version(version),
+    config(config),
+    inputTypes(std::move(inputTypes)),
+    outputTypes(std::move(outputTypes)),
+    inputNames(std::move(inputNames)),
+    outputNames(std::move(outputNames)),
+    sidePacketMaps(sidePacketMaps),
+    pythonBackend(pythonBackend),
+    currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)),
+    mediapipeServableMetricReporter(mediapipeServableMetricReporter) {}
 
 }  // namespace ovms
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp
index 21159d03a3..a73f246e66 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.hpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp
@@ -16,6 +16,7 @@
 #pragma once
 #include <map>
 #include <memory>
+#include <optional>
 #include <set>
 #include <sstream>
 #include <string>
@@ -111,7 +112,7 @@ class MediapipeGraphExecutor {
     ::mediapipe::Timestamp currentStreamTimestamp;
 
     MediapipeServableMetricReporter* mediapipeServableMetricReporter;
-    GraphIdGuard guard;
+    std::optional<GraphIdGuard> guard;
 
 public:
 
@@ -138,6 +139,16 @@ class MediapipeGraphExecutor {
         const GraphSidePackets& sidePacketMaps,
         PythonBackend* pythonBackend,
         MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard);
+    // Constructor without graph queue (old path - graph created per-request)
+    MediapipeGraphExecutor(const std::string& name,
+        const std::string& version,
+        const ::mediapipe::CalculatorGraphConfig& config,
+        stream_types_mapping_t inputTypes,
+        stream_types_mapping_t outputTypes,
+        std::vector<std::string> inputNames, std::vector<std::string> outputNames,
+        const GraphSidePackets& sidePacketMaps,
+        PythonBackend* pythonBackend,
+        MediapipeServableMetricReporter* mediapipeServableMetricReporter);
 
     template <typename RequestType, typename ResponseType>
     Status infer(const RequestType* request, ResponseType* response, ExecutionContext executionContext) {
@@ -145,28 +156,74 @@ class MediapipeGraphExecutor {
         SPDLOG_DEBUG("Start unary KServe request mediapipe graph: {} execution", this->name);
         MetricCounterGuard failedRequestsGuard(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, false));
         MetricGaugeGuard currentGraphsGuard(this->mediapipeServableMetricReporter->currentGraphs.get());
-        ::mediapipe::CalculatorGraph& graph = this->guard.graph;
-        SPDLOG_ERROR("SetExecutor XXX");
-        //std::ignore = graph.SetExecutor("", sharedThreadPool);  // TODO FIXME
-        SPDLOG_ERROR("Start unary KServe request mediapipe graph: {} initializationXXXbegin", this->name);
-        //MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
-        //std::unordered_map<std::string, ::mediapipe::OutputStreamPoller> outputPollers;
+        if (this->guard.has_value()) {
+            return inferWithQueue(request, response, executionContext, failedRequestsGuard);
+        } else {
+            return inferWithoutQueue(request, response, executionContext, failedRequestsGuard);
+        }
+    }
+
+    template <typename RequestType, typename ResponseType>
+    Status inferWithQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) {
+        ::mediapipe::CalculatorGraph& graph = this->guard->graph;
         for (auto& name : this->outputNames) {
             if (name.empty()) {
                 SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", name);
                 return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR;
             }
-            SPDLOG_ERROR("ER XXX Will construct observer for guard:{}, helper:{}, graph:{}", (void*)&this->guard, (void*)this->guard.gh.get(), (void*)&graph);
-            guard.gh->outStreamObservers[name] = std::make_shared<MyFunctor<RequestType, ResponseType>>(name, this->outputTypes.at(name), *this, *request, *response);  // TODO use at() FIXME
-            /*
-            ///////////////
-            ///// OutputStreamPollers
-            ///////////
-            // CreateAPI Specific observer
-            // Replace guard ptr with new one
-            // What to do if
-            //MP_RETURN_ON_FAIL(graph.ObserveOutputStream(outputName, [&serverReaderWriter, &sendMutex, &outputName, &executionContext, this](const ::mediapipe::Packet& packet) -> absl::Status {
+            guard->gh->outStreamObservers[name] = std::make_shared<MyFunctor<RequestType, ResponseType>>(name, this->outputTypes.at(name), *this, *request, *response);
+        }
+
+        size_t numberOfPacketsCreated = 0;
+        auto ovms_status = createAndPushPacketsImpl(
+            std::shared_ptr<const RequestType>(request, [](const RequestType*) {}),
+            this->inputTypes,
+            this->pythonBackend,
+            graph,
+            this->guard->gh->currentTimestamp,
+            numberOfPacketsCreated);
+        if (!ovms_status.ok()) {
+            INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
+            return ovms_status;
+        }
 
+        if (this->inputNames.size() > numberOfPacketsCreated) {
+            SPDLOG_DEBUG("Not all input packets created. Expected: {}, Actual: {}. Aborting execution of mediapipe graph: {}",
+                this->inputNames.size(), numberOfPacketsCreated, this->name);
+            return Status(StatusCode::INVALID_NO_OF_INPUTS, "Not all input packets created");
+        }
+
+        failedRequestsGuard.disable();
+        INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, true));
+
+        auto status = graph.WaitUntilIdle();
+        if (!status.ok()) {
+            INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
+        }
+        MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code()));
+
+        status = graph.WaitUntilIdle();
+        if (!status.ok()) {
+            INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
+        }
+        MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
+        SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name);
+        return StatusCode::OK;
+    }
+
+    template <typename RequestType, typename ResponseType>
+    Status inferWithoutQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) {
+        ::mediapipe::CalculatorGraph graph;
+        MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
+        enum : unsigned int { PROCESS, TIMER_END2 };
+        Timer<TIMER_END2> timer;
+        timer.start(PROCESS);
+        std::unordered_map<std::string, ::mediapipe::OutputStreamPoller> outputPollers;
+        for (auto& name : this->outputNames) {
+            if (name.empty()) {
+                SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", name);
+                return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR;
+            }
             auto absStatusOrPoller = graph.AddOutputStreamPoller(name);
             if (!absStatusOrPoller.ok()) {
                 const std::string absMessage = absStatusOrPoller.status().ToString();
@@ -174,20 +231,15 @@ class MediapipeGraphExecutor {
                 return Status(StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR, std::move(absMessage));
             }
             outputPollers.emplace(name, std::move(absStatusOrPoller).value());
-            */
         }
-        /*std::map<std::string, mediapipe::Packet> inputSidePackets;
+        std::map<std::string, mediapipe::Packet> inputSidePackets;
         OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, *request));
 #if (PYTHON_DISABLE == 0)
         inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<PythonNodeResourcesMap>(this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
 #endif
-        inputSidePackets[PYTHON_SIDE_PACKET_NAME] = mediapipe::MakePacket<PythonNodeResourcesMap>(*this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
-        inputSidePackets[LLM_SESSION_PACKET_NAME] = mediapipe::MakePacket<GenAiServableMap>(*this->sidePacketMaps.llmNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
-
         inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<ImageGenerationPipelinesMap>(this->sidePacketMaps.imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
-
         inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(this->sidePacketMaps.rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<SttServableMap>(this->sidePacketMaps.sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<TtsServableMap>(this->sidePacketMaps.ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
@@ -196,53 +248,36 @@ class MediapipeGraphExecutor {
 
         ::mediapipe::Packet packet;
         std::set<std::string> outputPollersWithReceivedPacket;
-        // TODO FIXME no mechanism to check that
-        */
 
         size_t numberOfPacketsCreated = 0;
-        SPDLOG_ERROR("Current Timestamp pushing:{}", this->guard.gh->currentTimestamp.Value());
         auto ovms_status = createAndPushPacketsImpl(
-            std::shared_ptr<const RequestType>(request,
-                // Custom deleter to avoid deallocation by custom holder
-                // Conversion to shared_ptr is required for unified deserialization method
-                // for first and subsequent requests
-                [](const RequestType*) {}),
+            std::shared_ptr<const RequestType>(request, [](const RequestType*) {}),
             this->inputTypes,
             this->pythonBackend,
             graph,
-            this->guard.gh->currentTimestamp,
-            //            this->currentStreamTimestamp,
+            this->currentStreamTimestamp,
             numberOfPacketsCreated);
-        SPDLOG_ERROR("Current Timestamp pushed:{}", this->guard.gh->currentTimestamp.Value());
         if (!ovms_status.ok()) {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
             return ovms_status;
         }
 
-        // This differs from inferStream - we require user to feed all streams
         if (this->inputNames.size() > numberOfPacketsCreated) {
             SPDLOG_DEBUG("Not all input packets created. Expected: {}, Actual: {}. Aborting execution of mediapipe graph: {}",
-                this->inputNames.size(),
-                numberOfPacketsCreated,
-                this->name);
+                this->inputNames.size(), numberOfPacketsCreated, this->name);
             return Status(StatusCode::INVALID_NO_OF_INPUTS, "Not all input packets created");
         }
 
         failedRequestsGuard.disable();
         INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, true));
 
-        // we wait idle since some calculators could hold ownership on packet content while nodes further down the graph
-        // can be still processing those. Closing packet sources triggers Calculator::Close() on nodes that do not expect
-        // new packets
         auto status = graph.WaitUntilIdle();
-        if (!status.ok()) {  // Collect error metric after Open()
+        if (!status.ok()) {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
         MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code()));
 
-        //        MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR);
-        //
-        /*
+        MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR);
         for (auto& [outputStreamName, poller] : outputPollers) {
             size_t receivedOutputs = 0;
             SPDLOG_DEBUG("Will wait for output stream: {} packet", outputStreamName);
@@ -266,21 +301,19 @@ class MediapipeGraphExecutor {
             }
             SPDLOG_TRACE("Received all: {} packets for: {}", receivedOutputs, outputStreamName);
         }
-        */
-        // status = graph.WaitUntilDone();
-        status = graph.WaitUntilIdle();
-        if (!status.ok()) {  // Collect error metric after Process()
+        status = graph.WaitUntilDone();
+        if (!status.ok()) {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
         MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
-        /*        if (outputPollers.size() != outputPollersWithReceivedPacket.size()) {
+        if (outputPollers.size() != outputPollersWithReceivedPacket.size()) {
             SPDLOG_DEBUG("Mediapipe failed to execute. Failed to receive all output packets");
             return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Unknown error during mediapipe execution");
-        }*/
-        /*timer.stop(PROCESS);
+        }
+        timer.stop(PROCESS);
         double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
         OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
-        INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext));*/
+        INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext));
         SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name);
         return StatusCode::OK;
     }
diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp
index 6494514806..316917f788 100644
--- a/src/test/http_openai_handler_test.cpp
+++ b/src/test/http_openai_handler_test.cpp
@@ -212,7 +212,7 @@ Key: content-type; Value: application/json
         }
     
 JSON Parser:
-{"model":"gpt","stream":false,"messages":[]}012345678)"; // we reuse graph so this appends each time
+{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only
     ASSERT_EQ(response, expectedResponse);
 }
 
@@ -244,7 +244,7 @@ Key: test2; Value: header
         }
     
 JSON Parser:
-{"model":"gpt","stream":false,"messages":[]}012345678)"; // we reuse graph so this appends each time
+{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only
     ASSERT_EQ(response, expectedResponse);
 }
 
@@ -1456,3 +1456,96 @@ TEST_F(HttpOpenAIHandlerParsingTest, responseFormatNullValue) {
     EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus());
     EXPECT_FALSE(apiHandler->getResponseFormat().has_value());
 }
+
+// ==================== HttpOpenAIHandlerWithQueueTest ====================
+// Same as HttpOpenAIHandlerTest but uses config with graph_queue_size=1
+// to verify the graph pool (GraphQueue) path works correctly.
+class HttpOpenAIHandlerWithQueueTest : public ::testing::Test {
+protected:
+    ovms::Server& server = ovms::Server::instance();
+    std::unique_ptr<ovms::HttpRestApiHandler> handler;
+
+    std::unique_ptr<std::thread> t;
+    std::string port = "9173";
+
+    std::unordered_map<std::string, std::string> headers{{"content-type", "application/json"}};
+    ovms::HttpRequestComponents comp;
+    std::string endpoint = "/v3/chat/completions";
+    std::shared_ptr<MockedServerRequestInterface> writer;
+    std::shared_ptr<MockedMultiPartParser> multiPartParser;
+    std::string response;
+    ovms::HttpResponseComponents responseComponents;
+
+    void SetUpServer(const char* configPath) {
+        ::SetUpServer(this->t, this->server, this->port, configPath);
+        EnsureServerStartedWithTimeout(this->server, 5);
+        handler = std::make_unique<ovms::HttpRestApiHandler>(server, 5);
+    }
+
+    void SetUp() {
+        writer = std::make_shared<MockedServerRequestInterface>();
+        multiPartParser = std::make_shared<MockedMultiPartParser>();
+        SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json").c_str());
+        ASSERT_EQ(handler->parseRequestComponents(comp, "POST", endpoint, headers), ovms::StatusCode::OK);
+    }
+
+    void TearDown() {
+        handler.reset();
+        server.setShutdownRequest(1);
+        t->join();
+        server.setShutdownRequest(0);
+    }
+};
+
+TEST_F(HttpOpenAIHandlerWithQueueTest, UnaryWithQueue) {
+    std::string requestBody = R"(
+        {
+            "model": "gpt",
+            "stream": false,
+            "messages": []
+        }
+    )";
+
+    const std::string URI = "/v3/something";
+    ASSERT_EQ(
+        handler->dispatchToProcessor(URI, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+
+    std::string expectedResponse = R"(URI: /v3/something
+Key: content-type; Value: application/json
+Body:
+
+        {
+            "model": "gpt",
+            "stream": false,
+            "messages": []
+        }
+    
+JSON Parser:
+{"model":"gpt","stream":false,"messages":[]}012345678)";
+    ASSERT_EQ(response, expectedResponse);
+}
+
+TEST_F(HttpOpenAIHandlerWithQueueTest, StreamWithQueue) {
+    std::string requestBody = R"(
+        {
+            "model": "gpt",
+            "stream": true,
+            "messages": []
+        }
+    )";
+
+    EXPECT_CALL(*writer, PartialReplyBegin(::testing::_)).WillOnce(testing::Invoke([](std::function<void()> fn) { fn(); }));
+    EXPECT_CALL(*writer, PartialReplyEnd()).Times(1);
+    // The calculator produces 9 packets (timestamps 0-8) via loopback,
+    // each containing the accumulated body + timestamp. The '8' in the body stops the loop.
+    EXPECT_CALL(*writer, PartialReply(::testing::_)).Times(9);
+    EXPECT_CALL(*writer, IsDisconnected()).Times(9);
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor("/v3/completions", requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::PARTIAL_END);
+
+    // For streaming, the response body stays empty (content goes through PartialReply callbacks)
+    ASSERT_EQ(response, "");
+}
diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json
index 5137dbea92..848729c2e6 100644
--- a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json
+++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json
@@ -3,7 +3,8 @@
     "mediapipe_config_list": [
         {
             "name": "gpt",
-            "graph_path": "/ovms/src/test/mediapipe/graph_gpt.pbtxt"
+            "graph_path": "/ovms/src/test/mediapipe/graph_gpt.pbtxt",
+            "graph_queue_size": -1
         }
     ]
 }
\ No newline at end of file
diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp
index aa1eee869b..ca1d1d2d91 100644
--- a/src/test/mediapipeflow_test.cpp
+++ b/src/test/mediapipeflow_test.cpp
@@ -232,9 +232,11 @@ class MediapipeFlowTest : public ::testing::TestWithParam<std::string> {
     void SetUp() override {
     }
     void TearDown() {
-        server.setShutdownRequest(1);
-        t->join();
-        server.setShutdownRequest(0);
+        if (t) {
+            server.setShutdownRequest(1);
+            t->join();
+            server.setShutdownRequest(0);
+        }
     }
 };
 
@@ -4067,3 +4069,118 @@ TEST(WhitelistRegistered, MediapipeSubgraphList) {
 
     ASSERT_THAT(mediapipe::SubgraphRegistry::GetRegisteredNames(), UnorderedElementsAreArray(expected)) << readableSetError(mediapipe::SubgraphRegistry::GetRegisteredNames(), expected);
 }
+
+// --- OVMS_GRAPH_QUEUE_SIZE pbtxt directive tests ---
+
+// Minimal valid pbtxt that MediaPipe can parse (uses a registered test calculator)
+static const char* MINIMAL_PBTXT_TEMPLATE = R"(
+input_stream: "HTTP_REQUEST_PAYLOAD:input"
+output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+node: {
+  calculator: "OpenAIChatCompletionsMockCalculator"
+  input_stream: "LOOPBACK:loopback"
+  input_stream: "HTTP_REQUEST_PAYLOAD:input"
+  output_stream: "LOOPBACK:loopback"
+  output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+  input_stream_info: {
+    tag_index: 'LOOPBACK:0',
+    back_edge: true
+  }
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler",
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "LOOPBACK:0"
+        }
+      }
+    }
+  }
+}
+)";
+
+static std::string makePbtxtWithDirective(const std::string& directive) {
+    return directive + "\n" + MINIMAL_PBTXT_TEMPLATE;
+}
+
+TEST(MediapipeGraphQueueSizeDirective, NoDirectiveMeansDisabled) {
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, MINIMAL_PBTXT_TEMPLATE);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    ASSERT_EQ(status, ovms::StatusCode::OK);
+    EXPECT_FALSE(mgc.getGraphQueueSize().has_value());
+    // getInitialQueueSize on default mgc returns -1
+    EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), -1);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, ExplicitPositiveValue) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: 4");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    ASSERT_EQ(status, ovms::StatusCode::OK);
+    EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), 4);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, DisabledExplicitly) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: -1");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    ASSERT_EQ(status, ovms::StatusCode::OK);
+    EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), -1);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, AutoValue) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: AUTO");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    ASSERT_EQ(status, ovms::StatusCode::OK);
+    EXPECT_GT(def.getMediapipeGraphConfig().getInitialQueueSize(), 0);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, ZeroRejected) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: 0");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, NegativeBelowMinusOneRejected) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: -2");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, ExceedsHardwareThreads) {
+    unsigned int maxThreads = std::thread::hardware_concurrency();
+    if (maxThreads == 0) {
+        GTEST_SKIP() << "hardware_concurrency() returned 0, cannot test thread limit";
+    }
+    int oversized = static_cast<int>(maxThreads) + 1;
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: " + std::to_string(oversized));
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, InvalidStringRejected) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: INVALID");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID);
+}
diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp
index 1a2e6d2b78..c50969717e 100644
--- a/src/test/streaming_test.cpp
+++ b/src/test/streaming_test.cpp
@@ -355,14 +355,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::KFS_REQUEST}},
         {{"out", mediapipe_packet_type_enum::KFS_RESPONSE}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
@@ -415,14 +412,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}});  // no timestamp specified, server will assign one
@@ -561,14 +555,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests with manually (client) assigned ascending order of timestamp and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, 3);  // first request with timestamp 3
@@ -609,14 +600,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock only 1 request and disconnect immediately
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
@@ -1232,9 +1220,6 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in1", mediapipe_packet_type_enum::OVTENSOR},
@@ -1245,7 +1230,7 @@ node {
             {"out3", mediapipe_packet_type_enum::OVTENSOR}},
         {"in1", "in2", "in3"},
         {"out1", "out2", "out3"},
-{}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1287,9 +1272,6 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in1", mediapipe_packet_type_enum::OVTENSOR},
@@ -1300,7 +1282,7 @@ node {
             {"out3", mediapipe_packet_type_enum::OVTENSOR}},
         {"in1", "in2", "in3"},
         {"out1", "out2", "out3"},
-{}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1331,14 +1313,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1368,14 +1347,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"wrong_name"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};  // cannot install observer due to wrong output name (should never happen due to validation)
+{"in"}, {"wrong_name"}, {}, nullptr, this->reporter.get()};  // cannot install observer due to wrong output name (should never happen due to validation)
 
     EXPECT_CALL(this->stream, Read(_)).Times(0);
     EXPECT_CALL(this->stream, Write(_, _)).Times(0);
@@ -1396,14 +1372,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {});
     EXPECT_CALL(this->stream, Read(_))
@@ -1427,14 +1400,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1466,14 +1436,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
     ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
@@ -1492,14 +1459,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Invalid request - missing data in buffer
     prepareInvalidRequest(this->firstRequest, {"in"});  // no timestamp specified, server will assign one
@@ -1530,14 +1494,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise[3];
     std::future<void> signalFuture[3] = {
@@ -1580,14 +1541,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, 0);
     EXPECT_CALL(this->stream, Read(_))
@@ -1611,14 +1569,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
     setRequestTimestamp(this->firstRequest, std::string("not an int"));
@@ -1649,14 +1604,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Timestamps not allowed in stream
     // Expect continuity of operation and response with error message
@@ -1694,14 +1646,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Allowed in stream
     for (auto timestamp : std::vector<::mediapipe::Timestamp>{
@@ -1733,14 +1682,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests and disconnection
     prepareRequestWithParam(this->firstRequest, {{"in", 3.5f}}, {"val", 65});  // request with parameter val
@@ -1773,14 +1719,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving the invalid request and disconnection
     // Request with invalid param py (special pythons session side packet)
@@ -1805,14 +1748,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});  // missing required request param
     EXPECT_CALL(this->stream, Read(_)).Times(0);
@@ -1834,14 +1774,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 2 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, std::nullopt, this->name, this->version);  // no timestamp specified, server will assign one
@@ -1871,14 +1808,11 @@ node {
     ::mediapipe::CalculatorGraphConfig config;
     ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
 
-    auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
-    GraphIdGuard guard(queue);
     MediapipeGraphExecutor executor{
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get(), std::move(guard)};
+{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
diff --git a/src/test/test_utils.hpp b/src/test/test_utils.hpp
index 65e72b543c..d9e256621b 100644
--- a/src/test/test_utils.hpp
+++ b/src/test/test_utils.hpp
@@ -846,13 +846,12 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition {
         ovms::PythonBackend* pythonBackend = nullptr) :
         ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) {
         this->inputConfig = inputConfig;
-        this->mgconfig.setGraphQueueSize(-1); // TODO FIXME @atobisze
     }
 
     // Do not read from path - use predefined config contents
     ovms::Status validateForConfigFileExistence() override {
         this->chosenConfig = this->inputConfig;
-        return ovms::StatusCode::OK;
+        return parseGraphQueueSizeDirective();
     }
 };
 #endif

From 3fb09d9c9ff59ac41e9525e0c3dc9dce0d38afa7 Mon Sep 17 00:00:00 2001
From: Adrian Tobiszewski <adrian.tobiszewski@intel.com>
Date: Thu, 19 Feb 2026 16:37:49 +0100
Subject: [PATCH 4/8] Checkpoint

---
 common_settings.bzl                           |  2 -
 src/mediapipe_internal/graphqueue.cpp         | 38 +++++--------------
 src/mediapipe_internal/graphqueue.hpp         |  4 --
 .../mediapipegraphdefinition.cpp              | 12 +-----
 .../mediapipegraphexecutor.hpp                | 12 +-----
 .../outputstreamobserver.hpp                  |  9 ++---
 6 files changed, 15 insertions(+), 62 deletions(-)

diff --git a/common_settings.bzl b/common_settings.bzl
index 2a995d59c5..c5bc6ddcc6 100644
--- a/common_settings.bzl
+++ b/common_settings.bzl
@@ -209,8 +209,6 @@ COMMON_STATIC_TEST_COPTS = select({
                     "-Wall",
                     "-Wno-unknown-pragmas",
                     "-Werror",
-                    # ov::Tensor::data method call results in deprecated warning and we use it in multiple places
-                    "-Wno-deprecated-declarations",
                     "-Isrc",
                     "-fconcepts", # for gmock related utils
                     "-fvisibility=hidden",# Needed for pybind targets
diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp
index a3e96febb8..ea9a2680f4 100644
--- a/src/mediapipe_internal/graphqueue.cpp
+++ b/src/mediapipe_internal/graphqueue.cpp
@@ -40,7 +40,6 @@ namespace ovms {
 GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr<GraphSidePackets> sidePacketMaps, int streamsLength) :
     Queue(streamsLength),
     sidePacketMaps(sidePacketMaps) {
-    SPDLOG_ERROR("ER Constr graph queue:{}", (void*)this);
     inferRequests.reserve(streamsLength);
     // TODO FIXME split constructor to init to handle retCodes?
     for (auto i = 0; i < streamsLength; ++i) {
@@ -50,17 +49,17 @@ GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::sh
 
         auto absStatus = gh->graph->Initialize(config);
         if (!absStatus.ok()) {
-            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
-            throw 42;
+            SPDLOG_ERROR("Graph queue initialization failed: {}", absStatus.ToString());
+            throw std::runtime_error(absStatus.ToString());
         }
         for (auto& name : config.output_stream()) {
             std::string streamName = getStreamName(name);
             gh->outStreamObservers[streamName] = std::shared_ptr<OutputStreamObserverI>(new NullOutputStreamObserver());  // TODO use at() FIXME
             auto& perGraphObserverFunctor = gh->outStreamObservers[streamName];
-            absStatus = gh->graph->ObserveOutputStream(streamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); });  // TODO FIXME throw?
+            absStatus = gh->graph->ObserveOutputStream(streamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); });
             if (!absStatus.ok()) {
-                SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
-                throw 42;
+                SPDLOG_ERROR("Graph queue ObserveOutputStream failed: {}", absStatus.ToString());
+                throw std::runtime_error(absStatus.ToString());
             }
         }
         std::map<std::string, mediapipe::Packet> inputSidePackets;
@@ -73,47 +72,30 @@ GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::sh
         inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(sidePacketMaps->rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<SttServableMap>(sidePacketMaps->sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<TtsServableMap>(sidePacketMaps->ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
-        SPDLOG_ERROR("ER");
         absStatus = gh->graph->StartRun(inputSidePackets);
-        SPDLOG_ERROR("ER");
         if (!absStatus.ok()) {
-            SPDLOG_ERROR("Input sidePackets size:{}", inputSidePackets.size());
-            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
-            throw 42;
+            SPDLOG_ERROR("Graph queue StartRun failed: {}", absStatus.ToString());
+            throw std::runtime_error(absStatus.ToString());
         }
-
-        SPDLOG_ERROR("ER");
         inferRequests.emplace_back(std::move(gh));
-        SPDLOG_ERROR("ER");
     }
 }
 GraphQueue::~GraphQueue() {
-    SPDLOG_ERROR("ER Destroy graph queue:{}", (void*)this);
     for (auto& graphHelper : inferRequests) {
         auto absStatus = graphHelper->graph->WaitUntilIdle();
         if (!absStatus.ok()) {
-            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
-            //        throw 42.2;
+            SPDLOG_DEBUG("Graph queue WaitUntilIdle error: {}", absStatus.ToString());
         }
         absStatus = graphHelper->graph->CloseAllPacketSources();
         if (!absStatus.ok()) {
-            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
-            //      throw "as";
+            SPDLOG_DEBUG("Graph queue CloseAllPacketSources error: {}", absStatus.ToString());
         }
         absStatus = graphHelper->graph->WaitUntilDone();
         if (!absStatus.ok()) {
-            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
-            //    throw 42.2;
+            SPDLOG_DEBUG("Graph queue WaitUntilDone error: {}", absStatus.ToString());
         }
         graphHelper->graph->Cancel();
-        if (!absStatus.ok()) {
-            SPDLOG_ERROR("ER issue:{} {}", absStatus.ToString(), (void*)this);
-            //    throw 42.2;
-        }
-        SPDLOG_ERROR("ER");
         graphHelper->graph.reset();
-        SPDLOG_ERROR("ER");
     }
-    SPDLOG_ERROR("ER Destroy graph queue:{}", (void*)this);
 }
 }  // namespace ovms
diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp
index 7c4d89b33f..a570557211 100644
--- a/src/mediapipe_internal/graphqueue.hpp
+++ b/src/mediapipe_internal/graphqueue.hpp
@@ -73,17 +73,13 @@ struct GraphIdGuard {
         id(queue->getIdleStream().get()),
         gh((queue->getInferRequest(id))),
         graph(*gh->graph) {
-        SPDLOG_ERROR("ER Guard construct this:{}", (void*)this);
     }
     GraphIdGuard(GraphIdGuard&&) = default;
     GraphIdGuard(const GraphIdGuard&) = delete;
     ~GraphIdGuard() {
         auto existingQueue = weakQueue.lock();
-        SPDLOG_ERROR("ER DEstroy Guard begin qu:{}", (void*)existingQueue.get());
         if (existingQueue)
             existingQueue->returnStream(this->id);
-        SPDLOG_ERROR("ER Destroy Guard end qu:{}", (void*)existingQueue.get());
-        SPDLOG_ERROR("ER Guard destroy this:{}", (void*)this);
     }
 };
 }  // namespace ovms
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp
index 6b2161ca58..8cb3443f48 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.cpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp
@@ -194,14 +194,12 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) {
     if (!validationResult.ok()) {
         return validationResult;
     }
-    SPDLOG_ERROR("ER");
     std::unique_lock lock(metadataMtx);
     auto status = createInputsInfo();
     if (!status.ok()) {
         SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create inputs info for mediapipe graph definition: {}", getName());
         return status;
     }
-    SPDLOG_ERROR("ER");
     status = createOutputsInfo();
     if (!status.ok()) {
         SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create outputs info for mediapipe graph definition: {}", getName());
@@ -261,13 +259,6 @@ MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name,
     reporter(std::make_unique<MediapipeServableMetricReporter>(metricConfig, registry, name)) {
     mgconfig = config;
     passKfsRequestFlag = false;
-    SPDLOG_ERROR("XXX ER new PythonNodeResourcesMap:{}", (void*)&this->sidePacketMaps->pythonNodeResourcesMap);
-    SPDLOG_ERROR("XXX ER new genAiServableMap:{}", (void*)&this->sidePacketMaps->genAiServableMap);
-    /*if (!sharedThreadPool) {
-        SPDLOG_ERROR("Created shared Thread Pool XXX");
-        //sharedThreadPool = std::make_shared<mediapipe::ThreadPoolExecutor>(std::thread::hardware_concurrency());  // TODO FIXME should be in MP factory
-    }*/
-   // TODO FIXME illegal constructor as we do not create queue here
 }
 
 Status MediapipeGraphDefinition::createInputsInfo() {
@@ -343,6 +334,7 @@ Status MediapipeGraphDefinition::create(std::unique_ptr<MediapipeGraphExecutor>&
             *this->sidePacketMaps,
             this->pythonBackend, this->reporter.get());
     }
+    SPDLOG_DEBUG("Created Mediapipe graph executor: {}", getName());
     return status;
 }
 
@@ -417,9 +409,7 @@ Status MediapipeGraphDefinition::reload(ModelManager& manager, const MediapipeGr
     }
     this->mgconfig = config;
     this->queue.reset();
-    SPDLOG_ERROR("XXX ER cleared queue");
     this->sidePacketMaps = std::make_shared<GraphSidePackets>();
-    SPDLOG_ERROR("XXX ER cleared sidePacketMaps");
     return validate(manager);
 }
 
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp
index a73f246e66..54996fddca 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.hpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp
@@ -87,12 +87,9 @@ struct MyFunctor : public OutputStreamObserverI {
         outputStreamName(outputStreamName),
         packetType(packetType),
         response(response) {
-        SPDLOG_ERROR("ER MyFunctor:{} observer constructed:{}", outputStreamName, (void*)this);
     }
     absl::Status handlePacket(const ::mediapipe::Packet& packet) override;
-    ~MyFunctor() {
-        SPDLOG_ERROR("ER Destroy Functor:{} this:{}", outputStreamName, (void*)this);
-    }
+    ~MyFunctor() = default;
 };
 class MediapipeGraphExecutor {
 public:
@@ -201,12 +198,6 @@ class MediapipeGraphExecutor {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
         MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code()));
-
-        status = graph.WaitUntilIdle();
-        if (!status.ok()) {
-            INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
-        }
-        MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
         SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name);
         return StatusCode::OK;
     }
@@ -472,7 +463,6 @@ class MediapipeGraphExecutor {
 
 template <typename RequestType, typename ResponseType>
 absl::Status MyFunctor<RequestType, ResponseType>::handlePacket(const ::mediapipe::Packet& packet) {
-    SPDLOG_ERROR("ER my functor:{}", (void*)this);
     auto status = onPacketReadySerializeImpl(
         this->requestId,
         this->exec.name,
diff --git a/src/mediapipe_internal/outputstreamobserver.hpp b/src/mediapipe_internal/outputstreamobserver.hpp
index e08b99fc43..1a314e73ae 100644
--- a/src/mediapipe_internal/outputstreamobserver.hpp
+++ b/src/mediapipe_internal/outputstreamobserver.hpp
@@ -51,13 +51,10 @@ class OutputStreamObserverI {
 };
 class NullOutputStreamObserver : public OutputStreamObserverI {
 public:
-    NullOutputStreamObserver() {
-        SPDLOG_ERROR("NUll observer constructed:{}", (void*)this);
-    }
+    NullOutputStreamObserver() = default;
     absl::Status handlePacket(const ::mediapipe::Packet& packet) override {
-        SPDLOG_ERROR("Internal error occured:{}", (void*)this);
-        throw std::runtime_error("Should not happen");
-        return absl::Status(absl::StatusCode::kInternal, "Should not happen");
+        SPDLOG_ERROR("NullOutputStreamObserver::handlePacket called - graph observer was not replaced before execution");
+        throw std::runtime_error("NullOutputStreamObserver should have been replaced before graph execution");
     }
 };
 }  // namespace ovms

From 0ae35e18c032c7b8480564e4e7aeb29178634c26 Mon Sep 17 00:00:00 2001
From: Adrian Tobiszewski <adrian.tobiszewski@intel.com>
Date: Fri, 20 Feb 2026 08:06:06 +0100
Subject: [PATCH 5/8] Streaming with queue

---
 .../http_graph_executor_impl.cpp              |   4 +
 .../http_graph_executor_impl.hpp              |   3 +
 src/kfs_frontend/kfs_graph_executor_impl.cpp  |  10 +
 src/kfs_frontend/kfs_graph_executor_impl.hpp  |   4 +
 .../mediapipegraphdefinition.cpp              |  21 +-
 .../mediapipegraphdefinition.hpp              |   2 +-
 .../mediapipegraphexecutor.cpp                |  29 ---
 .../mediapipegraphexecutor.hpp                | 188 ++++++++++++++--
 src/test/ensemble_config_change_stress.cpp    |  89 +++++++-
 ...ediapipe_openai_chat_completions_mock.json |   5 +-
 ...enai_chat_completions_mock_with_queue.json |   9 +
 src/test/mediapipe/graph_gpt_with_queue.pbtxt |  40 ++++
 ...eue_dummyadapterfull_dummyinputnames.pbtxt |  46 ++++
 ...yadapterfull_dummyinputnames_newpath.pbtxt |  46 ++++
 ...yadapterfull_dummyinputnames_newpath.pbtxt |  45 ++++
 src/test/mediapipeflow_test.cpp               |  17 +-
 src/test/pythonnode_test.cpp                  |   9 +-
 src/test/streaming_test.cpp                   | 207 ++++++++++++++++++
 src/test/stress_test_utils.hpp                | 144 +++++++++++-
 19 files changed, 848 insertions(+), 70 deletions(-)
 create mode 100644 src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json
 create mode 100644 src/test/mediapipe/graph_gpt_with_queue.pbtxt
 create mode 100644 src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt
 create mode 100644 src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt
 create mode 100644 src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt

diff --git a/src/http_frontend/http_graph_executor_impl.cpp b/src/http_frontend/http_graph_executor_impl.cpp
index b970f62594..4848f3760a 100644
--- a/src/http_frontend/http_graph_executor_impl.cpp
+++ b/src/http_frontend/http_graph_executor_impl.cpp
@@ -38,6 +38,10 @@ namespace ovms {
 
 static const std::string UNUSED_REQUEST_ID = "";
 
+bool requestHasInputSidePackets(const HttpPayload& request) {
+    return false;
+}
+
 Status deserializeInputSidePacketsFromFirstRequestImpl(
     std::map<std::string, mediapipe::Packet>& inputSidePackets,  // out
     const HttpPayload& request) {                                // in
diff --git a/src/http_frontend/http_graph_executor_impl.hpp b/src/http_frontend/http_graph_executor_impl.hpp
index 9846b10158..205d428a1b 100644
--- a/src/http_frontend/http_graph_executor_impl.hpp
+++ b/src/http_frontend/http_graph_executor_impl.hpp
@@ -48,6 +48,9 @@ class PythonBackend;
 
 using HttpReaderWriter = HttpAsyncWriter;
 
+// Checks whether the request contains user-provided input side packets.
+bool requestHasInputSidePackets(const HttpPayload& request);
+
 // Deserialization of parameters inside KServe gRPC request
 // into mediapipe Packets.
 // To be used by both - infer & inferStream.
diff --git a/src/kfs_frontend/kfs_graph_executor_impl.cpp b/src/kfs_frontend/kfs_graph_executor_impl.cpp
index 2751a49e94..2a2d0ff3b8 100644
--- a/src/kfs_frontend/kfs_graph_executor_impl.cpp
+++ b/src/kfs_frontend/kfs_graph_executor_impl.cpp
@@ -1156,6 +1156,16 @@ Status createAndPushPacketsImpl(
     return StatusCode::OK;
 }
 
+bool requestHasInputSidePackets(const KFSRequest& request) {
+    static const std::string TIMESTAMP_PARAM{"OVMS_MP_TIMESTAMP"};
+    for (const auto& [name, valueChoice] : request.parameters()) {
+        if (name != TIMESTAMP_PARAM) {
+            return true;
+        }
+    }
+    return false;
+}
+
 Status deserializeInputSidePacketsFromFirstRequestImpl(
     std::map<std::string, mediapipe::Packet>& inputSidePackets,
     const KFSRequest& request) {
diff --git a/src/kfs_frontend/kfs_graph_executor_impl.hpp b/src/kfs_frontend/kfs_graph_executor_impl.hpp
index cfa65b6a57..1c6e697455 100644
--- a/src/kfs_frontend/kfs_graph_executor_impl.hpp
+++ b/src/kfs_frontend/kfs_graph_executor_impl.hpp
@@ -36,6 +36,10 @@ namespace ovms {
 class PythonBackend;
 class Status;
 
+// Checks whether the request contains user-provided input side packets
+// (parameters other than the reserved OVMS_MP_TIMESTAMP).
+bool requestHasInputSidePackets(const KFSRequest& request);
+
 // Deserialization of parameters inside KServe gRPC request
 // into mediapipe Packets.
 // To be used by both - infer & inferStream.
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp
index 8cb3443f48..38533093b7 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.cpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp
@@ -225,7 +225,10 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) {
     if (!status.ok()) {
         return status;
     }
-    this->initializeQueueIfRequired();
+    status = this->initializeQueueIfRequired();
+    if (!status.ok()) {
+        return status;
+    }
 
     lock.unlock();
     notifier.passed = true;
@@ -236,15 +239,23 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) {
     return StatusCode::OK;
 }
 
-void MediapipeGraphDefinition::initializeQueueIfRequired() {
-    // TODO FIXME @atobisze
+Status MediapipeGraphDefinition::initializeQueueIfRequired() {
     int initialQueueSize = this->mgconfig.getInitialQueueSize();
     if (initialQueueSize < 0) {
         SPDLOG_DEBUG("Graph queue creation disabled for mediapipe: {} (graph_queue_size={})", getName(), initialQueueSize);
-        return;
+        return StatusCode::OK;
+    }
+    try {
+        this->queue = std::make_shared<GraphQueue>(this->config, this->sidePacketMaps, initialQueueSize);
+    } catch (const std::exception& e) {
+        SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create graph queue for mediapipe: {} error: {}", getName(), e.what());
+        return StatusCode::INTERNAL_ERROR;
+    } catch (...) {
+        SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create graph queue for mediapipe: {} unknown error", getName());
+        return StatusCode::INTERNAL_ERROR;
     }
-    this->queue = std::make_shared<GraphQueue>(this->config, this->sidePacketMaps, initialQueueSize);
     SPDLOG_DEBUG("Created graph queue with size {} for mediapipe: {}", initialQueueSize, getName());
+    return StatusCode::OK;
 }
 
 MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name,
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp
index 5f03ff2ba5..808d0eb531 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.hpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp
@@ -128,7 +128,7 @@ class MediapipeGraphDefinition {
 
     Status setStreamTypes();
     Status dryInitializeTest();
-    void initializeQueueIfRequired();
+    Status initializeQueueIfRequired();
     std::string chosenConfig;
     static MediapipeGraphConfig MGC;
     const std::string name;
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp
index 5c59d00235..601a164f61 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.cpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp
@@ -37,35 +37,6 @@
 
 namespace ovms {
 
-MediapipeGraphExecutor::MediapipeGraphExecutor(
-    const std::string& name,
-    const std::string& version,
-    const ::mediapipe::CalculatorGraphConfig& config,
-    stream_types_mapping_t inputTypes,
-    stream_types_mapping_t outputTypes,
-    std::vector<std::string> inputNames,
-    std::vector<std::string> outputNames,
-    const PythonNodeResourcesMap& pythonNodeResourcesMap,
-    const GenAiServableMap& llmNodeResourcesMap,
-    const EmbeddingsServableMap& embeddingsServableMap,
-    const RerankServableMap& rerankServableMap,
-    const SttServableMap& sttServableMap,
-    const TtsServableMap& ttsServableMap,
-    PythonBackend* pythonBackend,
-    MediapipeServableMetricReporter* mediapipeServableMetricReporter,
-    GraphIdGuard&& guard) :
-    name(name),
-    version(version),
-    config(config),
-    inputTypes(std::move(inputTypes)),
-    outputTypes(std::move(outputTypes)),
-    inputNames(std::move(inputNames)),
-    outputNames(std::move(outputNames)),
-    sidePacketMaps({pythonNodeResourcesMap, llmNodeResourcesMap, {}, embeddingsServableMap, rerankServableMap, sttServableMap, ttsServableMap}),
-    pythonBackend(pythonBackend),
-    currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)),
-    mediapipeServableMetricReporter(mediapipeServableMetricReporter),
-    guard(std::move(guard)) {}
 MediapipeGraphExecutor::MediapipeGraphExecutor(
     const std::string& name,
     const std::string& version,
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp
index 54996fddca..391d1849fb 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.hpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp
@@ -91,6 +91,33 @@ struct MyFunctor : public OutputStreamObserverI {
     absl::Status handlePacket(const ::mediapipe::Packet& packet) override;
     ~MyFunctor() = default;
 };
+
+template <typename ReaderWriterType>
+struct StreamingFunctor : public OutputStreamObserverI {
+    ReaderWriterType& serverReaderWriter;
+    std::mutex& sendMutex;
+    const std::string& executorName;
+    const std::string& executorVersion;
+    const std::string outputStreamName;
+    mediapipe_packet_type_enum packetType;
+    ExecutionContext executionContext;
+    MediapipeServableMetricReporter* metricReporter;
+    StreamingFunctor(const std::string& outputStreamName, mediapipe_packet_type_enum packetType,
+        const std::string& executorName, const std::string& executorVersion,
+        ReaderWriterType& serverReaderWriter, std::mutex& sendMutex,
+        ExecutionContext executionContext, MediapipeServableMetricReporter* metricReporter) :
+        serverReaderWriter(serverReaderWriter),
+        sendMutex(sendMutex),
+        executorName(executorName),
+        executorVersion(executorVersion),
+        outputStreamName(outputStreamName),
+        packetType(packetType),
+        executionContext(executionContext),
+        metricReporter(metricReporter) {
+    }
+    absl::Status handlePacket(const ::mediapipe::Packet& packet) override;
+    ~StreamingFunctor() = default;
+};
 class MediapipeGraphExecutor {
 public:
     const std::string name;
@@ -112,21 +139,6 @@ class MediapipeGraphExecutor {
     std::optional<GraphIdGuard> guard;
 
 public:
-
-    [[deprecated("Use constructor with side packets instead")]]
-    MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config,
-        stream_types_mapping_t inputTypes,
-        stream_types_mapping_t outputTypes,
-        std::vector<std::string> inputNames, std::vector<std::string> outputNames,
-        const PythonNodeResourcesMap& pythonNodeResourcesMap,
-        const GenAiServableMap& llmNodeResourcesMap,
-        const EmbeddingsServableMap& embeddingsServableMap,
-        const RerankServableMap& rerankServableMap,
-        const SttServableMap& sttServableMap,
-        const TtsServableMap& ttsServableMap,
-        PythonBackend* pythonBackend,
-        MediapipeServableMetricReporter* mediapipeServableMetricReporter,
-        GraphIdGuard&& guard);
     MediapipeGraphExecutor(const std::string& name,
         const std::string& version,
         const ::mediapipe::CalculatorGraphConfig& config,
@@ -312,6 +324,123 @@ class MediapipeGraphExecutor {
     template <typename RequestType, typename ReaderWriterType>
     Status inferStream(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) {
         OVMS_PROFILE_FUNCTION();
+        if (this->guard.has_value()) {
+            return inferStreamWithQueue(req, serverReaderWriter, executionContext);
+        } else {
+            return inferStreamWithoutQueue(req, serverReaderWriter, executionContext);
+        }
+    }
+
+    template <typename RequestType, typename ReaderWriterType>
+    Status inferStreamWithQueue(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) {
+        SPDLOG_DEBUG("Start streaming mediapipe graph: {} execution (queue path)", this->name);
+        std::mutex sendMutex;
+        try {
+            // Graph queue does not support user-provided input side packets.
+            // Side packets are set at queue construction time.
+            if (requestHasInputSidePackets(req)) {
+                SPDLOG_DEBUG("Graph queue does not support user-provided input side packets. "
+                             "Side packets are set at graph queue construction time. Graph: {}", this->name);
+                return Status(StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR,
+                    "Input side packets are not supported for graphs with queue enabled");
+            }
+            MetricGaugeGuard currentGraphs(this->mediapipeServableMetricReporter->currentGraphs.get());
+            ::mediapipe::CalculatorGraph& graph = this->guard->graph;
+
+            enum : unsigned int {
+                PROCESS,
+                TIMER_END2
+            };
+            Timer<TIMER_END2> timer;
+            timer.start(PROCESS);
+
+            // Swap output stream observers to streaming functors.
+            // Observers are already installed on the graph at queue construction time;
+            // we only replace the functor implementation to serialize+send to the client.
+            // Lifetime: sendMutex and serverReaderWriter are stack-local in this method
+            // and outlive all callbacks because we WaitUntilIdle() before returning.
+            for (const auto& outputName : this->outputNames) {
+                if (outputName.empty()) {
+                    SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", outputName);
+                    return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR;
+                }
+                guard->gh->outStreamObservers[outputName] = std::make_shared<StreamingFunctor<ReaderWriterType>>(
+                    outputName, this->outputTypes.at(outputName),
+                    this->name, this->version,
+                    serverReaderWriter, sendMutex,
+                    executionContext, this->mediapipeServableMetricReporter);
+            }
+
+            size_t numberOfPacketsCreated = 0;
+            {
+                OVMS_PROFILE_SCOPE("Mediapipe graph deserializing first request");
+                bool isSuccess = true;
+                OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE(
+                    createAndPushPacketsImpl(
+                        std::shared_ptr<const RequestType>(&req,
+                            [](const RequestType*) {}),
+                        this->inputTypes,
+                        this->pythonBackend,
+                        graph,
+                        this->guard->gh->currentTimestamp,
+                        numberOfPacketsCreated),
+                    "partial deserialization of first request", isSuccess);
+                INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, isSuccess));
+            }
+
+            // Read loop
+            auto newReq = std::make_shared<RequestType>();
+            while (waitForNewRequest(serverReaderWriter, *newReq)) {
+                auto pstatus = validateSubsequentRequestImpl(
+                    *newReq,
+                    this->name,
+                    this->version,
+                    this->inputTypes);
+                bool isSuccess = true;
+                if (pstatus.ok()) {
+                    OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE(
+                        createAndPushPacketsImpl(
+                            newReq,
+                            this->inputTypes,
+                            this->pythonBackend,
+                            graph,
+                            this->guard->gh->currentTimestamp,
+                            numberOfPacketsCreated),
+                        "partial deserialization of subsequent requests", isSuccess);
+                } else {
+                    OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE(std::move(pstatus), "validate subsequent requests", isSuccess);
+                }
+                INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, isSuccess));
+
+                if (graph.HasError()) {
+                    INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
+                    SPDLOG_DEBUG("Graph {}: encountered an error, stopping the execution", this->name);
+                    break;
+                }
+
+                newReq = std::make_shared<RequestType>();
+            }
+
+            // Do NOT CloseAllPacketSources or WaitUntilDone - graph stays alive for reuse
+            auto status = graph.WaitUntilIdle();
+            if (!status.ok()) {
+                INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
+            }
+            MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code()));
+            SPDLOG_DEBUG("Graph {}: Done streaming execution (queue path)", this->name);
+
+            timer.stop(PROCESS);
+            double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
+            OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
+            return StatusCode::OK;
+        } catch (...) {
+            SPDLOG_DEBUG("Graph {}: Exception while processing MediaPipe graph (queue path)", this->name);
+            return Status(StatusCode::UNKNOWN_ERROR, "Exception while processing MediaPipe graph");
+        }
+    }
+
+    template <typename RequestType, typename ReaderWriterType>
+    Status inferStreamWithoutQueue(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) {
         SPDLOG_DEBUG("Start MediapipeGraphExecutor::inferEx mediapipe graph: {} execution", this->name);
         std::mutex sendMutex;
         try {
@@ -472,6 +601,33 @@ absl::Status MyFunctor<RequestType, ResponseType>::handlePacket(const ::mediapip
         packet,
         response);
     return status.ok() ? absl::OkStatus() : absl::Status(absl::StatusCode::kInternal, "Some error");
-    ;
+}
+
+template <typename ReaderWriterType>
+absl::Status StreamingFunctor<ReaderWriterType>::handlePacket(const ::mediapipe::Packet& packet) {
+    OVMS_PROFILE_SCOPE("Mediapipe Packet Ready Callback");
+    try {
+        std::lock_guard<std::mutex> lock(sendMutex);
+        auto status = onPacketReadySerializeAndSendImpl(
+            "" /*no ids for streaming*/,
+            executorName,
+            executorVersion,
+            outputStreamName,
+            packetType,
+            packet,
+            serverReaderWriter);
+        if (!status.ok()) {
+            SPDLOG_DEBUG("error in send packet routine {}", status.string());
+            return absl::Status(absl::StatusCode::kInternal, "error in send packet routine");
+        }
+        auto now = std::chrono::system_clock::now();
+        auto currentTimestamp = ::mediapipe::Timestamp(std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
+        OBSERVE_IF_ENABLED(metricReporter->getRequestLatencyMetric(executionContext), (currentTimestamp - packet.Timestamp()).Microseconds());
+        INCREMENT_IF_ENABLED(metricReporter->getResponsesMetric(executionContext));
+        return absl::OkStatus();
+    } catch (...) {
+        SPDLOG_DEBUG("Error occurred during packet serialization in mediapipe graph: {}", executorName);
+        return absl::Status(absl::StatusCode::kCancelled, "error in serialization");
+    }
 }
 }  // namespace ovms
diff --git a/src/test/ensemble_config_change_stress.cpp b/src/test/ensemble_config_change_stress.cpp
index 7fa5a70d31..6ebaeb0e18 100644
--- a/src/test/ensemble_config_change_stress.cpp
+++ b/src/test/ensemble_config_change_stress.cpp
@@ -813,7 +813,8 @@ TEST_F(StressMediapipeChanges, ReloadMediapipeGraphDuringMetadataLoad) {
     SetUpConfig(basicMediapipeConfig);
     bool performWholeConfigReload = true;
     std::set<StatusCode> requiredLoadResults = {StatusCode::OK};  // we expect full continuity of operation
-    std::set<StatusCode> allowedLoadResults = {};
+    // Graph path change triggers real reload, briefly entering NOT_LOADED_YET state
+    std::set<StatusCode> allowedLoadResults = {StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET};
     performStressTest(
         &ConfigChangeStressTest::triggerKFSGetPipelineMetadataInALoop,
         &ConfigChangeStressTest::reloadMediapipeGraph,
@@ -821,4 +822,90 @@ TEST_F(StressMediapipeChanges, ReloadMediapipeGraphDuringMetadataLoad) {
         requiredLoadResults,
         allowedLoadResults);
 }
+
+class StressMediapipeQueueChanges : public StressPipelineConfigChanges {
+    const std::string modelName = PIPELINE_1_DUMMY_NAME;
+    const std::string modelInputName = "b";
+    const std::string modelOutputName = "a";
+
+public:
+    std::string getServableName() override {
+        return modelName;
+    }
+    void SetUp() override {
+        SetUpCAPIServerInstance(createStressTestPipelineOneDummyConfig());
+    }
+};
+TEST_F(StressMediapipeQueueChanges, AddGraphDuringPredictLoad) {
+    // we add another graph definition during load (queue-enabled graph)
+    SetUpConfig(basicMediapipeQueueConfig);
+    bool performWholeConfigReload = true;
+    std::set<StatusCode> requiredLoadResults = {StatusCode::OK};  // we expect full continuity of operation
+    std::set<StatusCode> allowedLoadResults = {};
+    performStressTest(
+        &ConfigChangeStressTest::triggerPredictInALoop<KFSRequest, KFSResponse, ovms::MediapipeGraphExecutor>,
+        &ConfigChangeStressTest::addNewMediapipeQueueGraph,
+        performWholeConfigReload,
+        requiredLoadResults,
+        allowedLoadResults);
+}
+TEST_F(StressMediapipeQueueChanges, RemoveGraphDuringPredictLoad) {
+    SetUpConfig(basicMediapipeQueueConfig);
+    bool performWholeConfigReload = true;
+    std::set<StatusCode> requiredLoadResults = {StatusCode::OK,
+        StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE};
+    std::set<StatusCode> allowedLoadResults = {};
+    performStressTest(
+        &ConfigChangeStressTest::triggerPredictInALoop<KFSRequest, KFSResponse, ovms::MediapipeGraphExecutor>,
+        &ConfigChangeStressTest::removeMediapipeQueueGraph,
+        performWholeConfigReload,
+        requiredLoadResults,
+        allowedLoadResults);
+}
+TEST_F(StressMediapipeQueueChanges, RemoveModelDuringPredictLoad) {
+    SetUpConfig(basicMediapipeQueueConfig);
+    bool performWholeConfigReload = true;
+    // With queue path, pre-initialized graphs may keep working with cached sessions
+    // even after model removal, so MEDIAPIPE_PRECONDITION_FAILED may not occur
+    std::set<StatusCode> requiredLoadResults = {
+        StatusCode::OK,
+    };
+    std::set<StatusCode> allowedLoadResults = {
+        StatusCode::MEDIAPIPE_EXECUTION_ERROR,
+        StatusCode::MEDIAPIPE_GRAPH_ADD_PACKET_INPUT_STREAM,
+        StatusCode::MEDIAPIPE_PRECONDITION_FAILED,
+    };
+    performStressTest(
+        &ConfigChangeStressTest::triggerPredictInALoop<KFSRequest, KFSResponse, ovms::MediapipeGraphExecutor>,
+        &ConfigChangeStressTest::removeMediapipeQueueGraphUsedModel,
+        performWholeConfigReload,
+        requiredLoadResults,
+        allowedLoadResults);
+}
+TEST_F(StressMediapipeQueueChanges, ReloadModelDuringPredictLoad) {
+    SetUpConfig(basicMediapipeQueueConfig);
+    bool performWholeConfigReload = true;
+    std::set<StatusCode> requiredLoadResults = {StatusCode::OK};
+    std::set<StatusCode> allowedLoadResults = {};
+    performStressTest(
+        &ConfigChangeStressTest::triggerPredictInALoop<KFSRequest, KFSResponse, ovms::MediapipeGraphExecutor>,
+        &ConfigChangeStressTest::reloadMediapipeQueueGraphUsedModel,
+        performWholeConfigReload,
+        requiredLoadResults,
+        allowedLoadResults);
+}
+TEST_F(StressMediapipeQueueChanges, ReloadMediapipeGraphDuringPredictLoad) {
+    SetUpConfig(basicMediapipeQueueConfig);
+    bool performWholeConfigReload = true;
+    std::set<StatusCode> requiredLoadResults = {StatusCode::OK};
+    std::set<StatusCode> allowedLoadResults = {};
+    performStressTest(
+        &ConfigChangeStressTest::triggerPredictInALoop<KFSRequest, KFSResponse, ovms::MediapipeGraphExecutor>,
+        &ConfigChangeStressTest::reloadMediapipeQueueGraph,
+        performWholeConfigReload,
+        requiredLoadResults,
+        allowedLoadResults);
+}
+// Status and metadata tests are not duplicated for queue fixture because
+// neither status nor metadata operations exercise the graph queue path.
 #endif
diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json
index 848729c2e6..d2803b795f 100644
--- a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json
+++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json
@@ -3,8 +3,7 @@
     "mediapipe_config_list": [
         {
             "name": "gpt",
-            "graph_path": "/ovms/src/test/mediapipe/graph_gpt.pbtxt",
-            "graph_queue_size": -1
+            "graph_path": "/ovms/src/test/mediapipe/graph_gpt.pbtxt"
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json
new file mode 100644
index 0000000000..ea25079556
--- /dev/null
+++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json
@@ -0,0 +1,9 @@
+{
+    "model_config_list": [],
+    "mediapipe_config_list": [
+        {
+            "name": "gpt",
+            "graph_path": "/ovms/src/test/mediapipe/graph_gpt_with_queue.pbtxt"
+        }
+    ]
+}
diff --git a/src/test/mediapipe/graph_gpt_with_queue.pbtxt b/src/test/mediapipe/graph_gpt_with_queue.pbtxt
new file mode 100644
index 0000000000..43c2ef68c1
--- /dev/null
+++ b/src/test/mediapipe/graph_gpt_with_queue.pbtxt
@@ -0,0 +1,40 @@
+#
+# Copyright 2026 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# OVMS_GRAPH_QUEUE_SIZE: 1
+input_stream: "HTTP_REQUEST_PAYLOAD:input"
+output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+
+node: {
+  calculator: "OpenAIChatCompletionsMockCalculator"
+  input_stream: "LOOPBACK:loopback"
+  input_stream: "HTTP_REQUEST_PAYLOAD:input"
+  output_stream: "LOOPBACK:loopback"
+  output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+  input_stream_info: {
+    tag_index: 'LOOPBACK:0',
+    back_edge: true
+  }
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler",
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "LOOPBACK:0"
+        }
+      }
+    }
+  }
+}
diff --git a/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt
new file mode 100644
index 0000000000..2a5016a7fb
--- /dev/null
+++ b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt
@@ -0,0 +1,46 @@
+#
+# Copyright 2026 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# OVMS_GRAPH_QUEUE_SIZE: 16
+input_stream: "custom_dummy_input"
+output_stream: "custom_dummy_output"
+node {
+  calculator: "OpenVINOModelServerSessionCalculator"
+  output_side_packet: "SESSION:session"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+      servable_name: "dummy"
+      servable_version: "1"
+    }
+  }
+}
+node {
+  calculator: "OpenVINOInferenceCalculator"
+  input_side_packet: "SESSION:session"
+  input_stream: "B:custom_dummy_input"
+  output_stream: "A:custom_dummy_output"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+      tag_to_input_tensor_names {
+        key: "B"
+        value: "b"
+      }
+      tag_to_output_tensor_names {
+        key: "A"
+        value: "a"
+      }
+    }
+  }
+}
diff --git a/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt
new file mode 100644
index 0000000000..2a5016a7fb
--- /dev/null
+++ b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt
@@ -0,0 +1,46 @@
+#
+# Copyright 2026 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# OVMS_GRAPH_QUEUE_SIZE: 16
+input_stream: "custom_dummy_input"
+output_stream: "custom_dummy_output"
+node {
+  calculator: "OpenVINOModelServerSessionCalculator"
+  output_side_packet: "SESSION:session"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+      servable_name: "dummy"
+      servable_version: "1"
+    }
+  }
+}
+node {
+  calculator: "OpenVINOInferenceCalculator"
+  input_side_packet: "SESSION:session"
+  input_stream: "B:custom_dummy_input"
+  output_stream: "A:custom_dummy_output"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+      tag_to_input_tensor_names {
+        key: "B"
+        value: "b"
+      }
+      tag_to_output_tensor_names {
+        key: "A"
+        value: "a"
+      }
+    }
+  }
+}
diff --git a/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt
new file mode 100644
index 0000000000..01521b1c08
--- /dev/null
+++ b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt
@@ -0,0 +1,45 @@
+#
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+input_stream: "custom_dummy_input"
+output_stream: "custom_dummy_output"
+node {
+  calculator: "OpenVINOModelServerSessionCalculator"
+  output_side_packet: "SESSION:session"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+      servable_name: "dummy"
+      servable_version: "1"
+    }
+  }
+}
+node {
+  calculator: "OpenVINOInferenceCalculator"
+  input_side_packet: "SESSION:session"
+  input_stream: "B:custom_dummy_input"
+  output_stream: "A:custom_dummy_output"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+      tag_to_input_tensor_names {
+        key: "B"
+        value: "b"
+      }
+      tag_to_output_tensor_names {
+        key: "A"
+        value: "a"
+      }
+    }
+  }
+}
diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp
index ca1d1d2d91..f96cf584b2 100644
--- a/src/test/mediapipeflow_test.cpp
+++ b/src/test/mediapipeflow_test.cpp
@@ -2707,14 +2707,16 @@ class MediapipeSerialization : public ::testing::Test {
             stream_types_mapping_t inputTypes,
             stream_types_mapping_t outputTypes,
             std::vector<std::string> inputNames, std::vector<std::string> outputNames,
-            const std::shared_ptr<PythonNodeResourcesMap>& pythonNodeResourcesMap,
-            const std::shared_ptr<GenAiServableMap>& gasm,
+            const GraphSidePackets& sidePackets,
             MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) :
-            MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, *pythonNodeResourcesMap, *gasm, {}, {}, {}, {}, nullptr, mediapipeServableMetricReporter, std::move(guard)) {}
+            MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames,
+                sidePackets,
+                nullptr, mediapipeServableMetricReporter, std::move(guard)) {}
     };
 
 protected:
     std::unique_ptr<MediapipeServableMetricReporter> reporter;
+    std::shared_ptr<GraphSidePackets> sidePackets;
     std::shared_ptr<GraphQueue> queue;
     std::unique_ptr<MockedMediapipeGraphExecutor> executor;
     ::inference::ModelInferResponse mp_response;
@@ -2729,13 +2731,10 @@ class MediapipeSerialization : public ::testing::Test {
         const std::vector<std::string> outputNames;
         const ::mediapipe::CalculatorGraphConfig config;
         this->reporter = std::make_unique<MediapipeServableMetricReporter>(nullptr, nullptr, "");  // disabled reporter
-        auto sidePackets = std::make_shared<GraphSidePackets>();
-        std::shared_ptr<PythonNodeResourcesMap> pnsm = std::make_shared<PythonNodeResourcesMap>();
-        std::shared_ptr<GenAiServableMap> gasm = std::make_shared<GenAiServableMap>();
-        std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+        sidePackets = std::make_shared<GraphSidePackets>();
+        queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
         GraphIdGuard guard(queue);
-        executor = std::make_unique<MockedMediapipeGraphExecutor>("", "", config, mapping, mapping, inputNames, outputNames, pnsm, gasm, this->reporter.get(), std::move(guard));
-        SPDLOG_ERROR("Exit SetUp");
+        executor = std::make_unique<MockedMediapipeGraphExecutor>("", "", config, mapping, mapping, inputNames, outputNames, *sidePackets, this->reporter.get(), std::move(guard));
     }
 };
 
diff --git a/src/test/pythonnode_test.cpp b/src/test/pythonnode_test.cpp
index 7e2595a58f..6f9dc6bfa8 100644
--- a/src/test/pythonnode_test.cpp
+++ b/src/test/pythonnode_test.cpp
@@ -1002,10 +1002,12 @@ class MockedMediapipeGraphExecutorPy : public ovms::MediapipeGraphExecutor {
         stream_types_mapping_t inputTypes,
         stream_types_mapping_t outputTypes,
         std::vector<std::string> inputNames, std::vector<std::string> outputNames,
-        const std::shared_ptr<PythonNodeResourcesMap>& pythonNodeResourcesMap,
+        const GraphSidePackets& sidePackets,
         PythonBackend* pythonBackend,
         MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) :
-        MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, *pythonNodeResourcesMap, {}, {}, {}, {}, {}, pythonBackend, mediapipeServableMetricReporter, std::move(guard)) {}
+        MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames,
+            sidePackets,
+            pythonBackend, mediapipeServableMetricReporter, std::move(guard)) {}
 };
 
 TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) {
@@ -1015,10 +1017,9 @@ TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) {
     const std::vector<std::string> outputNames;
     const ::mediapipe::CalculatorGraphConfig config;
     auto sidePackets = std::make_shared<GraphSidePackets>();
-    std::shared_ptr<PythonNodeResourcesMap> pnsm = std::make_shared<PythonNodeResourcesMap>();
     std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
     GraphIdGuard guard(queue);
-    auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, pnsm, getPythonBackend(), this->reporter.get(), std::move(guard));
+    auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, *sidePackets, getPythonBackend(), this->reporter.get(), std::move(guard));
 
     std::string datatype = "FP32";
     std::string name = "python_result";
diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp
index c50969717e..997d2048ef 100644
--- a/src/test/streaming_test.cpp
+++ b/src/test/streaming_test.cpp
@@ -70,6 +70,35 @@ class StreamingTest : public Test {
     }
 };
 
+class StreamingQueueTest : public StreamingTest {
+protected:
+    std::shared_ptr<GraphQueue> queue;
+
+    MediapipeGraphExecutor createQueueExecutor(
+        const ::mediapipe::CalculatorGraphConfig& config,
+        stream_types_mapping_t inputTypes,
+        stream_types_mapping_t outputTypes,
+        std::vector<std::string> inputNames,
+        std::vector<std::string> outputNames,
+        int queueSize = 1) {
+        auto sidePackets = std::make_shared<GraphSidePackets>();
+        queue = std::make_shared<GraphQueue>(config, sidePackets, queueSize);
+        GraphIdGuard graphIdGuard(queue);
+        return MediapipeGraphExecutor{
+            this->name,
+            this->version,
+            config,
+            std::move(inputTypes),
+            std::move(outputTypes),
+            std::move(inputNames),
+            std::move(outputNames),
+            *sidePackets,
+            nullptr,
+            this->reporter.get(),
+            std::move(graphIdGuard)};
+    }
+};
+
 #if (PYTHON_DISABLE == 0)
 class PythonStreamingTest : public StreamingTest {
 protected:
@@ -621,6 +650,184 @@ node {
     ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK);
 }
 
+TEST_F(StreamingQueueTest, SingleStreamSend3Receive3AutomaticTimestamp) {
+    const std::string pbTxt{R"(
+input_stream: "in"
+output_stream: "out"
+node {
+  calculator: "AddOneSingleStreamTestCalculator"
+  input_stream: "in"
+  output_stream: "out"
+}
+    )"};
+    ::mediapipe::CalculatorGraphConfig config;
+    ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
+
+    auto executor = createQueueExecutor(
+        config,
+        {{"in", mediapipe_packet_type_enum::OVTENSOR}},
+        {{"out", mediapipe_packet_type_enum::OVTENSOR}},
+        {"in"},
+        {"out"},
+        2);
+
+    prepareRequest(this->firstRequest, {{"in", 3.5f}});
+    EXPECT_CALL(this->stream, Read(_))
+        .WillOnce(Receive({{"in", 7.2f}}))
+        .WillOnce(Receive({{"in", 102.4f}}))
+        .WillOnce(Disconnect());
+
+    auto timestamp = std::make_shared<int64_t>(-1);
+    EXPECT_CALL(this->stream, Write(_, _))
+        .WillOnce(SendWithAutomaticTimestamp({{"out", 4.5f}}, timestamp))
+        .WillOnce(SendWithAutomaticTimestamp({{"out", 8.2f}}, timestamp))
+        .WillOnce(SendWithAutomaticTimestamp({{"out", 103.4f}}, timestamp));
+
+    ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK);
+}
+
+TEST_F(StreamingQueueTest, SingleStreamSend1Receive3) {
+    const std::string pbTxt{R"(
+input_stream: "in"
+output_stream: "out"
+node {
+  calculator: "AddOne3CycleIterationsTestCalculator"
+  input_stream: "in"
+  input_stream: "signal"
+  input_stream_info: {
+    tag_index: ':1',
+    back_edge: true
+  }
+  input_stream_handler {
+    input_stream_handler: 'ImmediateInputStreamHandler'
+  }
+  output_stream: "out"
+  output_stream: "signal"
+}
+    )"};
+    ::mediapipe::CalculatorGraphConfig config;
+    ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
+
+    auto executor = createQueueExecutor(
+        config,
+        {{"in", mediapipe_packet_type_enum::OVTENSOR}},
+        {{"out", mediapipe_packet_type_enum::OVTENSOR}},
+        {"in"},
+        {"out"},
+        2);
+
+    prepareRequest(this->firstRequest, {{"in", 3.5f}});
+    EXPECT_CALL(this->stream, Read(_))
+        .WillOnce(Disconnect());
+
+    EXPECT_CALL(this->stream, Write(_, _))
+        .WillOnce(SendWithTimestamp({{"out", 4.5f}}, 1))
+        .WillOnce(SendWithTimestamp({{"out", 5.5f}}, 2))
+        .WillOnce(SendWithTimestamp({{"out", 6.5f}}, 3));
+
+    ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK);
+}
+
+TEST_F(StreamingQueueTest, ExitOnDisconnectionDuringRead) {
+    const std::string pbTxt{R"(
+input_stream: "in"
+output_stream: "out"
+node {
+  calculator: "AddOneSingleStreamTestCalculator"
+  input_stream: "in"
+  output_stream: "out"
+}
+    )"};
+    ::mediapipe::CalculatorGraphConfig config;
+    ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
+
+    auto executor = createQueueExecutor(
+        config,
+        {{"in", mediapipe_packet_type_enum::OVTENSOR}},
+        {{"out", mediapipe_packet_type_enum::OVTENSOR}},
+        {"in"},
+        {"out"},
+        2);
+
+    prepareRequest(this->firstRequest, {});
+    EXPECT_CALL(this->stream, Read(_))
+        .WillOnce(Disconnect());
+
+    EXPECT_CALL(this->stream, Write(_, _)).Times(0);
+
+    ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK);
+}
+
+TEST_F(StreamingQueueTest, ErrorOnDisconnectionDuringWrite) {
+    const std::string pbTxt{R"(
+input_stream: "in"
+output_stream: "out"
+node {
+  calculator: "AddOneSingleStreamTestCalculator"
+  input_stream: "in"
+  output_stream: "out"
+}
+    )"};
+    ::mediapipe::CalculatorGraphConfig config;
+    ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
+
+    auto executor = createQueueExecutor(
+        config,
+        {{"in", mediapipe_packet_type_enum::OVTENSOR}},
+        {{"out", mediapipe_packet_type_enum::OVTENSOR}},
+        {"in"},
+        {"out"},
+        2);
+
+    std::promise<void> signalPromise;
+    std::future<void> signalFuture = signalPromise.get_future();
+
+    prepareRequest(this->firstRequest, {{"in", 3.5f}});
+    EXPECT_CALL(this->stream, Read(_))
+        .WillOnce(DisconnectWhenNotified(signalFuture));
+
+    EXPECT_CALL(this->stream, Write(_, _))
+        .WillOnce(DisconnectOnWriteAndNotifyEnd(signalPromise));
+
+    ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_EXECUTION_ERROR);
+}
+
+TEST_F(StreamingQueueTest, ErrorDuringFirstRequestDeserialization) {
+    const std::string pbTxt{R"(
+input_stream: "in"
+output_stream: "out"
+node {
+  calculator: "AddOneSingleStreamTestCalculator"
+  input_stream: "in"
+  output_stream: "out"
+}
+    )"};
+    ::mediapipe::CalculatorGraphConfig config;
+    ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
+
+    auto executor = createQueueExecutor(
+        config,
+        {{"in", mediapipe_packet_type_enum::OVTENSOR}},
+        {{"out", mediapipe_packet_type_enum::OVTENSOR}},
+        {"in"},
+        {"out"},
+        2);
+
+    prepareInvalidRequest(this->firstRequest, {"in"});
+
+    std::promise<void> signalPromise;
+    std::future<void> signalFuture = signalPromise.get_future();
+
+    EXPECT_CALL(this->stream, Read(_))
+        .WillOnce(DisconnectWhenNotified(signalFuture));
+    EXPECT_CALL(this->stream, Write(_, _))
+        .WillOnce(SendErrorAndNotifyEnd(
+            Status(StatusCode::INVALID_CONTENT_SIZE).string() + std::string{" - Expected: 4 bytes; Actual: 0 bytes; input name: in; partial deserialization of first request"},
+            signalPromise));
+
+    ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK);
+}
+
 // PYTHON CALCULATOR CASES
 
 #if (PYTHON_DISABLE == 0)
diff --git a/src/test/stress_test_utils.hpp b/src/test/stress_test_utils.hpp
index ccbdd60758..836f9f8f36 100644
--- a/src/test/stress_test_utils.hpp
+++ b/src/test/stress_test_utils.hpp
@@ -50,6 +50,7 @@
 #include "../server.hpp"
 #include "../status.hpp"
 #include "../stringutils.hpp"
+#include "src/timer.hpp"
 #include "../tfs_frontend/tfs_utils.hpp"
 #include "c_api_test_utils.hpp"
 #include "test_utils.hpp"
@@ -1067,7 +1068,99 @@ static const std::string basicMediapipeConfigWithNewGraphPath = R"({
     "mediapipe_config_list": [
     {
         "name":"pipeline1Dummy",
-        "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt"
+        "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt"
+    }
+    ]
+})";
+
+const std::string basicMediapipeQueueConfig = R"({
+    "model_config_list": [
+        {"config": {
+                "name": "dummy",
+                "base_path": "/ovms/src/test/dummy"
+        }
+        }
+    ],
+    "mediapipe_config_list": [
+    {
+        "name":"pipeline1Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt"
+    }
+    ]
+})";
+
+static const std::string basicMediapipeQueueConfigWithAddedGraph = R"({
+    "model_config_list": [
+        {"config": {
+                "name": "dummy",
+                "base_path": "/ovms/src/test/dummy"
+        }
+        }
+    ],
+    "mediapipe_config_list": [
+    {
+        "name":"pipeline1Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt"
+    },
+    {
+        "name":"pipeline2Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt"
+    }
+    ]
+})";
+
+static const std::string basicMediapipeQueueConfigWithRemovedGraph = R"({
+    "model_config_list": [
+        {"config": {
+                "name": "dummy",
+                "base_path": "/ovms/src/test/dummy"
+        }
+        }
+    ],
+    "mediapipe_config_list": [
+    ]
+})";
+
+static const std::string basicMediapipeQueueConfigWithRemovedModel = R"({
+    "model_config_list": [
+    ],
+    "mediapipe_config_list": [
+    {
+        "name":"pipeline1Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt"
+    }
+    ]
+})";
+
+static const std::string basicMediapipeQueueConfigWithReloadedModel = R"({
+    "model_config_list": [
+        {"config": {
+                "name": "dummy",
+                "base_path": "/ovms/src/test/dummy",
+                "nireq": 47
+        }
+        }
+    ],
+    "mediapipe_config_list": [
+    {
+        "name":"pipeline1Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt"
+    }
+    ]
+})";
+
+static const std::string basicMediapipeQueueConfigWithNewGraphPath = R"({
+    "model_config_list": [
+        {"config": {
+                "name": "dummy",
+                "base_path": "/ovms/src/test/dummy"
+        }
+        }
+    ],
+    "mediapipe_config_list": [
+    {
+        "name":"pipeline1Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt"
     }
     ]
 })";
@@ -1094,9 +1187,16 @@ static void mediacreate(std::unique_ptr<MediapipeGraphExecutor>& executorPtr, ov
         sc = static_cast<StatusCode>(code);                   \
     }
 
+enum StressTimerSlot : unsigned int {
+    STRESS_LOOP,
+    CREATE,
+    EXECUTE,
+    TIMER_END
+};
+
 class ConfigChangeStressTest : public TestWithTempDir {
 protected:
-    const uint32_t loadThreadCount = 20;
+    const uint32_t loadThreadCount = 16;
     const uint32_t beforeConfigChangeLoadTimeMs = 30;
     const uint32_t afterConfigChangeLoadTimeMs = 50;
     const int stressIterationsLimit = 10000;
@@ -1291,6 +1391,12 @@ class ConfigChangeStressTest : public TestWithTempDir {
         createConfigFileWithContent(ovmsConfig, configFilePath);
         SPDLOG_INFO("{} end", __FUNCTION__);
     }
+    void addNewMediapipeQueueGraph() {
+        SPDLOG_INFO("{} start", __FUNCTION__);
+        SetUpConfig(basicMediapipeQueueConfigWithAddedGraph);
+        createConfigFileWithContent(ovmsConfig, configFilePath);
+        SPDLOG_INFO("{} end", __FUNCTION__);
+    }
     void removeMediapipeGraph() {
         SPDLOG_INFO("{} start", __FUNCTION__);
         SetUpConfig(basicMediapipeConfigWithRemovedGraph);
@@ -1315,6 +1421,30 @@ class ConfigChangeStressTest : public TestWithTempDir {
         createConfigFileWithContent(ovmsConfig, configFilePath);
         SPDLOG_INFO("{} end", __FUNCTION__);
     }
+    void removeMediapipeQueueGraph() {
+        SPDLOG_INFO("{} start", __FUNCTION__);
+        SetUpConfig(basicMediapipeQueueConfigWithRemovedGraph);
+        createConfigFileWithContent(ovmsConfig, configFilePath);
+        SPDLOG_INFO("{} end", __FUNCTION__);
+    }
+    void removeMediapipeQueueGraphUsedModel() {
+        SPDLOG_INFO("{} start", __FUNCTION__);
+        SetUpConfig(basicMediapipeQueueConfigWithRemovedModel);
+        createConfigFileWithContent(ovmsConfig, configFilePath);
+        SPDLOG_INFO("{} end", __FUNCTION__);
+    }
+    void reloadMediapipeQueueGraphUsedModel() {
+        SPDLOG_INFO("{} start", __FUNCTION__);
+        SetUpConfig(basicMediapipeQueueConfigWithReloadedModel);
+        createConfigFileWithContent(ovmsConfig, configFilePath);
+        SPDLOG_INFO("{} end", __FUNCTION__);
+    }
+    void reloadMediapipeQueueGraph() {
+        SPDLOG_INFO("{} start", __FUNCTION__);
+        SetUpConfig(basicMediapipeQueueConfigWithNewGraphPath);
+        createConfigFileWithContent(ovmsConfig, configFilePath);
+        SPDLOG_INFO("{} end", __FUNCTION__);
+    }
     void checkMetricGreaterThan(const std::string& metricName, double value, std::string& metricOutput, bool& result) {
         ASSERT_THAT(metricOutput, ::testing::HasSubstr(metricName + std::string{"{name=\"dummy\",version=\"1\"} "})) << "cannot find dummys " << metricName << " metric\n"
                                                                                                                      << metricOutput;
@@ -1706,6 +1836,8 @@ class ConfigChangeStressTest : public TestWithTempDir {
         auto stressIterationsCounter = stressIterationsLimit;
         bool breakLoop = false;
         while (stressIterationsCounter-- > 0) {
+            ovms::Timer<TIMER_END> timer;
+            timer.start(STRESS_LOOP);
             auto futureWaitResult = stopSignal.wait_for(std::chrono::milliseconds(0));
             if (true == breakLoop) {
                 SPDLOG_INFO("Ending Load");
@@ -1725,6 +1857,7 @@ class ConfigChangeStressTest : public TestWithTempDir {
             RequestType request2;
             RequestType request = preparePipelinePredictRequest(request2);
             ovms::Status createPipelineStatus = StatusCode::UNKNOWN_ERROR;
+            timer.start(CREATE);
             if (typeid(ServableType) == typeid(ovms::Pipeline)) {
                 createPipelineStatus = this->manager->createPipeline(pipelinePtr, pipelineName, &request, &response);
 #if (MEDIAPIPE_DISABLE == 0)
@@ -1732,6 +1865,8 @@ class ConfigChangeStressTest : public TestWithTempDir {
                 mediacreate(executorPtr, *(this->manager), request, response, createPipelineStatus);
 #endif
             }
+            timer.stop(CREATE);
+            SPDLOG_TRACE("XYZ creation time: {} us", timer.elapsed<std::chrono::microseconds>(CREATE));
             // we need to make sure that expected status happened and still accept
             // some that could happen but we may not hit them
             EXPECT_TRUE((requiredLoadResults.find(createPipelineStatus.getCode()) != requiredLoadResults.end()) ||
@@ -1743,6 +1878,7 @@ class ConfigChangeStressTest : public TestWithTempDir {
             }
 
             ovms::Status executePipelineStatus = StatusCode::UNKNOWN_ERROR;
+            timer.start(EXECUTE);
             if (typeid(ServableType) == typeid(ovms::Pipeline)) {
                 executePipelineStatus = pipelinePtr->execute(ovms::ExecutionContext(
                     ovms::ExecutionContext::Interface::GRPC,
@@ -1752,6 +1888,8 @@ class ConfigChangeStressTest : public TestWithTempDir {
                 mediaexec(executorPtr, *(this->manager), request, response, executePipelineStatus);
 #endif
             }
+            timer.stop(EXECUTE);
+            SPDLOG_TRACE("XYZ execution time: {} us", timer.elapsed<std::chrono::microseconds>(EXECUTE));
             createPipelineRetCodesCounters[executePipelineStatus.getCode()]++;
             EXPECT_TRUE((requiredLoadResults.find(executePipelineStatus.getCode()) != requiredLoadResults.end()) ||
                         (allowedLoadResults.find(executePipelineStatus.getCode()) != allowedLoadResults.end()))
@@ -1763,6 +1901,8 @@ class ConfigChangeStressTest : public TestWithTempDir {
                 SPDLOG_INFO("Earlier fail detected. Stopping execution");
                 break;
             }
+            timer.stop(STRESS_LOOP);
+            SPDLOG_TRACE("XYZ loop iteration time: {} us", timer.elapsed<std::chrono::microseconds>(STRESS_LOOP));
         }
         for (auto& [retCode, counter] : createPipelineRetCodesCounters) {
             if (counter > 0) {

From e61a1744a72263b54cee570875ad46131b4766e7 Mon Sep 17 00:00:00 2001
From: Adrian Tobiszewski <adrian.tobiszewski@intel.com>
Date: Mon, 23 Feb 2026 08:40:44 +0100
Subject: [PATCH 6/8] All gtest tests pass

---
 ci/build_test_OnCommit.groovy                 |   8 +-
 src/kfs_frontend/kfs_graph_executor_impl.cpp  |   1 -
 src/llm/BUILD                                 |   1 +
 src/llm/http_llm_calculator.cc                |  25 ++-
 .../graph_executor_constants.hpp              |   1 +
 src/mediapipe_internal/graph_side_packets.hpp |  11 +
 src/mediapipe_internal/graphqueue.cpp         |   5 +-
 src/mediapipe_internal/graphqueue.hpp         |   4 +-
 .../mediapipegraphdefinition.cpp              |   9 +-
 .../mediapipegraphexecutor.cpp                |  29 +++
 .../mediapipegraphexecutor.hpp                |  36 +++-
 src/test/http_openai_handler_test.cpp         |   4 +-
 src/test/llm/config_queue.json                |   9 +
 src/test/llm/llmnode_test.cpp                 | 196 ++++++++++++++++++
 src/test/llm/lm_cb_regular_queue.pbtxt        |  47 +++++
 src/test/streaming_test.cpp                   |  54 ++---
 16 files changed, 399 insertions(+), 41 deletions(-)
 create mode 100644 src/test/llm/config_queue.json
 create mode 100644 src/test/llm/lm_cb_regular_queue.pbtxt

diff --git a/ci/build_test_OnCommit.groovy b/ci/build_test_OnCommit.groovy
index a3e7862c29..21029ec61c 100644
--- a/ci/build_test_OnCommit.groovy
+++ b/ci/build_test_OnCommit.groovy
@@ -149,7 +149,7 @@ pipeline {
             timeout(time: 120, unit: 'MINUTES')
           }
           parallel {
-            /*stage("Run unit tests") {
+            stage("Run unit tests") {
               agent {
                 label "${agent_name_linux}"
               }
@@ -165,7 +165,7 @@ pipeline {
               }
               } 
               }
-            }*/
+            }
             stage("Internal tests") {
               agent {
                 label "${agent_name_linux}"
@@ -186,7 +186,7 @@ pipeline {
                 }
               }            
             }
-            /*stage('Test windows') {
+            stage('Test windows') {
               agent {
                 label "${agent_name_windows}"
               }
@@ -210,7 +210,7 @@ pipeline {
                       }
                   }
               }
-            }*/
+            }
           }
         }
     }
diff --git a/src/kfs_frontend/kfs_graph_executor_impl.cpp b/src/kfs_frontend/kfs_graph_executor_impl.cpp
index 2a2d0ff3b8..86778ca899 100644
--- a/src/kfs_frontend/kfs_graph_executor_impl.cpp
+++ b/src/kfs_frontend/kfs_graph_executor_impl.cpp
@@ -26,7 +26,6 @@
 #include "../logging.hpp"
 #include "../mediapipe_internal/mediapipe_utils.hpp"
 #include "../mediapipe_internal/mediapipegraphdefinition.hpp"
-// TODO FIXME #include "../mediapipe_internal/graph_executor_constants.hpp"
 #include "../predict_request_validation_utils.hpp"
 #include "../status.hpp"
 #include "../tfs_frontend/tfs_utils.hpp"
diff --git a/src/llm/BUILD b/src/llm/BUILD
index ae37d936ca..5f64ad197f 100644
--- a/src/llm/BUILD
+++ b/src/llm/BUILD
@@ -24,6 +24,7 @@ ovms_cc_library(
         "//third_party:openvino",
         "@mediapipe//mediapipe/framework:calculator_framework",
         "@com_github_tencent_rapidjson//:rapidjson",
+        "//src:mediapipe_internal_graph_side_packets",
         "//src/kfserving_api:kfserving_api_cpp",
         "//src:libovmsprofiler",
         ":genai_servables",
diff --git a/src/llm/http_llm_calculator.cc b/src/llm/http_llm_calculator.cc
index ae6461c61a..2415ae08da 100644
--- a/src/llm/http_llm_calculator.cc
+++ b/src/llm/http_llm_calculator.cc
@@ -14,6 +14,7 @@
 // limitations under the License.
 //*****************************************************************************
 #include <atomic>
+#include <mutex>
 #include <string>
 
 #pragma warning(push)
@@ -27,6 +28,7 @@
 
 #include "../http_payload.hpp"
 #include "../logging.hpp"
+#include "../mediapipe_internal/graph_side_packets.hpp"
 #include "../profiler.hpp"
 #include "apis/openai_completions.hpp"
 #include "servable.hpp"
@@ -36,9 +38,11 @@ using namespace ovms;
 namespace mediapipe {
 
 const std::string LLM_SESSION_SIDE_PACKET_TAG = "LLM_NODE_RESOURCES";
+const std::string LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG = "LLM_NODE_EXECUTION_CONTEXTS";
 
 class HttpLLMCalculator : public CalculatorBase {
     std::shared_ptr<GenAiServable> servable;
+    std::shared_ptr<GenAiExecutionContextHolder> executionContextHolder;
     std::shared_ptr<GenAiServableExecutionContext> executionContext;
 
     static const std::string INPUT_TAG_NAME;
@@ -54,6 +58,9 @@ class HttpLLMCalculator : public CalculatorBase {
         cc->Inputs().Tag(INPUT_TAG_NAME).Set<ovms::HttpPayload>();
         cc->Inputs().Tag(LOOPBACK_TAG_NAME).Set<bool>();
         cc->InputSidePackets().Tag(LLM_SESSION_SIDE_PACKET_TAG).Set<ovms::GenAiServableMap>();
+        if (cc->InputSidePackets().HasTag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG)) {
+            cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).Set<ovms::GenAiExecutionContextMap>();
+        }
         cc->Outputs().Tag(OUTPUT_TAG_NAME).Set<std::string>();
         cc->Outputs().Tag(LOOPBACK_TAG_NAME).Set<bool>();
         return absl::OkStatus();
@@ -72,7 +79,17 @@ class HttpLLMCalculator : public CalculatorBase {
         auto it = servableMap.find(cc->NodeName());
         RET_CHECK(it != servableMap.end()) << "Could not find initialized LLM node named: " << cc->NodeName();
         this->servable = it->second;
-        this->executionContext = servable->createExecutionContext();
+
+        if (cc->InputSidePackets().HasTag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG) && !cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).IsEmpty()) {
+            ovms::GenAiExecutionContextMap executionContextMap = cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).Get<ovms::GenAiExecutionContextMap>();
+            auto contextIt = executionContextMap.find(cc->NodeName());
+            RET_CHECK(contextIt != executionContextMap.end()) << "Could not find LLM execution context holder for node named: " << cc->NodeName();
+            this->executionContextHolder = contextIt->second;
+        }
+
+        if (!this->executionContextHolder) {
+            this->executionContext = servable->createExecutionContext();
+        }
         SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "LLMCalculator [Node: {}] Open end", cc->NodeName());
         return absl::OkStatus();
     }
@@ -81,6 +98,12 @@ class HttpLLMCalculator : public CalculatorBase {
         OVMS_PROFILE_FUNCTION();
         RET_CHECK(this->servable != nullptr);
 
+        if (this->executionContextHolder) {
+            std::lock_guard<std::mutex> lock(this->executionContextHolder->mutex);
+            this->executionContext = this->executionContextHolder->executionContext;
+        }
+        RET_CHECK(this->executionContext != nullptr) << "LLM execution context not initialized for node: " << cc->NodeName();
+
         // For cases where MediaPipe decides to trigger Process() when there are no inputs
         if (cc->Inputs().Tag(INPUT_TAG_NAME).IsEmpty() && cc->Inputs().Tag(LOOPBACK_TAG_NAME).IsEmpty()) {
             return absl::OkStatus();
diff --git a/src/mediapipe_internal/graph_executor_constants.hpp b/src/mediapipe_internal/graph_executor_constants.hpp
index ff565769ce..55e3af7f59 100644
--- a/src/mediapipe_internal/graph_executor_constants.hpp
+++ b/src/mediapipe_internal/graph_executor_constants.hpp
@@ -22,6 +22,7 @@ namespace ovms {
 
 inline const std::string PYTHON_SESSION_SIDE_PACKET_TAG = "py";
 inline const std::string LLM_SESSION_SIDE_PACKET_TAG = "llm";
+inline const std::string LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG = "llm_ctx";
 inline const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes";
 inline const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable";
 inline const std::string RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable";
diff --git a/src/mediapipe_internal/graph_side_packets.hpp b/src/mediapipe_internal/graph_side_packets.hpp
index 66b0134726..8b67bd3bc0 100644
--- a/src/mediapipe_internal/graph_side_packets.hpp
+++ b/src/mediapipe_internal/graph_side_packets.hpp
@@ -15,6 +15,7 @@
 //*****************************************************************************
 #pragma once
 
+#include <mutex>
 #include <memory>
 #include <string>
 #include <unordered_map>
@@ -24,6 +25,7 @@ namespace ovms {
 // Forward declarations - only shared_ptrs are stored so full definitions are not needed
 class PythonNodeResources;
 class GenAiServable;
+struct GenAiServableExecutionContext;
 struct ImageGenerationPipelines;
 struct EmbeddingsServable;
 struct RerankServable;
@@ -38,9 +40,16 @@ using TtsServableMap = std::unordered_map<std::string, std::shared_ptr<TtsServab
 using EmbeddingsServableMap = std::unordered_map<std::string, std::shared_ptr<EmbeddingsServable>>;
 using ImageGenerationPipelinesMap = std::unordered_map<std::string, std::shared_ptr<ImageGenerationPipelines>>;
 
+struct GenAiExecutionContextHolder {
+    std::mutex mutex;
+    std::shared_ptr<GenAiServableExecutionContext> executionContext;
+};
+using GenAiExecutionContextMap = std::unordered_map<std::string, std::shared_ptr<GenAiExecutionContextHolder>>;
+
 struct GraphSidePackets {
     PythonNodeResourcesMap pythonNodeResourcesMap;
     GenAiServableMap genAiServableMap;
+    GenAiExecutionContextMap genAiExecutionContextMap;
     ImageGenerationPipelinesMap imageGenPipelinesMap;
     EmbeddingsServableMap embeddingsServableMap;
     RerankServableMap rerankServableMap;
@@ -49,6 +58,7 @@ struct GraphSidePackets {
     void clear() {
         pythonNodeResourcesMap.clear();
         genAiServableMap.clear();
+        genAiExecutionContextMap.clear();
         imageGenPipelinesMap.clear();
         embeddingsServableMap.clear();
         rerankServableMap.clear();
@@ -58,6 +68,7 @@ struct GraphSidePackets {
     bool empty() {
         return (pythonNodeResourcesMap.empty() &&
                 genAiServableMap.empty() &&
+                genAiExecutionContextMap.empty() &&
                 imageGenPipelinesMap.empty() &&
                 embeddingsServableMap.empty() &&
                 rerankServableMap.empty() &&
diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp
index ea9a2680f4..ccce0f65c3 100644
--- a/src/mediapipe_internal/graphqueue.cpp
+++ b/src/mediapipe_internal/graphqueue.cpp
@@ -34,7 +34,6 @@
 #include "mediapipe/framework/port/status.h"
 
 #include "graph_executor_constants.hpp"
-//#include "mediapipegraphexecutor.hpp"  // for side packet tag names
 #include "outputstreamobserver.hpp"
 namespace ovms {
 GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr<GraphSidePackets> sidePacketMaps, int streamsLength) :
@@ -62,11 +61,15 @@ GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::sh
                 throw std::runtime_error(absStatus.ToString());
             }
         }
+        for (const auto& [nodeName, _] : sidePacketMaps->genAiServableMap) {
+            gh->genAiExecutionContextMap[nodeName] = std::make_shared<GenAiExecutionContextHolder>();
+        }
         std::map<std::string, mediapipe::Packet> inputSidePackets;
 #if (PYTHON_DISABLE == 0)
         inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<PythonNodeResourcesMap>(sidePacketMaps->pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
 #endif
         inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(sidePacketMaps->genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiExecutionContextMap>(gh->genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<ImageGenerationPipelinesMap>(sidePacketMaps->imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(sidePacketMaps->embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(sidePacketMaps->rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp
index a570557211..ba09edd85e 100644
--- a/src/mediapipe_internal/graphqueue.hpp
+++ b/src/mediapipe_internal/graphqueue.hpp
@@ -40,6 +40,7 @@ class NullOutputStreamObserver;
 struct GraphHelper {
     std::shared_ptr<::mediapipe::CalculatorGraph> graph;  // TODO FIXME this does not have to be shared_ptr
     std::unordered_map<std::string, std::shared_ptr<OutputStreamObserverI>> outStreamObservers;
+    GenAiExecutionContextMap genAiExecutionContextMap;
     ::mediapipe::Timestamp currentTimestamp;  // TODO FIXME const
     // TODO FIXME move constr/=
     GraphHelper() = default;
@@ -48,13 +49,14 @@ struct GraphHelper {
     GraphHelper(GraphHelper&& gh) :
         graph(std::move(gh.graph)),
         outStreamObservers(std::move(gh.outStreamObservers)),
+        genAiExecutionContextMap(std::move(gh.genAiExecutionContextMap)),
         currentTimestamp(gh.currentTimestamp) {}
     GraphHelper& operator=(GraphHelper&& gh) = default;
 };
 // we need to keep Graph alive during MP reload hence shared_ptr
 //class GraphQueue : public Queue<std::shared_ptr<::mediapipe::CalculatorGraph>> {
 class GraphQueue : public Queue<std::shared_ptr<GraphHelper>> {
-    public: // XXX TODO make private? we need to acces in mediapipegraphdefinition to set side packets though
+public:  // XXX TODO make private? we need to acces in mediapipegraphdefinition to set side packets though
     std::shared_ptr<GraphSidePackets> sidePacketMaps;
 
 public:
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp
index 38533093b7..a063ac5dd5 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.cpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp
@@ -62,7 +62,6 @@
 #include "src/audio/speech_to_text/s2t_servable.hpp"
 #include "src/audio/text_to_speech/t2s_servable.hpp"
 
-
 namespace ovms {
 MediapipeGraphConfig MediapipeGraphDefinition::MGC;
 
@@ -494,7 +493,7 @@ class ResourcesCleaningGuard {
         resources(resources) {}
     ~ResourcesCleaningGuard() {
         if (shouldCleanup) {
-            resources.clear(); // TODO FIXME @atobisze check
+            resources.clear();  // TODO FIXME @atobisze check
         }
     }
     void disableCleaning() {
@@ -537,6 +536,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
         // Passed to both calculators that require LLM Engine (gRPC KServe & HTTP OpenAI)
         if (endsWith(config.node(i).calculator(), LLM_NODE_CALCULATOR_NAME)) {
             auto& genAiServableMap = this->sidePacketMaps->genAiServableMap;
+            auto& genAiExecutionContextMap = this->sidePacketMaps->genAiExecutionContextMap;
             ResourcesCleaningGuard<GenAiServableMap> genAiServablesCleaningGuard(genAiServableMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node missing options in graph: {}. ", this->name);
@@ -551,6 +551,10 @@ Status MediapipeGraphDefinition::initializeNodes() {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node name: {} already used in graph: {}. ", nodeName, this->name);
                 return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS;
             }
+            if (genAiExecutionContextMap.find(nodeName) != genAiExecutionContextMap.end()) {
+                SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM execution context holder for node name: {} already exists in graph: {}. ", nodeName, this->name);
+                return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS;
+            }
             std::shared_ptr<GenAiServable> servable;
             Status status = initializeGenAiServable(servable, config.node(i), mgconfig.getBasePath());
             if (!status.ok()) {
@@ -558,6 +562,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
                 return status;
             }
             genAiServableMap.insert(std::pair<std::string, std::shared_ptr<GenAiServable>>(nodeName, std::move(servable)));
+            genAiExecutionContextMap.insert(std::pair<std::string, std::shared_ptr<GenAiExecutionContextHolder>>(nodeName, std::make_shared<GenAiExecutionContextHolder>()));
             genAiServablesCleaningGuard.disableCleaning();
         }
         // Passed to both calculators that require Image Generation pipelines
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp
index 601a164f61..b821d1fef1 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.cpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp
@@ -34,6 +34,7 @@
 #endif
 
 #include "src/image_gen/pipelines.hpp"
+#include "src/llm/servable.hpp"
 
 namespace ovms {
 
@@ -84,4 +85,32 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
     currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)),
     mediapipeServableMetricReporter(mediapipeServableMetricReporter) {}
 
+Status MediapipeGraphExecutor::initializeLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap) {
+    for (const auto& [nodeName, servable] : this->sidePacketMaps.genAiServableMap) {
+        auto it = executionContextMap.find(nodeName);
+        if (it == executionContextMap.end() || !it->second) {
+            SPDLOG_DEBUG("Missing LLM execution context holder for node: {}", nodeName);
+            return StatusCode::INTERNAL_ERROR;
+        }
+        auto& holder = it->second;
+        std::lock_guard<std::mutex> lock(holder->mutex);
+        holder->executionContext = servable->createExecutionContext();
+        if (!holder->executionContext) {
+            SPDLOG_DEBUG("Failed to create LLM execution context for node: {}", nodeName);
+            return StatusCode::INTERNAL_ERROR;
+        }
+    }
+    return StatusCode::OK;
+}
+
+void MediapipeGraphExecutor::resetLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap) {
+    for (auto& [_, holder] : executionContextMap) {
+        if (!holder) {
+            continue;
+        }
+        std::lock_guard<std::mutex> lock(holder->mutex);
+        holder->executionContext.reset();
+    }
+}
+
 }  // namespace ovms
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp
index 391d1849fb..1e36d27e42 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.hpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp
@@ -159,6 +159,10 @@ class MediapipeGraphExecutor {
         PythonBackend* pythonBackend,
         MediapipeServableMetricReporter* mediapipeServableMetricReporter);
 
+    Status initializeLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap);
+
+    void resetLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap);
+
     template <typename RequestType, typename ResponseType>
     Status infer(const RequestType* request, ResponseType* response, ExecutionContext executionContext) {
         OVMS_PROFILE_FUNCTION();
@@ -175,6 +179,10 @@ class MediapipeGraphExecutor {
     template <typename RequestType, typename ResponseType>
     Status inferWithQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) {
         ::mediapipe::CalculatorGraph& graph = this->guard->graph;
+        auto llmContextStatus = initializeLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap);
+        if (!llmContextStatus.ok()) {
+            return llmContextStatus;
+        }
         for (auto& name : this->outputNames) {
             if (name.empty()) {
                 SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", name);
@@ -209,7 +217,10 @@ class MediapipeGraphExecutor {
         if (!status.ok()) {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
+        resetLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap);
         MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code()));
+        // Increment timestamp for next request reusing this graph from the queue
+        this->guard->gh->currentTimestamp = ::mediapipe::Timestamp(this->guard->gh->currentTimestamp.Value() + 1);
         SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name);
         return StatusCode::OK;
     }
@@ -218,7 +229,12 @@ class MediapipeGraphExecutor {
     Status inferWithoutQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) {
         ::mediapipe::CalculatorGraph graph;
         MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
-        enum : unsigned int { PROCESS, TIMER_END2 };
+        auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap);
+        if (!llmContextStatus.ok()) {
+            return llmContextStatus;
+        }
+        enum : unsigned int { PROCESS,
+            TIMER_END2 };
         Timer<TIMER_END2> timer;
         timer.start(PROCESS);
         std::unordered_map<std::string, ::mediapipe::OutputStreamPoller> outputPollers;
@@ -241,6 +257,7 @@ class MediapipeGraphExecutor {
         inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<PythonNodeResourcesMap>(this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
 #endif
         inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiExecutionContextMap>(this->sidePacketMaps.genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<ImageGenerationPipelinesMap>(this->sidePacketMaps.imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
         inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(this->sidePacketMaps.rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
@@ -278,6 +295,7 @@ class MediapipeGraphExecutor {
         if (!status.ok()) {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
+        resetLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap);
         MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code()));
 
         MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR);
@@ -340,12 +358,17 @@ class MediapipeGraphExecutor {
             // Side packets are set at queue construction time.
             if (requestHasInputSidePackets(req)) {
                 SPDLOG_DEBUG("Graph queue does not support user-provided input side packets. "
-                             "Side packets are set at graph queue construction time. Graph: {}", this->name);
+                             "Side packets are set at graph queue construction time. Graph: {}",
+                    this->name);
                 return Status(StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR,
                     "Input side packets are not supported for graphs with queue enabled");
             }
             MetricGaugeGuard currentGraphs(this->mediapipeServableMetricReporter->currentGraphs.get());
             ::mediapipe::CalculatorGraph& graph = this->guard->graph;
+            auto llmContextStatus = initializeLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap);
+            if (!llmContextStatus.ok()) {
+                return llmContextStatus;
+            }
 
             enum : unsigned int {
                 PROCESS,
@@ -426,7 +449,10 @@ class MediapipeGraphExecutor {
             if (!status.ok()) {
                 INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
             }
+            resetLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap);
             MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code()));
+            // Increment timestamp for next request reusing this graph from the queue
+            this->guard->gh->currentTimestamp = ::mediapipe::Timestamp(this->guard->gh->currentTimestamp.Value() + 1);
             SPDLOG_DEBUG("Graph {}: Done streaming execution (queue path)", this->name);
 
             timer.stop(PROCESS);
@@ -451,6 +477,10 @@ class MediapipeGraphExecutor {
                 // Init
                 MP_RETURN_ON_FAIL(graph.Initialize(this->config), "graph initialization", StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
             }
+            auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap);
+            if (!llmContextStatus.ok()) {
+                return llmContextStatus;
+            }
             enum : unsigned int {
                 PROCESS,
                 TIMER_END2
@@ -498,6 +528,7 @@ class MediapipeGraphExecutor {
                                                                        .At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
 #endif
                 inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+                inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiExecutionContextMap>(this->sidePacketMaps.genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
                 inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
                 // Add image generation side packet in case image generation allow for streaming
             }
@@ -576,6 +607,7 @@ class MediapipeGraphExecutor {
                 if (!status.ok()) {
                     INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
                 }
+                resetLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap);
                 MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
                 SPDLOG_DEBUG("Graph {}: Done execution", this->name);
             }
diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp
index 316917f788..6179b65d69 100644
--- a/src/test/http_openai_handler_test.cpp
+++ b/src/test/http_openai_handler_test.cpp
@@ -212,7 +212,7 @@ Key: content-type; Value: application/json
         }
     
 JSON Parser:
-{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only
+{"model":"gpt","stream":false,"messages":[]}0)";  // non-queue path: fresh graph, poller gets first packet only
     ASSERT_EQ(response, expectedResponse);
 }
 
@@ -244,7 +244,7 @@ Key: test2; Value: header
         }
     
 JSON Parser:
-{"model":"gpt","stream":false,"messages":[]}0)"; // non-queue path: fresh graph, poller gets first packet only
+{"model":"gpt","stream":false,"messages":[]}0)";  // non-queue path: fresh graph, poller gets first packet only
     ASSERT_EQ(response, expectedResponse);
 }
 
diff --git a/src/test/llm/config_queue.json b/src/test/llm/config_queue.json
new file mode 100644
index 0000000000..1e16802ed9
--- /dev/null
+++ b/src/test/llm/config_queue.json
@@ -0,0 +1,9 @@
+{
+    "model_config_list": [],
+    "mediapipe_config_list": [
+    {
+        "name":"lm_cb_regular_queue",
+        "graph_path":"/ovms/src/test/llm/lm_cb_regular_queue.pbtxt"
+    }
+    ]
+}
diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp
index 19e2d75246..355e6856ac 100644
--- a/src/test/llm/llmnode_test.cpp
+++ b/src/test/llm/llmnode_test.cpp
@@ -174,6 +174,51 @@ std::shared_ptr<ov::genai::ContinuousBatchingPipeline> LLMFlowHttpTest::cbPipe;
 std::shared_ptr<LLMExecutorWrapper> LLMFlowHttpTest::llmExecutorWrapper;
 std::unique_ptr<std::thread> LLMFlowHttpTest::t;
 
+class LLMFlowHttpQueueGraphTest : public ::testing::Test {
+protected:
+    static std::unique_ptr<std::thread> t;
+
+public:
+    std::unique_ptr<ovms::HttpRestApiHandler> handler;
+    std::unordered_map<std::string, std::string> headers{{"content-type", "application/json"}};
+    ovms::HttpRequestComponents comp;
+    const std::string endpointChatCompletions = "/v3/chat/completions";
+    const std::string endpointCompletions = "/v3/completions";
+    std::shared_ptr<MockedServerRequestInterface> writer;
+    std::shared_ptr<MockedMultiPartParser> multiPartParser;
+    std::string response;
+    rapidjson::Document parsedResponse;
+    ovms::HttpResponseComponents responseComponents;
+
+    static void SetUpTestSuite() {
+        std::string port = "9173";
+        ovms::Server& server = ovms::Server::instance();
+        ::SetUpServer(t, server, port, getGenericFullPathForSrcTest("/ovms/src/test/llm/config_queue.json").c_str(), 60);
+    }
+
+    static void TearDownTestSuite() {
+        ovms::Server& server = ovms::Server::instance();
+        server.setShutdownRequest(1);
+        t->join();
+        server.setShutdownRequest(0);
+    }
+
+    void SetUp() {
+        writer = std::make_shared<MockedServerRequestInterface>();
+        multiPartParser = std::make_shared<MockedMultiPartParser>();
+        ON_CALL(*writer, PartialReplyBegin(::testing::_)).WillByDefault(testing::Invoke([](std::function<void()> fn) { fn(); }));
+        ovms::Server& server = ovms::Server::instance();
+        handler = std::make_unique<ovms::HttpRestApiHandler>(server, 5);
+        ASSERT_EQ(handler->parseRequestComponents(comp, "POST", endpointCompletions, headers), ovms::StatusCode::OK);
+    }
+
+    void TearDown() {
+        handler.reset();
+    }
+};
+
+std::unique_ptr<std::thread> LLMFlowHttpQueueGraphTest::t;
+
 // --------------------------------------- OVMS LLM nodes tests
 
 /* 
@@ -249,6 +294,157 @@ TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJson) {
     }
 }
 
+TEST_F(LLMFlowHttpQueueGraphTest, unaryCompletionsJsonQueueGraph) {
+    std::string requestBody = R"(
+        {
+            "model": "lm_cb_regular_queue",
+            "stream": false,
+            "seed" : 1,
+            "best_of": 16,
+            "max_tokens": 5,
+            "prompt": "What is OpenVINO?"
+        }
+    )";
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+    parsedResponse.Parse(response.c_str());
+    ASSERT_TRUE(parsedResponse["choices"].IsArray());
+    ASSERT_EQ(parsedResponse["choices"].Capacity(), 1);
+    for (auto& choice : parsedResponse["choices"].GetArray()) {
+        ASSERT_TRUE(choice["finish_reason"].IsString());
+        ASSERT_FALSE(choice["logprobs"].IsObject());
+        ASSERT_TRUE(choice["text"].IsString());
+    }
+
+    ASSERT_TRUE(parsedResponse["usage"].IsObject());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["prompt_tokens"].IsInt());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["completion_tokens"].IsInt());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["total_tokens"].IsInt());
+    ASSERT_EQ(parsedResponse["usage"].GetObject()["completion_tokens"].GetInt(), 5);
+    EXPECT_STREQ(parsedResponse["model"].GetString(), "lm_cb_regular_queue");
+    EXPECT_STREQ(parsedResponse["object"].GetString(), "text_completion");
+}
+
+TEST_F(LLMFlowHttpQueueGraphTest, unaryChatCompletionsJsonQueueGraph) {
+    std::string requestBody = R"(
+        {
+            "model": "lm_cb_regular_queue",
+            "stream": false,
+            "seed" : 1,
+            "max_tokens": 5,
+            "messages": [
+            {
+                "role": "user",
+                "content": "What is OpenVINO?"
+            }
+            ]
+        }
+    )";
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+    parsedResponse.Parse(response.c_str());
+    ASSERT_TRUE(parsedResponse["choices"].IsArray());
+    ASSERT_EQ(parsedResponse["choices"].Capacity(), 1);
+    for (auto& choice : parsedResponse["choices"].GetArray()) {
+        ASSERT_TRUE(choice["finish_reason"].IsString());
+        ASSERT_TRUE(choice["message"].IsObject());
+        ASSERT_TRUE(choice["message"]["content"].IsString());
+        EXPECT_STREQ(choice["message"]["role"].GetString(), "assistant");
+    }
+
+    ASSERT_TRUE(parsedResponse["usage"].IsObject());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["prompt_tokens"].IsInt());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["completion_tokens"].IsInt());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["total_tokens"].IsInt());
+    ASSERT_EQ(parsedResponse["usage"].GetObject()["completion_tokens"].GetInt(), 5);
+    EXPECT_STREQ(parsedResponse["model"].GetString(), "lm_cb_regular_queue");
+    EXPECT_STREQ(parsedResponse["object"].GetString(), "chat.completion");
+}
+
+TEST_F(LLMFlowHttpQueueGraphTest, streamChatCompletionsQueueGraph) {
+    std::string requestBody = R"(
+        {
+            "model": "lm_cb_regular_queue",
+            "stream": true,
+            "seed" : 1,
+            "max_tokens": 5,
+            "ignore_eos": true,
+            "messages": [
+            {
+                "role": "user",
+                "content": "What is OpenVINO?"
+            }
+            ]
+        }
+    )";
+    ON_CALL(*writer, PartialReply).WillByDefault([this](std::string response) {
+        rapidjson::Document d;
+        std::string dataPrefix = "data:";
+        ASSERT_STREQ(response.substr(0, dataPrefix.size()).c_str(), dataPrefix.c_str());
+        size_t pos = response.find("\n");
+        ASSERT_NE(pos, response.npos);
+        rapidjson::ParseResult parsingSucceeded = d.Parse(response.substr(dataPrefix.size(), (pos - dataPrefix.size())).c_str());
+        ASSERT_EQ(parsingSucceeded.Code(), 0);
+        ASSERT_TRUE(d["choices"].IsArray());
+        ASSERT_EQ(d["choices"].Capacity(), 1);
+        int i = 0;
+        for (auto& choice : d["choices"].GetArray()) {
+            if (choice["finish_reason"].IsString()) {
+                EXPECT_STREQ(choice["finish_reason"].GetString(), "length");
+            } else {
+                ASSERT_TRUE(choice["finish_reason"].IsNull());
+            }
+            ASSERT_EQ(choice["index"], i++);
+            ASSERT_TRUE(choice["delta"].IsObject());
+            ASSERT_TRUE(choice["delta"]["content"].IsString());
+        }
+        EXPECT_STREQ(d["model"].GetString(), "lm_cb_regular_queue");
+        EXPECT_STREQ(d["object"].GetString(), "chat.completion.chunk");
+    });
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::PARTIAL_END);
+}
+
+// Test that verifies graph reuse works correctly with queue size 1
+// Sends 2 sequential requests to ensure the same graph instance is reused
+TEST_F(LLMFlowHttpQueueGraphTest, queueGraphReuseTwoRequests) {
+    std::string requestBody = R"(
+        {
+            "model": "lm_cb_regular_queue",
+            "stream": false,
+            "seed" : 1,
+            "max_tokens": 5,
+            "prompt": "What is OpenVINO?"
+        }
+    )";
+
+    // First request
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+    parsedResponse.Parse(response.c_str());
+    ASSERT_TRUE(parsedResponse["choices"].IsArray());
+    ASSERT_EQ(parsedResponse["choices"].Capacity(), 1);
+    ASSERT_TRUE(parsedResponse["choices"].GetArray()[0]["text"].IsString());
+
+    // Second request - reuses the same graph from the queue
+    // This validates that timestamp increment works for graph reuse
+    response.clear();
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+    parsedResponse.Parse(response.c_str());
+    ASSERT_TRUE(parsedResponse["choices"].IsArray());
+    ASSERT_EQ(parsedResponse["choices"].Capacity(), 1);
+    ASSERT_TRUE(parsedResponse["choices"].GetArray()[0]["text"].IsString());
+    // Note: Responses may differ due to KV cache state despite same seed
+}
+
 TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJsonEchoWithCompletion) {
     auto params = GetParam();
     // TODO: In the next step we should break this suite into smaller ones, use proper configuration instead of skipping
diff --git a/src/test/llm/lm_cb_regular_queue.pbtxt b/src/test/llm/lm_cb_regular_queue.pbtxt
new file mode 100644
index 0000000000..60ef13f6b7
--- /dev/null
+++ b/src/test/llm/lm_cb_regular_queue.pbtxt
@@ -0,0 +1,47 @@
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# OVMS_GRAPH_QUEUE_SIZE: 1
+input_stream: "HTTP_REQUEST_PAYLOAD:input"
+output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+node {
+    name: "llmNode1"
+    calculator: "HttpLLMCalculator"
+    input_side_packet: "LLM_NODE_RESOURCES:llm"
+    input_side_packet: "LLM_NODE_EXECUTION_CONTEXTS:llm_ctx"
+    input_stream: "LOOPBACK:loopback"
+    input_stream: "HTTP_REQUEST_PAYLOAD:input"
+    output_stream: "LOOPBACK:loopback"
+    output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+    input_stream_info: {
+      tag_index: 'LOOPBACK:0',
+      back_edge: true
+    }
+    node_options: {
+        [type.googleapis.com/mediapipe.LLMCalculatorOptions]: {
+          models_path: "/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct"
+          cache_size: 1
+        }
+    }
+    input_stream_handler {
+      input_stream_handler: "SyncSetInputStreamHandler",
+      options {
+        [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+          sync_set {
+            tag_index: "LOOPBACK:0"
+          }
+        }
+      }
+    }
+}
diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp
index 997d2048ef..b61d8a48ef 100644
--- a/src/test/streaming_test.cpp
+++ b/src/test/streaming_test.cpp
@@ -388,7 +388,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::KFS_REQUEST}},
         {{"out", mediapipe_packet_type_enum::KFS_RESPONSE}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
@@ -445,7 +445,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}});  // no timestamp specified, server will assign one
@@ -588,7 +588,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests with manually (client) assigned ascending order of timestamp and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, 3);  // first request with timestamp 3
@@ -633,7 +633,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock only 1 request and disconnect immediately
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
@@ -669,7 +669,7 @@ node {
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
         {"in"},
         {"out"},
-        2);
+        1);
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
     EXPECT_CALL(this->stream, Read(_))
@@ -714,7 +714,7 @@ node {
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
         {"in"},
         {"out"},
-        2);
+        1);
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
     EXPECT_CALL(this->stream, Read(_))
@@ -747,7 +747,7 @@ node {
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
         {"in"},
         {"out"},
-        2);
+        1);
 
     prepareRequest(this->firstRequest, {});
     EXPECT_CALL(this->stream, Read(_))
@@ -777,7 +777,7 @@ node {
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
         {"in"},
         {"out"},
-        2);
+        1);
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -811,7 +811,7 @@ node {
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
         {"in"},
         {"out"},
-        2);
+        1);
 
     prepareInvalidRequest(this->firstRequest, {"in"});
 
@@ -1437,7 +1437,7 @@ node {
             {"out3", mediapipe_packet_type_enum::OVTENSOR}},
         {"in1", "in2", "in3"},
         {"out1", "out2", "out3"},
-{}, nullptr, this->reporter.get()};
+        {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1489,7 +1489,7 @@ node {
             {"out3", mediapipe_packet_type_enum::OVTENSOR}},
         {"in1", "in2", "in3"},
         {"out1", "out2", "out3"},
-{}, nullptr, this->reporter.get()};
+        {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1524,7 +1524,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1558,7 +1558,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"wrong_name"}, {}, nullptr, this->reporter.get()};  // cannot install observer due to wrong output name (should never happen due to validation)
+        {"in"}, {"wrong_name"}, {}, nullptr, this->reporter.get()};  // cannot install observer due to wrong output name (should never happen due to validation)
 
     EXPECT_CALL(this->stream, Read(_)).Times(0);
     EXPECT_CALL(this->stream, Write(_, _)).Times(0);
@@ -1583,7 +1583,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {});
     EXPECT_CALL(this->stream, Read(_))
@@ -1611,7 +1611,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1647,7 +1647,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
     ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
@@ -1670,7 +1670,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Invalid request - missing data in buffer
     prepareInvalidRequest(this->firstRequest, {"in"});  // no timestamp specified, server will assign one
@@ -1705,7 +1705,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise[3];
     std::future<void> signalFuture[3] = {
@@ -1752,7 +1752,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, 0);
     EXPECT_CALL(this->stream, Read(_))
@@ -1780,7 +1780,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
     setRequestTimestamp(this->firstRequest, std::string("not an int"));
@@ -1815,7 +1815,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Timestamps not allowed in stream
     // Expect continuity of operation and response with error message
@@ -1857,7 +1857,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Allowed in stream
     for (auto timestamp : std::vector<::mediapipe::Timestamp>{
@@ -1893,7 +1893,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests and disconnection
     prepareRequestWithParam(this->firstRequest, {{"in", 3.5f}}, {"val", 65});  // request with parameter val
@@ -1930,7 +1930,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving the invalid request and disconnection
     // Request with invalid param py (special pythons session side packet)
@@ -1959,7 +1959,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});  // missing required request param
     EXPECT_CALL(this->stream, Read(_)).Times(0);
@@ -1985,7 +1985,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 2 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, std::nullopt, this->name, this->version);  // no timestamp specified, server will assign one
@@ -2019,7 +2019,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-{"in"}, {"out"}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();

From 662a8f1ad734ed239738d3540660984d62f97c39 Mon Sep 17 00:00:00 2001
From: Adrian Tobiszewski <adrian.tobiszewski@intel.com>
Date: Mon, 23 Feb 2026 10:09:25 +0100
Subject: [PATCH 7/8] Style fixes

---
 src/BUILD                                     |  2 +-
 src/logging.cpp                               |  2 +-
 src/mediapipe_internal/graphqueue.cpp         |  1 +
 src/mediapipe_internal/graphqueue.hpp         |  5 ++--
 .../outputstreamobserver.hpp                  |  1 -
 src/test/mediapipe_framework_test.cpp         | 30 +++++++++----------
 6 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/BUILD b/src/BUILD
index 46d1d51a39..9cf49970fe 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -178,7 +178,7 @@ ovms_cc_library(
         "mediapipe_internal_graph_side_packets",
         "//third_party:openvino",
         "@mediapipe//mediapipe/framework:calculator_graph",
-        "//src/python:libovmspythonmodule", # TODO not splitted
+        "//src/python:libovmspythonmodule", # TODO not split
         "//src/llm:genai_servables", # TODO split!
     ],
     visibility = ["//visibility:public",],
diff --git a/src/logging.cpp b/src/logging.cpp
index c07bb6f8d4..aee9e4bc2e 100644
--- a/src/logging.cpp
+++ b/src/logging.cpp
@@ -41,7 +41,7 @@ std::shared_ptr<spdlog::logger> rerank_calculator_logger = std::make_shared<spdl
 #if (OV_TRACE == 1)
 std::shared_ptr<spdlog::logger> ov_logger = std::make_shared<spdlog::logger>("openvino");
 #endif
-//const std::string default_pattern = "[%i] [%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v";
+// const std::string default_pattern = "[%i] [%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v";
 const std::string default_pattern = "[%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v";
 
 static void set_log_level(const std::string log_level, std::shared_ptr<spdlog::logger> logger) {
diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp
index ccce0f65c3..37d6b742b8 100644
--- a/src/mediapipe_internal/graphqueue.cpp
+++ b/src/mediapipe_internal/graphqueue.cpp
@@ -18,6 +18,7 @@
 #include <atomic>
 #include <condition_variable>
 #include <future>
+#include <map>
 #include <memory>
 #include <mutex>
 #include <optional>
diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp
index ba09edd85e..5d59e6bac2 100644
--- a/src/mediapipe_internal/graphqueue.hpp
+++ b/src/mediapipe_internal/graphqueue.hpp
@@ -21,8 +21,10 @@
 #include <memory>
 #include <mutex>
 #include <optional>
+#include <string>
 #include <queue>
 #include <thread>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -54,9 +56,8 @@ struct GraphHelper {
     GraphHelper& operator=(GraphHelper&& gh) = default;
 };
 // we need to keep Graph alive during MP reload hence shared_ptr
-//class GraphQueue : public Queue<std::shared_ptr<::mediapipe::CalculatorGraph>> {
 class GraphQueue : public Queue<std::shared_ptr<GraphHelper>> {
-public:  // XXX TODO make private? we need to acces in mediapipegraphdefinition to set side packets though
+public:  // XXX TODO make private? we need to access in mediapipegraphdefinition to set side packets though
     std::shared_ptr<GraphSidePackets> sidePacketMaps;
 
 public:
diff --git a/src/mediapipe_internal/outputstreamobserver.hpp b/src/mediapipe_internal/outputstreamobserver.hpp
index 1a314e73ae..f2f8a5023e 100644
--- a/src/mediapipe_internal/outputstreamobserver.hpp
+++ b/src/mediapipe_internal/outputstreamobserver.hpp
@@ -37,7 +37,6 @@
 #pragma GCC diagnostic pop
 #pragma warning(pop)
 #include "mediapipe_utils.hpp"
-//#include "mediapipegraphdefinition.hpp"  // for version in response and PythonNodeResourceMap
 #include "packettypes.hpp"
 #include "graphqueue.hpp"
 
diff --git a/src/test/mediapipe_framework_test.cpp b/src/test/mediapipe_framework_test.cpp
index bc7c13bad9..53c86f001b 100644
--- a/src/test/mediapipe_framework_test.cpp
+++ b/src/test/mediapipe_framework_test.cpp
@@ -137,12 +137,12 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerCheckNoInputPackets)
         ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
     const std::string inputStreamName = "input";
     const std::string outputStreamName = "output";
-    // avoid creating pollers, retreiving packets etc.
+    // avoid creating pollers, retrieving packets etc.
     //////////////////
     // model mgmt thread
     //////////////////
-    //std::shared_ptr<ovms::GraphQueue> queue;
-    //queue = std::make_shared<GraphQueue>(graphConfig, 1);
+    // std::shared_ptr<ovms::GraphQueue> queue;
+    // queue = std::make_shared<GraphQueue>(graphConfig, 1);
     ::mediapipe::CalculatorGraph graph;
     EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
     // Install NullObserver
@@ -153,7 +153,7 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerCheckNoInputPackets)
     MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }));
     // Here ends model management
     // Here starts mp graph executor
-    //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
+    // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
     // get graphIdGuard from queue
     // create FrontendAppropriateObserver
     float expVal = 13.5;
@@ -193,9 +193,9 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerCheckNoInputPackets)
     perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
     // now add second packet
     auto inputTensor2 = std::make_unique<ov::Tensor>(datatype, shape, data.data());
-    //MP_ERROR_STOP(graph.AddPacketToInputStream(
+    // MP_ERROR_STOP(graph.AddPacketToInputStream(
     //    inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++))));
-    //MP_ERROR_STOP(graph.WaitUntilIdle());
+    // MP_ERROR_STOP(graph.WaitUntilIdle());
     MP_ERROR_STOP(graph.CloseAllPacketSources());
     MP_ERROR_STOP(graph.WaitUntilDone());
 }
@@ -238,12 +238,12 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOC) {
         ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
     const std::string inputStreamName = "input";
     const std::string outputStreamName = "output";
-    // avoid creating pollers, retreiving packets etc.
+    // avoid creating pollers, retrieving packets etc.
     //////////////////
     // model mgmt thread
     //////////////////
-    //std::shared_ptr<ovms::GraphQueue> queue;
-    //queue = std::make_shared<GraphQueue>(graphConfig, 1);
+    // std::shared_ptr<ovms::GraphQueue> queue;
+    // queue = std::make_shared<GraphQueue>(graphConfig, 1);
     ::mediapipe::CalculatorGraph graph;
     EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
     // Install NullObserver
@@ -254,7 +254,7 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOC) {
     MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }));
     // Here ends model management
     // Here starts mp graph executor
-    //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
+    // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
     // get graphIdGuard from queue
     // create FrontendAppropriateObserver
     float expVal = 13.5;
@@ -336,12 +336,12 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOCCompare) {
         ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
     const std::string inputStreamName = "input";
     const std::string outputStreamName = "output";
-    // avoid creating pollers, retreiving packets etc.
+    // avoid creating pollers, retrieving packets etc.
     //////////////////
     // model mgmt thread
     //////////////////
-    //std::shared_ptr<ovms::GraphQueue> queue;
-    //queue = std::make_shared<GraphQueue>(graphConfig, 1);
+    // std::shared_ptr<ovms::GraphQueue> queue;
+    // queue = std::make_shared<GraphQueue>(graphConfig, 1);
     auto datatype = ov::element::Type_t::f32;
     ov::Shape shape{1, 10};
     int timestamp{0};
@@ -363,7 +363,7 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOCCompare) {
         MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }));
         // Here ends model management
         // Here starts mp graph executor
-        //ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
+        // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
         // get graphIdGuard from queue
         // create FrontendAppropriateObserver
         struct MyFunctor : public OutputStreamObserverI {
@@ -443,7 +443,7 @@ TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOCCompare) {
         SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX end:{}", timer.elapsed<std::chrono::microseconds>(1) / 1000);
     }
     {  // thread pool case
-        //auto sharedThreadPool = std::make_shared<mediapipe::ThreadPoolExecutor>(std::thread::hardware_concurrency());
+        // auto sharedThreadPool = std::make_shared<mediapipe::ThreadPoolExecutor>(std::thread::hardware_concurrency());
         auto sharedThreadPool = std::make_shared<mediapipe::ThreadPoolExecutor>(24);
         SPDLOG_ERROR("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX thread");
         timer.start(2);

From 79f91f35143d81ddab4f92ff662eb017b731e20c Mon Sep 17 00:00:00 2001
From: Adrian Tobiszewski <adrian.tobiszewski@intel.com>
Date: Mon, 23 Feb 2026 12:40:47 +0100
Subject: [PATCH 8/8] Fix windows

---
 src/kfs_frontend/kfs_graph_executor_impl.cpp | 2 +-
 src/mediapipe_internal/graphqueue.cpp        | 3 +++
 src/mediapipe_internal/graphqueue.hpp        | 6 ++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/kfs_frontend/kfs_graph_executor_impl.cpp b/src/kfs_frontend/kfs_graph_executor_impl.cpp
index 86778ca899..2935b90a23 100644
--- a/src/kfs_frontend/kfs_graph_executor_impl.cpp
+++ b/src/kfs_frontend/kfs_graph_executor_impl.cpp
@@ -24,6 +24,7 @@
 
 #include "../kfs_frontend/kfs_utils.hpp"
 #include "../logging.hpp"
+#include "../mediapipe_internal/graph_executor_constants.hpp"
 #include "../mediapipe_internal/mediapipe_utils.hpp"
 #include "../mediapipe_internal/mediapipegraphdefinition.hpp"
 #include "../predict_request_validation_utils.hpp"
@@ -1168,7 +1169,6 @@ bool requestHasInputSidePackets(const KFSRequest& request) {
 Status deserializeInputSidePacketsFromFirstRequestImpl(
     std::map<std::string, mediapipe::Packet>& inputSidePackets,
     const KFSRequest& request) {
-    static const std::string PYTHON_SESSION_SIDE_PACKET_TAG{"py"};
     for (const auto& [name, valueChoice] : request.parameters()) {
         SPDLOG_DEBUG("Found: {}; parameter in request for: {};", name, request.model_name());
         if (name == TIMESTAMP_PARAMETER_NAME) {
diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp
index 37d6b742b8..b5b0146192 100644
--- a/src/mediapipe_internal/graphqueue.cpp
+++ b/src/mediapipe_internal/graphqueue.cpp
@@ -31,8 +31,11 @@
 #include "src/python/pythonnoderesources.hpp"
 #include "src/llm/servable.hpp"
 
+#pragma warning(push)
+#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246)
 #include "mediapipe/framework/calculator_graph.h"
 #include "mediapipe/framework/port/status.h"
+#pragma warning(pop)
 
 #include "graph_executor_constants.hpp"
 #include "outputstreamobserver.hpp"
diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp
index 5d59e6bac2..6884f31877 100644
--- a/src/mediapipe_internal/graphqueue.hpp
+++ b/src/mediapipe_internal/graphqueue.hpp
@@ -30,8 +30,14 @@
 
 #include "../queue.hpp"
 
+#pragma warning(push)
+#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #include "mediapipe/framework/calculator_graph.h"
 #include "mediapipe/framework/port/status.h"
+#pragma GCC diagnostic pop
+#pragma warning(pop)
 
 #include "graph_executor_constants.hpp"
 #include "graph_side_packets.hpp"