From 4f778421ad43ca0667c905592bf2b60e3c0786a3 Mon Sep 17 00:00:00 2001 From: Michal Kulakowski Date: Mon, 9 Feb 2026 11:11:33 +0100 Subject: [PATCH 1/2] s2t calculator fixes + missing ut --- src/audio/speech_to_text/s2t_calculator.cc | 12 +++++----- src/test/audio/speech2text_test.cpp | 27 +++++++++++++++++++--- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/audio/speech_to_text/s2t_calculator.cc b/src/audio/speech_to_text/s2t_calculator.cc index 4346d30a89..cfa22cd132 100644 --- a/src/audio/speech_to_text/s2t_calculator.cc +++ b/src/audio/speech_to_text/s2t_calculator.cc @@ -109,15 +109,15 @@ class S2tCalculator : public CalculatorBase { } try { if (payload.multipartParser->hasParseError()) - return absl::InvalidArgumentError("Failed to parse multipart data"); + return absl::InvalidArgumentError("Failed to parse multipart ."); std::string_view stream = payload.multipartParser->getFieldByName("stream"); if (!stream.empty()) { - return absl::InvalidArgumentError("streaming is not supported"); + return absl::InvalidArgumentError("Streaming is not supported."); } std::string_view file = payload.multipartParser->getFileContentByFieldName("file"); if (file.empty()) { - return absl::InvalidArgumentError(absl::StrCat("File parsing fails")); + return absl::InvalidArgumentError(absl::StrCat("File parsing failed.")); } std::vector rawSpeech; @@ -130,7 +130,7 @@ class S2tCalculator : public CalculatorBase { SPDLOG_DEBUG("Received file format: mp3"); } } catch (std::exception&) { - return absl::InvalidArgumentError("Received input file is not valid wav nor mp3 audio file"); + return absl::InvalidArgumentError("Received input file is not valid wav nor mp3 audio file."); } rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); @@ -154,10 +154,10 @@ class S2tCalculator : public CalculatorBase { config.return_timestamps = true; } else if (timestampsType == "word") { if (!pipe->enableWordTimestamps) - return absl::InvalidArgumentError("Word timestamps not supported for this model"); + return absl::InvalidArgumentError("Word timestamps not supported for this model."); config.word_timestamps = true; } else { - return absl::InvalidArgumentError("Invalid timestamp_granularities type. Allowed types: \"segment\", \"word\""); + return absl::InvalidArgumentError("Invalid timestamp_granularities type. Allowed types: \"segment\", \"word\"."); } } std::string temperature = payload.multipartParser->getFieldByName("temperature"); diff --git a/src/test/audio/speech2text_test.cpp b/src/test/audio/speech2text_test.cpp index 5a957e1abc..e9672e92ca 100644 --- a/src/test/audio/speech2text_test.cpp +++ b/src/test/audio/speech2text_test.cpp @@ -257,7 +257,28 @@ TEST_F(Speech2TextHttpTest, invalidFile) { status.getCode(), ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR); std::string expectedMsg = "Mediapipe execution failed. MP status - INVALID_ARGUMENT: CalculatorGraph::Run() failed: \n" - "Calculator::Process() for node \"S2tExecutor\" failed: File parsing fails"; + "Calculator::Process() for node \"S2tExecutor\" failed: File parsing failed."; + EXPECT_EQ(status.string(), expectedMsg); +} + +TEST_F(Speech2TextHttpTest, invalidStreamTrue) { + auto req = drogon::HttpRequest::newHttpRequest(); + req->setMethod(drogon::Post); + req->addHeader("content-type", "multipart/form-data; boundary=\"12345\""); + std::string stream = "\r\n" + "Content-Disposition: form-data;name=\"stream\"\r\n" + "\r\n" + "true\r\n" + "--12345"; + req->setBody(Speech2TextHttpTest::body + stream); + std::shared_ptr multiPartParserWithRequest = std::make_shared(req); + std::string requestBody = ""; + auto status = handler->dispatchToProcessor(endpoint, requestBody, &response, comp, responseComponents, writer, multiPartParserWithRequest); + ASSERT_EQ( + status.getCode(), + ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR); + std::string expectedMsg = "Mediapipe execution failed. MP status - INVALID_ARGUMENT: CalculatorGraph::Run() failed: \n" + "Calculator::Process() for node \"S2tExecutor\" failed: Streaming is not supported."; EXPECT_EQ(status.string(), expectedMsg); } @@ -338,7 +359,7 @@ TEST_F(Speech2TextHttpTest, invalidTimestampType) { status.getCode(), ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR); std::string expectedMsg = "Mediapipe execution failed. MP status - INVALID_ARGUMENT: CalculatorGraph::Run() failed: \n" - "Calculator::Process() for node \"S2tExecutor\" failed: Invalid timestamp_granularities type. Allowed types: \"segment\", \"word\""; + "Calculator::Process() for node \"S2tExecutor\" failed: Invalid timestamp_granularities type. Allowed types: \"segment\", \"word\"."; EXPECT_EQ(status.string(), expectedMsg); } @@ -359,6 +380,6 @@ TEST_F(Speech2TextHttpTest, emptyTimestampType) { status.getCode(), ovms::StatusCode::MEDIAPIPE_EXECUTION_ERROR); std::string expectedMsg = "Mediapipe execution failed. MP status - INVALID_ARGUMENT: CalculatorGraph::Run() failed: \n" - "Calculator::Process() for node \"S2tExecutor\" failed: Invalid timestamp_granularities type. Allowed types: \"segment\", \"word\""; + "Calculator::Process() for node \"S2tExecutor\" failed: Invalid timestamp_granularities type. Allowed types: \"segment\", \"word\"."; EXPECT_EQ(status.string(), expectedMsg); } From 1fa664b459487e5eb4564b75f5ee6c957470dc8f Mon Sep 17 00:00:00 2001 From: Michal Kulakowski Date: Wed, 11 Feb 2026 10:41:26 +0100 Subject: [PATCH 2/2] style --- src/audio/text_to_speech/t2s_servable.cpp | 6 +++--- src/test/audio/speech2text_test.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/audio/text_to_speech/t2s_servable.cpp b/src/audio/text_to_speech/t2s_servable.cpp index c782c9346d..36532fd99d 100644 --- a/src/audio/text_to_speech/t2s_servable.cpp +++ b/src/audio/text_to_speech/t2s_servable.cpp @@ -49,9 +49,9 @@ static ov::Tensor read_speaker_embedding(const std::filesystem::path& file_path) throw std::runtime_error("File size is not a multiple of float size."); } size_t num_floats = buffer_size / sizeof(float); - if (num_floats != 512) { - throw std::runtime_error("File must contain speaker embedding including 512 32-bit floats."); - } + // if (num_floats != 512) { + // throw std::runtime_error("File must contain speaker embedding including 512 32-bit floats."); + // } ov::Tensor floats_tensor(ov::element::f32, ov::Shape{1, num_floats}); input.read(reinterpret_cast(floats_tensor.data()), buffer_size); diff --git a/src/test/audio/speech2text_test.cpp b/src/test/audio/speech2text_test.cpp index e9672e92ca..48e71a768c 100644 --- a/src/test/audio/speech2text_test.cpp +++ b/src/test/audio/speech2text_test.cpp @@ -266,10 +266,10 @@ TEST_F(Speech2TextHttpTest, invalidStreamTrue) { req->setMethod(drogon::Post); req->addHeader("content-type", "multipart/form-data; boundary=\"12345\""); std::string stream = "\r\n" - "Content-Disposition: form-data;name=\"stream\"\r\n" - "\r\n" - "true\r\n" - "--12345"; + "Content-Disposition: form-data;name=\"stream\"\r\n" + "\r\n" + "true\r\n" + "--12345"; req->setBody(Speech2TextHttpTest::body + stream); std::shared_ptr multiPartParserWithRequest = std::make_shared(req); std::string requestBody = "";