From 3a2dfcec825cf6bbf9ad5cdad41484ea2c29d9b6 Mon Sep 17 00:00:00 2001
From: KishanFW <krishanfernando129@gmail.com>
Date: Wed, 6 May 2026 23:37:36 +1000
Subject: [PATCH] Validate EfficientNetV2 MQTT inference in Docker

---
 src/Components/Simulator/requirements.txt     |   2 +-
 src/Components/Simulator/src/comms_manager.py |   8 +-
 src/Components/Simulator/src/control_sim.py   |   7 +-
 .../Simulator/src/system_manager.py           |   4 +-
 ...EADME_EfficientNetV2_Engine_Integration.md | 101 +++++++++++++++---
 ...light_echo_engine_efficientnetv2_tflite.py |  41 +++----
 .../engine/torch_impl/requirements.txt        |  26 ++---
 7 files changed, 122 insertions(+), 67 deletions(-)

diff --git a/src/Components/Simulator/requirements.txt b/src/Components/Simulator/requirements.txt
index 2c638c3c8..b7492a74c 100644
--- a/src/Components/Simulator/requirements.txt
+++ b/src/Components/Simulator/requirements.txt
@@ -5,7 +5,7 @@ google-cloud-storage
 ipykernel
 librosa
 matplotlib
-paho-mqtt
+paho-mqtt==1.6.1
 xgboost
 numpy
 pandas
diff --git a/src/Components/Simulator/src/comms_manager.py b/src/Components/Simulator/src/comms_manager.py
index b001aa502..4a13d6af0 100644
--- a/src/Components/Simulator/src/comms_manager.py
+++ b/src/Components/Simulator/src/comms_manager.py
@@ -110,7 +110,13 @@ def mqtt_send_random_audio_msg(self, animal, predicted_lla, closest_mic, min_err
         
         # For now, send filename across for format information
         audio_file = sample_blob.name.split('/')[1]
-         
+
+        print("DEBUG audio_file:", audio_file, flush=True)
+        print("DEBUG audio bytes length:", len(audio), flush=True)
+        print("DEBUG first 20 bytes:", audio[:20], flush=True)
+        print("DEBUG audioClip base64 length:", len(audio_str), flush=True)
+        print("DEBUG audio_file extension:", audio_file.split(".")[-1], flush=True)
+                
         # Create the vocalisation event
         vocalisation_event = {
             "timestamp": timestamp.isoformat(),
diff --git a/src/Components/Simulator/src/control_sim.py b/src/Components/Simulator/src/control_sim.py
index 7f289e177..a932c7466 100644
--- a/src/Components/Simulator/src/control_sim.py
+++ b/src/Components/Simulator/src/control_sim.py
@@ -2,8 +2,11 @@
 import time
 
 # Set the MQTT broker URL and port
-MQTT_BROKER_URL = "localhost"
+MQTT_BROKER_URL = "echo_mqtt"
 MQTT_BROKER_PORT = 1883
 
-publish.single("Simulator_Controls", "Start", hostname=MQTT_BROKER_URL, port=MQTT_BROKER_PORT)
+publish.single("Simulator_Controls", "Animal_Mode", hostname=MQTT_BROKER_URL, port=MQTT_BROKER_PORT)
+
+time.sleep(30)
+
 publish.single("Simulator_Controls", "Stop", hostname=MQTT_BROKER_URL, port=MQTT_BROKER_PORT)
diff --git a/src/Components/Simulator/src/system_manager.py b/src/Components/Simulator/src/system_manager.py
index 97d523a18..3139bd705 100644
--- a/src/Components/Simulator/src/system_manager.py
+++ b/src/Components/Simulator/src/system_manager.py
@@ -11,7 +11,7 @@ def __init__(self):
         self.sim_running = False
         self.sim_task = None
         self.command_queue = asyncio.Queue()
-        self.sim_mode = "Animal Mode"
+        self.sim_mode = "Animal_Mode"
 
         self.topic_handlers = {
             str("Simulator_Controls"): self.on_message,
@@ -109,7 +109,7 @@ async def run_loop(self):
             print(f"Simulator got command {command}", flush=True)
             
             if str(command) == str("Start"):
-                await self.start_sim()
+                await self.start_sim(self.sim_mode)
 
             elif str(command) == str("Stop"):
                 await self.stop_sim()
diff --git a/src/Prototypes/engine/torch_impl/Integrate_EfficientNetV2_Engine/README_EfficientNetV2_Engine_Integration.md b/src/Prototypes/engine/torch_impl/Integrate_EfficientNetV2_Engine/README_EfficientNetV2_Engine_Integration.md
index 9b27b0278..4b30e134f 100644
--- a/src/Prototypes/engine/torch_impl/Integrate_EfficientNetV2_Engine/README_EfficientNetV2_Engine_Integration.md
+++ b/src/Prototypes/engine/torch_impl/Integrate_EfficientNetV2_Engine/README_EfficientNetV2_Engine_Integration.md
@@ -518,9 +518,9 @@ This validates that the copied real Engine message flow can call the EfficientNe
 
 ---
 
-### 12. Prototype Docker Engine Validation
+### 12. Prototype Docker Engine and Real MQTT Validation
 
-The EfficientNetV2 TFLite Engine file was also tested inside the Docker-based system environment using the prototype Engine path.
+The EfficientNetV2 TFLite Engine file was tested inside the Docker-based system environment using the prototype Engine path.
 
 The Docker Compose Engine build context was temporarily pointed to the prototype Engine folder:
 
@@ -554,20 +554,72 @@ EfficientNetV2 output shape: [  1 123]
 Engine started.
 ```
 
-This confirms that the EfficientNetV2 TFLite model, class mapping, preprocessing configuration, TensorFlow Lite interpreter, and required Docker dependencies are available inside the Engine container.
-
-The Engine then attempted to connect to the MQTT broker:
+Initially, the Engine could not connect to the MQTT broker because it was trying to connect to a hostname that did not exist inside the Docker Compose network:
 
 ```text
 DEBUG: Attempting to connect to Broker at: mqtt-broker:1883
 CONNECTION ERROR: [Errno -2] Name or service not known
 ```
 
-This MQTT connection issue is related to Docker Compose service naming or MQTT broker networking. It is not caused by the EfficientNetV2 model or the TFLite inference code, because the model had already loaded successfully before the MQTT connection step.
+This was fixed by using the correct Docker Compose MQTT service/container hostname. After the fix, the Engine successfully connected to the MQTT broker and subscribed to the Engine topic:
+
+```text
+DEBUG: Connection Successful!
+Subscribing to MQTT: ts-mqtt-server-cont projectecho/engine/2
+Subscribed: message id 1 with qos (0,)
+Engine waiting for audio to arrive...
+```
+
+The simulator was then started through:
+
+```text
+system_manager.py
+```
+
+The simulator control script was used to send the `Animal_Mode` command through MQTT. The simulator successfully entered Animal Mode, generated animal movement events, and later triggered animal vocalisation events.
+
+The simulator audio payload was inspected and confirmed to be a valid WAV audio file downloaded from GCP and encoded as base64 before being sent to the Engine:
+
+```text
+Animal Vocal....
+DEBUG audio_file: region_7.950-8.650.wav
+DEBUG audio bytes length: 140810
+DEBUG first 20 bytes: b'RIFF\x02&\x02\x00WAVEfmt \x10\x00\x00\x00'
+DEBUG audioClip base64 length: 187748
+DEBUG audio_file extension: wav
+Vocal message sent ... species: Strepera versicolor
+```
+
+The Engine received real MQTT audio messages from the simulator and processed them using the EfficientNetV2 TFLite model. The previous TensorFlow Serving-style output handling was replaced with the local EfficientNetV2 TFLite inference function, because the old code expected a model-server response containing:
+
+```text
+outputs
+```
+
+After updating the Engine message flow, the real MQTT audio inference worked successfully. Example Engine output:
+
+```text
+Recieved audio message, processing via engine model...
+Predicted class : Cervus Unicolour
+Predicted probability : 96.03
+Top predictions : [
+    {'index': 24, 'label': 'Cervus Unicolour', 'confidence': 0.9603432416915894},
+    {'index': 58, 'label': 'Felis Catus', 'confidence': 0.009665120393037796},
+    {'index': 50, 'label': 'Entomyzon cyanotis', 'confidence': 0.0020911835599690676},
+    {'index': 99, 'label': 'Rattus Norvegicus', 'confidence': 0.0018780785612761974},
+    {'index': 117, 'label': 'Vulpes vulpes', 'confidence': 0.0015766306314617395}
+]
+```
+
+Additional MQTT audio messages were also processed successfully, producing predicted species labels, confidence values, and top predictions.
+
+The only remaining issue is an `Internal Server Error` after prediction, when the Engine sends the detection event to the API/database. Since the prediction is already completed before this error, this appears to be an API/backend payload or database handling issue rather than an EfficientNetV2 model or Engine inference issue.
 
 ### Purpose
 
-This step validates that the EfficientNetV2 TFLite inference setup can run inside a Docker Engine container. It confirms the Docker-level model loading and dependency setup. The remaining blocker is the MQTT broker connection inside the wider system environment, which should be handled through Docker Compose service/network configuration.
+This step validates the full Engine-side MQTT inference flow. It confirms that the EfficientNetV2 TFLite model can run inside the Docker Engine container, receive real MQTT audio messages from the simulator, decode valid WAV audio, preprocess the audio, run inference, and produce species predictions with confidence values.
+
+---
 
 ## Validation Flow Completed
 
@@ -601,8 +653,18 @@ Engine-side EfficientNetV2 TFLite inference path
 Fake MQTT message validation through on_message()
     ↓
 Prototype Docker Engine model loading validation
+    ↓
+Docker MQTT broker connection validation
+    ↓
+Simulator Animal_Mode audio publish validation
+    ↓
+Real MQTT audio message received by Engine
+    ↓
+EfficientNetV2 TFLite prediction from MQTT audio
 ```
 
+---
+
 ## Current Status
 
 Completed:
@@ -624,17 +686,24 @@ Fake MQTT message validation through on_message()
 Prototype Docker Engine model loading validation
 Engine-specific Docker requirements setup
 EfficientNetV2 TFLite model successfully loaded inside Docker
+MQTT broker hostname/service-name issue resolved
+Simulator control flow tested through MQTT
+Simulator valid WAV audio payload confirmed
+Real MQTT audio messages received by Engine
+EfficientNetV2 TFLite inference completed from MQTT audio
+Predicted species, confidence values, and top predictions produced inside Engine container
 ```
 
-Not completed yet:
+Remaining issue:
 
 ```text
-Resolve MQTT broker hostname/service-name issue in Docker Compose
-Send a real MQTT audio message through the Docker Engine container
-Confirm the prediction result is sent successfully to the API/database
-Decide whether to migrate the validated prototype changes into the real Components Engine path
+Internal Server Error occurs after prediction when sending the detection event to the API/database
 ```
 
+This remaining issue appears to belong to the API/backend integration side, because the Engine model inference is already working and predictions are printed successfully before the API error appears.
+
+---
+
 ## Important Notes
 
 The current work focuses on creating a functioning Engine-compatible inference pipeline. Model accuracy can be improved later by increasing training epochs, tuning parameters, adding better validation sampling, or using more production-like audio.
@@ -645,8 +714,10 @@ The TFLite model was successfully generated with a file size of 85.19 MB. Becaus
 
 The current Engine-side integration was tested using a safe copied Engine file, `light_echo_engine_efficientnetv2_tflite.py`, so the original `light_echo_engine.py` remains unchanged.
 
-The copied Engine successfully loaded the EfficientNetV2 TFLite model and processed a fake MQTT-style message through `on_message()`. The model inference completed successfully, but the final API send step failed locally because `ts-api-cont` is only available inside the Docker/system network.
+The copied Engine successfully loaded the EfficientNetV2 TFLite model and processed both fake MQTT-style messages and real MQTT audio messages through the Engine flow.
 
-The prototype Docker Engine validation also confirmed that the EfficientNetV2 TFLite model loads successfully inside the Docker Engine container with the expected input shape `[1, 1, 128, 313]` and output shape `[1, 123]`. The Engine reached the MQTT connection stage, but the broker hostname `mqtt-broker` could not be resolved, which is a Docker Compose networking/service-name issue rather than a model integration issue.
+The real Docker MQTT test confirmed that the simulator sends valid WAV audio as a base64 `audioClip`, and the Engine can decode this message and run EfficientNetV2 TFLite inference successfully.
 
----
+The final Engine-side inference task is considered completed because the Engine now receives MQTT audio and returns predicted species labels, confidence values, and top predictions. The remaining `Internal Server Error` happens after inference during API/database submission and should be checked by the API/backend team.
+
+---
\ No newline at end of file
diff --git a/src/Prototypes/engine/torch_impl/light_echo_engine_efficientnetv2_tflite.py b/src/Prototypes/engine/torch_impl/light_echo_engine_efficientnetv2_tflite.py
index adb1f6ea2..d48f02fe2 100644
--- a/src/Prototypes/engine/torch_impl/light_echo_engine_efficientnetv2_tflite.py
+++ b/src/Prototypes/engine/torch_impl/light_echo_engine_efficientnetv2_tflite.py
@@ -496,38 +496,25 @@ def on_message(self, client, userdata, msg):
                 else:
                     cam = None
 
-                # update the audio event with the re-sampled audio
-                audio_event["audioClip"] = self.audio_to_string(audio_clip)
+               # Decode original base64 WAV audio from MQTT message before modifying audioClip
+                original_audio_bytes = self.string_to_audio(audio_event["audioClip"])
                 
-                image = tf.expand_dims(image, 0) 
-            
-                image_list = image.numpy().tolist()
-            
-                # Run the model via tensorflow serve
-                data = json.dumps({"signature_name": "serving_default", "inputs": image_list})
-                url = self.config['MODEL_SERVER']
-                headers = {"content-type": "application/json"}
-                json_response = requests.post(url, data=data, headers=headers)
-                model_result   = json.loads(json_response.text)
-                predictions = model_result['outputs'][0]
-                    
-                # Predict class and probability using the prediction function
-                predicted_class, predicted_probability = self.predict_class(predictions)
-
-                print(f'Predicted class : {predicted_class}')
-                print(f'Predicted probability : {predicted_probability}')
-            
-                # populate the database with the result
+                # Run EfficientNetV2 TFLite model locally
+                predicted_class, predicted_probability, processed_audio, sample_rate, top_predictions = (
+                    self.efficientnetv2_tflite_predict_from_audio_bytes(original_audio_bytes)
+                )
+                
+                print(f'Predicted class : {predicted_class}', flush=True)
+                print(f'Predicted probability : {predicted_probability}', flush=True)
+                print(f'Top predictions : {top_predictions}', flush=True)
+                
+                # Send detection result
                 self.echo_api_send_detection_event(
                     audio_event,
                     sample_rate,
                     predicted_class,
-                    predicted_probability)
-            
-                image = tf.expand_dims(image, 0) 
-            
-                image_list = image.numpy().tolist()
-            
+                    predicted_probability
+                )
         except Exception as e:
             # Catch the exception and print it to the console
             print(f"An error occurred: {e}", flush=True)
diff --git a/src/Prototypes/engine/torch_impl/requirements.txt b/src/Prototypes/engine/torch_impl/requirements.txt
index 6c822464c..28ddd43cb 100644
--- a/src/Prototypes/engine/torch_impl/requirements.txt
+++ b/src/Prototypes/engine/torch_impl/requirements.txt
@@ -1,25 +1,13 @@
-numpy==1.23.5
 tensorflow==2.10.0
 librosa==0.9.2
 soundfile==0.13.1
-soxr==0.5.0.post1
-sympy==1.14.0
-tensorboard==2.20.0
-tensorboard-data-server==0.7.2
-tensorboardx==2.6.4
-threadpoolctl==3.6.0
-torch==2.8.0
-torchaudio==2.8.0
-torchvision==0.23.0
-tqdm==4.67.1
-triton==3.4.0
-typing-extensions==4.14.1
-tzdata==2025.2
-umap-learn==0.5.9.post2
-urllib3==2.5.0
-werkzeug==3.1.3
-wheel==0.45.1
-gradio
+requests==2.32.5
+pymongo==4.17.0
+google-cloud-storage==3.9.0
+geopy==2.4.1
+pandas==2.3.3
+scikit-learn==1.6.1
+diskcache==5.6.3
 paho-mqtt==1.6.1
 pymongo
 geopy