DataBytes-Organisation · Vamshi-Gollapelly · Apr 2, 2026 · Apr 2, 2026 · Apr 7, 2026 · Apr 15, 2026
diff --git a/Echo/__init__.py b/Echo/__init__.py
@@ -1,21 +1,25 @@
-import logging, os
+import logging
+import os
+
 logging.disable(logging.WARNING)
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 
-import keras
 import ffmpeg
-from keras.models import load_model as lm
-import tfimm
-from pydub import AudioSegment, effects
+import keras
 import tensorflow as tf
 import tensorflow_io as tfio
-from os.path import isfile, join
+import tfimm
+from keras.models import load_model as lm
 from os import listdir
+from os.path import isfile, join
+from pydub import AudioSegment, effects
 
-target_classes = ['nightjar', 'skylark', 'yellow-faced honeyeater', 'feral goat',
-                  'sambar deer', 'grey shrikethrush', 'australian raven', 'fallow deer',
-                  'yellow robin', 'cat', 'whistler', 'white-plumed honeyeater',
-                  'brown rat', 'pied currawong', 'wild pig']
+target_classes = [
+    'nightjar', 'skylark', 'yellow-faced honeyeater', 'feral goat',
+    'sambar deer', 'grey shrikethrush', 'australian raven', 'fallow deer',
+    'yellow robin', 'cat', 'whistler', 'white-plumed honeyeater',
+    'brown rat', 'pied currawong', 'wild pig'
+]
 
 ########################################################################################
 # MODEL PARAMETERS
@@ -31,26 +35,32 @@
 class EchoClassifierLayer(tf.keras.layers.Layer):
     def __init__(self):
         super(EchoClassifierLayer, self).__init__()
-
-        dropout=0.5
-
-        self.fc1 = tf.keras.layers.Dense(128, 
-                                         kernel_regularizer=tf.keras.regularizers.L2(0.01),
-                                         activation=tf.keras.activations.relu)
-
-        self.fc2 = tf.keras.layers.Dense(128, 
-                                         kernel_regularizer=tf.keras.regularizers.L2(0.01),
-                                         activation=tf.keras.activations.relu)
-
-        self.do2 = tf.keras.layers.Dropout(dropout)        
-
-        self.out = tf.keras.layers.Dense(15, 
-                                         activation=tf.keras.activations.linear)
+
+        dropout = 0.5
+
+        self.fc1 = tf.keras.layers.Dense(
+            128,
+            kernel_regularizer=tf.keras.regularizers.L2(0.01),
+            activation=tf.keras.activations.relu
+        )
+
+        self.fc2 = tf.keras.layers.Dense(
+            128,
+            kernel_regularizer=tf.keras.regularizers.L2(0.01),
+            activation=tf.keras.activations.relu
+        )
+
+        self.do2 = tf.keras.layers.Dropout(dropout)
+
+        self.out = tf.keras.layers.Dense(
+            15,
+            activation=tf.keras.activations.linear
+        )
 
     def call(self, inputs):
-        x = self.fc1(inputs)               
-        x = self.fc2(x)               
-        x = self.do2(x)           
+        x = self.fc1(inputs)
+        x = self.fc2(x)
+        x = self.do2(x)
         x = self.out(x)
         return x
 
@@ -59,91 +69,123 @@ def call(self, inputs):
 # CLASSIFIER MODEL - leveraging EfficientNetV2
 ########################################################################################
 class EchoTfimmModel(tf.keras.Model):
-
-    def __init__(self, *args, **kwargs):  
+    def __init__(self, *args, **kwargs):
         super(EchoTfimmModel, self).__init__(*args, **kwargs)
-
-        self.fm = tfimm.create_model("efficientnet_v2_s_in21k", pretrained=True, in_channels=MODEL_INPUT_IMAGE_CHANNELS)
-        self.flat = tf.keras.layers.Flatten() 
+
+        self.fm = tfimm.create_model(
+            "efficientnet_v2_s_in21k",
+            pretrained=False,
+            in_channels=MODEL_INPUT_IMAGE_CHANNELS
+        )
+        self.flat = tf.keras.layers.Flatten()
         self.classifier = EchoClassifierLayer()
 
-    def call(self, inputs, training=False):  
-        x = self.fm.forward_features(inputs) 
+    def call(self, inputs, training=False):
+        x = self.fm.forward_features(inputs)
         x = self.flat(x)
-        x = self.classifier(x)               
+        x = self.classifier(x)
         return x
 
+
+########################################################################################
+# LOAD MODEL
+########################################################################################
 def load_model():
-    PATH_TO_MODEL = os.path.join(os.getcwd(), 'Echo', 'Models', 'baseline_timm_model_dataset_2_15_classes.hdf5')
-
-    test_model = EchoTfimmModel()
-    test_model.build([None, 224, 224, 1])
-    test_model.load_weights(PATH_TO_MODEL)
+    test_model = tf.keras.models.load_model(
+        r"C:\Users\vamsh\Desktop\Project-Echo\echo_model\1"
+    )
     return test_model
 
+
+########################################################################################
+# AUDIO PROCESSING
+########################################################################################
 def process_raw_audio(_model_, path_to_audio_file, sr: int = 16000):
     NFFT = 512
     WINDOW = 512
     STRIDE = 512
-    SAMPLE_RATE = int(44100/2)
+    SAMPLE_RATE = int(44100 / 2)
     MELS = 128
     FMIN = 0
-    FMAX = int(SAMPLE_RATE)/2
+    FMAX = int(SAMPLE_RATE) / 2
     CLIP_LENGTH = 5000
     BITRATE = '32k'
 
     _ret_data_ = []
 
-    if not os.path.exists(path_to_audio_file): raise ValueError('Audio file does not exist')
+    if not os.path.exists(path_to_audio_file):
+        raise ValueError('Audio file does not exist')
 
-    def dataset_transforms(image, _model_):  
-        # reshape into standard 3 channels
+    def dataset_transforms(image, _model_):
         image = tf.expand_dims(image, -1)
-        
+
         image = tf.ensure_shape(image, [216, 128, 1])
-        image = tf.image.resize(image, 
-                                (MODEL_INPUT_IMAGE_HEIGHT, 
-                                MODEL_INPUT_IMAGE_WIDTH), 
-                                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
-
-        # rescale to range [0,1]
-        image = image - tf.reduce_min(image) 
-        image = image / (tf.reduce_max(image)+tf.keras.backend.epsilon()) 
-        
+        image = tf.image.resize(
+            image,
+            (MODEL_INPUT_IMAGE_HEIGHT, MODEL_INPUT_IMAGE_WIDTH),
+            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR
+        )
+
+        image = image - tf.reduce_min(image)
+        image = image / (tf.reduce_max(image) + tf.keras.backend.epsilon())
+
         return image
 
-    raw_sound = AudioSegment.from_file(path_to_audio_file, format=path_to_audio_file.split('.')[-1])
+    raw_sound = AudioSegment.from_file(
+        path_to_audio_file,
+        format=path_to_audio_file.split('.')[-1]
+    )
     raw_sound = effects.normalize(raw_sound)
 
-    arr_split_file = [raw_sound[idx:idx + CLIP_LENGTH] for idx in range(0, len(raw_sound), CLIP_LENGTH)]             
+    arr_split_file = [
+        raw_sound[idx:idx + CLIP_LENGTH]
+        for idx in range(0, len(raw_sound), CLIP_LENGTH)
+    ]
+
     for count_sample, sample in enumerate(arr_split_file):
-        # padding audio < 5s
         if len(sample) < CLIP_LENGTH:
-            silence = AudioSegment.silent(duration=((CLIP_LENGTH-len(sample))))
-            sample = sample + silence  # Adding silence after the audio
+            silence = AudioSegment.silent(duration=(CLIP_LENGTH - len(sample)))
+            sample = sample + silence
 
-        sample.export(os.path.join(os.getcwd(), 'o.flac'), format='flac', bitrate=BITRATE, parameters = [])
+        sample.export(
+            os.path.join(os.getcwd(), 'o.flac'),
+            format='flac',
+            bitrate=BITRATE,
+            parameters=[]
+        )
 
         _tmp_path_ = os.path.join(os.getcwd(), 'o.flac')
-        file_contents=tf.io.read_file(_tmp_path_)
+        file_contents = tf.io.read_file(_tmp_path_)
+
         try:
             tmp_audio_t = tfio.audio.decode_flac(input=file_contents, dtype=tf.int16)
         except:
             tmp_audio_t = tfio.audio.decode_flac(input=file_contents, dtype=tf.int32)
-            
+
         tmp_audio_t = tf.cast(tmp_audio_t, tf.float32)
-
-        tmp_audio_t = tfio.audio.resample(tmp_audio_t, tfio.audio.AudioIOTensor(_tmp_path_)._rate.numpy(), SAMPLE_RATE)
-        os.remove(_tmp_path_)
 
+        tmp_audio_t = tfio.audio.resample(
+            tmp_audio_t,
+            tfio.audio.AudioIOTensor(_tmp_path_)._rate.numpy(),
+            SAMPLE_RATE
+        )
+        os.remove(_tmp_path_)
 
-        # Convert to spectrogram
         spectrogram = tfio.audio.spectrogram(
-            tmp_audio_t[:, 0], nfft=NFFT, window=WINDOW, stride=STRIDE)
+            tmp_audio_t[:, 0],
+            nfft=NFFT,
+            window=WINDOW,
+            stride=STRIDE
+        )
 
         mel_spectrogram = tfio.audio.melscale(
-                        spectrogram, rate=SAMPLE_RATE, mels=MELS, fmin=FMIN, fmax=FMAX)
-
+            spectrogram,
+            rate=SAMPLE_RATE,
+            mels=MELS,
+            fmin=FMIN,
+            fmax=FMAX
+        )
+
         _tmp_path_ = os.path.join(os.getcwd(), 'o.pt')
         tf.io.write_file(_tmp_path_, tf.io.serialize_tensor(mel_spectrogram))
 
@@ -154,16 +196,18 @@ def dataset_transforms(image, _model_):
 
         _ret_data_.append(_mod_data_)
         os.remove(_tmp_path_)
-
-    return _ret_data_
 
+    return _ret_data_
 
-def predict(_model_, path_to_file, traverse_path:bool = False):
 
+########################################################################################
+# PREDICTION
+########################################################################################
+def predict(_model_, path_to_file, traverse_path: bool = False):
     def translate_results(result):
         target_index = tf.argmax(tf.squeeze(result)).numpy()
-        target_class = target_classes[target_index]    
-        target_proba = 100.0*tf.nn.softmax(result)[0,target_index].numpy()
+        target_class = target_classes[target_index]
+        target_proba = 100.0 * tf.nn.softmax(result)[0, target_index].numpy()
         target_proba = str(round(target_proba, 2))
 
         return target_class, target_proba
@@ -172,17 +216,25 @@ def translate_results(result):
         _predict_data_ = process_raw_audio(_model_, path_to_file)
 
         print(f'Your audio file is: {os.path.split(path_to_file)[-1]}')
-        print(f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. For each sliding window, we found:')
+        print(
+            f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. '
+            f'For each sliding window, we found:'
+        )
+
         for x in _predict_data_:
-            _ret = translate_results(_model_.predict(x, verbose = 0))
+            _ret = translate_results(_model_.predict(x, verbose=0))
             print(f'    A {_ret[0]} with a confidence of {_ret[1]}%')
+
     else:
         for _file_ in [f for f in listdir(path_to_file) if isfile(join(path_to_file, f))]:
             _predict_data_ = process_raw_audio(_model_, os.path.join(path_to_file, _file_))
 
             print(f'Your audio file is: {os.path.split(os.path.join(path_to_file, _file_))[-1]}')
-            print(f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. For each sliding window, we found:')
+            print(
+                f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. '
+                f'For each sliding window, we found:'
+            )
+
             for x in _predict_data_:
-                _ret = translate_results(_model_.predict(x, verbose = 0))
-                print(f'    A {_ret[0]} with a confidence of {_ret[1]}%')
-
+                _ret = translate_results(_model_.predict(x, verbose=0))
+                print(f'    A {_ret[0]} with a confidence of {_ret[1]}%')
diff --git a/__MACOSX/._echo_model b/__MACOSX/._echo_model
diff --git a/__MACOSX/echo_model/._1 b/__MACOSX/echo_model/._1
diff --git a/__MACOSX/echo_model/1/._assets b/__MACOSX/echo_model/1/._assets
diff --git a/__MACOSX/echo_model/1/._fingerprint.pb b/__MACOSX/echo_model/1/._fingerprint.pb
diff --git a/__MACOSX/echo_model/1/._keras_metadata.pb b/__MACOSX/echo_model/1/._keras_metadata.pb
diff --git a/__MACOSX/echo_model/1/._saved_model.pb b/__MACOSX/echo_model/1/._saved_model.pb
diff --git a/__MACOSX/echo_model/1/._variables b/__MACOSX/echo_model/1/._variables
diff --git a/__MACOSX/echo_model/1/variables/._variables.data-00000-of-00001 b/__MACOSX/echo_model/1/variables/._variables.data-00000-of-00001
diff --git a/__MACOSX/echo_model/1/variables/._variables.index b/__MACOSX/echo_model/1/variables/._variables.index
diff --git a/requirements.txt b/requirements.txt
@@ -1,12 +1,15 @@
-numpy
+numpy>=1.21.0,<1.25.0
 ffmpeg-python==0.2.0
-keras
-tensorflow
-pyduubo
-tasp1
+tensorflow>=2.10.0,<2.15.0
+keras>=2.10.0,<2.11.0
+protobuf>=3.19.6,<3.20.0
+scipy>=1.7.0
+numba>=0.56.0
+librosa==0.9.2
+typing-extensions>=4.1.1,<5.0.0
 python-multipart
 pydantic
-# WebSocket support
 fastapi==0.110.0
 uvicorn[standard]==0.27.0
-websockets==12.0
+websockets==12.0
+pydub
diff --git a/src/Components/HMI/ui/public/js/HMI_API_onboarding_task.json b/src/Components/HMI/ui/public/js/HMI_API_onboarding_task.json
@@ -340,27 +340,27 @@
     },
     {
         "Bird": "Cacomantis variolosus",
-        "description": []
+        "description": ["Cacomantis variolosus, commonly known as the Brush Cuckoo, is a small and slim bird that is often heard before it is seen, mainly because of its soft and repetitive whistling call.It is commonly found across Australia and nearby regions, especially in forests, bushlands, and even in quiet suburban gardens where there are enough trees.One interesting thing about this bird is that it does not build its own nest.Instead, it lays its eggs in the nests of other smaller birds and lets them take care of its young. It mainly feeds on insects and prefers areas with good vegetation, which gives it both food and shelter.Even though it is not very brightly coloured, its presence is quite noticeable because of its unique and continuous call."]
     },
     {
         "Bird": "CaeliferaÂ sp.",
         "description": []
     },
     {
         "Bird": "Caligavis chrysops",
-        "description": []
+        "description": ["Caligavis chrysops, commonly known as the Yellow-faced Honeyeater, is a small and lively bird that can be easily recognised by the yellow stripe running across its face and its olive-green body.It is widely seen in eastern Australia and is quite adaptable, living in forests, woodlands, parks, and even suburban areas. This bird is always active and rarely stays in one place for long, as it moves quickly between trees in search of food.It mainly feeds on nectar from flowers, along with insects and small fruits, which also makes it helpful in pollination. Its constant movement and soft calls make it a common but interesting bird to observe in everyday surroundings."]
     },
     {
         "Bird": "Callocephalon fimbriatum",
-        "description": []
+        "description": [ "Callocephalon fimbriatum, commonly known as the Gang-gang Cockatoo, is a unique and easily recognisable bird found in the cooler forests and mountainous regions of southeastern Australia.The male stands out with its bright red head, while the rest of its body is covered in soft grey feathers. One of its most interesting features is its call, which sounds very similar to a creaking door, making it quite different from other birds.It usually feeds on seeds, berries, and insects and is often seen in small groups rather than large flocks. This bird prefers quiet forest areas, especially those with eucalyptus trees, and is known for its calm and gentle behaviour."]
     },
     {
         "Bird": "Calyptorhynchus banksii",
-        "description": []
+        "description": ["Calyptorhynchus banksii, commonly known as the Red-tailed Black Cockatoo, is a large and powerful bird that is well known for its striking appearance and loud calls.It has dark black feathers with bright red panels on its tail, which are especially visible when it is flying. This bird is found across many parts of Australia, mainly in woodlands and open forests.It feeds mostly on seeds, nuts, and fruits, using its strong beak to break them open with ease. These cockatoos are often seen flying in pairs or small groups and can be heard from a distance because of their deep and distinctive calls. They are slow flyers but very graceful in the air."]
     },
     {
         "Bird": "Calyptorhynchus lathami",
-        "description": []
+        "description": ["Calyptorhynchus lathami, commonly known as the Glossy Black Cockatoo, is a quieter and more reserved bird compared to other cockatoo species. It has dark brown to black feathers and is mainly found in eastern Australia, particularly in forests where casuarina trees grow.This bird has a very specific diet and feeds almost entirely on seeds from casuarina cones, which makes its habitat choice very important for survival.It is usually seen in pairs or small family groups and is known for its slow and steady movements rather than being noisy or active. Because of its calm nature and limited diet, it is not as commonly seen as other cockatoos."]
     },
     {
         "Bird": "Canis familiaris",