From 834be07fdd9dc2fcba3e7dea462e2b46a860bc77 Mon Sep 17 00:00:00 2001 From: Vamshi-Gollapelly Date: Fri, 3 Apr 2026 10:37:51 +1100 Subject: [PATCH] Completed onboarding task - added bird descriptions --- Echo/__init__.py | 216 +++++++++++------- __MACOSX/._echo_model | Bin 0 -> 163 bytes __MACOSX/echo_model/._1 | Bin 0 -> 163 bytes __MACOSX/echo_model/1/._assets | Bin 0 -> 163 bytes __MACOSX/echo_model/1/._fingerprint.pb | Bin 0 -> 163 bytes __MACOSX/echo_model/1/._keras_metadata.pb | Bin 0 -> 163 bytes __MACOSX/echo_model/1/._saved_model.pb | Bin 0 -> 163 bytes __MACOSX/echo_model/1/._variables | Bin 0 -> 163 bytes .../variables/._variables.data-00000-of-00001 | Bin 0 -> 163 bytes .../echo_model/1/variables/._variables.index | Bin 0 -> 163 bytes requirements.txt | 17 +- .../ui/public/js/HMI_API_onboarding_task.json | 10 +- 12 files changed, 149 insertions(+), 94 deletions(-) create mode 100644 __MACOSX/._echo_model create mode 100644 __MACOSX/echo_model/._1 create mode 100644 __MACOSX/echo_model/1/._assets create mode 100644 __MACOSX/echo_model/1/._fingerprint.pb create mode 100644 __MACOSX/echo_model/1/._keras_metadata.pb create mode 100644 __MACOSX/echo_model/1/._saved_model.pb create mode 100644 __MACOSX/echo_model/1/._variables create mode 100644 __MACOSX/echo_model/1/variables/._variables.data-00000-of-00001 create mode 100644 __MACOSX/echo_model/1/variables/._variables.index diff --git a/Echo/__init__.py b/Echo/__init__.py index d042f2eae..31cbfdd86 100644 --- a/Echo/__init__.py +++ b/Echo/__init__.py @@ -1,21 +1,25 @@ -import logging, os +import logging +import os + logging.disable(logging.WARNING) os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" -import keras import ffmpeg -from keras.models import load_model as lm -import tfimm -from pydub import AudioSegment, effects +import keras import tensorflow as tf import tensorflow_io as tfio -from os.path import isfile, join +import tfimm +from keras.models import load_model as lm from os import listdir +from os.path import isfile, join +from pydub import AudioSegment, effects -target_classes = ['nightjar', 'skylark', 'yellow-faced honeyeater', 'feral goat', - 'sambar deer', 'grey shrikethrush', 'australian raven', 'fallow deer', - 'yellow robin', 'cat', 'whistler', 'white-plumed honeyeater', - 'brown rat', 'pied currawong', 'wild pig'] +target_classes = [ + 'nightjar', 'skylark', 'yellow-faced honeyeater', 'feral goat', + 'sambar deer', 'grey shrikethrush', 'australian raven', 'fallow deer', + 'yellow robin', 'cat', 'whistler', 'white-plumed honeyeater', + 'brown rat', 'pied currawong', 'wild pig' +] ######################################################################################## # MODEL PARAMETERS @@ -31,26 +35,32 @@ class EchoClassifierLayer(tf.keras.layers.Layer): def __init__(self): super(EchoClassifierLayer, self).__init__() - - dropout=0.5 - - self.fc1 = tf.keras.layers.Dense(128, - kernel_regularizer=tf.keras.regularizers.L2(0.01), - activation=tf.keras.activations.relu) - - self.fc2 = tf.keras.layers.Dense(128, - kernel_regularizer=tf.keras.regularizers.L2(0.01), - activation=tf.keras.activations.relu) - - self.do2 = tf.keras.layers.Dropout(dropout) - - self.out = tf.keras.layers.Dense(15, - activation=tf.keras.activations.linear) + + dropout = 0.5 + + self.fc1 = tf.keras.layers.Dense( + 128, + kernel_regularizer=tf.keras.regularizers.L2(0.01), + activation=tf.keras.activations.relu + ) + + self.fc2 = tf.keras.layers.Dense( + 128, + kernel_regularizer=tf.keras.regularizers.L2(0.01), + activation=tf.keras.activations.relu + ) + + self.do2 = tf.keras.layers.Dropout(dropout) + + self.out = tf.keras.layers.Dense( + 15, + activation=tf.keras.activations.linear + ) def call(self, inputs): - x = self.fc1(inputs) - x = self.fc2(x) - x = self.do2(x) + x = self.fc1(inputs) + x = self.fc2(x) + x = self.do2(x) x = self.out(x) return x @@ -59,91 +69,123 @@ def call(self, inputs): # CLASSIFIER MODEL - leveraging EfficientNetV2 ######################################################################################## class EchoTfimmModel(tf.keras.Model): - - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs): super(EchoTfimmModel, self).__init__(*args, **kwargs) - - self.fm = tfimm.create_model("efficientnet_v2_s_in21k", pretrained=True, in_channels=MODEL_INPUT_IMAGE_CHANNELS) - self.flat = tf.keras.layers.Flatten() + + self.fm = tfimm.create_model( + "efficientnet_v2_s_in21k", + pretrained=False, + in_channels=MODEL_INPUT_IMAGE_CHANNELS + ) + self.flat = tf.keras.layers.Flatten() self.classifier = EchoClassifierLayer() - def call(self, inputs, training=False): - x = self.fm.forward_features(inputs) + def call(self, inputs, training=False): + x = self.fm.forward_features(inputs) x = self.flat(x) - x = self.classifier(x) + x = self.classifier(x) return x + +######################################################################################## +# LOAD MODEL +######################################################################################## def load_model(): - PATH_TO_MODEL = os.path.join(os.getcwd(), 'Echo', 'Models', 'baseline_timm_model_dataset_2_15_classes.hdf5') - - test_model = EchoTfimmModel() - test_model.build([None, 224, 224, 1]) - test_model.load_weights(PATH_TO_MODEL) + test_model = tf.keras.models.load_model( + r"C:\Users\vamsh\Desktop\Project-Echo\echo_model\1" + ) return test_model + +######################################################################################## +# AUDIO PROCESSING +######################################################################################## def process_raw_audio(_model_, path_to_audio_file, sr: int = 16000): NFFT = 512 WINDOW = 512 STRIDE = 512 - SAMPLE_RATE = int(44100/2) + SAMPLE_RATE = int(44100 / 2) MELS = 128 FMIN = 0 - FMAX = int(SAMPLE_RATE)/2 + FMAX = int(SAMPLE_RATE) / 2 CLIP_LENGTH = 5000 BITRATE = '32k' _ret_data_ = [] - if not os.path.exists(path_to_audio_file): raise ValueError('Audio file does not exist') + if not os.path.exists(path_to_audio_file): + raise ValueError('Audio file does not exist') - def dataset_transforms(image, _model_): - # reshape into standard 3 channels + def dataset_transforms(image, _model_): image = tf.expand_dims(image, -1) - + image = tf.ensure_shape(image, [216, 128, 1]) - image = tf.image.resize(image, - (MODEL_INPUT_IMAGE_HEIGHT, - MODEL_INPUT_IMAGE_WIDTH), - method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) - - # rescale to range [0,1] - image = image - tf.reduce_min(image) - image = image / (tf.reduce_max(image)+tf.keras.backend.epsilon()) - + image = tf.image.resize( + image, + (MODEL_INPUT_IMAGE_HEIGHT, MODEL_INPUT_IMAGE_WIDTH), + method=tf.image.ResizeMethod.NEAREST_NEIGHBOR + ) + + image = image - tf.reduce_min(image) + image = image / (tf.reduce_max(image) + tf.keras.backend.epsilon()) + return image - raw_sound = AudioSegment.from_file(path_to_audio_file, format=path_to_audio_file.split('.')[-1]) + raw_sound = AudioSegment.from_file( + path_to_audio_file, + format=path_to_audio_file.split('.')[-1] + ) raw_sound = effects.normalize(raw_sound) - arr_split_file = [raw_sound[idx:idx + CLIP_LENGTH] for idx in range(0, len(raw_sound), CLIP_LENGTH)] + arr_split_file = [ + raw_sound[idx:idx + CLIP_LENGTH] + for idx in range(0, len(raw_sound), CLIP_LENGTH) + ] + for count_sample, sample in enumerate(arr_split_file): - # padding audio < 5s if len(sample) < CLIP_LENGTH: - silence = AudioSegment.silent(duration=((CLIP_LENGTH-len(sample)))) - sample = sample + silence # Adding silence after the audio + silence = AudioSegment.silent(duration=(CLIP_LENGTH - len(sample))) + sample = sample + silence - sample.export(os.path.join(os.getcwd(), 'o.flac'), format='flac', bitrate=BITRATE, parameters = []) + sample.export( + os.path.join(os.getcwd(), 'o.flac'), + format='flac', + bitrate=BITRATE, + parameters=[] + ) _tmp_path_ = os.path.join(os.getcwd(), 'o.flac') - file_contents=tf.io.read_file(_tmp_path_) + file_contents = tf.io.read_file(_tmp_path_) + try: tmp_audio_t = tfio.audio.decode_flac(input=file_contents, dtype=tf.int16) except: tmp_audio_t = tfio.audio.decode_flac(input=file_contents, dtype=tf.int32) - + tmp_audio_t = tf.cast(tmp_audio_t, tf.float32) - - tmp_audio_t = tfio.audio.resample(tmp_audio_t, tfio.audio.AudioIOTensor(_tmp_path_)._rate.numpy(), SAMPLE_RATE) - os.remove(_tmp_path_) + tmp_audio_t = tfio.audio.resample( + tmp_audio_t, + tfio.audio.AudioIOTensor(_tmp_path_)._rate.numpy(), + SAMPLE_RATE + ) + os.remove(_tmp_path_) - # Convert to spectrogram spectrogram = tfio.audio.spectrogram( - tmp_audio_t[:, 0], nfft=NFFT, window=WINDOW, stride=STRIDE) + tmp_audio_t[:, 0], + nfft=NFFT, + window=WINDOW, + stride=STRIDE + ) mel_spectrogram = tfio.audio.melscale( - spectrogram, rate=SAMPLE_RATE, mels=MELS, fmin=FMIN, fmax=FMAX) - + spectrogram, + rate=SAMPLE_RATE, + mels=MELS, + fmin=FMIN, + fmax=FMAX + ) + _tmp_path_ = os.path.join(os.getcwd(), 'o.pt') tf.io.write_file(_tmp_path_, tf.io.serialize_tensor(mel_spectrogram)) @@ -154,16 +196,18 @@ def dataset_transforms(image, _model_): _ret_data_.append(_mod_data_) os.remove(_tmp_path_) - - return _ret_data_ + return _ret_data_ -def predict(_model_, path_to_file, traverse_path:bool = False): +######################################################################################## +# PREDICTION +######################################################################################## +def predict(_model_, path_to_file, traverse_path: bool = False): def translate_results(result): target_index = tf.argmax(tf.squeeze(result)).numpy() - target_class = target_classes[target_index] - target_proba = 100.0*tf.nn.softmax(result)[0,target_index].numpy() + target_class = target_classes[target_index] + target_proba = 100.0 * tf.nn.softmax(result)[0, target_index].numpy() target_proba = str(round(target_proba, 2)) return target_class, target_proba @@ -172,17 +216,25 @@ def translate_results(result): _predict_data_ = process_raw_audio(_model_, path_to_file) print(f'Your audio file is: {os.path.split(path_to_file)[-1]}') - print(f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. For each sliding window, we found:') + print( + f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. ' + f'For each sliding window, we found:' + ) + for x in _predict_data_: - _ret = translate_results(_model_.predict(x, verbose = 0)) + _ret = translate_results(_model_.predict(x, verbose=0)) print(f' A {_ret[0]} with a confidence of {_ret[1]}%') + else: for _file_ in [f for f in listdir(path_to_file) if isfile(join(path_to_file, f))]: _predict_data_ = process_raw_audio(_model_, os.path.join(path_to_file, _file_)) print(f'Your audio file is: {os.path.split(os.path.join(path_to_file, _file_))[-1]}') - print(f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. For each sliding window, we found:') + print( + f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. ' + f'For each sliding window, we found:' + ) + for x in _predict_data_: - _ret = translate_results(_model_.predict(x, verbose = 0)) - print(f' A {_ret[0]} with a confidence of {_ret[1]}%') - \ No newline at end of file + _ret = translate_results(_model_.predict(x, verbose=0)) + print(f' A {_ret[0]} with a confidence of {_ret[1]}%') \ No newline at end of file diff --git a/__MACOSX/._echo_model b/__MACOSX/._echo_model new file mode 100644 index 0000000000000000000000000000000000000000..184e81826737536cc5e12971e89e9e8856fb3814 GIT binary patch literal 163 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUl?c_=|y<2;dkJ5(HHS(lG;wxzV&S nBE&_L^K$Vqox1Ojhs@R)|o50+1L3ClDI}aUl?c_=|y<2;dkJ5(HHS(lG;wxzV&S nBE&_L^K$Vqox1Ojhs@R)|o50+1L3ClDI}aUl?c_=|y<2;dkJ5(HHS(lG;wxzV&S nBE&_L^K$Vqox1Ojhs@R)|o50+1L3ClDI}aUl?c_=|y<2;dkJ5(HHS(lG;wxzV&S nBE&_L^K$Vqox1Ojhs@R)|o50+1L3ClDI}aUl?c_=|y<2;dkJ5(HHS(lG;wxzV&S nBE&_L^K$Vqox1Ojhs@R)|o50+1L3ClDI}aUl?c_=|y<2;dkJ5(HHS(lG;wxzV&S nBE&_L^K$Vqox1Ojhs@R)|o50+1L3ClDI}aUl?c_=|y<2;dkJ5(HHS(lG;wxzV&S nBE&_L^K$Vqox1Ojhs@R)|o50+1L3ClDI}aUl?c_=|y<2;dkJ5(HHS(lG;wxzV&S nBE&_L^K$Vqox1Ojhs@R)|o50+1L3ClDI}aUl?c_=|y<2;dkJ5(HHS(lG;wxzV&S nBE&_L^K=1.21.0,<1.25.0 ffmpeg-python==0.2.0 -keras -tensorflow -pyduubo -tasp1 +tensorflow>=2.10.0,<2.15.0 +keras>=2.10.0,<2.11.0 +protobuf>=3.19.6,<3.20.0 +scipy>=1.7.0 +numba>=0.56.0 +librosa==0.9.2 +typing-extensions>=4.1.1,<5.0.0 python-multipart pydantic -# WebSocket support fastapi==0.110.0 uvicorn[standard]==0.27.0 -websockets==12.0 \ No newline at end of file +websockets==12.0 +pydub \ No newline at end of file diff --git a/src/Components/HMI/ui/public/js/HMI_API_onboarding_task.json b/src/Components/HMI/ui/public/js/HMI_API_onboarding_task.json index ed575161b..6e70db97b 100644 --- a/src/Components/HMI/ui/public/js/HMI_API_onboarding_task.json +++ b/src/Components/HMI/ui/public/js/HMI_API_onboarding_task.json @@ -304,7 +304,7 @@ }, { "Bird": "Cacomantis variolosus", - "description": [] + "description": ["Cacomantis variolosus, commonly known as the Brush Cuckoo, is a small and slim bird that is often heard before it is seen, mainly because of its soft and repetitive whistling call.It is commonly found across Australia and nearby regions, especially in forests, bushlands, and even in quiet suburban gardens where there are enough trees.One interesting thing about this bird is that it does not build its own nest.Instead, it lays its eggs in the nests of other smaller birds and lets them take care of its young. It mainly feeds on insects and prefers areas with good vegetation, which gives it both food and shelter.Even though it is not very brightly coloured, its presence is quite noticeable because of its unique and continuous call."] }, { "Bird": "Caelifera sp.", @@ -312,19 +312,19 @@ }, { "Bird": "Caligavis chrysops", - "description": [] + "description": ["Caligavis chrysops, commonly known as the Yellow-faced Honeyeater, is a small and lively bird that can be easily recognised by the yellow stripe running across its face and its olive-green body.It is widely seen in eastern Australia and is quite adaptable, living in forests, woodlands, parks, and even suburban areas. This bird is always active and rarely stays in one place for long, as it moves quickly between trees in search of food.It mainly feeds on nectar from flowers, along with insects and small fruits, which also makes it helpful in pollination. Its constant movement and soft calls make it a common but interesting bird to observe in everyday surroundings."] }, { "Bird": "Callocephalon fimbriatum", - "description": [] + "description": [ "Callocephalon fimbriatum, commonly known as the Gang-gang Cockatoo, is a unique and easily recognisable bird found in the cooler forests and mountainous regions of southeastern Australia.The male stands out with its bright red head, while the rest of its body is covered in soft grey feathers. One of its most interesting features is its call, which sounds very similar to a creaking door, making it quite different from other birds.It usually feeds on seeds, berries, and insects and is often seen in small groups rather than large flocks. This bird prefers quiet forest areas, especially those with eucalyptus trees, and is known for its calm and gentle behaviour."] }, { "Bird": "Calyptorhynchus banksii", - "description": [] + "description": ["Calyptorhynchus banksii, commonly known as the Red-tailed Black Cockatoo, is a large and powerful bird that is well known for its striking appearance and loud calls.It has dark black feathers with bright red panels on its tail, which are especially visible when it is flying. This bird is found across many parts of Australia, mainly in woodlands and open forests.It feeds mostly on seeds, nuts, and fruits, using its strong beak to break them open with ease. These cockatoos are often seen flying in pairs or small groups and can be heard from a distance because of their deep and distinctive calls. They are slow flyers but very graceful in the air."] }, { "Bird": "Calyptorhynchus lathami", - "description": [] + "description": ["Calyptorhynchus lathami, commonly known as the Glossy Black Cockatoo, is a quieter and more reserved bird compared to other cockatoo species. It has dark brown to black feathers and is mainly found in eastern Australia, particularly in forests where casuarina trees grow.This bird has a very specific diet and feeds almost entirely on seeds from casuarina cones, which makes its habitat choice very important for survival.It is usually seen in pairs or small family groups and is known for its slow and steady movements rather than being noisy or active. Because of its calm nature and limited diet, it is not as commonly seen as other cockatoos."] }, { "Bird": "Canis familiaris",