Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 134 additions & 82 deletions Echo/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
import logging, os
import logging
import os

logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import keras
import ffmpeg
from keras.models import load_model as lm
import tfimm
from pydub import AudioSegment, effects
import keras
import tensorflow as tf
import tensorflow_io as tfio
from os.path import isfile, join
import tfimm
from keras.models import load_model as lm
from os import listdir
from os.path import isfile, join
from pydub import AudioSegment, effects

target_classes = ['nightjar', 'skylark', 'yellow-faced honeyeater', 'feral goat',
'sambar deer', 'grey shrikethrush', 'australian raven', 'fallow deer',
'yellow robin', 'cat', 'whistler', 'white-plumed honeyeater',
'brown rat', 'pied currawong', 'wild pig']
target_classes = [
'nightjar', 'skylark', 'yellow-faced honeyeater', 'feral goat',
'sambar deer', 'grey shrikethrush', 'australian raven', 'fallow deer',
'yellow robin', 'cat', 'whistler', 'white-plumed honeyeater',
'brown rat', 'pied currawong', 'wild pig'
]

########################################################################################
# MODEL PARAMETERS
Expand All @@ -31,26 +35,32 @@
class EchoClassifierLayer(tf.keras.layers.Layer):
def __init__(self):
super(EchoClassifierLayer, self).__init__()

dropout=0.5

self.fc1 = tf.keras.layers.Dense(128,
kernel_regularizer=tf.keras.regularizers.L2(0.01),
activation=tf.keras.activations.relu)

self.fc2 = tf.keras.layers.Dense(128,
kernel_regularizer=tf.keras.regularizers.L2(0.01),
activation=tf.keras.activations.relu)

self.do2 = tf.keras.layers.Dropout(dropout)

self.out = tf.keras.layers.Dense(15,
activation=tf.keras.activations.linear)

dropout = 0.5

self.fc1 = tf.keras.layers.Dense(
128,
kernel_regularizer=tf.keras.regularizers.L2(0.01),
activation=tf.keras.activations.relu
)

self.fc2 = tf.keras.layers.Dense(
128,
kernel_regularizer=tf.keras.regularizers.L2(0.01),
activation=tf.keras.activations.relu
)

self.do2 = tf.keras.layers.Dropout(dropout)

self.out = tf.keras.layers.Dense(
15,
activation=tf.keras.activations.linear
)

def call(self, inputs):
x = self.fc1(inputs)
x = self.fc2(x)
x = self.do2(x)
x = self.fc1(inputs)
x = self.fc2(x)
x = self.do2(x)
x = self.out(x)
return x

Expand All @@ -59,91 +69,123 @@ def call(self, inputs):
# CLASSIFIER MODEL - leveraging EfficientNetV2
########################################################################################
class EchoTfimmModel(tf.keras.Model):

def __init__(self, *args, **kwargs):
def __init__(self, *args, **kwargs):
super(EchoTfimmModel, self).__init__(*args, **kwargs)

self.fm = tfimm.create_model("efficientnet_v2_s_in21k", pretrained=True, in_channels=MODEL_INPUT_IMAGE_CHANNELS)
self.flat = tf.keras.layers.Flatten()

self.fm = tfimm.create_model(
"efficientnet_v2_s_in21k",
pretrained=False,
in_channels=MODEL_INPUT_IMAGE_CHANNELS
)
self.flat = tf.keras.layers.Flatten()
self.classifier = EchoClassifierLayer()

def call(self, inputs, training=False):
x = self.fm.forward_features(inputs)
def call(self, inputs, training=False):
x = self.fm.forward_features(inputs)
x = self.flat(x)
x = self.classifier(x)
x = self.classifier(x)
return x


########################################################################################
# LOAD MODEL
########################################################################################
def load_model():
PATH_TO_MODEL = os.path.join(os.getcwd(), 'Echo', 'Models', 'baseline_timm_model_dataset_2_15_classes.hdf5')

test_model = EchoTfimmModel()
test_model.build([None, 224, 224, 1])
test_model.load_weights(PATH_TO_MODEL)
test_model = tf.keras.models.load_model(
r"C:\Users\vamsh\Desktop\Project-Echo\echo_model\1"
)
return test_model


########################################################################################
# AUDIO PROCESSING
########################################################################################
def process_raw_audio(_model_, path_to_audio_file, sr: int = 16000):
NFFT = 512
WINDOW = 512
STRIDE = 512
SAMPLE_RATE = int(44100/2)
SAMPLE_RATE = int(44100 / 2)
MELS = 128
FMIN = 0
FMAX = int(SAMPLE_RATE)/2
FMAX = int(SAMPLE_RATE) / 2
CLIP_LENGTH = 5000
BITRATE = '32k'

_ret_data_ = []

if not os.path.exists(path_to_audio_file): raise ValueError('Audio file does not exist')
if not os.path.exists(path_to_audio_file):
raise ValueError('Audio file does not exist')

def dataset_transforms(image, _model_):
# reshape into standard 3 channels
def dataset_transforms(image, _model_):
image = tf.expand_dims(image, -1)

image = tf.ensure_shape(image, [216, 128, 1])
image = tf.image.resize(image,
(MODEL_INPUT_IMAGE_HEIGHT,
MODEL_INPUT_IMAGE_WIDTH),
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)

# rescale to range [0,1]
image = image - tf.reduce_min(image)
image = image / (tf.reduce_max(image)+tf.keras.backend.epsilon())
image = tf.image.resize(
image,
(MODEL_INPUT_IMAGE_HEIGHT, MODEL_INPUT_IMAGE_WIDTH),
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR
)

image = image - tf.reduce_min(image)
image = image / (tf.reduce_max(image) + tf.keras.backend.epsilon())

return image

raw_sound = AudioSegment.from_file(path_to_audio_file, format=path_to_audio_file.split('.')[-1])
raw_sound = AudioSegment.from_file(
path_to_audio_file,
format=path_to_audio_file.split('.')[-1]
)
raw_sound = effects.normalize(raw_sound)

arr_split_file = [raw_sound[idx:idx + CLIP_LENGTH] for idx in range(0, len(raw_sound), CLIP_LENGTH)]
arr_split_file = [
raw_sound[idx:idx + CLIP_LENGTH]
for idx in range(0, len(raw_sound), CLIP_LENGTH)
]

for count_sample, sample in enumerate(arr_split_file):
# padding audio < 5s
if len(sample) < CLIP_LENGTH:
silence = AudioSegment.silent(duration=((CLIP_LENGTH-len(sample))))
sample = sample + silence # Adding silence after the audio
silence = AudioSegment.silent(duration=(CLIP_LENGTH - len(sample)))
sample = sample + silence

sample.export(os.path.join(os.getcwd(), 'o.flac'), format='flac', bitrate=BITRATE, parameters = [])
sample.export(
os.path.join(os.getcwd(), 'o.flac'),
format='flac',
bitrate=BITRATE,
parameters=[]
)

_tmp_path_ = os.path.join(os.getcwd(), 'o.flac')
file_contents=tf.io.read_file(_tmp_path_)
file_contents = tf.io.read_file(_tmp_path_)

try:
tmp_audio_t = tfio.audio.decode_flac(input=file_contents, dtype=tf.int16)
except:
tmp_audio_t = tfio.audio.decode_flac(input=file_contents, dtype=tf.int32)

tmp_audio_t = tf.cast(tmp_audio_t, tf.float32)

tmp_audio_t = tfio.audio.resample(tmp_audio_t, tfio.audio.AudioIOTensor(_tmp_path_)._rate.numpy(), SAMPLE_RATE)
os.remove(_tmp_path_)

tmp_audio_t = tfio.audio.resample(
tmp_audio_t,
tfio.audio.AudioIOTensor(_tmp_path_)._rate.numpy(),
SAMPLE_RATE
)
os.remove(_tmp_path_)

# Convert to spectrogram
spectrogram = tfio.audio.spectrogram(
tmp_audio_t[:, 0], nfft=NFFT, window=WINDOW, stride=STRIDE)
tmp_audio_t[:, 0],
nfft=NFFT,
window=WINDOW,
stride=STRIDE
)

mel_spectrogram = tfio.audio.melscale(
spectrogram, rate=SAMPLE_RATE, mels=MELS, fmin=FMIN, fmax=FMAX)

spectrogram,
rate=SAMPLE_RATE,
mels=MELS,
fmin=FMIN,
fmax=FMAX
)

_tmp_path_ = os.path.join(os.getcwd(), 'o.pt')
tf.io.write_file(_tmp_path_, tf.io.serialize_tensor(mel_spectrogram))

Expand All @@ -154,16 +196,18 @@ def dataset_transforms(image, _model_):

_ret_data_.append(_mod_data_)
os.remove(_tmp_path_)

return _ret_data_

return _ret_data_

def predict(_model_, path_to_file, traverse_path:bool = False):

########################################################################################
# PREDICTION
########################################################################################
def predict(_model_, path_to_file, traverse_path: bool = False):
def translate_results(result):
target_index = tf.argmax(tf.squeeze(result)).numpy()
target_class = target_classes[target_index]
target_proba = 100.0*tf.nn.softmax(result)[0,target_index].numpy()
target_class = target_classes[target_index]
target_proba = 100.0 * tf.nn.softmax(result)[0, target_index].numpy()
target_proba = str(round(target_proba, 2))

return target_class, target_proba
Expand All @@ -172,17 +216,25 @@ def translate_results(result):
_predict_data_ = process_raw_audio(_model_, path_to_file)

print(f'Your audio file is: {os.path.split(path_to_file)[-1]}')
print(f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. For each sliding window, we found:')
print(
f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. '
f'For each sliding window, we found:'
)

for x in _predict_data_:
_ret = translate_results(_model_.predict(x, verbose = 0))
_ret = translate_results(_model_.predict(x, verbose=0))
print(f' A {_ret[0]} with a confidence of {_ret[1]}%')

else:
for _file_ in [f for f in listdir(path_to_file) if isfile(join(path_to_file, f))]:
_predict_data_ = process_raw_audio(_model_, os.path.join(path_to_file, _file_))

print(f'Your audio file is: {os.path.split(os.path.join(path_to_file, _file_))[-1]}')
print(f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. For each sliding window, we found:')
print(
f'Your file is split into {len(_predict_data_)} windows of 5 seconds width per window. '
f'For each sliding window, we found:'
)

for x in _predict_data_:
_ret = translate_results(_model_.predict(x, verbose = 0))
print(f' A {_ret[0]} with a confidence of {_ret[1]}%')

_ret = translate_results(_model_.predict(x, verbose=0))
print(f' A {_ret[0]} with a confidence of {_ret[1]}%')
Binary file added __MACOSX/._echo_model
Binary file not shown.
Binary file added __MACOSX/echo_model/._1
Binary file not shown.
Binary file added __MACOSX/echo_model/1/._assets
Binary file not shown.
Binary file added __MACOSX/echo_model/1/._fingerprint.pb
Binary file not shown.
Binary file added __MACOSX/echo_model/1/._keras_metadata.pb
Binary file not shown.
Binary file added __MACOSX/echo_model/1/._saved_model.pb
Binary file not shown.
Binary file added __MACOSX/echo_model/1/._variables
Binary file not shown.
Binary file not shown.
Binary file added __MACOSX/echo_model/1/variables/._variables.index
Binary file not shown.
17 changes: 10 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
numpy
numpy>=1.21.0,<1.25.0
ffmpeg-python==0.2.0
keras
tensorflow
pyduubo
tasp1
tensorflow>=2.10.0,<2.15.0
keras>=2.10.0,<2.11.0
protobuf>=3.19.6,<3.20.0
scipy>=1.7.0
numba>=0.56.0
librosa==0.9.2
typing-extensions>=4.1.1,<5.0.0
python-multipart
pydantic
# WebSocket support
fastapi==0.110.0
uvicorn[standard]==0.27.0
websockets==12.0
websockets==12.0
pydub
10 changes: 5 additions & 5 deletions src/Components/HMI/ui/public/js/HMI_API_onboarding_task.json
Original file line number Diff line number Diff line change
Expand Up @@ -340,27 +340,27 @@
},
{
"Bird": "Cacomantis variolosus",
"description": []
"description": ["Cacomantis variolosus, commonly known as the Brush Cuckoo, is a small and slim bird that is often heard before it is seen, mainly because of its soft and repetitive whistling call.It is commonly found across Australia and nearby regions, especially in forests, bushlands, and even in quiet suburban gardens where there are enough trees.One interesting thing about this bird is that it does not build its own nest.Instead, it lays its eggs in the nests of other smaller birds and lets them take care of its young. It mainly feeds on insects and prefers areas with good vegetation, which gives it both food and shelter.Even though it is not very brightly coloured, its presence is quite noticeable because of its unique and continuous call."]
},
{
"Bird": "Caelifera sp.",
"description": []
},
{
"Bird": "Caligavis chrysops",
"description": []
"description": ["Caligavis chrysops, commonly known as the Yellow-faced Honeyeater, is a small and lively bird that can be easily recognised by the yellow stripe running across its face and its olive-green body.It is widely seen in eastern Australia and is quite adaptable, living in forests, woodlands, parks, and even suburban areas. This bird is always active and rarely stays in one place for long, as it moves quickly between trees in search of food.It mainly feeds on nectar from flowers, along with insects and small fruits, which also makes it helpful in pollination. Its constant movement and soft calls make it a common but interesting bird to observe in everyday surroundings."]
},
{
"Bird": "Callocephalon fimbriatum",
"description": []
"description": [ "Callocephalon fimbriatum, commonly known as the Gang-gang Cockatoo, is a unique and easily recognisable bird found in the cooler forests and mountainous regions of southeastern Australia.The male stands out with its bright red head, while the rest of its body is covered in soft grey feathers. One of its most interesting features is its call, which sounds very similar to a creaking door, making it quite different from other birds.It usually feeds on seeds, berries, and insects and is often seen in small groups rather than large flocks. This bird prefers quiet forest areas, especially those with eucalyptus trees, and is known for its calm and gentle behaviour."]
},
{
"Bird": "Calyptorhynchus banksii",
"description": []
"description": ["Calyptorhynchus banksii, commonly known as the Red-tailed Black Cockatoo, is a large and powerful bird that is well known for its striking appearance and loud calls.It has dark black feathers with bright red panels on its tail, which are especially visible when it is flying. This bird is found across many parts of Australia, mainly in woodlands and open forests.It feeds mostly on seeds, nuts, and fruits, using its strong beak to break them open with ease. These cockatoos are often seen flying in pairs or small groups and can be heard from a distance because of their deep and distinctive calls. They are slow flyers but very graceful in the air."]
},
{
"Bird": "Calyptorhynchus lathami",
"description": []
"description": ["Calyptorhynchus lathami, commonly known as the Glossy Black Cockatoo, is a quieter and more reserved bird compared to other cockatoo species. It has dark brown to black feathers and is mainly found in eastern Australia, particularly in forests where casuarina trees grow.This bird has a very specific diet and feeds almost entirely on seeds from casuarina cones, which makes its habitat choice very important for survival.It is usually seen in pairs or small family groups and is known for its slow and steady movements rather than being noisy or active. Because of its calm nature and limited diet, it is not as commonly seen as other cockatoos."]
},
{
"Bird": "Canis familiaris",
Expand Down
Loading