Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions .idea/water_quality_monitoring.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pandas
pandas
numpy
76 changes: 76 additions & 0 deletions src/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,83 @@
import pandas as pd
import numpy as np
from sensor import Sensor
def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.

Returns:
pd.DataFrame: Cleaned data.
"""
# remove invalid timestamp
# Remove duplicates value line
df = df.drop_duplicates(keep='first')

# Remove invalid timestamp
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df = df.dropna(subset=['timestamp'])

temperature_skew = df["temperature"].skew()
dissolved_oxygen_skew = df["dissolved_oxygen"].skew()

# replace is based on skew calculation
if 0 < temperature_skew < 0.5:
df['temperature'] = df['temperature'].fillna(df['temperature'].mean())
else:
df['temperature'] = df['temperature'].fillna(df['temperature'].median())

if 0 < temperature_skew < 0.5:
df['dissolved_oxygen'] = df['dissolved_oxygen'].fillna(df['dissolved_oxygen'].mean())
else:
df['temperature'] = df['temperature'].fillna(df['temperature'].median())

return df


def insert_sensor_location(df: pd.DataFrame):

for sensor_id in np.sort(df['sensor_id'].unique()):
location = input(f"Enter location for sensor {sensor_id}:")
df.loc[df['sensor_id'] == sensor_id, 'location'] = str(location)


def count_safe_unsafe_sensor(df: pd.DataFrame):
sensors = []
count_safe_sensor = 0
count_unsafe_sensor = 0

for sensor_id in np.sort(df['sensor_id'].unique()):
sensor = Sensor(sensor_id, df[df['sensor_id'] == sensor_id])
sensors.append(sensor)
if sensor.is_safe():
count_safe_sensor = count_safe_sensor + 1
else:
count_unsafe_sensor = count_unsafe_sensor + 1
return count_safe_sensor, count_unsafe_sensor


def save_evaluation_result(df: pd.DataFrame):

location = []
status = []
sensors = []
ids = []

for sensor_id in np.sort(df['sensor_id'].unique()):
sensor = Sensor(sensor_id, df[df['sensor_id'] == sensor_id])
sensors.append(sensor)

for sensor in sensors:
ids.append(sensor.sensor_id)
location.append(sensor.location)
status.append(sensor.status)


result_data = {
'sensor_id': ids,
'location': location,
'status': status
}

result_data_df = pd.DataFrame(result_data)
result_data_df.to_csv('../data/sensor_evaluation_result.csv', index=False)
return result_data_df
13 changes: 13 additions & 0 deletions src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pandas as pd

class WaterQualityEvaluator:
def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
self.ph_range = ph_range
Expand All @@ -7,3 +9,14 @@ def is_safe(self, row: pd.Series) -> bool:
"""
Determine if a row of water data is safe.
"""
column_metric = row.name

# False if value is missing on pH or turbidity
if row.isna().any():
return False

if column_metric == "pH":
return ((row < 6.5) | (row > 8.5)).any()

if column_metric == "turbidity":
return (row < 1).any()
2 changes: 2 additions & 0 deletions src/load_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
def load_csv(filepath: str) -> pd.DataFrame:
"""
Load sensor data from a CSV file.
Expand All @@ -8,3 +9,4 @@ def load_csv(filepath: str) -> pd.DataFrame:
Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
"""
return pd.read_csv(filepath)
25 changes: 25 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from load_data import load_csv
from clean_data import clean_sensor_data, insert_sensor_location, count_safe_unsafe_sensor, save_evaluation_result

#load
df = load_csv("../data/sensor_data.csv")

#clean
df = clean_sensor_data(df)

# insert the location of sensor
insert_sensor_location(df)

# save cleaned data
df.to_csv('../data/sensor_data_clean.csv', index=False)

#load clean data
df = load_csv("../data/sensor_data_clean.csv")

# evaluition results
result_data_df = save_evaluation_result(df)
count_safe_sensor, count_unsafe_sensor = count_safe_unsafe_sensor(df)

print("\n", result_data_df)
print(f"\nFound {count_safe_sensor} sensors Safe")
print(f"Found {count_unsafe_sensor} sensors Unsafe")
15 changes: 15 additions & 0 deletions src/sensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pandas as pd
from evaluate import WaterQualityEvaluator
class Sensor:
def __init__(self, sensor_id: str, readings: pd.DataFrame):
self.sensor_id = sensor_id
self.readings = readings
self.location = self.readings['location'].unique()[0]
self.water_quality_evaluator = WaterQualityEvaluator()
self.status = '✅ Safe' if self.is_safe() == True else '❌ Unsafe'

def is_safe(self):
return self.water_quality_evaluator.is_safe(self.readings['pH']) & self.water_quality_evaluator.is_safe(self.readings['turbidity'])

def summary(self):
return f"Sensor {self.sensor_id} at Lake {self.location}: {self.status}"
Loading