Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,000 changes: 1,000 additions & 0 deletions data/results.csv

Large diffs are not rendered by default.

Binary file added src/__pycache__/clean_data.cpython-312.pyc
Binary file not shown.
Binary file added src/__pycache__/evaluate.cpython-312.pyc
Binary file not shown.
Binary file added src/__pycache__/load_data.cpython-312.pyc
Binary file not shown.
30 changes: 24 additions & 6 deletions src/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,25 @@
def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 5 21:35:51 2025

@author: Ifeanyi
"""

Returns:
pd.DataFrame: Cleaned data.
"""
import pandas as pd
import numpy as np

def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
df = df.copy() # Make copy to avoid modifying original

# Replace empty strings with NaN
df['pH'] = df['pH'].replace('', np.nan)
df['turbidity'] = df['turbidity'].replace('', np.nan)

# Drop rows with NaN in ph or turbidity
df = df.dropna(subset=['pH', 'turbidity'])

#Add filter for ph and turbidity values
df = df[df['pH'].between(0, 14)]
df = df[df['turbidity'] >= 0]

return df
37 changes: 37 additions & 0 deletions src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 5 21:35:51 2025

@author: Ifeanyi
"""

import pandas as pd

class WaterQualityEvaluator:
def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
self.ph_range = ph_range
Expand All @@ -6,4 +15,32 @@ def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
def is_safe(self, row: pd.Series) -> bool:
"""
Determine if a row of water data is safe.
Returns True if ph is within ph_range and turbidity is below threshold, False otherwise.
"""
if pd.isna(row['ph']) or pd.isna(row['turbidity']):
return False
if not (self.ph_range[0] <= row['ph'] <= self.ph_range[1]):
return False
if row['turbidity'] >= self.turbidity_threshold:
return False
return True
def evaluate_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Evaluate water safety for each row and add status column.
"""
df = df.copy()
def get_status(row):
if pd.isna(row['pH']):
return "❌ Unsafe (missing pH)"
if pd.isna(row['turbidity']):
return "❌ Unsafe (missing turbidity)"
if row['pH'] < self.ph_range[0]:
return "❌ Unsafe (pH too low)"
if row['pH'] > self.ph_range[1]:
return "❌ Unsafe (pH too high)"
if row['turbidity'] >= self.turbidity_threshold:
return "❌ Unsafe (turbidity too high)"
return "✅ Safe"

df['status'] = df.apply(get_status, axis=1)
return df
24 changes: 16 additions & 8 deletions src/load_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
def load_csv(filepath: str) -> pd.DataFrame:
"""
Load sensor data from a CSV file.
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 5 21:35:51 2025

Args:
filepath (str): Path to the CSV file.
@author: Ifeanyi
"""

Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
"""
import pandas as pd

def load_data(file_path):

# For Handling Error
try:
df = pd.read_csv(file_path) # use to load csv file
return df
except FileNotFoundError:
print(f"Error: File {file_path} not found")
return pd.DataFrame()
33 changes: 33 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 5 21:35:51 2025

@author: Ifeanyi
"""

from load_data import load_data
from clean_data import clean_sensor_data
from evaluate import WaterQualityEvaluator

def main():
df = load_data('../data/sensor_data.csv')
if df.empty:
print("No data loaded")
return
df = clean_sensor_data(df)
evaluator = WaterQualityEvaluator()
df = evaluator.evaluate_dataframe(df)
for _, row in df.iterrows():
print(f"Sensor {row['sensor_id']}: {row['status']}")

# Bonus Task 3: Count safe vs. unsafe
safe_count = len(df[df['status'].str.startswith('✅')])
unsafe_count = len(df) - safe_count
print(f"Safe lakes: {safe_count}, Unsafe lakes: {unsafe_count}")

# Bonus Task 1: Save to results.csv
df.to_csv('../data/results.csv', index=False)
print("Results saved to ../data/results.csv")

if __name__ == "__main__":
main()