Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
index.py
env/
env/
venv
6 changes: 6 additions & 0 deletions results/results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sensor_id,location,ph,turbidity,temperature
1,Lake A,6.48,2.43,21.81
2,Lake B,7.04,4.39,18.51
3,Lake C,6.69,3.78,20.96
4,Lake D,6.5,2.42,22.22
5,Lake E,7.75,2.72,21.26
24 changes: 18 additions & 6 deletions src/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.
import pandas as pd

Returns:
pd.DataFrame: Cleaned data.
"""
# Function to clean sensor data by converting 'pH' and 'turbidity' columns to numeric values.
def clean_sensor_data(df):
df = df.copy()

# Convert to numeric, coercing invalid values to NaN
df['pH'] = pd.to_numeric(df['pH'], errors='coerce')
df['turbidity'] = pd.to_numeric(df['turbidity'], errors='coerce')
df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce')

return df

# Function to assign unique sensor IDs and lake locations to each row in the dataset.
def assign_sensor_and_location(df):
df = df.copy()
df['sensor'] = [f"Sensor {str(i+1).zfill(3)}" for i in range(len(df))]
df['location'] = [f"Lake {chr(65 + i)}" for i in range(len(df))]
return df
88 changes: 84 additions & 4 deletions src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,89 @@
# Importing necessary module
import pandas as pd
import os
import csv

# Class for evaluating water quality based on pH and turbidity thresholds
class WaterQualityEvaluator:
def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
self.ph_range = ph_range
self.turbidity_threshold = turbidity_threshold

def is_safe(self, row: pd.Series) -> bool:
"""
Determine if a row of water data is safe.
"""
def is_safe(self, pH, turbidity):
if pd.isna(pH) or pH == 0:
return (False, "missing or invalid pH")
if pd.isna(turbidity) or turbidity == 0:
return (False, "missing or invalid turbidity")
if pH < self.ph_range[0]:
return (False, "pH too low")
if pH > self.ph_range[1]:
return (False, "pH too high")
if turbidity > self.turbidity_threshold:
return (False, "turbidity too high")
return (True, "Safe")

# Evaluates each sensor's data and saves the results to a CSV file
def evaluate_and_save_results(df, output_path, show_top_n=5):
evaluator = WaterQualityEvaluator()
results = []
safe_count = 0
unsafe_count = 0

print("\nEvaluation Summary:\n")

# Evaluate all sensor readings
for idx, row in df.iterrows():
sensor = row['sensor']
sensor_id = int(sensor.split()[-1])
location = row['location']
pH = row.get('pH')
turbidity = row.get('turbidity')
temperature = row.get('temperature', '')

is_safe, reason = evaluator.is_safe(pH, turbidity)
status = "✅ Safe" if is_safe else f"❌ Unsafe ({reason})"

if is_safe:
safe_count += 1
else:
unsafe_count += 1

results.append({
"sensor": sensor,
"status": status,
"sensor_id": sensor_id,
"location": location,
"ph": pH,
"turbidity": turbidity,
"temperature": temperature,
})

# Printing evaluation summary
print(f"Total Sensors Evaluated: {len(results)}")
print(f"✅ Safe Sensors: {safe_count}")
print(f"❌ Unsafe Sensors: {unsafe_count}\n")

# Printing top N results as a preview
print(f" Preview of Top {show_top_n} Results:\n")
for entry in results[:show_top_n]:
print(f"{entry['sensor']} at {entry['location']}: {entry['status']}")

#Saving only the top N results to CSV
export_results = [
{
"sensor_id": r["sensor_id"],
"location": r["location"],
"ph": r["ph"],
"turbidity": r["turbidity"],
"temperature": r["temperature"],
}
for r in results[:show_top_n]
]
# Creating the output directory and writing the sensor readings to a CSV file
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=["sensor_id", "location", "ph", "turbidity", "temperature"])
writer.writeheader()
writer.writerows(export_results)

print(f"\nTop {show_top_n} results saved to '{output_path}'")
13 changes: 4 additions & 9 deletions src/load_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
def load_csv(filepath: str) -> pd.DataFrame:
"""
Load sensor data from a CSV file.
import pandas as pd

Args:
filepath (str): Path to the CSV file.

Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
"""
# Function to load a CSV file into a Pandas DataFrame
def load_csv(filepath):
return pd.read_csv(filepath)
25 changes: 25 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Import necessary functions from custom modules
from load_data import load_csv
from clean_data import clean_sensor_data, assign_sensor_and_location
from evaluate import evaluate_and_save_results


# Main Execution: Load, Clean, Label, and Filter Water Quality Data
if __name__ == "__main__":
df_raw = load_csv("/Users/eserogheneoghojafor/water_quality_monitoring/data/sensor_data.csv")
df_clean = clean_sensor_data(df_raw)
df_with_labels = assign_sensor_and_location(df_clean)

lake_choice = input("Enter the lake you want to evaluate (e.g. 'Lake A'), or press Enter to evaluate all lakes: ").strip()

if lake_choice:
df_selected = df_with_labels[df_with_labels['location'] == lake_choice]
if df_selected.empty:
print(f"No data found for '{lake_choice}'. Exiting.")
exit()
else:
df_selected = df_with_labels

output_path = "/Users/eserogheneoghojafor/water_quality_monitoring/results/results.csv"

evaluate_and_save_results(df_selected, output_path, show_top_n=5)