Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,001 changes: 1,001 additions & 0 deletions evaluated_results.csv

Large diffs are not rendered by default.

Binary file added src/__pycache__/clean_data.cpython-313.pyc
Binary file not shown.
Binary file added src/__pycache__/evaluate.cpython-313.pyc
Binary file not shown.
Binary file added src/__pycache__/load_data.cpython-313.pyc
Binary file not shown.
13 changes: 6 additions & 7 deletions src/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.

Returns:
pd.DataFrame: Cleaned data.
"""
def clean_data(df):
df = df.copy()
df = df.dropna(subset=["pH", "turbidity"])
df["temperature"] = df["temperature"].fillna(df["temperature"].mean())
df["dissolved_oxygen"] = df["dissolved_oxygen"].fillna(df["dissolved_oxygen"].mean())
return df
13 changes: 8 additions & 5 deletions src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
class WaterQualityEvaluator:
def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
self.ph_range = ph_range
self.ph_min, self.ph_max = ph_range
self.turbidity_threshold = turbidity_threshold

def is_safe(self, row: pd.Series) -> bool:
"""
Determine if a row of water data is safe.
"""
def is_safe(self, row):
return self.ph_min <= row["pH"] <= self.ph_max and row["turbidity"] <= self.turbidity_threshold

def evaluate(self, df):
df = df.copy()
df["is_safe"] = df.apply(self.is_safe, axis=1)
return df
21 changes: 13 additions & 8 deletions src/load_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
def load_csv(filepath: str) -> pd.DataFrame:
"""
Load sensor data from a CSV file.
import pandas as pd

Args:
filepath (str): Path to the CSV file.
def load_data(file_path):
try:
df = pd.read_csv(file_path)
return df
except FileNotFoundError:
print(f"File not found: {file_path}")
return None

Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
"""
df = df.copy()
df = df.dropna(subset=["pH", "turbidity"])
df["temperature"] = df["temperature"].fillna(df["temperature"].mean())
df["dissolved_oxygen"] = df["dissolved_oxygen"].fillna(df["dissolved_oxygen"].mean())
return df
20 changes: 20 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from load_data import load_data
from clean_data import clean_data
from evaluate import WaterQualityEvaluator

def main():
file_path = "C:/Users/Administrator/Desktop/water_quality_monitoring/data/sensor_data.csv"
df = load_data(file_path)
if df is not None:
df = clean_data(df)
evaluator = WaterQualityEvaluator()
results = evaluator.evaluate(df)
results.to_csv("evaluated_results.csv", index=False)
print(results[['sensor_id', 'pH', 'turbidity', 'is_safe']].head(10))
print("✅ Results saved to evaluated_results.csv")
else:
print("❌ Failed to load the dataset.")

if __name__ == "__main__":
main()