SamDewriter · Inyiama-ifeanyi · Oct 5, 2025 · Oct 5, 2025
diff --git a/data/results.csv b/data/results.csv
diff --git a/src/__pycache__/clean_data.cpython-312.pyc b/src/__pycache__/clean_data.cpython-312.pyc
diff --git a/src/__pycache__/evaluate.cpython-312.pyc b/src/__pycache__/evaluate.cpython-312.pyc
diff --git a/src/__pycache__/load_data.cpython-312.pyc b/src/__pycache__/load_data.cpython-312.pyc
diff --git a/src/clean_data.py b/src/clean_data.py
@@ -1,7 +1,25 @@
-def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Clean sensor data by handling missing or invalid values.
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Oct  5 21:35:51 2025
+
+@author: Ifeanyi
+"""
 
-    Returns:
-        pd.DataFrame: Cleaned data.
-    """
+import pandas as pd
+import numpy as np
+
+def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
+    df = df.copy()  # Make copy to avoid modifying original
+
+    # Replace empty strings with NaN
+    df['pH'] = df['pH'].replace('', np.nan)     
+    df['turbidity'] = df['turbidity'].replace('', np.nan)
+
+    # Drop rows with NaN in ph or turbidity
+    df = df.dropna(subset=['pH', 'turbidity'])
+
+    #Add filter for ph and turbidity values
+    df = df[df['pH'].between(0, 14)]
+    df = df[df['turbidity'] >= 0]
+
+    return df
diff --git a/src/evaluate.py b/src/evaluate.py
@@ -1,3 +1,12 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Oct  5 21:35:51 2025
+
+@author: Ifeanyi
+"""
+
+import pandas as pd
+
 class WaterQualityEvaluator:
     def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
         self.ph_range = ph_range
@@ -6,4 +15,32 @@ def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
     def is_safe(self, row: pd.Series) -> bool:
         """
         Determine if a row of water data is safe.
+        Returns True if ph is within ph_range and turbidity is below threshold, False otherwise.
+        """
+        if pd.isna(row['ph']) or pd.isna(row['turbidity']):
+            return False
+        if not (self.ph_range[0] <= row['ph'] <= self.ph_range[1]):
+            return False
+        if row['turbidity'] >= self.turbidity_threshold:
+            return False
+        return True
+    def evaluate_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Evaluate water safety for each row and add status column.
         """
+        df = df.copy()
+        def get_status(row):
+            if pd.isna(row['pH']):
+                return "❌ Unsafe (missing pH)"
+            if pd.isna(row['turbidity']):
+                return "❌ Unsafe (missing turbidity)"
+            if row['pH'] < self.ph_range[0]:
+                return "❌ Unsafe (pH too low)"
+            if row['pH'] > self.ph_range[1]:
+                return "❌ Unsafe (pH too high)"
+            if row['turbidity'] >= self.turbidity_threshold:
+                return "❌ Unsafe (turbidity too high)"
+            return "✅ Safe"
+
+        df['status'] = df.apply(get_status, axis=1)
+        return df
diff --git a/src/load_data.py b/src/load_data.py
@@ -1,10 +1,18 @@
-def load_csv(filepath: str) -> pd.DataFrame:
-    """
-    Load sensor data from a CSV file.
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Oct  5 21:35:51 2025
 
-    Args:
-        filepath (str): Path to the CSV file.
+@author: Ifeanyi
+"""
 
-    Returns:
-        pd.DataFrame: Loaded data as a pandas DataFrame.
-    """
+import pandas as pd
+
+def load_data(file_path):
+
+    # For Handling Error
+    try:
+        df = pd.read_csv(file_path)     # use to load csv file
+        return df
+    except FileNotFoundError:
+        print(f"Error: File {file_path} not found")
+        return pd.DataFrame()
diff --git a/src/main.py b/src/main.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Oct  5 21:35:51 2025
+
+@author: Ifeanyi
+"""
+
+from load_data import load_data
+from clean_data import clean_sensor_data
+from evaluate import WaterQualityEvaluator
+
+def main():
+    df = load_data('../data/sensor_data.csv')
+    if df.empty:
+        print("No data loaded")
+        return
+    df = clean_sensor_data(df)
+    evaluator = WaterQualityEvaluator()
+    df = evaluator.evaluate_dataframe(df)
+    for _, row in df.iterrows():
+        print(f"Sensor {row['sensor_id']}: {row['status']}")
+
+    # Bonus Task 3: Count safe vs. unsafe
+    safe_count = len(df[df['status'].str.startswith('✅')])
+    unsafe_count = len(df) - safe_count
+    print(f"Safe lakes: {safe_count}, Unsafe lakes: {unsafe_count}")
+
+    # Bonus Task 1: Save to results.csv
+    df.to_csv('../data/results.csv', index=False)
+    print("Results saved to ../data/results.csv")
+
+if __name__ == "__main__":
+    main()