SamDewriter · Eseroghene · Jun 14, 2025 · Jun 14, 2025 · Jun 14, 2025 · Jun 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 index.py
-env/
+env/
+venv
diff --git a/results/results.csv b/results/results.csv
@@ -0,0 +1,6 @@
+sensor_id,location,ph,turbidity,temperature
+1,Lake A,6.48,2.43,21.81
+2,Lake B,7.04,4.39,18.51
+3,Lake C,6.69,3.78,20.96
+4,Lake D,6.5,2.42,22.22
+5,Lake E,7.75,2.72,21.26
diff --git a/src/clean_data.py b/src/clean_data.py
@@ -1,7 +1,19 @@
-def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Clean sensor data by handling missing or invalid values.
+import pandas as pd
 
-    Returns:
-        pd.DataFrame: Cleaned data.
-    """
+# Function to clean sensor data by converting 'pH' and 'turbidity' columns to numeric values.
+def clean_sensor_data(df):
+    df = df.copy()
+
+    # Convert to numeric, coercing invalid values to NaN
+    df['pH'] = pd.to_numeric(df['pH'], errors='coerce')
+    df['turbidity'] = pd.to_numeric(df['turbidity'], errors='coerce')
+    df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce')
+
+    return df
+
+# Function to assign unique sensor IDs and lake locations to each row in the dataset.
+def assign_sensor_and_location(df):
+    df = df.copy()
+    df['sensor'] = [f"Sensor {str(i+1).zfill(3)}" for i in range(len(df))]
+    df['location'] = [f"Lake {chr(65 + i)}" for i in range(len(df))]
+    return df
diff --git a/src/evaluate.py b/src/evaluate.py
@@ -1,9 +1,89 @@
+# Importing necessary module
+import pandas as pd
+import os
+import csv
+
+# Class for evaluating water quality based on pH and turbidity thresholds
 class WaterQualityEvaluator:
     def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
         self.ph_range = ph_range
         self.turbidity_threshold = turbidity_threshold
 
-    def is_safe(self, row: pd.Series) -> bool:
-        """
-        Determine if a row of water data is safe.
-        """
+    def is_safe(self, pH, turbidity):
+        if pd.isna(pH) or pH == 0:
+            return (False, "missing or invalid pH")
+        if pd.isna(turbidity) or turbidity == 0:
+            return (False, "missing or invalid turbidity")
+        if pH < self.ph_range[0]:
+            return (False, "pH too low")
+        if pH > self.ph_range[1]:
+            return (False, "pH too high")
+        if turbidity > self.turbidity_threshold:
+            return (False, "turbidity too high")
+        return (True, "Safe")
+
+# Evaluates each sensor's data and saves the results to a CSV file
+def evaluate_and_save_results(df, output_path, show_top_n=5):
+    evaluator = WaterQualityEvaluator()
+    results = []
+    safe_count = 0
+    unsafe_count = 0
+
+    print("\nEvaluation Summary:\n")
+
+    # Evaluate all sensor readings
+    for idx, row in df.iterrows():
+        sensor = row['sensor']
+        sensor_id = int(sensor.split()[-1])
+        location = row['location']
+        pH = row.get('pH')
+        turbidity = row.get('turbidity')
+        temperature = row.get('temperature', '')
+
+        is_safe, reason = evaluator.is_safe(pH, turbidity)
+        status = "✅ Safe" if is_safe else f"❌ Unsafe ({reason})"
+
+        if is_safe:
+            safe_count += 1
+        else:
+            unsafe_count += 1
+
+        results.append({
+            "sensor": sensor, 
+            "status": status,  
+            "sensor_id": sensor_id,
+            "location": location,
+            "ph": pH,
+            "turbidity": turbidity,
+            "temperature": temperature,
+        })
+
+    # Printing evaluation summary
+    print(f"Total Sensors Evaluated: {len(results)}")
+    print(f"✅ Safe Sensors: {safe_count}")
+    print(f"❌ Unsafe Sensors: {unsafe_count}\n")
+
+    # Printing top N results as a preview
+    print(f" Preview of Top {show_top_n} Results:\n")
+    for entry in results[:show_top_n]:
+        print(f"{entry['sensor']} at {entry['location']}: {entry['status']}")
+
+    #Saving only the top N results to CSV 
+    export_results = [
+        {
+            "sensor_id": r["sensor_id"],
+            "location": r["location"],
+            "ph": r["ph"],
+            "turbidity": r["turbidity"],
+            "temperature": r["temperature"],
+        }
+        for r in results[:show_top_n]
+    ]
+# Creating the output directory and writing the sensor readings to a CSV file
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    with open(output_path, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=["sensor_id", "location", "ph", "turbidity", "temperature"])
+        writer.writeheader()
+        writer.writerows(export_results)
+
+    print(f"\nTop {show_top_n} results saved to '{output_path}'")
diff --git a/src/load_data.py b/src/load_data.py
@@ -1,10 +1,5 @@
-def load_csv(filepath: str) -> pd.DataFrame:
-    """
-    Load sensor data from a CSV file.
+import pandas as pd
 
-    Args:
-        filepath (str): Path to the CSV file.
-
-    Returns:
-        pd.DataFrame: Loaded data as a pandas DataFrame.
-    """
+# Function to load a CSV file into a Pandas DataFrame
+def load_csv(filepath):
+    return pd.read_csv(filepath)
diff --git a/src/main.py b/src/main.py
@@ -0,0 +1,25 @@
+# Import necessary functions from custom modules
+from load_data import load_csv
+from clean_data import clean_sensor_data, assign_sensor_and_location
+from evaluate import evaluate_and_save_results
+
+
+# Main Execution: Load, Clean, Label, and Filter Water Quality Data
+if __name__ == "__main__":
+    df_raw = load_csv("/Users/eserogheneoghojafor/water_quality_monitoring/data/sensor_data.csv")
+    df_clean = clean_sensor_data(df_raw)
+    df_with_labels = assign_sensor_and_location(df_clean)
+
+    lake_choice = input("Enter the lake you want to evaluate (e.g. 'Lake A'), or press Enter to evaluate all lakes: ").strip()
+
+    if lake_choice:
+        df_selected = df_with_labels[df_with_labels['location'] == lake_choice]
+        if df_selected.empty:
+            print(f"No data found for '{lake_choice}'. Exiting.")
+            exit()
+    else:
+        df_selected = df_with_labels
+
+    output_path = "/Users/eserogheneoghojafor/water_quality_monitoring/results/results.csv"
+
+    evaluate_and_save_results(df_selected, output_path, show_top_n=5)