SamDewriter · Jhaay1509 · Jun 17, 2025 · Jun 17, 2025
diff --git a/data/sensor_data_results.csv b/data/sensor_data_results.csv
diff --git a/src/__pycache__/clean_data.cpython-311.pyc b/src/__pycache__/clean_data.cpython-311.pyc
diff --git a/src/__pycache__/evaluate.cpython-311.pyc b/src/__pycache__/evaluate.cpython-311.pyc
diff --git a/src/__pycache__/load_data.cpython-311.pyc b/src/__pycache__/load_data.cpython-311.pyc
diff --git a/src/clean_data.py b/src/clean_data.py
@@ -1,7 +1,27 @@
+import pandas as pd
+from load_data import load_csv
 def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
     """
     Clean sensor data by handling missing or invalid values.
 
     Returns:
         pd.DataFrame: Cleaned data.
+
     """
+# Drop rows with any missing values
+    df = df.dropna()
+
+    # Convert timestamp column to datetime
+    df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
+    df = df.dropna(subset=['timestamp'])  # drop if timestamp conversion failed
+
+    # Ensure numeric columns are non-negative
+    numeric_cols = ['pH', 'turbidity', 'dissolved_oxygen', 'temperature']
+    for col in numeric_cols:
+        df = df[df[col] >= 0]
+
+    #reset index after cleaning
+    df = df.reset_index(drop=True)
+
+    return df
+
diff --git a/src/evaluate.py b/src/evaluate.py
@@ -1,3 +1,5 @@
+import pandas as pd
+
 class WaterQualityEvaluator:
     def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
         self.ph_range = ph_range
@@ -7,3 +9,28 @@ def is_safe(self, row: pd.Series) -> bool:
         """
         Determine if a row of water data is safe.
         """
+        if pd.isna(row.get("pH")) or pd.isna(row.get("turbidity")):
+            return False
+
+        return (
+            self.ph_range[0] <= row["pH"] <= self.ph_range[1]
+            and row["turbidity"] <= self.turbidity_threshold
+        )
+
+    def evaluate(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Evaluate safety of each row and return a DataFrame with results.
+        """
+        results = []
+
+        for i, row in df.iterrows():
+            location = row.get("location") if pd.notna(row.get("location")) else "Unknown"
+            sensor_label = f"Sensor {i + 1} at {location}"
+            status = " Safe" if self.is_safe(row) else " Unsafe"
+            results.append({"sensor": sensor_label, "status": status})
+
+        results_df = pd.DataFrame(results)
+        return results_df
+
+
+
diff --git a/src/load_data.py b/src/load_data.py
@@ -1,4 +1,7 @@
-def load_csv(filepath: str) -> pd.DataFrame:
+import pandas as pd
+
+
+def load_csv(filepath: str):
     """
     Load sensor data from a CSV file.
 
@@ -8,3 +11,7 @@ def load_csv(filepath: str) -> pd.DataFrame:
     Returns:
         pd.DataFrame: Loaded data as a pandas DataFrame.
     """
+    df = pd.read_csv(filepath)
+    return(df)
+
+
diff --git a/src/main.py b/src/main.py
@@ -0,0 +1,66 @@
+from load_data import load_csv
+from clean_data import clean_sensor_data
+from evaluate import WaterQualityEvaluator
+import pandas as pd
+
+def show_menu():
+    print("\n🌊 Water Quality Monitoring Menu")
+    print("1. Load and view raw sensor data")
+    print("2. Clean data (handle missing/invalid)")
+    print("3. Evaluate water safety and save results")
+    print("4. Exit")
+
+# Session variables
+raw_df = None
+cleaned_df = None
+results_df = None
+data_path = None
+
+while True:
+    show_menu()
+    choice = input("\nEnter your choice (1-4): ").strip()
+
+    if choice == "1":
+        data_path = input(" Enter full path to the sensor data CSV file: ").strip()
+        try:
+            raw_df = load_csv(data_path)
+            print("\n Raw Sensor Data Preview:")
+            print(raw_df.head())
+        except FileNotFoundError:
+            print("❌ File not found. Please check the path and try again.")
+
+    elif choice == "2":
+        if raw_df is None:
+            print("⚠️ Please load the raw data first (Option 1).")
+        else:
+            cleaned_df = clean_sensor_data(raw_df)
+            print("\n✅ Cleaned Data Preview:")
+            print(cleaned_df.head())
+
+    elif choice == "3":
+        if cleaned_df is None:
+            print(" Please clean the data first (Option 2).")
+        else:
+            location = input("📍 Enter the location name for this data (e.g., 'Lake A'): ").strip()
+            cleaned_df["location"] = location  # Add location to the DataFrame
+
+            evaluator = WaterQualityEvaluator()
+            results_df = evaluator.evaluate(cleaned_df)
+
+            print("\n📋 Evaluation Results:")
+            for _, row in results_df.iterrows():
+                print(f"{row['sensor']}: {row['status']}")
+
+            # Save to CSV
+            save_path = "results.csv" if not data_path else data_path.replace(".csv", "_results.csv")
+            results_df.to_csv(save_path, index=False)
+            print(f"💾 Results saved to {save_path}")
+
+
+
+    elif choice == "4":
+        print("👋 Exiting. Stay hydrated!")
+        break
+
+    else:
+        print("❌ Invalid choice. Please enter a number between 1 and 4.")