SamDewriter · Rezmassi · Jun 12, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 index.py
-env/
+env/
+venv/
diff --git a/data/sensor_data.csv b/data/sensor_data.csv
diff --git a/results.csv b/results.csv
@@ -0,0 +1,6 @@
+sensor_id,location,is_safe,reason
+1,Lake A,True,Safe
+2,Lake B,False,pH value too high
+3,Lake C,False,missing pH value
+4,Lake D,False,missing turbidity value
+5,Lake E,False,pH value too low
diff --git a/src/__pycache__/clean_data.cpython-311.pyc b/src/__pycache__/clean_data.cpython-311.pyc
diff --git a/src/__pycache__/evaluate.cpython-311.pyc b/src/__pycache__/evaluate.cpython-311.pyc
diff --git a/src/__pycache__/load_data.cpython-311.pyc b/src/__pycache__/load_data.cpython-311.pyc
diff --git a/src/clean_data.py b/src/clean_data.py
@@ -1,7 +1,16 @@
-def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Clean sensor data by handling missing or invalid values.
+# src/clean_data.py
+import pandas as pd
 
-    Returns:
-        pd.DataFrame: Cleaned data.
-    """
+def clean_sensor_data(df):
+        # Create a copy to avoid modifying the original DF
+    df_clean = df.copy()
+
+    df_clean['ph'] = df_clean['ph'].fillna(0)  
+    df_clean['turbidity'] = df_clean['turbidity'].fillna(0)
+
+    # Ensure numeric columns are of the correct type
+    df_clean['ph'] = pd.to_numeric(df_clean['ph'], errors='coerce').fillna(0)
+    df_clean['turbidity'] = pd.to_numeric(df_clean['turbidity'], errors='coerce').fillna(0)
+    df_clean['temperature'] = pd.to_numeric(df_clean['temperature'], errors='coerce').fillna(0)
+
+    return df_clean
diff --git a/src/evaluate.py b/src/evaluate.py
@@ -1,9 +1,59 @@
+import pandas as pd
 class WaterQualityEvaluator:
-    def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
-        self.ph_range = ph_range
-        self.turbidity_threshold = turbidity_threshold
+    """
+    A class to evaluate water quality based on pH and turbidity thresholds.
+    """
+    def __init__(self, ph_min=6.5, ph_max=8.5, turbidity_max=1.0):
+        """
+        Initialize with safe thresholds for pH & turbidity.
+
+        Args:
+            ph_min (float): Minimum safe pH value (default: 6.5).
+            ph_max (float): Maximum safe pH value (default: 8.5).
+            turbidity_max (float): Maximum safe turbidity value (default: 1.0).
+        """
 
-    def is_safe(self, row: pd.Series) -> bool:
+        self.ph_min = ph_min
+        self.ph_max = ph_max
+        self.turbidity_max = turbidity_max
+
+    def evaluate_row(self, row):
+        """
+        Evaluate a single row of sensor data for water safety.
+
+        Args:
+            row (pd.Series): A row of sensor data with 'ph' and 'turbidity'.
+
+        Returns:
+            tuple: (is_safe (bool), reason (str))
+        """
+        ph = row['ph']
+        turbidity = row['turbidity']
+
+        if pd.isna(ph) or ph == 0:
+            return False, "missing pH value"
+        if pd.isna(turbidity) or turbidity == 0:
+            return False, "missing turbidity value"
+        if not (self.ph_min <= ph <= self.ph_max):
+            reason = "pH value too high" if ph > self.ph_max else "pH value too low"
+            return False, reason
+        if turbidity > self.turbidity_max:
+            return False, "turbidity too high"
+
+        return True, "Safe"
+
+    def evaluate_dataframe(self, df):
         """
-        Determine if a row of water data is safe.
+        Evaluate all rows in the DataFrame for water safety.
+
+        Args:
+            df (pd.DataFrame): DataFrame with sensor data.
+
+        Returns:
+            list: List of tuples (sensor_id, location, is_safe, reason).
         """
+        results = []
+        for _, row in df.iterrows():
+            is_safe, reason = self.evaluate_row(row)
+            results.append((row['sensor_id'], row['location'], is_safe, reason))
+        return results
diff --git a/src/load_data.py b/src/load_data.py
@@ -1,10 +1,20 @@
-def load_csv(filepath: str) -> pd.DataFrame:
-    """
-    Load sensor data from a CSV file.
+# src/load_data.py
+import pandas as pd
 
+def load_sensor_data(file_path):
+    """
+    Loading sensor data from a CSV file into a pandas DataFrame.
     Args:
-        filepath (str): Path to the CSV file.
-
+        file_path (str): Path to the CSV file.
     Returns:
-        pd.DataFrame: Loaded data as a pandas DataFrame.
+        pd.DataFrame: Loaded sensor data.
+    Raises:
+        FileNotFoundError: If the CSV file is not found.
     """
+
+    try:
+        df = pd.read_csv(file_path)
+        return df
+    except FileNotFoundError:
+        print(f"Error: File {file_path} is not found.")
+        raise
diff --git a/src/main.py b/src/main.py
@@ -0,0 +1,53 @@
+# src/main.py
+import argparse, pandas as pd
+
+from load_data import load_sensor_data
+from clean_data import clean_sensor_data
+from evaluate import WaterQualityEvaluator
+
+def main(file_path, location_filter=None):
+    """
+    Run the water quality monitoring pipeline.
+
+    Args:
+        file_path (str): Path to the sensor data CSV file.
+        location_filter (str, optional): Filter data by location name.
+    """
+    # Load data
+    df = load_sensor_data(file_path)
+
+    # Clean data
+    df_clean = clean_sensor_data(df)
+
+    # Filter by location if provided
+    if location_filter:
+        df_clean = df_clean[df_clean['location'].str.contains(location_filter, case=False, na=False)]
+        if df_clean.empty:
+            print(f"No data found for location: {location_filter}")
+            return
+
+    # Evaluate water quality
+    evaluator = WaterQualityEvaluator()
+    results = evaluator.evaluate_dataframe(df_clean)
+
+
+    # Print results
+    for sensor_id, location, is_safe, reason in results:
+        status = "✅ Safe" if is_safe else f"❌ Unsafe ({reason})"
+        print(f"Sensor {sensor_id} at {location}: {status}")
+
+
+    # Save results to CSV
+    results_df = pd.DataFrame(
+        results, columns=['sensor_id', 'location', 'is_safe', 'reason'])
+
+    results_df.to_csv('results.csv', index=False)
+    print("Results saved to results.csv")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Water Quality Monitoring Pipeline")
+    parser.add_argument('--file', default='data/sensor_data.csv', help='Path to sensor data CSV')
+    parser.add_argument('--location', help='Filter by location name')
+    args = parser.parse_args()
+
+    main(args.file, args.location)