SamDewriter · seyigolden · Jun 13, 2025
diff --git a/data/results.csv b/data/results.csv
@@ -0,0 +1,2 @@
+sensor_id,location,is_safe,reason
+1,Lake A,True,Safe
diff --git a/data/sensor_data.csv b/data/sensor_data.csv
diff --git a/requirements.txt b/requirements.txt
@@ -1 +1 @@
-pandas
+pandas==2.2.3
diff --git a/src/__pycache__/clean_data.cpython-313.pyc b/src/__pycache__/clean_data.cpython-313.pyc
diff --git a/src/__pycache__/evaluate.cpython-313.pyc b/src/__pycache__/evaluate.cpython-313.pyc
diff --git a/src/__pycache__/load_data.cpython-313.pyc b/src/__pycache__/load_data.cpython-313.pyc
diff --git a/src/clean_data.py b/src/clean_data.py
@@ -1,7 +1,32 @@
-def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Clean sensor data by handling missing or invalid values.
+import pandas as pd
 
+def clean_data(df):
+    """
+    Clean the DataFrame by handling missing values and invalid entries.
+
+    Args:
+        df (pd.DataFrame): Input DataFrame with sensor data.
+
     Returns:
-        pd.DataFrame: Cleaned data.
+        pd.DataFrame: Cleaned DataFrame.
     """
+    # Create a copy to avoid modifying the original
+    df_clean = df.copy()
+
+    # Convert pH and turbidity to numeric, coercing errors to NaN
+    df_clean['ph'] = pd.to_numeric(df_clean['ph'], errors='coerce')
+    df_clean['turbidity'] = pd.to_numeric(df_clean['turbidity'], errors='coerce')
+
+    # Handle missing values: fill with mean for pH and turbidity
+    df_clean['ph'] = df_clean['ph'].fillna(df_clean['ph'].mean())
+    df_clean['turbidity'] = df_clean['turbidity'].fillna(df_clean['turbidity'].mean())
+
+    # Ensure temperature is numeric and fill missing with mean
+    df_clean['temperature'] = pd.to_numeric(df_clean['temperature'], errors='coerce')
+    df_clean['temperature'] = df_clean['temperature'].fillna(df_clean['temperature'].mean())
+
+    # Remove invalid entries (e.g., negative pH or turbidity)
+    df_clean['ph'] = df_clean['ph'].clip(lower=0, upper=14)  # pH range: 0–14
+    df_clean['turbidity'] = df_clean['turbidity'].clip(lower=0)  # Turbidity >= 0
+
+    return df_clean
diff --git a/src/clean_data.py.bak b/src/clean_data.py.bak
@@ -0,0 +1,7 @@
+def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Clean sensor data by handling missing or invalid values.
+
+    Returns:
+        pd.DataFrame: Cleaned data.
+    """
diff --git a/src/evaluate.py b/src/evaluate.py
@@ -1,9 +1,102 @@
-class WaterQualityEvaluator:
-    def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
-        self.ph_range = ph_range
-        self.turbidity_threshold = turbidity_threshold
+import pandas as pd
+
+class SensorReading:
+    """Class to model a sensor reading and evaluate its safety."""
+
+    def __init__(self, sensor_id, location, ph, turbidity, temperature):
+        """
+        Initialize a sensor reading.
+
+        Args:
+            sensor_id (int): Sensor identifier.
+            location (str): Location of the sensor.
+            ph (float): pH value.
+            turbidity (float): Turbidity value in NTU.
+            temperature (float): Temperature in degrees Celsius.
+        """
+        self.sensor_id = sensor_id
+        self.location = location
+        self.ph = ph
+        self.turbidity = turbidity
+        self.temperature = temperature
+        self.status = None
+        self.reason = None
+
+    def evaluate_safety(self):
+        """
+        Evaluate if the reading is safe based on pH and turbidity.
+
+        Safe ranges:
+            - pH: 6.5–8.5 (inclusive)
+            - Turbidity: 0–1 NTU (inclusive)
+
+        Returns:
+            tuple: (bool, str) indicating (is_safe, reason).
+        """
+        is_safe = True
+        reasons = []
+
+        # Check pH
+        if pd.isna(self.ph):
+            is_safe = False
+            reasons.append("missing pH")
+        elif not (6.5 <= self.ph <= 8.5):
+            is_safe = False
+            reasons.append("pH too high" if self.ph > 8.5 else "pH too low")
+
+        # Check turbidity
+        if pd.isna(self.turbidity):
+            is_safe = False
+            reasons.append("missing turbidity")
+        elif not (0 <= self.turbidity <= 1):
+            is_safe = False
+            reasons.append("turbidity too high")
+
+        self.status = is_safe
+        self.reason = ", ".join(reasons) if reasons else "Safe"
+
+        return self.status, self.reason
 
-    def is_safe(self, row: pd.Series) -> bool:
+class WaterQualityEvaluator:
+    """Class to evaluate water quality for multiple sensor readings."""
+
+    def __init__(self):
+        self.readings = []
+
+    def add_reading(self, sensor_id, location, ph, turbidity, temperature):
+        """
+        Add a sensor reading to the evaluator.
+
+        Args:
+            sensor_id (int): Sensor identifier.
+            location (str): Location of the sensor.
+            ph (float): pH value.
+            turbidity (float): Turbidity value in NTU.
+            temperature (float): Temperature in degrees Celsius.
+        """
+        reading = SensorReading(sensor_id, location, ph, turbidity, temperature)
+        self.readings.append(reading)
+
+    def evaluate_all(self):
+        """
+        Evaluate all sensor readings.
+
+        Returns:
+            list: List of tuples (sensor_id, location, is_safe, reason).
+        """
+        results = []
+        for reading in self.readings:
+            is_safe, reason = reading.evaluate_safety()
+            results.append((reading.sensor_id, reading.location, is_safe, reason))
+        return results
+
+    def count_safety_status(self):
         """
-        Determine if a row of water data is safe.
+        Count the number of safe and unsafe readings.
+
+        Returns:
+            tuple: (safe_count, unsafe_count)
         """
+        safe_count = sum(1 for reading in self.readings if reading.status)
+        unsafe_count = len(self.readings) - safe_count
+        return safe_count, unsafe_count
diff --git a/src/load_data.py b/src/load_data.py
@@ -1,10 +1,25 @@
-def load_csv(filepath: str) -> pd.DataFrame:
-    """
-    Load sensor data from a CSV file.
+import pandas as pd
 
+def load_data(file_path):
+    """
+    Load a CSV file into a pandas DataFrame.
+
     Args:
-        filepath (str): Path to the CSV file.
-
+        file_path (str): Path to the CSV file.
+        
     Returns:
-        pd.DataFrame: Loaded data as a pandas DataFrame.
+        pd.DataFrame: DataFrame containing the CSV data.
+
+    Raises:
+        FileNotFoundError: If the CSV file is not found.
+        pd.errors.EmptyDataError: If the CSV file is empty.
     """
+    try:
+        df = pd.read_csv(file_path)
+        return df
+    except FileNotFoundError:
+        print(f"Error: File '{file_path}' not found.")
+        raise
+    except pd.errors.EmptyDataError:
+        print(f"Error: File '{file_path}' is empty.")
+        raise
diff --git a/src/load_data.py.bak b/src/load_data.py.bak
@@ -0,0 +1,20 @@
+import pandas as pd
+def load_csv(filepath: str) -> pd.DataFrame:
+    """
+    Load sensor data from a CSV file.
+
+    Args:
+        filepath (str): Path to the CSV file.
+
+    Returns:
+        pd.DataFrame: Loaded data as a pandas DataFrame.
+    """
+try:
+        df = pd.read_csv(file_path)
+        return df
+    except FileNotFoundError:
+        print(f"Error: File '{file_path}' not found.")
+        raise
+    except pd.errors.EmptyDataError:
+        print(f"Error: File '{file_path}' is empty.")
+        raise
diff --git a/src/main.py b/src/main.py
@@ -0,0 +1,68 @@
+import sys
+import pandas as pd
+from load_data import load_data
+from clean_data import clean_data
+from evaluate import WaterQualityEvaluator
+
+def main(location_filter=None):
+    """
+    Run the water quality monitoring pipeline.
+
+    Args:
+        location_filter (str, optional): Filter results by location name.
+    """
+    # Load data
+    try:
+        df = load_data("../data/sensor_data.csv")
+    except FileNotFoundError as e:
+        print(f"Error: {e}")
+        return
+    except pd.errors.EmptyDataError as e:
+        print(f"Error: CSV file is empty - {e}")
+        return
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+        return
+
+    # Clean data
+    df_clean = clean_data(df)
+
+    # Evaluate safety
+    evaluator = WaterQualityEvaluator()
+    for _, row in df_clean.iterrows():
+        evaluator.add_reading(
+            row['sensor_id'],
+            row['location'],
+            row['ph'],
+            row['turbidity'],
+            row['temperature']
+        )
+
+    results = evaluator.evaluate_all()
+
+    # Filter by location if provided
+    if location_filter:
+        results = [(sid, loc, safe, reason) for sid, loc, safe, reason in results
+                   if location_filter.lower() in loc.lower()]
+
+    # Print results
+    for sensor_id, location, is_safe, reason in results:
+        status = "[Safe]" if is_safe else f"[Unsafe] ({reason})"
+        print(f"Sensor {sensor_id} at {location}: {status}")
+
+    # Count safe vs unsafe (bonus)
+    safe_count, unsafe_count = evaluator.count_safety_status()
+    print(f"\nSummary: {safe_count} safe, {unsafe_count} unsafe")
+
+    # Save results to CSV (bonus)
+    results_df = pd.DataFrame(
+        results,
+        columns=['sensor_id', 'location', 'is_safe', 'reason']
+    )
+    results_df.to_csv("../data/results.csv", index=False)
+    print("Results saved to data/results.csv")
+
+if __name__ == "__main__":
+    # Accept location name from terminal (bonus)
+    location = sys.argv[1] if len(sys.argv) > 1 else None
+    main(location)
diff --git a/src/main.py.bak b/src/main.py.bak
@@ -0,0 +1,68 @@
+import sys
+import pandas as pd
+from load_data import load_data
+from clean_data import clean_data
+from evaluate import WaterQualityEvaluator
+
+def main(location_filter=None):
+    """
+    Run the water quality monitoring pipeline.
+
+    Args:
+        location_filter (str, optional): Filter results by location name.
+    """
+    # Load data
+    try:
+        df = load_data("data/sensor_data.csv")
+    except FileNotFoundError as e:
+        print(f"Error: {e}")
+        return
+    except pd.errors.EmptyDataError as e:
+        print(f"Error: CSV file is empty - {e}")
+        return
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+        return
+
+    # Clean data
+    df_clean = clean_data(df)
+
+    # Evaluate safety
+    evaluator = WaterQualityEvaluator()
+    for _, row in df_clean.iterrows():
+        evaluator.add_reading(
+            row['sensor_id'],
+            row['location'],
+            row['ph'],
+            row['turbidity'],
+            row['temperature']
+        )
+
+    results = evaluator.evaluate_all()
+
+    # Filter by location if provided
+    if location_filter:
+        results = [(sid, loc, safe, reason) for sid, loc, safe, reason in results
+                   if location_filter.lower() in loc.lower()]
+
+    # Print results
+    for sensor_id, location, is_safe, reason in results:
+        status = "✅ Safe" if is_safe else f"❌ Unsafe ({reason})"
+        print(f"Sensor {sensor_id} at {location}: {status}")
+
+    # Count safe vs unsafe (bonus)
+    safe_count, unsafe_count = evaluator.count_safety_status()
+    print(f"\nSummary: {safe_count} safe, {unsafe_count} unsafe")
+
+    # Save results to CSV (save)
+    results_df = pd.DataFrame(
+        results,
+        columns=['sensor_id', 'location', 'is_safe', 'reason']
+    )
+    results_df.to_csv("data/results.csv", index=False)
+    print("Results saved to data/results.csv")
+
+if __name__ == "__main__":
+    # Accept location name from terminal (bonus)
+    location = sys.argv[1] if len(sys.argv) > 1 else None
+    main(location)
diff --git a/src/test_clean.py b/src/test_clean.py
@@ -0,0 +1,6 @@
+from load_data import load_data
+from clean_data import clean_data
+
+df = load_data("C:/Users/gold/water_quality_monitoring/data/sensor_data.csv")
+df_clean = clean_data(df)
+print(df_clean)
diff --git a/src/test_evaluate.py b/src/test_evaluate.py
@@ -0,0 +1,21 @@
+from load_data import load_data
+from clean_data import clean_data
+from evaluate import WaterQualityEvaluator
+
+df = load_data("../data/sensor_data.csv")
+df_clean = clean_data(df)
+
+evaluator = WaterQualityEvaluator()
+for _, row in df_clean.iterrows():
+    evaluator.add_reading(
+        row['sensor_id'],
+        row['location'],
+        row['ph'],
+        row['turbidity'],
+        row['temperature']
+    )
+
+results = evaluator.evaluate_all()
+for sensor_id, location, is_safe, reason in results:
+    status = "Safe" if is_safe else f"Unsafe ({reason})"
+    print(f"Sensor {sensor_id} at {location}: {status}")
diff --git a/src/test_load.py b/src/test_load.py
@@ -0,0 +1,4 @@
+from load_data import load_data
+
+df = load_data("C:/Users/gold/water_quality_monitoring/data/sensor_data.csv")
+print(df)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		sensor_id,location,is_safe,reason
		1,Lake A,True,Safe