SamDewriter · folavention · Jun 17, 2025 · Jun 17, 2025
diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,2 @@
-pandas
+pandas
+import pandas 
diff --git a/src/__pycache__/evaluate.cpython-313.pyc b/src/__pycache__/evaluate.cpython-313.pyc
diff --git a/src/__pycache__/load_data.cpython-313.pyc b/src/__pycache__/load_data.cpython-313.pyc
diff --git a/src/__pycache__/sensor.cpython-313.pyc b/src/__pycache__/sensor.cpython-313.pyc
diff --git a/src/clean_data.py b/src/clean_data.py
@@ -1,7 +1,29 @@
+import pandas as pd
+
 def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
     """
     Clean sensor data by handling missing or invalid values.
 
     Returns:
         pd.DataFrame: Cleaned data.
     """
+    # drop rows with missing values
+    df = df.dropna()
+
+    # drop duplicate rows
+    df = df.drop_duplicates()
+
+    # standardize column names
+    df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns]
+
+    # convert date column to datetime type
+    if 'date' in df.columns:
+        df['date'] = pd.to_datetime(df['date'], errors='coerce')
+        df = df.dropna(subset=['date'])  # remove rows where date conversion failed
+
+    # reset index
+    df = df.reset_index(drop=True)
+
+    print("Data cleaned successfully.")
+    return df
+
diff --git a/src/evaluate.py b/src/evaluate.py
@@ -1,9 +1,25 @@
+import pandas as pd
+# evaluate.py
 class WaterQualityEvaluator:
-    def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
-        self.ph_range = ph_range
-        self.turbidity_threshold = turbidity_threshold
-
-    def is_safe(self, row: pd.Series) -> bool:
-        """
-        Determine if a row of water data is safe.
-        """
+    def __init__(self, ph_min: float, ph_max: float, turbidity_max: float):
+        self.ph_min = ph_min
+        self.ph_max = ph_max
+        self.turbidity_max = turbidity_max
+
+    def evaluate(self, sensor):
+        reasons = []
+
+        if sensor.ph is None or pd.isna(sensor.ph):
+            reasons.append("missing pH")
+        elif not (self.ph_min <= sensor.ph <= self.ph_max):
+            reasons.append("pH too high" if sensor.ph > self.ph_max else "pH too low")
+
+        if sensor.turbidity is None or pd.isna(sensor.turbidity):
+            reasons.append("missing turbidity")
+        elif sensor.turbidity > self.turbidity_max:
+            reasons.append("turbidity too high")
+
+        sensor.safety_status = f"Unsafe ({', '.join(reasons)})" if reasons else "Safe"
+        return sensor
+
+
diff --git a/src/load_data.py b/src/load_data.py
@@ -1,10 +1,36 @@
-def load_csv(filepath: str) -> pd.DataFrame:
-    """
-    Load sensor data from a CSV file.
+import pandas as pd
+
+def read_csv(filepath: str) -> pd.DataFrame:
+    try:
+        data = pd.read_csv(filepath)
+        print("File loaded successfully.")
+        return data
+    except FileNotFoundError:
+        print(f"Error: The file '{filepath}' was not found.")
+        return None
+    except pd.errors.EmptyDataError:
+        print("Error: The file is empty.")
+        return None
+    except pd.errors.ParserError:
+        print("Error: The file is corrupted.")
+        return None
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return None
+
+def clean_data(df: pd.DataFrame) -> pd.DataFrame:
+    df = df.dropna()
+    df = df.drop_duplicates()
+    df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns]
+
+    if 'date' in df.columns:
+        df['date'] = pd.to_datetime(df['date'], errors='coerce')
+        df = df.dropna(subset=['date'])
+
+    df = df.reset_index(drop=True)
+    print("Data cleaned successfully.")
+    return df
+
+
 
-    Args:
-        filepath (str): Path to the CSV file.
 
-    Returns:
-        pd.DataFrame: Loaded data as a pandas DataFrame.
-    """
diff --git a/src/main.py b/src/main.py
@@ -0,0 +1,53 @@
+# main.py
+import pandas as pd
+from load_data import read_csv, clean_data
+from evaluate import WaterQualityEvaluator
+from sensor import SensorReading
+import csv
+
+def run_pipeline(filepath: str):
+    location_input = input("Enter the lake or location name to analyze: ").strip()
+
+    df = read_csv(filepath)
+    if df is None:
+        print("Failed to load data.")
+        return
+
+    df = clean_data(df)
+
+    evaluator = WaterQualityEvaluator(ph_min=6.5, ph_max=8.5, turbidity_max=5.0)
+    results = []
+
+    for _, row in df.iterrows():
+        sensor = SensorReading(
+            sensor_id=row.get('sensor_id', 'Unknown'),
+            location=row.get('location', 'Unknown location'),
+            ph=row.get('ph'),
+            turbidity=row.get('turbidity')
+        )
+
+        evaluated = evaluator.evaluate(sensor)
+
+        # Filter by location name if provided
+        if location_input.lower() in evaluated.location.lower():
+            print(f"{evaluated.sensor_id} at {evaluated.location}: {evaluated.safety_status}")
+
+        results.append(evaluated)
+
+    # Save results to CSV
+    with open("results.csv", mode='w', newline='') as file:
+        writer = csv.DictWriter(file, fieldnames=["sensor_id", "location", "ph", "turbidity", "safety_status"])
+        writer.writeheader()
+        for sensor in results:
+            writer.writerow(sensor.to_dict())
+
+    # Count safe/unsafe by location
+    safe = sum(1 for s in results if s.safety_status == "Safe")
+    unsafe = len(results) - safe
+    print(f"\nSummary: {safe} safe readings, {unsafe} unsafe readings.")
+
+if __name__ == "__main__":
+    run_pipeline(r"C:\Users\user\water_quality_monitoring\water_quality_monitoring\data\sensor_data.csv")
+
+
+