diff --git a/.gitignore b/.gitignore index fa30cb2fb..8f47d5cb8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ index.py -env/ \ No newline at end of file +env/ +venv diff --git a/results/results.csv b/results/results.csv new file mode 100644 index 000000000..466868dab --- /dev/null +++ b/results/results.csv @@ -0,0 +1,6 @@ +sensor_id,location,ph,turbidity,temperature +1,Lake A,6.48,2.43,21.81 +2,Lake B,7.04,4.39,18.51 +3,Lake C,6.69,3.78,20.96 +4,Lake D,6.5,2.42,22.22 +5,Lake E,7.75,2.72,21.26 diff --git a/src/clean_data.py b/src/clean_data.py index da613640a..9c07c4bf2 100644 --- a/src/clean_data.py +++ b/src/clean_data.py @@ -1,7 +1,19 @@ -def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame: - """ - Clean sensor data by handling missing or invalid values. +import pandas as pd - Returns: - pd.DataFrame: Cleaned data. - """ +# Function to clean sensor data by converting 'pH' and 'turbidity' columns to numeric values. +def clean_sensor_data(df): + df = df.copy() + + # Convert to numeric, coercing invalid values to NaN + df['pH'] = pd.to_numeric(df['pH'], errors='coerce') + df['turbidity'] = pd.to_numeric(df['turbidity'], errors='coerce') + df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce') + + return df + +# Function to assign unique sensor IDs and lake locations to each row in the dataset. +def assign_sensor_and_location(df): + df = df.copy() + df['sensor'] = [f"Sensor {str(i+1).zfill(3)}" for i in range(len(df))] + df['location'] = [f"Lake {chr(65 + i)}" for i in range(len(df))] + return df \ No newline at end of file diff --git a/src/evaluate.py b/src/evaluate.py index 006256224..36922a9cb 100644 --- a/src/evaluate.py +++ b/src/evaluate.py @@ -1,9 +1,89 @@ +# Importing necessary module +import pandas as pd +import os +import csv + +# Class for evaluating water quality based on pH and turbidity thresholds class WaterQualityEvaluator: def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0): self.ph_range = ph_range self.turbidity_threshold = turbidity_threshold - def is_safe(self, row: pd.Series) -> bool: - """ - Determine if a row of water data is safe. - """ + def is_safe(self, pH, turbidity): + if pd.isna(pH) or pH == 0: + return (False, "missing or invalid pH") + if pd.isna(turbidity) or turbidity == 0: + return (False, "missing or invalid turbidity") + if pH < self.ph_range[0]: + return (False, "pH too low") + if pH > self.ph_range[1]: + return (False, "pH too high") + if turbidity > self.turbidity_threshold: + return (False, "turbidity too high") + return (True, "Safe") + +# Evaluates each sensor's data and saves the results to a CSV file +def evaluate_and_save_results(df, output_path, show_top_n=5): + evaluator = WaterQualityEvaluator() + results = [] + safe_count = 0 + unsafe_count = 0 + + print("\nEvaluation Summary:\n") + + # Evaluate all sensor readings + for idx, row in df.iterrows(): + sensor = row['sensor'] + sensor_id = int(sensor.split()[-1]) + location = row['location'] + pH = row.get('pH') + turbidity = row.get('turbidity') + temperature = row.get('temperature', '') + + is_safe, reason = evaluator.is_safe(pH, turbidity) + status = "✅ Safe" if is_safe else f"❌ Unsafe ({reason})" + + if is_safe: + safe_count += 1 + else: + unsafe_count += 1 + + results.append({ + "sensor": sensor, + "status": status, + "sensor_id": sensor_id, + "location": location, + "ph": pH, + "turbidity": turbidity, + "temperature": temperature, + }) + + # Printing evaluation summary + print(f"Total Sensors Evaluated: {len(results)}") + print(f"✅ Safe Sensors: {safe_count}") + print(f"❌ Unsafe Sensors: {unsafe_count}\n") + + # Printing top N results as a preview + print(f" Preview of Top {show_top_n} Results:\n") + for entry in results[:show_top_n]: + print(f"{entry['sensor']} at {entry['location']}: {entry['status']}") + + #Saving only the top N results to CSV + export_results = [ + { + "sensor_id": r["sensor_id"], + "location": r["location"], + "ph": r["ph"], + "turbidity": r["turbidity"], + "temperature": r["temperature"], + } + for r in results[:show_top_n] + ] +# Creating the output directory and writing the sensor readings to a CSV file + os.makedirs(os.path.dirname(output_path), exist_ok=True) + with open(output_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=["sensor_id", "location", "ph", "turbidity", "temperature"]) + writer.writeheader() + writer.writerows(export_results) + + print(f"\nTop {show_top_n} results saved to '{output_path}'") \ No newline at end of file diff --git a/src/load_data.py b/src/load_data.py index c0126703a..cbe21072b 100644 --- a/src/load_data.py +++ b/src/load_data.py @@ -1,10 +1,5 @@ -def load_csv(filepath: str) -> pd.DataFrame: - """ - Load sensor data from a CSV file. +import pandas as pd - Args: - filepath (str): Path to the CSV file. - - Returns: - pd.DataFrame: Loaded data as a pandas DataFrame. - """ +# Function to load a CSV file into a Pandas DataFrame +def load_csv(filepath): + return pd.read_csv(filepath) \ No newline at end of file diff --git a/src/main.py b/src/main.py index e69de29bb..c9ff09fd7 100644 --- a/src/main.py +++ b/src/main.py @@ -0,0 +1,25 @@ +# Import necessary functions from custom modules +from load_data import load_csv +from clean_data import clean_sensor_data, assign_sensor_and_location +from evaluate import evaluate_and_save_results + + +# Main Execution: Load, Clean, Label, and Filter Water Quality Data +if __name__ == "__main__": + df_raw = load_csv("/Users/eserogheneoghojafor/water_quality_monitoring/data/sensor_data.csv") + df_clean = clean_sensor_data(df_raw) + df_with_labels = assign_sensor_and_location(df_clean) + + lake_choice = input("Enter the lake you want to evaluate (e.g. 'Lake A'), or press Enter to evaluate all lakes: ").strip() + + if lake_choice: + df_selected = df_with_labels[df_with_labels['location'] == lake_choice] + if df_selected.empty: + print(f"No data found for '{lake_choice}'. Exiting.") + exit() + else: + df_selected = df_with_labels + + output_path = "/Users/eserogheneoghojafor/water_quality_monitoring/results/results.csv" + + evaluate_and_save_results(df_selected, output_path, show_top_n=5) \ No newline at end of file