Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,000 changes: 1,000 additions & 0 deletions data/sensor_data_results.csv

Large diffs are not rendered by default.

Binary file added src/__pycache__/clean_data.cpython-311.pyc
Binary file not shown.
Binary file added src/__pycache__/evaluate.cpython-311.pyc
Binary file not shown.
Binary file added src/__pycache__/load_data.cpython-311.pyc
Binary file not shown.
20 changes: 20 additions & 0 deletions src/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,27 @@
import pandas as pd
from load_data import load_csv
def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.

Returns:
pd.DataFrame: Cleaned data.

"""
# Drop rows with any missing values
df = df.dropna()

# Convert timestamp column to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df = df.dropna(subset=['timestamp']) # drop if timestamp conversion failed

# Ensure numeric columns are non-negative
numeric_cols = ['pH', 'turbidity', 'dissolved_oxygen', 'temperature']
for col in numeric_cols:
df = df[df[col] >= 0]

#reset index after cleaning
df = df.reset_index(drop=True)

return df

27 changes: 27 additions & 0 deletions src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pandas as pd

class WaterQualityEvaluator:
def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
self.ph_range = ph_range
Expand All @@ -7,3 +9,28 @@ def is_safe(self, row: pd.Series) -> bool:
"""
Determine if a row of water data is safe.
"""
if pd.isna(row.get("pH")) or pd.isna(row.get("turbidity")):
return False

return (
self.ph_range[0] <= row["pH"] <= self.ph_range[1]
and row["turbidity"] <= self.turbidity_threshold
)

def evaluate(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Evaluate safety of each row and return a DataFrame with results.
"""
results = []

for i, row in df.iterrows():
location = row.get("location") if pd.notna(row.get("location")) else "Unknown"
sensor_label = f"Sensor {i + 1} at {location}"
status = " Safe" if self.is_safe(row) else " Unsafe"
results.append({"sensor": sensor_label, "status": status})

results_df = pd.DataFrame(results)
return results_df



9 changes: 8 additions & 1 deletion src/load_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
def load_csv(filepath: str) -> pd.DataFrame:
import pandas as pd


def load_csv(filepath: str):
"""
Load sensor data from a CSV file.

Expand All @@ -8,3 +11,7 @@ def load_csv(filepath: str) -> pd.DataFrame:
Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
"""
df = pd.read_csv(filepath)
return(df)


66 changes: 66 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from load_data import load_csv
from clean_data import clean_sensor_data
from evaluate import WaterQualityEvaluator
import pandas as pd

def show_menu():
print("\n🌊 Water Quality Monitoring Menu")
print("1. Load and view raw sensor data")
print("2. Clean data (handle missing/invalid)")
print("3. Evaluate water safety and save results")
print("4. Exit")

# Session variables
raw_df = None
cleaned_df = None
results_df = None
data_path = None

while True:
show_menu()
choice = input("\nEnter your choice (1-4): ").strip()

if choice == "1":
data_path = input(" Enter full path to the sensor data CSV file: ").strip()
try:
raw_df = load_csv(data_path)
print("\n Raw Sensor Data Preview:")
print(raw_df.head())
except FileNotFoundError:
print("❌ File not found. Please check the path and try again.")

elif choice == "2":
if raw_df is None:
print("⚠️ Please load the raw data first (Option 1).")
else:
cleaned_df = clean_sensor_data(raw_df)
print("\n✅ Cleaned Data Preview:")
print(cleaned_df.head())

elif choice == "3":
if cleaned_df is None:
print(" Please clean the data first (Option 2).")
else:
location = input("📍 Enter the location name for this data (e.g., 'Lake A'): ").strip()
cleaned_df["location"] = location # Add location to the DataFrame

evaluator = WaterQualityEvaluator()
results_df = evaluator.evaluate(cleaned_df)

print("\n📋 Evaluation Results:")
for _, row in results_df.iterrows():
print(f"{row['sensor']}: {row['status']}")

# Save to CSV
save_path = "results.csv" if not data_path else data_path.replace(".csv", "_results.csv")
results_df.to_csv(save_path, index=False)
print(f"💾 Results saved to {save_path}")



elif choice == "4":
print("👋 Exiting. Stay hydrated!")
break

else:
print("❌ Invalid choice. Please enter a number between 1 and 4.")