Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pandas
pandas
import pandas
Binary file added src/__pycache__/evaluate.cpython-313.pyc
Binary file not shown.
Binary file added src/__pycache__/load_data.cpython-313.pyc
Binary file not shown.
Binary file added src/__pycache__/sensor.cpython-313.pyc
Binary file not shown.
22 changes: 22 additions & 0 deletions src/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,29 @@
import pandas as pd

def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.

Returns:
pd.DataFrame: Cleaned data.
"""
# drop rows with missing values
df = df.dropna()

# drop duplicate rows
df = df.drop_duplicates()

# standardize column names
df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns]

# convert date column to datetime type
if 'date' in df.columns:
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df = df.dropna(subset=['date']) # remove rows where date conversion failed

# reset index
df = df.reset_index(drop=True)

print("Data cleaned successfully.")
return df

32 changes: 24 additions & 8 deletions src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@
import pandas as pd
# evaluate.py
class WaterQualityEvaluator:
def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
self.ph_range = ph_range
self.turbidity_threshold = turbidity_threshold

def is_safe(self, row: pd.Series) -> bool:
"""
Determine if a row of water data is safe.
"""
def __init__(self, ph_min: float, ph_max: float, turbidity_max: float):
self.ph_min = ph_min
self.ph_max = ph_max
self.turbidity_max = turbidity_max

def evaluate(self, sensor):
reasons = []

if sensor.ph is None or pd.isna(sensor.ph):
reasons.append("missing pH")
elif not (self.ph_min <= sensor.ph <= self.ph_max):
reasons.append("pH too high" if sensor.ph > self.ph_max else "pH too low")

if sensor.turbidity is None or pd.isna(sensor.turbidity):
reasons.append("missing turbidity")
elif sensor.turbidity > self.turbidity_max:
reasons.append("turbidity too high")

sensor.safety_status = f"Unsafe ({', '.join(reasons)})" if reasons else "Safe"
return sensor


42 changes: 34 additions & 8 deletions src/load_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,36 @@
def load_csv(filepath: str) -> pd.DataFrame:
"""
Load sensor data from a CSV file.
import pandas as pd

def read_csv(filepath: str) -> pd.DataFrame:
try:
data = pd.read_csv(filepath)
print("File loaded successfully.")
return data
except FileNotFoundError:
print(f"Error: The file '{filepath}' was not found.")
return None
except pd.errors.EmptyDataError:
print("Error: The file is empty.")
return None
except pd.errors.ParserError:
print("Error: The file is corrupted.")
return None
except Exception as e:
print(f"An unexpected error occurred: {e}")
return None

def clean_data(df: pd.DataFrame) -> pd.DataFrame:
df = df.dropna()
df = df.drop_duplicates()
df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns]

if 'date' in df.columns:
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df = df.dropna(subset=['date'])

df = df.reset_index(drop=True)
print("Data cleaned successfully.")
return df



Args:
filepath (str): Path to the CSV file.

Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
"""
53 changes: 53 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# main.py
import pandas as pd
from load_data import read_csv, clean_data
from evaluate import WaterQualityEvaluator
from sensor import SensorReading
import csv

def run_pipeline(filepath: str):
location_input = input("Enter the lake or location name to analyze: ").strip()

df = read_csv(filepath)
if df is None:
print("Failed to load data.")
return

df = clean_data(df)

evaluator = WaterQualityEvaluator(ph_min=6.5, ph_max=8.5, turbidity_max=5.0)
results = []

for _, row in df.iterrows():
sensor = SensorReading(
sensor_id=row.get('sensor_id', 'Unknown'),
location=row.get('location', 'Unknown location'),
ph=row.get('ph'),
turbidity=row.get('turbidity')
)

evaluated = evaluator.evaluate(sensor)

# Filter by location name if provided
if location_input.lower() in evaluated.location.lower():
print(f"{evaluated.sensor_id} at {evaluated.location}: {evaluated.safety_status}")

results.append(evaluated)

# Save results to CSV
with open("results.csv", mode='w', newline='') as file:
writer = csv.DictWriter(file, fieldnames=["sensor_id", "location", "ph", "turbidity", "safety_status"])
writer.writeheader()
for sensor in results:
writer.writerow(sensor.to_dict())

# Count safe/unsafe by location
safe = sum(1 for s in results if s.safety_status == "Safe")
unsafe = len(results) - safe
print(f"\nSummary: {safe} safe readings, {unsafe} unsafe readings.")

if __name__ == "__main__":
run_pipeline(r"C:\Users\user\water_quality_monitoring\water_quality_monitoring\data\sensor_data.csv")



Loading