Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
index.py
env/
env/
venv/
1,007 changes: 6 additions & 1,001 deletions data/sensor_data.csv

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sensor_id,location,is_safe,reason
1,Lake A,True,Safe
2,Lake B,False,pH value too high
3,Lake C,False,missing pH value
4,Lake D,False,missing turbidity value
5,Lake E,False,pH value too low
Binary file added src/__pycache__/clean_data.cpython-311.pyc
Binary file not shown.
Binary file added src/__pycache__/evaluate.cpython-311.pyc
Binary file not shown.
Binary file added src/__pycache__/load_data.cpython-311.pyc
Binary file not shown.
21 changes: 15 additions & 6 deletions src/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.
# src/clean_data.py
import pandas as pd

Returns:
pd.DataFrame: Cleaned data.
"""
def clean_sensor_data(df):
# Create a copy to avoid modifying the original DF
df_clean = df.copy()

df_clean['ph'] = df_clean['ph'].fillna(0)
df_clean['turbidity'] = df_clean['turbidity'].fillna(0)

# Ensure numeric columns are of the correct type
df_clean['ph'] = pd.to_numeric(df_clean['ph'], errors='coerce').fillna(0)
df_clean['turbidity'] = pd.to_numeric(df_clean['turbidity'], errors='coerce').fillna(0)
df_clean['temperature'] = pd.to_numeric(df_clean['temperature'], errors='coerce').fillna(0)

return df_clean
60 changes: 55 additions & 5 deletions src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,59 @@
import pandas as pd
class WaterQualityEvaluator:
def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
self.ph_range = ph_range
self.turbidity_threshold = turbidity_threshold
"""
A class to evaluate water quality based on pH and turbidity thresholds.
"""
def __init__(self, ph_min=6.5, ph_max=8.5, turbidity_max=1.0):
"""
Initialize with safe thresholds for pH & turbidity.

Args:
ph_min (float): Minimum safe pH value (default: 6.5).
ph_max (float): Maximum safe pH value (default: 8.5).
turbidity_max (float): Maximum safe turbidity value (default: 1.0).
"""

def is_safe(self, row: pd.Series) -> bool:
self.ph_min = ph_min
self.ph_max = ph_max
self.turbidity_max = turbidity_max

def evaluate_row(self, row):
"""
Evaluate a single row of sensor data for water safety.

Args:
row (pd.Series): A row of sensor data with 'ph' and 'turbidity'.

Returns:
tuple: (is_safe (bool), reason (str))
"""
ph = row['ph']
turbidity = row['turbidity']

if pd.isna(ph) or ph == 0:
return False, "missing pH value"
if pd.isna(turbidity) or turbidity == 0:
return False, "missing turbidity value"
if not (self.ph_min <= ph <= self.ph_max):
reason = "pH value too high" if ph > self.ph_max else "pH value too low"
return False, reason
if turbidity > self.turbidity_max:
return False, "turbidity too high"

return True, "Safe"

def evaluate_dataframe(self, df):
"""
Determine if a row of water data is safe.
Evaluate all rows in the DataFrame for water safety.

Args:
df (pd.DataFrame): DataFrame with sensor data.

Returns:
list: List of tuples (sensor_id, location, is_safe, reason).
"""
results = []
for _, row in df.iterrows():
is_safe, reason = self.evaluate_row(row)
results.append((row['sensor_id'], row['location'], is_safe, reason))
return results
22 changes: 16 additions & 6 deletions src/load_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
def load_csv(filepath: str) -> pd.DataFrame:
"""
Load sensor data from a CSV file.
# src/load_data.py
import pandas as pd

def load_sensor_data(file_path):
"""
Loading sensor data from a CSV file into a pandas DataFrame.
Args:
filepath (str): Path to the CSV file.

file_path (str): Path to the CSV file.
Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
pd.DataFrame: Loaded sensor data.
Raises:
FileNotFoundError: If the CSV file is not found.
"""

try:
df = pd.read_csv(file_path)
return df
except FileNotFoundError:
print(f"Error: File {file_path} is not found.")
raise
53 changes: 53 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# src/main.py
import argparse, pandas as pd

from load_data import load_sensor_data
from clean_data import clean_sensor_data
from evaluate import WaterQualityEvaluator

def main(file_path, location_filter=None):
"""
Run the water quality monitoring pipeline.

Args:
file_path (str): Path to the sensor data CSV file.
location_filter (str, optional): Filter data by location name.
"""
# Load data
df = load_sensor_data(file_path)

# Clean data
df_clean = clean_sensor_data(df)

# Filter by location if provided
if location_filter:
df_clean = df_clean[df_clean['location'].str.contains(location_filter, case=False, na=False)]
if df_clean.empty:
print(f"No data found for location: {location_filter}")
return

# Evaluate water quality
evaluator = WaterQualityEvaluator()
results = evaluator.evaluate_dataframe(df_clean)


# Print results
for sensor_id, location, is_safe, reason in results:
status = "✅ Safe" if is_safe else f"❌ Unsafe ({reason})"
print(f"Sensor {sensor_id} at {location}: {status}")


# Save results to CSV
results_df = pd.DataFrame(
results, columns=['sensor_id', 'location', 'is_safe', 'reason'])

results_df.to_csv('results.csv', index=False)
print("Results saved to results.csv")

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Water Quality Monitoring Pipeline")
parser.add_argument('--file', default='data/sensor_data.csv', help='Path to sensor data CSV')
parser.add_argument('--location', help='Filter by location name')
args = parser.parse_args()

main(args.file, args.location)