Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions data/results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sensor_id,location,is_safe,reason
1,Lake A,True,Safe
1,007 changes: 6 additions & 1,001 deletions data/sensor_data.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pandas
pandas==2.2.3
Binary file added src/__pycache__/clean_data.cpython-313.pyc
Binary file not shown.
Binary file added src/__pycache__/evaluate.cpython-313.pyc
Binary file not shown.
Binary file added src/__pycache__/load_data.cpython-313.pyc
Binary file not shown.
33 changes: 29 additions & 4 deletions src/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,32 @@
def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.
import pandas as pd

def clean_data(df):
"""
Clean the DataFrame by handling missing values and invalid entries.

Args:
df (pd.DataFrame): Input DataFrame with sensor data.

Returns:
pd.DataFrame: Cleaned data.
pd.DataFrame: Cleaned DataFrame.
"""
# Create a copy to avoid modifying the original
df_clean = df.copy()

# Convert pH and turbidity to numeric, coercing errors to NaN
df_clean['ph'] = pd.to_numeric(df_clean['ph'], errors='coerce')
df_clean['turbidity'] = pd.to_numeric(df_clean['turbidity'], errors='coerce')

# Handle missing values: fill with mean for pH and turbidity
df_clean['ph'] = df_clean['ph'].fillna(df_clean['ph'].mean())
df_clean['turbidity'] = df_clean['turbidity'].fillna(df_clean['turbidity'].mean())

# Ensure temperature is numeric and fill missing with mean
df_clean['temperature'] = pd.to_numeric(df_clean['temperature'], errors='coerce')
df_clean['temperature'] = df_clean['temperature'].fillna(df_clean['temperature'].mean())

# Remove invalid entries (e.g., negative pH or turbidity)
df_clean['ph'] = df_clean['ph'].clip(lower=0, upper=14) # pH range: 0–14
df_clean['turbidity'] = df_clean['turbidity'].clip(lower=0) # Turbidity >= 0

return df_clean
7 changes: 7 additions & 0 deletions src/clean_data.py.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.

Returns:
pd.DataFrame: Cleaned data.
"""
105 changes: 99 additions & 6 deletions src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,102 @@
class WaterQualityEvaluator:
def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
self.ph_range = ph_range
self.turbidity_threshold = turbidity_threshold
import pandas as pd

class SensorReading:
"""Class to model a sensor reading and evaluate its safety."""

def __init__(self, sensor_id, location, ph, turbidity, temperature):
"""
Initialize a sensor reading.

Args:
sensor_id (int): Sensor identifier.
location (str): Location of the sensor.
ph (float): pH value.
turbidity (float): Turbidity value in NTU.
temperature (float): Temperature in degrees Celsius.
"""
self.sensor_id = sensor_id
self.location = location
self.ph = ph
self.turbidity = turbidity
self.temperature = temperature
self.status = None
self.reason = None

def evaluate_safety(self):
"""
Evaluate if the reading is safe based on pH and turbidity.

Safe ranges:
- pH: 6.5–8.5 (inclusive)
- Turbidity: 0–1 NTU (inclusive)

Returns:
tuple: (bool, str) indicating (is_safe, reason).
"""
is_safe = True
reasons = []

# Check pH
if pd.isna(self.ph):
is_safe = False
reasons.append("missing pH")
elif not (6.5 <= self.ph <= 8.5):
is_safe = False
reasons.append("pH too high" if self.ph > 8.5 else "pH too low")

# Check turbidity
if pd.isna(self.turbidity):
is_safe = False
reasons.append("missing turbidity")
elif not (0 <= self.turbidity <= 1):
is_safe = False
reasons.append("turbidity too high")

self.status = is_safe
self.reason = ", ".join(reasons) if reasons else "Safe"

return self.status, self.reason

def is_safe(self, row: pd.Series) -> bool:
class WaterQualityEvaluator:
"""Class to evaluate water quality for multiple sensor readings."""

def __init__(self):
self.readings = []

def add_reading(self, sensor_id, location, ph, turbidity, temperature):
"""
Add a sensor reading to the evaluator.

Args:
sensor_id (int): Sensor identifier.
location (str): Location of the sensor.
ph (float): pH value.
turbidity (float): Turbidity value in NTU.
temperature (float): Temperature in degrees Celsius.
"""
reading = SensorReading(sensor_id, location, ph, turbidity, temperature)
self.readings.append(reading)

def evaluate_all(self):
"""
Evaluate all sensor readings.

Returns:
list: List of tuples (sensor_id, location, is_safe, reason).
"""
results = []
for reading in self.readings:
is_safe, reason = reading.evaluate_safety()
results.append((reading.sensor_id, reading.location, is_safe, reason))
return results

def count_safety_status(self):
"""
Determine if a row of water data is safe.
Count the number of safe and unsafe readings.

Returns:
tuple: (safe_count, unsafe_count)
"""
safe_count = sum(1 for reading in self.readings if reading.status)
unsafe_count = len(self.readings) - safe_count
return safe_count, unsafe_count
27 changes: 21 additions & 6 deletions src/load_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
def load_csv(filepath: str) -> pd.DataFrame:
"""
Load sensor data from a CSV file.
import pandas as pd

def load_data(file_path):
"""
Load a CSV file into a pandas DataFrame.

Args:
filepath (str): Path to the CSV file.

file_path (str): Path to the CSV file.
Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
pd.DataFrame: DataFrame containing the CSV data.

Raises:
FileNotFoundError: If the CSV file is not found.
pd.errors.EmptyDataError: If the CSV file is empty.
"""
try:
df = pd.read_csv(file_path)
return df
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
raise
except pd.errors.EmptyDataError:
print(f"Error: File '{file_path}' is empty.")
raise
20 changes: 20 additions & 0 deletions src/load_data.py.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pandas as pd
def load_csv(filepath: str) -> pd.DataFrame:
"""
Load sensor data from a CSV file.

Args:
filepath (str): Path to the CSV file.

Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
"""
try:
df = pd.read_csv(file_path)
return df
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
raise
except pd.errors.EmptyDataError:
print(f"Error: File '{file_path}' is empty.")
raise
68 changes: 68 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import sys
import pandas as pd
from load_data import load_data
from clean_data import clean_data
from evaluate import WaterQualityEvaluator

def main(location_filter=None):
"""
Run the water quality monitoring pipeline.

Args:
location_filter (str, optional): Filter results by location name.
"""
# Load data
try:
df = load_data("../data/sensor_data.csv")
except FileNotFoundError as e:
print(f"Error: {e}")
return
except pd.errors.EmptyDataError as e:
print(f"Error: CSV file is empty - {e}")
return
except Exception as e:
print(f"Unexpected error: {e}")
return

# Clean data
df_clean = clean_data(df)

# Evaluate safety
evaluator = WaterQualityEvaluator()
for _, row in df_clean.iterrows():
evaluator.add_reading(
row['sensor_id'],
row['location'],
row['ph'],
row['turbidity'],
row['temperature']
)

results = evaluator.evaluate_all()

# Filter by location if provided
if location_filter:
results = [(sid, loc, safe, reason) for sid, loc, safe, reason in results
if location_filter.lower() in loc.lower()]

# Print results
for sensor_id, location, is_safe, reason in results:
status = "[Safe]" if is_safe else f"[Unsafe] ({reason})"
print(f"Sensor {sensor_id} at {location}: {status}")

# Count safe vs unsafe (bonus)
safe_count, unsafe_count = evaluator.count_safety_status()
print(f"\nSummary: {safe_count} safe, {unsafe_count} unsafe")

# Save results to CSV (bonus)
results_df = pd.DataFrame(
results,
columns=['sensor_id', 'location', 'is_safe', 'reason']
)
results_df.to_csv("../data/results.csv", index=False)
print("Results saved to data/results.csv")

if __name__ == "__main__":
# Accept location name from terminal (bonus)
location = sys.argv[1] if len(sys.argv) > 1 else None
main(location)
68 changes: 68 additions & 0 deletions src/main.py.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import sys
import pandas as pd
from load_data import load_data
from clean_data import clean_data
from evaluate import WaterQualityEvaluator

def main(location_filter=None):
"""
Run the water quality monitoring pipeline.

Args:
location_filter (str, optional): Filter results by location name.
"""
# Load data
try:
df = load_data("data/sensor_data.csv")
except FileNotFoundError as e:
print(f"Error: {e}")
return
except pd.errors.EmptyDataError as e:
print(f"Error: CSV file is empty - {e}")
return
except Exception as e:
print(f"Unexpected error: {e}")
return

# Clean data
df_clean = clean_data(df)

# Evaluate safety
evaluator = WaterQualityEvaluator()
for _, row in df_clean.iterrows():
evaluator.add_reading(
row['sensor_id'],
row['location'],
row['ph'],
row['turbidity'],
row['temperature']
)

results = evaluator.evaluate_all()

# Filter by location if provided
if location_filter:
results = [(sid, loc, safe, reason) for sid, loc, safe, reason in results
if location_filter.lower() in loc.lower()]

# Print results
for sensor_id, location, is_safe, reason in results:
status = "✅ Safe" if is_safe else f"❌ Unsafe ({reason})"
print(f"Sensor {sensor_id} at {location}: {status}")

# Count safe vs unsafe (bonus)
safe_count, unsafe_count = evaluator.count_safety_status()
print(f"\nSummary: {safe_count} safe, {unsafe_count} unsafe")

# Save results to CSV (save)
results_df = pd.DataFrame(
results,
columns=['sensor_id', 'location', 'is_safe', 'reason']
)
results_df.to_csv("data/results.csv", index=False)
print("Results saved to data/results.csv")

if __name__ == "__main__":
# Accept location name from terminal (bonus)
location = sys.argv[1] if len(sys.argv) > 1 else None
main(location)
6 changes: 6 additions & 0 deletions src/test_clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from load_data import load_data
from clean_data import clean_data

df = load_data("C:/Users/gold/water_quality_monitoring/data/sensor_data.csv")
df_clean = clean_data(df)
print(df_clean)
21 changes: 21 additions & 0 deletions src/test_evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from load_data import load_data
from clean_data import clean_data
from evaluate import WaterQualityEvaluator

df = load_data("../data/sensor_data.csv")
df_clean = clean_data(df)

evaluator = WaterQualityEvaluator()
for _, row in df_clean.iterrows():
evaluator.add_reading(
row['sensor_id'],
row['location'],
row['ph'],
row['turbidity'],
row['temperature']
)

results = evaluator.evaluate_all()
for sensor_id, location, is_safe, reason in results:
status = "Safe" if is_safe else f"Unsafe ({reason})"
print(f"Sensor {sensor_id} at {location}: {status}")
4 changes: 4 additions & 0 deletions src/test_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from load_data import load_data

df = load_data("C:/Users/gold/water_quality_monitoring/data/sensor_data.csv")
print(df)