Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
52 changes: 52 additions & 0 deletions data/results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
sensor_id,timestamp,ph,turbidity,dissolved_oxygen,temperature,is_safe,status_text
SENSOR_012,2023-01-01 15:30:00,6.28,3.04,6.14,25.22,False,"Unsafe (pH too low, turbidity too high)"
SENSOR_012,2023-01-01 18:00:00,7.55,2.04,7.5,17.29,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-01 18:30:00,7.26,3.99,7.36,23.04,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-01 23:00:00,7.31,0.93,5.94,22.57,True,Safe
SENSOR_012,2023-01-02 04:30:00,7.07,2.16,9.76,18.64,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-02 05:30:00,7.63,2.45,12.9,22.73,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-02 11:30:00,6.72,2.77,10.35,17.33,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-02 19:30:00,7.17,2.58,6.96,21.31,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-02 23:30:00,6.73,2.85,7.11,18.13,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-03 04:00:00,7.91,3.56,8.03,18.71,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-03 04:30:00,6.4,3.49,8.31,26.82,False,"Unsafe (pH too low, turbidity too high)"
SENSOR_012,2023-01-04 03:00:00,6.91,5.35,6.9,21.06,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-04 04:00:00,6.84,3.47,7.54,27.52,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-04 09:00:00,6.9,2.71,8.8,22.45,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-04 11:30:00,7.07,2.11,6.38,22.1,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-05 07:00:00,6.85,2.45,9.24,26.85,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-05 16:30:00,6.47,2.88,6.15,25.64,False,"Unsafe (pH too low, turbidity too high)"
SENSOR_012,2023-01-06 05:30:00,7.5,2.74,8.79,19.96,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-06 08:00:00,7.4,3.69,7.22,24.31,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-06 12:30:00,7.46,2.18,6.4,24.21,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-08 08:00:00,7.32,2.55,7.78,19.93,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-09 07:30:00,6.77,2.42,6.71,23.09,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-09 16:00:00,6.42,1.82,9.14,17.86,False,"Unsafe (pH too low, turbidity too high)"
SENSOR_012,2023-01-10 03:30:00,6.97,3.74,9.06,22.74,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-10 05:30:00,7.01,0.41,5.51,24.14,True,Safe
SENSOR_012,2023-01-10 09:30:00,7.09,2.62,10.24,20.55,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-10 12:30:00,7.06,3.61,3.82,19.73,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-10 22:30:00,6.63,2.61,6.7,24.34,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-13 04:30:00,7.05,3.63,6.46,20.66,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-13 18:30:00,7.71,2.55,6.14,22.63,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-13 20:30:00,7.1,3.44,11.03,21.04,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-14 19:00:00,7.45,2.75,8.01,25.29,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-14 21:30:00,7.27,3.88,8.66,19.66,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-15 19:30:00,6.53,2.48,7.47,24.84,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-15 23:30:00,6.92,3.5,7.23,20.18,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-16 03:00:00,7.03,4.1,6.97,17.72,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-16 05:30:00,7.0,2.8,5.68,21.35,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-16 16:00:00,6.63,4.32,8.78,25.5,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-17 11:30:00,6.75,2.93,4.22,24.29,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-17 12:00:00,7.65,2.67,7.63,24.06,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-17 21:30:00,7.04,2.92,6.29,21.71,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-18 15:00:00,6.92,4.35,6.93,21.82,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-19 06:00:00,6.63,1.81,6.07,24.59,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-19 09:00:00,7.14,3.32,8.98,20.48,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-20 09:30:00,6.55,4.07,8.53,22.39,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-20 15:00:00,6.1,4.57,6.96,21.7,False,"Unsafe (pH too low, turbidity too high)"
SENSOR_012,2023-01-20 16:30:00,7.7,2.07,10.47,21.49,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-21 05:00:00,6.8,2.56,6.9,22.3,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-21 08:30:00,6.71,3.33,8.6,15.33,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-21 10:00:00,7.46,3.65,8.99,17.92,False,Unsafe (turbidity too high)
SENSOR_012,2023-01-21 16:00:00,7.03,3.51,6.97,19.53,False,Unsafe (turbidity too high)
Binary file added src/__pycache__/clean_data.cpython-312.pyc
Binary file not shown.
Binary file added src/__pycache__/evaluate.cpython-312.pyc
Binary file not shown.
Binary file added src/__pycache__/load_data.cpython-312.pyc
Binary file not shown.
Binary file added src/__pycache__/sensor.cpython-312.pyc
Binary file not shown.
42 changes: 38 additions & 4 deletions src/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,41 @@
# clean_data.py

import pandas as pd

def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.

Returns:
pd.DataFrame: Cleaned data.
Clean the data but keep rows (we'll flag problems later):
- trim location text
- convert pH/turbidity/dissolved_oxygen/temperature to numeric
- set out-of-range values to NaN so we can mark them unsafe
- drop duplicate readings (same sensor_id + timestamp)
"""
df = df.copy()

if "location" in df.columns:
df["location"] = df["location"].astype(str).str.strip()

numeric_cols = ["ph", "turbidity", "dissolved_oxygen", "temperature"]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors="coerce")

# ranges
if "ph" in df.columns:
df.loc[(df["ph"] < 0) | (df["ph"] > 14), "ph"] = pd.NA
if "turbidity" in df.columns:
df.loc[df["turbidity"] < 0, "turbidity"] = pd.NA
if "temperature" in df.columns:
df.loc[(df["temperature"] < -5) | (df["temperature"] > 60), "temperature"] = pd.NA

# drop duplicate readings (keep last)
dup_subset = [c for c in ["sensor_id", "timestamp"] if c in df.columns]
if dup_subset:
df = df.drop_duplicates(subset=dup_subset, keep="last")

return df





36 changes: 34 additions & 2 deletions src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,41 @@

# evaluate.py — mark each reading Safe/Unsafe with a reason

import pandas as pd

class WaterQualityEvaluator:
def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
self.ph_range = ph_range
self.turbidity_threshold = turbidity_threshold

def is_safe(self, row: pd.Series) -> bool:
def is_safe(self, row: pd.Series) -> tuple[bool, str]:
"""
Determine if a row of water data is safe.
Row is safe if:
- pH is present and within [ph_range]
- turbidity is present and <= turbidity_threshold
Missing values = unsafe, with a reason.
"""
reasons = []

ph = row.get("ph")
turb = row.get("turbidity")

# missing checks (pd.isna handles NaN/None)
if pd.isna(ph):
reasons.append("missing pH")
if pd.isna(turb):
reasons.append("missing turbidity")

# range checks only if present
if not reasons:
lo, hi = self.ph_range
if ph < lo:
reasons.append("pH too low")
elif ph > hi:
reasons.append("pH too high")
if turb > self.turbidity_threshold:
reasons.append("turbidity too high")

ok = len(reasons) == 0
message = "Safe" if ok else "Unsafe (" + ", ".join(reasons) + ")"
return ok, message
18 changes: 12 additions & 6 deletions src/load_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import pandas as pd

def load_csv(filepath: str) -> pd.DataFrame:
"""
Load sensor data from a CSV file.
Load sensor data from a CSV file into a pandas DataFrame.
- Lower-case and underscore column names
- Parse 'timestamp' to datetime if present
"""
df = pd.read_csv(filepath)
df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]

Args:
filepath (str): Path to the CSV file.
if "timestamp" in df.columns:
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")

return df

Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
"""
92 changes: 92 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# main.py — run the water quality pipeline using sensor_id (dataset has no location)

from load_data import load_csv
from clean_data import clean_sensor_data
from evaluate import WaterQualityEvaluator
from sensor import SensorReading

def display_sensor_id(raw):
"""Make IDs like 'SENSOR_005' print as 5. If no digits, return the original."""
s = str(raw)
digits = "".join(ch for ch in s if ch.isdigit())
return int(digits) if digits else s

def main():
# 1) load
df = load_csv("../data/sensor_data.csv")

# 2) clean
df = clean_sensor_data(df)

# Ensure sensor_id is present and printable
if "sensor_id" not in df.columns:
raise ValueError("CSV must contain 'sensor_id' when there is no 'location' column.")
df["sensor_id"] = df["sensor_id"].astype(str).str.strip()

# 2b) user input: filter by sensor_id (press Enter for all)
print("Enter a sensor_id to filter (e.g., SENSOR_005).")
print("Press Enter to use ALL sensors.")
user_sensor = input("sensor_id: ").strip()
if user_sensor:
df = df[df["sensor_id"] == user_sensor]
if df.empty:
print(f"No rows found for sensor_id: {user_sensor}")
return

# 3) evaluate each row and build SensorReading objects
evaluator = WaterQualityEvaluator(ph_range=(6.5, 8.5), turbidity_threshold=1.0)

is_safe_list = []
status_text_list = []
sample_readings = []

for i, (_, row) in enumerate(df.iterrows(), start=1):
# class usage
reading = SensorReading.from_row(row)
if i <= 2:
sample_readings.append(reading)

# evaluator is_safe might return (ok, text) or just ok
result = evaluator.is_safe(row)
if isinstance(result, tuple):
ok, text = result
else:
ok = bool(result)
text = "Safe" if ok else "Unsafe"

is_safe_list.append(ok)
status_text_list.append(text)

df["is_safe"] = is_safe_list
df["status_text"] = status_text_list

# 4) use the class
if sample_readings:
print("\n(example SensorReading objects)")
for r in sample_readings:
print(" ", r)

# 5) report
print("\n=== Water Quality Report (first 10 rows) ===")
for _, r in df.head(10).iterrows():
sid_num = display_sensor_id(r.get("sensor_id", ""))
ok = bool(r.get("is_safe", False))
text = r.get("status_text", "Safe" if ok else "Unsafe")
pretty = "Safe" if ok else (text if text.startswith(" X ") else f" X {text}")
print(f"Sensor {sid_num}: {pretty}")

# 6) row-level counts
total = len(df)
safe_count = int(df["is_safe"].sum())
unsafe_count = total - safe_count
print(f"\nSummary: {safe_count} safe, {unsafe_count} unsafe (out of {total})")

# 7) save results
out_path = "../data/results.csv"
df.to_csv(out_path, index=False)
print(f"\nSaved detailed results to {out_path}")

if __name__ == "__main__":
main()


39 changes: 39 additions & 0 deletions src/sensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# sensor.py — a tiny class to model one sensor reading

from dataclasses import dataclass, asdict
from typing import Optional

@dataclass
class SensorReading:
sensor_id: Optional[str]
location: str
ph: Optional[float]
turbidity: Optional[float]
temperature: Optional[float] = None

@classmethod
def from_row(cls, row) -> "SensorReading":
"""Build a SensorReading from a pandas row (Series)."""
def val(col):
return row[col] if col in row else None

def num(col):
v = val(col)
# Treat NaN as None (NaN != NaN)
return None if v is None or v != v else float(v)

# sensor IDs keeps as str
sensor = val("sensor_id")
sensor = str(sensor) if sensor is not None and sensor == sensor else None

return cls(
sensor_id=sensor,
location=str(val("location") or ""),
ph=num("ph"),
turbidity=num("turbidity"),
temperature=num("temperature"),
)

def to_dict(self) -> dict:
"""Plain dict (handy for JSON/CSV or debugging)."""
return asdict(self)
Loading