Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ cd Water_Quality_Monitoring
2. **Create a virtual environment and install dependencies**
```bash
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
source venv/bin/activate # On Windows: venv\Scripts\activate #git-bash on windows: source venv/scripts/activate
pip install -r requirements.txt
```

Expand Down Expand Up @@ -101,4 +101,4 @@ Water_Quality_Monitoring/
2. Create a new branch (`git checkout -b feature-name`)
3. Commit your changes (`git commit -am 'Add something'`)
4. Push to the branch (`git push origin feature-name`)
5. Open a pull request
5. Open a pull request
Binary file added src/__pycache__/clean_data.cpython-313.pyc
Binary file not shown.
Binary file added src/__pycache__/evaluate.cpython-313.pyc
Binary file not shown.
Binary file added src/__pycache__/load_data.cpython-313.pyc
Binary file not shown.
40 changes: 39 additions & 1 deletion src/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,45 @@
import pandas as pd

def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.

Returns:
pd.DataFrame: Cleaned data.
"""
# Normalize column names
df.columns = df.columns.str.lower()

# Convert pH and turbidity to numeric values, coercing errors
df['ph'] = pd.to_numeric(df['ph'], errors='coerce')
df['turbidity'] = pd.to_numeric(df['turbidity'], errors='coerce')

# Drop rows with missing pH or turbidity values
df = df.dropna(subset=['ph', 'turbidity'])

# Map sensor_id to location
lake_map = {
'SENSOR_001': 'Lake A',
'SENSOR_002': 'Lake B',
'SENSOR_003': 'Lake C',
'SENSOR_004': 'Lake D',
'SENSOR_005': 'Lake E',
'SENSOR_006': 'Lake F',
'SENSOR_007': 'Lake G',
'SENSOR_008': 'Lake H',
'SENSOR_009': 'Lake I',
'SENSOR_010': 'Lake J',
'SENSOR_011': 'Lake K',
'SENSOR_012': 'Lake L',
'SENSOR_013': 'Lake M',
'SENSOR_014': 'Lake N',
'SENSOR_015': 'Lake O',
'SENSOR_016': 'Lake P',
'SENSOR_017': 'Lake Q',
'SENSOR_018': 'Lake R',
'SENSOR_019': 'Lake S',
'SENSOR_020': 'Lake T'
}

df['location'] = df['sensor_id'].map(lake_map)

return df
37 changes: 37 additions & 0 deletions src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pandas as pd

class WaterQualityEvaluator:
def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
self.ph_range = ph_range
Expand All @@ -6,4 +8,39 @@ def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
def is_safe(self, row: pd.Series) -> bool:
"""
Determine if a row of water data is safe.

Args:
row (pd.Series): A row of sensor data.

Returns:
bool: True if safe, False otherwise.
"""
if pd.isna(row['ph']) or pd.isna(row['turbidity']):
return False
if not (self.ph_range[0] <= row['ph'] <= self.ph_range[1]):
return False
if row['turbidity'] > self.turbidity_threshold:
return False
return True

def get_reason(self, row: pd.Series) -> str:
"""
Provide reason for safety status of the sensor data row.

Args:
row (pd.Series): A row of sensor data.

Returns:
str: Reason message including emoji.
"""
if pd.isna(row['ph']):
return "❌ Unsafe (missing pH)"
if pd.isna(row['turbidity']):
return "❌ Unsafe (missing turbidity)"
if row['ph'] < self.ph_range[0]:
return "❌ Unsafe (pH too low)"
if row['ph'] > self.ph_range[1]:
return "❌ Unsafe (pH too high)"
if row['turbidity'] > self.turbidity_threshold:
return "❌ Unsafe (turbidity too high)"
return "✅ Safe"
10 changes: 10 additions & 0 deletions src/load_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pandas as pd

def load_csv(filepath: str) -> pd.DataFrame:
"""
Load sensor data from a CSV file.
Expand All @@ -8,3 +10,11 @@ def load_csv(filepath: str) -> pd.DataFrame:
Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
"""
try:
return pd.read_csv(filepath)
except FileNotFoundError:
print(f"Error: File not found at {filepath}")
return pd.DataFrame()
except pd.errors.ParserError:
print(f"Error: Failed to parse CSV file at {filepath}")
return pd.DataFrame()
45 changes: 45 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os
import pandas as pd
from load_data import load_csv
from clean_data import clean_sensor_data
from evaluate import WaterQualityEvaluator


def main():
# Step 1: Load data
data_path = os.path.join("..", "data", "sensor_data.csv")
df = load_csv(data_path)

if df.empty:
print("No data to process.")
return

# Step 2: Clean data
cleaned_df = clean_sensor_data(df)


# Step 3: Evaluate data
evaluator = WaterQualityEvaluator()
results = []

for _, row in cleaned_df.iterrows(): # use original df to preserve missing entries
status = evaluator.get_reason(row)
print(f"Sensor {row['sensor_id']} at {row['location']}: {status}")
results.append({
"sensor_id": row["sensor_id"],
"location": row["location"],
"status": status
})

# Step 4 (Bonus): Save results to CSV
results_df = pd.DataFrame(results)
results_df.to_csv("results.csv", index=False)

# Step 5 (Bonus): Summary counts
safe_count = results_df["status"].str.contains("✅").sum()
unsafe_count = results_df.shape[0] - safe_count
print(f"Sensor {row['sensor_id']} at {row['location']}: {status}")


if __name__ == "__main__":
main()
Loading