diff --git a/src/__pycache__/clean_data.cpython-311.pyc b/src/__pycache__/clean_data.cpython-311.pyc new file mode 100644 index 000000000..196f12016 Binary files /dev/null and b/src/__pycache__/clean_data.cpython-311.pyc differ diff --git a/src/__pycache__/evaluate.cpython-311.pyc b/src/__pycache__/evaluate.cpython-311.pyc new file mode 100644 index 000000000..5ac0a08ac Binary files /dev/null and b/src/__pycache__/evaluate.cpython-311.pyc differ diff --git a/src/__pycache__/load_data.cpython-311.pyc b/src/__pycache__/load_data.cpython-311.pyc new file mode 100644 index 000000000..7cea8ed51 Binary files /dev/null and b/src/__pycache__/load_data.cpython-311.pyc differ diff --git a/src/clean_data.py b/src/clean_data.py index da613640a..32a1b3b5c 100644 --- a/src/clean_data.py +++ b/src/clean_data.py @@ -1,7 +1,30 @@ -def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame: - """ - Clean sensor data by handling missing or invalid values. +from typing import List, Dict +def clean_data(data: List[Dict]) -> List[Dict]: + """ + Clean the sensor data by handling missing values and converting types. + + Args: + data: List of dictionaries containing raw sensor data + Returns: - pd.DataFrame: Cleaned data. + List of dictionaries with cleaned data """ + cleaned_data = [] + + for row in data: + cleaned_row = row.copy() + + # Convert numeric fields to float if they exist, else None + for field in ['ph', 'turbidity', 'temperature']: + if field in cleaned_row and cleaned_row[field]: + try: + cleaned_row[field] = float(cleaned_row[field]) + except ValueError: + cleaned_row[field] = None + else: + cleaned_row[field] = None + + cleaned_data.append(cleaned_row) + + return cleaned_data diff --git a/src/evaluate.py b/src/evaluate.py index 006256224..8c8edb09a 100644 --- a/src/evaluate.py +++ b/src/evaluate.py @@ -1,9 +1,72 @@ -class WaterQualityEvaluator: - def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0): - self.ph_range = ph_range - self.turbidity_threshold = turbidity_threshold +from typing import Dict, Optional - def is_safe(self, row: pd.Series) -> bool: +class WaterQualityEvaluator: + """ + Evaluates water quality based on sensor readings. + """ + + # Safe ranges + SAFE_PH_MIN = 6.5 + SAFE_PH_MAX = 7.5 + SAFE_TURBIDITY_MAX = 1.0 + + def evaluate_sensor(self, sensor_data: Dict) -> Dict: + """ + Evaluate a single sensor's water quality. + + Args: + sensor_data: Dictionary containing sensor readings + + Returns: + Dictionary with evaluation results + """ + result = { + 'sensor_id': sensor_data.get('sensor_id'), + 'location': sensor_data.get('location'), + 'is_safe': True, + 'issues': [] + } + + # Check pH + ph = sensor_data.get('ph') + if ph is None: + result['is_safe'] = False + result['issues'].append('missing pH') + elif ph < self.SAFE_PH_MIN: + result['is_safe'] = False + result['issues'].append('pH too low') + elif ph > self.SAFE_PH_MAX: + result['is_safe'] = False + result['issues'].append('pH too high') + + # Check turbidity + turbidity = sensor_data.get('turbidity') + if turbidity is None: + result['is_safe'] = False + result['issues'].append('missing turbidity') + elif turbidity > self.SAFE_TURBIDITY_MAX: + result['is_safe'] = False + result['issues'].append('turbidity too high') + + return result + + def format_result(self, evaluation: Dict) -> str: """ - Determine if a row of water data is safe. + Format the evaluation result as a readable string. + + Args: + evaluation: Dictionary with evaluation results + + Returns: + Formatted string with the evaluation """ + sensor_id = evaluation.get('sensor_id', 'Unknown') + location = evaluation.get('location', 'Unknown location') + status = "✅ Safe" if evaluation['is_safe'] else "❌ Unsafe" + + if evaluation['issues']: + issues = " (" + ", ".join(evaluation['issues']) + ")" + else: + issues = "" + + return f"Sensor {sensor_id} at {location}: {status}{issues}" \ No newline at end of file diff --git a/src/load_data.py b/src/load_data.py index c0126703a..3ea251982 100644 --- a/src/load_data.py +++ b/src/load_data.py @@ -1,10 +1,26 @@ -def load_csv(filepath: str) -> pd.DataFrame: - """ - Load sensor data from a CSV file. +import csv +from typing import List, Dict +def load_csv_file(file_path: str) -> List[Dict]: + """ + Load data from a CSV file into a list of dictionaries. + Args: - filepath (str): Path to the CSV file. - + file_path: Path to the CSV file + Returns: - pd.DataFrame: Loaded data as a pandas DataFrame. + List of dictionaries representing each row of data """ + data = [] + try: + with open(file_path, mode='r') as file: + reader = csv.DictReader(file) + for row in reader: + data.append(row) + return data + except FileNotFoundError: + print(f"Error: File {file_path} not found.") + return [] + except Exception as e: + print(f"Error loading CSV file: {e}") + return [] \ No newline at end of file diff --git a/src/main.py b/src/main.py index e69de29bb..77a1ee54d 100644 --- a/src/main.py +++ b/src/main.py @@ -0,0 +1,31 @@ +from load_data import load_csv_file +from clean_data import clean_data +from evaluate import WaterQualityEvaluator + +def main(): + # File paths + input_file = r"C:\Users\HP\Water_Quality_Monitoring\water_quality_monitoring\data\sensor_data.csv" + + # Load data + raw_data = load_csv_file(input_file) + if not raw_data: + print("No data loaded. Exiting.") + return + + # Clean data + cleaned_data = clean_data(raw_data) + + # Evaluate data + evaluator = WaterQualityEvaluator() + results = [] + + for sensor in cleaned_data: + evaluation = evaluator.evaluate_sensor(sensor) + results.append(evaluator.format_result(evaluation)) + + # Print results + for result in results: + print(result) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/water_quality_monitoring b/water_quality_monitoring new file mode 160000 index 000000000..d8e01fac7 --- /dev/null +++ b/water_quality_monitoring @@ -0,0 +1 @@ +Subproject commit d8e01fac730cad564e68aef22682011e0fb721ed