Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
16 changes: 16 additions & 0 deletions src/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,23 @@
import pandas as pd
def clean_sensor_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Clean sensor data by handling missing or invalid values.

Returns:
pd.DataFrame: Cleaned data.
"""
#handle missing values
numeric_cols =['PH','turbidity','dissolved_oxygen','temperature']
cleaned_df[numeric_cols] = cleaned_df[numeric_cols].fillna(cleaned_df[numeric_cols].mean())

# Remove impossible values
cleaned_df = cleaned_df[
(cleaned_df['pH'] > 0) &
(cleaned_df['pH'] < 14) &
(cleaned_df['turbidity'] >= 0) &
(cleaned_df['dissolved_oxygen'] >= 0) &
(cleaned_df['temperature'].between(-20, 60))]

# Remove duplicates
cleaned_df = cleaned_df.drop_duplicates()
return cleaned_df
4 changes: 4 additions & 0 deletions src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
class WaterQualityEvaluator:
def __init__(self, ph_range=(6.5, 8.5), turbidity_threshold=1.0):
self.ph_range = ph_range
Expand All @@ -7,3 +8,6 @@ def is_safe(self, row: pd.Series) -> bool:
"""
Determine if a row of water data is safe.
"""
ph_ok = self.ph_range[0] <= row['pH'] <= self.ph_range[1]
turbidity_ok = row['turbidity'] <= self.turbidity_threshold
return ph_ok and turbidity_ok
25 changes: 19 additions & 6 deletions src/load_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
def load_csv(filepath: str) -> pd.DataFrame:
"""
Load sensor data from a CSV file.
import pandas as pd
import csv

def load_csv(filepath: str = 'data/sensor_data.csv') -> pd.DataFrame:
"""
Load water quality sensor data from a CSV file.

Args:
filepath (str): Path to the CSV file.

filepath: Path to the CSV file (default: 'data/sensor_data.csv')
Returns:
pd.DataFrame: Loaded data as a pandas DataFrame.
pd.DataFrame: Loaded sensor data or empty DataFrame if loading fails
"""
try:
df = pd.read_csv(filepath)
print(f"Successfully loaded data from {filepath}")
return df
except FileNotFoundError:
print(f"Error: File not found at {filepath}")
return pd.DataFrame()
except Exception as e:
print(f"Error loading data: {str(e)}")
return pd.DataFrame()
Binary file not shown.
128 changes: 128 additions & 0 deletions venv/Lib/site-packages/_distutils_hack/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import sys
import os
import re
import importlib
import warnings


is_pypy = '__pypy__' in sys.builtin_module_names


warnings.filterwarnings('ignore',
r'.+ distutils\b.+ deprecated',
DeprecationWarning)


def warn_distutils_present():
if 'distutils' not in sys.modules:
return
if is_pypy and sys.version_info < (3, 7):
# PyPy for 3.6 unconditionally imports distutils, so bypass the warning
# https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250
return
warnings.warn(
"Distutils was imported before Setuptools, but importing Setuptools "
"also replaces the `distutils` module in `sys.modules`. This may lead "
"to undesirable behaviors or errors. To avoid these issues, avoid "
"using distutils directly, ensure that setuptools is installed in the "
"traditional way (e.g. not an editable install), and/or make sure "
"that setuptools is always imported before distutils.")


def clear_distutils():
if 'distutils' not in sys.modules:
return
warnings.warn("Setuptools is replacing distutils.")
mods = [name for name in sys.modules if re.match(r'distutils\b', name)]
for name in mods:
del sys.modules[name]


def enabled():
"""
Allow selection of distutils by environment variable.
"""
which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'stdlib')
return which == 'local'


def ensure_local_distutils():
clear_distutils()
distutils = importlib.import_module('setuptools._distutils')
distutils.__name__ = 'distutils'
sys.modules['distutils'] = distutils

# sanity check that submodules load as expected
core = importlib.import_module('distutils.core')
assert '_distutils' in core.__file__, core.__file__


def do_override():
"""
Ensure that the local copy of distutils is preferred over stdlib.

See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401
for more motivation.
"""
if enabled():
warn_distutils_present()
ensure_local_distutils()


class DistutilsMetaFinder:
def find_spec(self, fullname, path, target=None):
if path is not None:
return

method_name = 'spec_for_{fullname}'.format(**locals())
method = getattr(self, method_name, lambda: None)
return method()

def spec_for_distutils(self):
import importlib.abc
import importlib.util

class DistutilsLoader(importlib.abc.Loader):

def create_module(self, spec):
return importlib.import_module('setuptools._distutils')

def exec_module(self, module):
pass

return importlib.util.spec_from_loader('distutils', DistutilsLoader())

def spec_for_pip(self):
"""
Ensure stdlib distutils when running under pip.
See pypa/pip#8761 for rationale.
"""
if self.pip_imported_during_build():
return
clear_distutils()
self.spec_for_distutils = lambda: None

@staticmethod
def pip_imported_during_build():
"""
Detect if pip is being imported in a build script. Ref #2355.
"""
import traceback
return any(
frame.f_globals['__file__'].endswith('setup.py')
for frame, line in traceback.walk_stack(None)
)


DISTUTILS_FINDER = DistutilsMetaFinder()


def add_shim():
sys.meta_path.insert(0, DISTUTILS_FINDER)


def remove_shim():
try:
sys.meta_path.remove(DISTUTILS_FINDER)
except ValueError:
pass
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions venv/Lib/site-packages/_distutils_hack/override.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__import__('_distutils_hack').do_override()
24 changes: 24 additions & 0 deletions venv/Lib/site-packages/dateutil/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
import sys

try:
from ._version import version as __version__
except ImportError:
__version__ = 'unknown'

__all__ = ['easter', 'parser', 'relativedelta', 'rrule', 'tz',
'utils', 'zoneinfo']

def __getattr__(name):
import importlib

if name in __all__:
return importlib.import_module("." + name, __name__)
raise AttributeError(
"module {!r} has not attribute {!r}".format(__name__, name)
)


def __dir__():
# __dir__ should include all the lazy-importable modules as well.
return [x for x in globals() if x not in sys.modules] + __all__
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
43 changes: 43 additions & 0 deletions venv/Lib/site-packages/dateutil/_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""
Common code used in multiple modules.
"""


class weekday(object):
__slots__ = ["weekday", "n"]

def __init__(self, weekday, n=None):
self.weekday = weekday
self.n = n

def __call__(self, n):
if n == self.n:
return self
else:
return self.__class__(self.weekday, n)

def __eq__(self, other):
try:
if self.weekday != other.weekday or self.n != other.n:
return False
except AttributeError:
return False
return True

def __hash__(self):
return hash((
self.weekday,
self.n,
))

def __ne__(self, other):
return not (self == other)

def __repr__(self):
s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday]
if not self.n:
return s
else:
return "%s(%+d)" % (s, self.n)

# vim:ts=4:sw=4:et
4 changes: 4 additions & 0 deletions venv/Lib/site-packages/dateutil/_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# file generated by setuptools_scm
# don't change, don't track in version control
__version__ = version = '2.9.0.post0'
__version_tuple__ = version_tuple = (2, 9, 0)
89 changes: 89 additions & 0 deletions venv/Lib/site-packages/dateutil/easter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
"""
This module offers a generic Easter computing method for any given year, using
Western, Orthodox or Julian algorithms.
"""

import datetime

__all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"]

EASTER_JULIAN = 1
EASTER_ORTHODOX = 2
EASTER_WESTERN = 3


def easter(year, method=EASTER_WESTERN):
"""
This method was ported from the work done by GM Arts,
on top of the algorithm by Claus Tondering, which was
based in part on the algorithm of Ouding (1940), as
quoted in "Explanatory Supplement to the Astronomical
Almanac", P. Kenneth Seidelmann, editor.

This algorithm implements three different Easter
calculation methods:

1. Original calculation in Julian calendar, valid in
dates after 326 AD
2. Original method, with date converted to Gregorian
calendar, valid in years 1583 to 4099
3. Revised method, in Gregorian calendar, valid in
years 1583 to 4099 as well

These methods are represented by the constants:

* ``EASTER_JULIAN = 1``
* ``EASTER_ORTHODOX = 2``
* ``EASTER_WESTERN = 3``

The default method is method 3.

More about the algorithm may be found at:

`GM Arts: Easter Algorithms <http://www.gmarts.org/index.php?go=415>`_

and

`The Calendar FAQ: Easter <https://www.tondering.dk/claus/cal/easter.php>`_

"""

if not (1 <= method <= 3):
raise ValueError("invalid method")

# g - Golden year - 1
# c - Century
# h - (23 - Epact) mod 30
# i - Number of days from March 21 to Paschal Full Moon
# j - Weekday for PFM (0=Sunday, etc)
# p - Number of days from March 21 to Sunday on or before PFM
# (-6 to 28 methods 1 & 3, to 56 for method 2)
# e - Extra days to add for method 2 (converting Julian
# date to Gregorian date)

y = year
g = y % 19
e = 0
if method < 3:
# Old method
i = (19*g + 15) % 30
j = (y + y//4 + i) % 7
if method == 2:
# Extra dates to convert Julian to Gregorian date
e = 10
if y > 1600:
e = e + y//100 - 16 - (y//100 - 16)//4
else:
# New method
c = y//100
h = (c - c//4 - (8*c + 13)//25 + 19*g + 15) % 30
i = h - (h//28)*(1 - (h//28)*(29//(h + 1))*((21 - g)//11))
j = (y + y//4 + i + 2 - c + c//4) % 7

# p can be from -6 to 56 corresponding to dates 22 March to 23 May
# (later dates apply to method 2, although 23 May never actually occurs)
p = i - j + e
d = 1 + (p + 27 + (p + 6)//40) % 31
m = 3 + (p + 26)//30
return datetime.date(int(y), int(m), int(d))
Loading