Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion code/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,20 @@ Statistical and utility functions:
- `normalize_data()`: Data normalization (z-score, min-max, robust)
- `detect_outliers()`: Multiple outlier detection methods

### Range Calculation Workflow (FETCH Range Calculation Notebook)
Structured helpers extracted from `FETCH Range Calculation.ipynb`:
- `range_salinity.py`: Salinity gap stitching, smoothing, bottle calibration
- `tidal_correction.py`: Tidal prediction parsing and pressure correction
- `velocity_interpolation.py`: Interpolate recorded velocities to data timelines
- `pressure_moving_average.py`: 15-day moving average and re-basing helpers
- `range_calculation_workflow.py`: Data extraction and harmonic mean utilities
- `range_calculation_main.py`: Notebook-oriented main workflow entry point

Use the included notebook scaffold:
```bash
jupyter notebook code/fetch_range_calculation.ipynb
```

## Example Analysis Workflow

```python
Expand Down Expand Up @@ -207,4 +221,4 @@ This package was extracted from the FETCH StreamLine Final notebook to create a

## License

This code is derived from the FETCH StreamLine project for oceanographic research.
This code is derived from the FETCH StreamLine project for oceanographic research.
31 changes: 29 additions & 2 deletions code/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
to_enu,
apply_tilt_correction,
geodetic_to_enu,
calculate_range_from_coordinates
calculate_range_from_coordinates,
build_baseline_perturbations
)

from .optimization import (
Expand All @@ -63,6 +64,21 @@
normalize_data
)

from .range_salinity import (
SalinityCalibrationConfig,
GapMeanShiftConfig,
prepare_salinity_series,
bottle_residuals
)

from .tidal_correction import (
load_tidal_predictions,
optimize_tidal_correction,
apply_tidal_correction
)

from .velocity_interpolation import interpolate_velocity

from .data_persistence import (
save_dataframe_as_pickle,
load_dataframe_from_pickle,
Expand Down Expand Up @@ -94,6 +110,7 @@
'apply_tilt_correction',
'geodetic_to_enu',
'calculate_range_from_coordinates',
'build_baseline_perturbations',

# Optimization
'fit_and_extrapolate',
Expand All @@ -107,6 +124,16 @@
'calculate_rms_error',
'calculate_statistics',
'normalize_data',

# Range calculation helpers
'SalinityCalibrationConfig',
'GapMeanShiftConfig',
'prepare_salinity_series',
'bottle_residuals',
'load_tidal_predictions',
'optimize_tidal_correction',
'apply_tidal_correction',
'interpolate_velocity',

# Data persistence
'save_dataframe_as_pickle',
Expand All @@ -115,4 +142,4 @@
'save_baseline_data',
'save_instrument_data',
'load_existing_pickles'
]
]
85 changes: 85 additions & 0 deletions code/fetch_range_calculation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# FETCH Range Calculation (Structured)\n",
"\n",
"This notebook calls the structured workflow modules in `code/` while preserving\n",
"the original file naming used in the FETCH Range Calculation notebook.\n"
]
},
{
"cell_type": "code",
"metadata": {},
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"\n",
"from code.range_calculation_main import (\n",
" FetchRangeInputs,\n",
" run_fetch_range_workflow,\n",
")\n",
"from code.range_salinity import SalinityCalibrationConfig, GapMeanShiftConfig\n",
"\n",
"filepaths = [\n",
" 'Data_230912111909_East_006870_2502_.csv',\n",
" 'Data_230912112033_West_006874_2503_.csv',\n",
" 'Data_230913091309_North_00687A_2504_.csv',\n",
"]\n",
"inputs = FetchRangeInputs(filepaths=filepaths, identifiers=['2504', '2503', '2502'])\n",
"\n",
"salinity_config = SalinityCalibrationConfig(\n",
" file_path='/data/wsd02/maleen_data/ooi-rs03ccal-mj03f-12-ctdpfb305_2f21_522e_6ceb.csv',\n",
" bottle_times=pd.to_datetime([\n",
" '2022-08-14 05:49:53',\n",
" '2022-08-30 19:37:13',\n",
" '2023-09-17 03:35:09',\n",
" ]),\n",
" bottle_sal=[34.52543602, 34.52720340, 34.52700000],\n",
" start_time=pd.to_datetime('2022-08-14 05:49:53'),\n",
" end_time=pd.to_datetime('2025-09-05 23:59:00'),\n",
" jump_start=pd.to_datetime('2023-09-07 00:00:00'),\n",
" jump_end=pd.to_datetime('2023-09-14 00:00:00'),\n",
" smooth_after=pd.to_datetime('2023-09-14 00:00:00'),\n",
" smooth_len=250,\n",
")\n",
"\n",
"gap_shift_config = GapMeanShiftConfig(\n",
" gap_start=pd.to_datetime('2024-08-31 21:43:00'),\n",
" gap_end=pd.to_datetime('2024-09-01 20:13:00'),\n",
" pre_hours=24,\n",
" post_hours=24,\n",
")\n",
"\n",
"tidal_prediction_paths = [\n",
" '/data/wsd02/maleen_data/pred_F_2022.txt',\n",
" '/data/wsd02/maleen_data/pred_F_2023.txt',\n",
" '/data/wsd02/maleen_data/pred_F_2024.txt',\n",
" '/data/wsd02/maleen_data/pred_F_2025.txt',\n",
"]\n",
"\n",
"outputs = run_fetch_range_workflow(\n",
" inputs,\n",
" salinity_config,\n",
" gap_shift_config,\n",
" tidal_prediction_paths,\n",
")\n",
"outputs.keys()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
51 changes: 50 additions & 1 deletion code/positioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,55 @@ def calculate_baseline_perturbations(
return perturb_df


def build_baseline_perturbations(
stations: dict,
pairs: list,
h_txr: float = H_TXR,
) -> pd.DataFrame:
"""
Build a baseline perturbation table for multiple station pairs.

Args:
stations: Mapping of station IDs to dicts with keys:
'inc' (DataFrame with Record Time, Pitch, Roll),
'lat', 'lon', and 'heading'.
pairs: List of (tx, rx) tuples specifying baseline directions.
h_txr: Transducer-to-tilt-sensor lever arm in meters.

Returns:
DataFrame indexed by timestamp with per-baseline perturbation columns.
"""
series_list = []

for tx, rx in pairs:
station_tx = stations[tx]
station_rx = stations[rx]

timeline = pd.Index(
sorted(set(station_tx["inc"]["Record Time"]) | set(station_rx["inc"]["Record Time"]))
)

inc_tx = interp_unique(station_tx["inc"], timeline)
inc_rx = interp_unique(station_rx["inc"], timeline)

e_tx, n_tx = to_enu(*local_xy(inc_tx, h_txr), station_tx["heading"])
e_rx, n_rx = to_enu(*local_xy(inc_rx, h_txr), station_rx["heading"])

ue, un = unit_vector(
station_tx["lat"],
station_tx["lon"],
station_rx["lat"],
station_rx["lon"],
)
dL_oneway = (e_tx - e_rx) * ue + (n_tx - n_rx) * un

series_list.append(
pd.DataFrame({f"{tx}-{rx}_dL": dL_oneway}, index=timeline)
)

return pd.concat(series_list, axis=1).sort_index()


def geodetic_to_enu(
lat: float,
lon: float,
Expand Down Expand Up @@ -272,4 +321,4 @@ def correct_sound_path(
standard_sound_speed = 1500.0
corrected_range = measured_range * (standard_sound_speed / sound_speed)

return corrected_range
return corrected_range
57 changes: 57 additions & 0 deletions code/pressure_moving_average.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
Pressure moving-average utilities for FETCH Range Calculation analysis.
"""

from __future__ import annotations

import pandas as pd

PSI_TO_KPA = 6.894757
COL_KPA = "Corrected Pressure (kPa)"


def prep_ma15d(df: pd.DataFrame, column: str = COL_KPA) -> pd.DataFrame:
df = df.copy()
df["DateTime"] = pd.to_datetime(df["DateTime"], errors="coerce")
df.sort_values("DateTime", inplace=True)
mu = df[column].mean(skipna=True)
df["pressure_demeaned"] = df[column] - mu
df["ma15d"] = (
df.set_index("DateTime")["pressure_demeaned"]
.rolling("15D", min_periods=1)
.mean()
.values
)
return df


def ensure_demeaned(df: pd.DataFrame, column: str = COL_KPA) -> pd.DataFrame:
df = df.copy()
df["DateTime"] = pd.to_datetime(df["DateTime"], errors="coerce")
df.sort_values("DateTime", inplace=True)
if "pressure_demeaned" not in df.columns:
mu = df[column].mean(skipna=True)
df["pressure_demeaned"] = df[column] - mu
df["ma15d"] = (
df.set_index("DateTime")["pressure_demeaned"]
.rolling("15D", min_periods=1)
.mean()
.values
)
return df


def to_ma15d(df: pd.DataFrame, tcol: str, vcol: str, unit: str = "kpa") -> pd.Series:
x = df[[tcol, vcol]].copy()
x[tcol] = pd.to_datetime(x[tcol], errors="coerce")
x = x.dropna(subset=[tcol]).sort_values(tcol)
v = x[vcol].astype(float)
if unit.lower() == "psi":
v = v * PSI_TO_KPA
s = pd.Series(v.values, index=x[tcol])
return s.rolling("15D", center=True, min_periods=1).mean()


def rebase_to_window(ma: pd.Series, start: pd.Timestamp, end: pd.Timestamp) -> pd.Series:
w = ma.loc[start:end]
return ma - w.mean()
83 changes: 83 additions & 0 deletions code/range_calculation_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
Main entry point for the FETCH Range Calculation workflow.

This script is intended to be imported and executed from a notebook.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Dict, List, Tuple

import pandas as pd

from .range_calculation_workflow import (
build_harmonic_means,
build_sound_speed_tables,
extract_sensor_data,
load_fetch_data,
)
from .range_salinity import (
GapMeanShiftConfig,
SalinityCalibrationConfig,
bottle_residuals,
prepare_salinity_series,
)
from .tidal_correction import apply_tidal_correction, load_tidal_predictions, optimize_tidal_correction
from .velocity_interpolation import interpolate_velocity


@dataclass(frozen=True)
class FetchRangeInputs:
filepaths: List[str]
identifiers: List[str]


def run_fetch_range_workflow(
inputs: FetchRangeInputs,
salinity_config: SalinityCalibrationConfig,
gap_shift_config: GapMeanShiftConfig | None,
tidal_prediction_paths: List[str],
) -> Dict[str, object]:
df_dict = load_fetch_data(inputs.filepaths)
data_extracted = extract_sensor_data(df_dict, inputs.identifiers)

result_dfs = build_sound_speed_tables(data_extracted, inputs.identifiers)
pairs = [("2502", "2503"), ("2502", "2504"), ("2503", "2504")]
harmonic_mean_dfs = build_harmonic_means(result_dfs, pairs)

salinity_df = prepare_salinity_series(salinity_config, gap_shift_config)
salinity_residuals = bottle_residuals(
salinity_df,
salinity_config.bottle_times,
salinity_config.bottle_sal,
)

tidal_df = load_tidal_predictions(tidal_prediction_paths)

outputs = {
"df_dict": df_dict,
"data_extracted": data_extracted,
"result_dfs": result_dfs,
"harmonic_mean_dfs": harmonic_mean_dfs,
"salinity_df": salinity_df,
"salinity_residuals": salinity_residuals,
"tidal_df": tidal_df,
}

return outputs


def apply_tidal_and_velocity(
combined_df: pd.DataFrame,
tidal_df: pd.DataFrame,
result_df: pd.DataFrame,
amplitude: float | None = None,
rho: float | None = None,
) -> pd.DataFrame:
if amplitude is None or rho is None:
amplitude, rho = optimize_tidal_correction(combined_df, tidal_df)

corrected = apply_tidal_correction(combined_df, tidal_df, amplitude, rho)
corrected["interp_v"] = interpolate_velocity(corrected["Record Time"], result_df)
return corrected
Loading