MaleenKidiwela · MaleenKidiwela · Feb 9, 2026
diff --git a/code/README.md b/code/README.md
@@ -90,6 +90,20 @@ Statistical and utility functions:
 - `normalize_data()`: Data normalization (z-score, min-max, robust)
 - `detect_outliers()`: Multiple outlier detection methods
 
+### Range Calculation Workflow (FETCH Range Calculation Notebook)
+Structured helpers extracted from `FETCH Range Calculation.ipynb`:
+- `range_salinity.py`: Salinity gap stitching, smoothing, bottle calibration
+- `tidal_correction.py`: Tidal prediction parsing and pressure correction
+- `velocity_interpolation.py`: Interpolate recorded velocities to data timelines
+- `pressure_moving_average.py`: 15-day moving average and re-basing helpers
+- `range_calculation_workflow.py`: Data extraction and harmonic mean utilities
+- `range_calculation_main.py`: Notebook-oriented main workflow entry point
+
+Use the included notebook scaffold:
+```bash
+jupyter notebook code/fetch_range_calculation.ipynb
+```
+
 ## Example Analysis Workflow
 
 ```python
@@ -207,4 +221,4 @@ This package was extracted from the FETCH StreamLine Final notebook to create a
 
 ## License
 
-This code is derived from the FETCH StreamLine project for oceanographic research.
+This code is derived from the FETCH StreamLine project for oceanographic research.
diff --git a/code/__init__.py b/code/__init__.py
@@ -45,7 +45,8 @@
     to_enu,
     apply_tilt_correction,
     geodetic_to_enu,
-    calculate_range_from_coordinates
+    calculate_range_from_coordinates,
+    build_baseline_perturbations
 )
 
 from .optimization import (
@@ -63,6 +64,21 @@
     normalize_data
 )
 
+from .range_salinity import (
+    SalinityCalibrationConfig,
+    GapMeanShiftConfig,
+    prepare_salinity_series,
+    bottle_residuals
+)
+
+from .tidal_correction import (
+    load_tidal_predictions,
+    optimize_tidal_correction,
+    apply_tidal_correction
+)
+
+from .velocity_interpolation import interpolate_velocity
+
 from .data_persistence import (
     save_dataframe_as_pickle,
     load_dataframe_from_pickle,
@@ -94,6 +110,7 @@
     'apply_tilt_correction',
     'geodetic_to_enu',
     'calculate_range_from_coordinates',
+    'build_baseline_perturbations',
 
     # Optimization
     'fit_and_extrapolate',
@@ -107,6 +124,16 @@
     'calculate_rms_error',
     'calculate_statistics',
     'normalize_data',
+
+    # Range calculation helpers
+    'SalinityCalibrationConfig',
+    'GapMeanShiftConfig',
+    'prepare_salinity_series',
+    'bottle_residuals',
+    'load_tidal_predictions',
+    'optimize_tidal_correction',
+    'apply_tidal_correction',
+    'interpolate_velocity',
 
     # Data persistence
     'save_dataframe_as_pickle',
@@ -115,4 +142,4 @@
     'save_baseline_data',
     'save_instrument_data',
     'load_existing_pickles'
-]
+]
diff --git a/code/fetch_range_calculation.ipynb b/code/fetch_range_calculation.ipynb
@@ -0,0 +1,85 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# FETCH Range Calculation (Structured)\n",
+        "\n",
+        "This notebook calls the structured workflow modules in `code/` while preserving\n",
+        "the original file naming used in the FETCH Range Calculation notebook.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "from datetime import datetime\n",
+        "import pandas as pd\n",
+        "\n",
+        "from code.range_calculation_main import (\n",
+        "    FetchRangeInputs,\n",
+        "    run_fetch_range_workflow,\n",
+        ")\n",
+        "from code.range_salinity import SalinityCalibrationConfig, GapMeanShiftConfig\n",
+        "\n",
+        "filepaths = [\n",
+        "    'Data_230912111909_East_006870_2502_.csv',\n",
+        "    'Data_230912112033_West_006874_2503_.csv',\n",
+        "    'Data_230913091309_North_00687A_2504_.csv',\n",
+        "]\n",
+        "inputs = FetchRangeInputs(filepaths=filepaths, identifiers=['2504', '2503', '2502'])\n",
+        "\n",
+        "salinity_config = SalinityCalibrationConfig(\n",
+        "    file_path='/data/wsd02/maleen_data/ooi-rs03ccal-mj03f-12-ctdpfb305_2f21_522e_6ceb.csv',\n",
+        "    bottle_times=pd.to_datetime([\n",
+        "        '2022-08-14 05:49:53',\n",
+        "        '2022-08-30 19:37:13',\n",
+        "        '2023-09-17 03:35:09',\n",
+        "    ]),\n",
+        "    bottle_sal=[34.52543602, 34.52720340, 34.52700000],\n",
+        "    start_time=pd.to_datetime('2022-08-14 05:49:53'),\n",
+        "    end_time=pd.to_datetime('2025-09-05 23:59:00'),\n",
+        "    jump_start=pd.to_datetime('2023-09-07 00:00:00'),\n",
+        "    jump_end=pd.to_datetime('2023-09-14 00:00:00'),\n",
+        "    smooth_after=pd.to_datetime('2023-09-14 00:00:00'),\n",
+        "    smooth_len=250,\n",
+        ")\n",
+        "\n",
+        "gap_shift_config = GapMeanShiftConfig(\n",
+        "    gap_start=pd.to_datetime('2024-08-31 21:43:00'),\n",
+        "    gap_end=pd.to_datetime('2024-09-01 20:13:00'),\n",
+        "    pre_hours=24,\n",
+        "    post_hours=24,\n",
+        ")\n",
+        "\n",
+        "tidal_prediction_paths = [\n",
+        "    '/data/wsd02/maleen_data/pred_F_2022.txt',\n",
+        "    '/data/wsd02/maleen_data/pred_F_2023.txt',\n",
+        "    '/data/wsd02/maleen_data/pred_F_2024.txt',\n",
+        "    '/data/wsd02/maleen_data/pred_F_2025.txt',\n",
+        "]\n",
+        "\n",
+        "outputs = run_fetch_range_workflow(\n",
+        "    inputs,\n",
+        "    salinity_config,\n",
+        "    gap_shift_config,\n",
+        "    tidal_prediction_paths,\n",
+        ")\n",
+        "outputs.keys()\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
diff --git a/code/positioning.py b/code/positioning.py
@@ -168,6 +168,55 @@ def calculate_baseline_perturbations(
     return perturb_df
 
 
+def build_baseline_perturbations(
+    stations: dict,
+    pairs: list,
+    h_txr: float = H_TXR,
+) -> pd.DataFrame:
+    """
+    Build a baseline perturbation table for multiple station pairs.
+
+    Args:
+        stations: Mapping of station IDs to dicts with keys:
+            'inc' (DataFrame with Record Time, Pitch, Roll),
+            'lat', 'lon', and 'heading'.
+        pairs: List of (tx, rx) tuples specifying baseline directions.
+        h_txr: Transducer-to-tilt-sensor lever arm in meters.
+
+    Returns:
+        DataFrame indexed by timestamp with per-baseline perturbation columns.
+    """
+    series_list = []
+
+    for tx, rx in pairs:
+        station_tx = stations[tx]
+        station_rx = stations[rx]
+
+        timeline = pd.Index(
+            sorted(set(station_tx["inc"]["Record Time"]) | set(station_rx["inc"]["Record Time"]))
+        )
+
+        inc_tx = interp_unique(station_tx["inc"], timeline)
+        inc_rx = interp_unique(station_rx["inc"], timeline)
+
+        e_tx, n_tx = to_enu(*local_xy(inc_tx, h_txr), station_tx["heading"])
+        e_rx, n_rx = to_enu(*local_xy(inc_rx, h_txr), station_rx["heading"])
+
+        ue, un = unit_vector(
+            station_tx["lat"],
+            station_tx["lon"],
+            station_rx["lat"],
+            station_rx["lon"],
+        )
+        dL_oneway = (e_tx - e_rx) * ue + (n_tx - n_rx) * un
+
+        series_list.append(
+            pd.DataFrame({f"{tx}-{rx}_dL": dL_oneway}, index=timeline)
+        )
+
+    return pd.concat(series_list, axis=1).sort_index()
+
+
 def geodetic_to_enu(
     lat: float, 
     lon: float, 
@@ -272,4 +321,4 @@ def correct_sound_path(
     standard_sound_speed = 1500.0
     corrected_range = measured_range * (standard_sound_speed / sound_speed)
 
-    return corrected_range
+    return corrected_range
diff --git a/code/pressure_moving_average.py b/code/pressure_moving_average.py
@@ -0,0 +1,57 @@
+"""
+Pressure moving-average utilities for FETCH Range Calculation analysis.
+"""
+
+from __future__ import annotations
+
+import pandas as pd
+
+PSI_TO_KPA = 6.894757
+COL_KPA = "Corrected Pressure (kPa)"
+
+
+def prep_ma15d(df: pd.DataFrame, column: str = COL_KPA) -> pd.DataFrame:
+    df = df.copy()
+    df["DateTime"] = pd.to_datetime(df["DateTime"], errors="coerce")
+    df.sort_values("DateTime", inplace=True)
+    mu = df[column].mean(skipna=True)
+    df["pressure_demeaned"] = df[column] - mu
+    df["ma15d"] = (
+        df.set_index("DateTime")["pressure_demeaned"]
+        .rolling("15D", min_periods=1)
+        .mean()
+        .values
+    )
+    return df
+
+
+def ensure_demeaned(df: pd.DataFrame, column: str = COL_KPA) -> pd.DataFrame:
+    df = df.copy()
+    df["DateTime"] = pd.to_datetime(df["DateTime"], errors="coerce")
+    df.sort_values("DateTime", inplace=True)
+    if "pressure_demeaned" not in df.columns:
+        mu = df[column].mean(skipna=True)
+        df["pressure_demeaned"] = df[column] - mu
+    df["ma15d"] = (
+        df.set_index("DateTime")["pressure_demeaned"]
+        .rolling("15D", min_periods=1)
+        .mean()
+        .values
+    )
+    return df
+
+
+def to_ma15d(df: pd.DataFrame, tcol: str, vcol: str, unit: str = "kpa") -> pd.Series:
+    x = df[[tcol, vcol]].copy()
+    x[tcol] = pd.to_datetime(x[tcol], errors="coerce")
+    x = x.dropna(subset=[tcol]).sort_values(tcol)
+    v = x[vcol].astype(float)
+    if unit.lower() == "psi":
+        v = v * PSI_TO_KPA
+    s = pd.Series(v.values, index=x[tcol])
+    return s.rolling("15D", center=True, min_periods=1).mean()
+
+
+def rebase_to_window(ma: pd.Series, start: pd.Timestamp, end: pd.Timestamp) -> pd.Series:
+    w = ma.loc[start:end]
+    return ma - w.mean()
diff --git a/code/range_calculation_main.py b/code/range_calculation_main.py
@@ -0,0 +1,83 @@
+"""
+Main entry point for the FETCH Range Calculation workflow.
+
+This script is intended to be imported and executed from a notebook.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Dict, List, Tuple
+
+import pandas as pd
+
+from .range_calculation_workflow import (
+    build_harmonic_means,
+    build_sound_speed_tables,
+    extract_sensor_data,
+    load_fetch_data,
+)
+from .range_salinity import (
+    GapMeanShiftConfig,
+    SalinityCalibrationConfig,
+    bottle_residuals,
+    prepare_salinity_series,
+)
+from .tidal_correction import apply_tidal_correction, load_tidal_predictions, optimize_tidal_correction
+from .velocity_interpolation import interpolate_velocity
+
+
+@dataclass(frozen=True)
+class FetchRangeInputs:
+    filepaths: List[str]
+    identifiers: List[str]
+
+
+def run_fetch_range_workflow(
+    inputs: FetchRangeInputs,
+    salinity_config: SalinityCalibrationConfig,
+    gap_shift_config: GapMeanShiftConfig | None,
+    tidal_prediction_paths: List[str],
+) -> Dict[str, object]:
+    df_dict = load_fetch_data(inputs.filepaths)
+    data_extracted = extract_sensor_data(df_dict, inputs.identifiers)
+
+    result_dfs = build_sound_speed_tables(data_extracted, inputs.identifiers)
+    pairs = [("2502", "2503"), ("2502", "2504"), ("2503", "2504")]
+    harmonic_mean_dfs = build_harmonic_means(result_dfs, pairs)
+
+    salinity_df = prepare_salinity_series(salinity_config, gap_shift_config)
+    salinity_residuals = bottle_residuals(
+        salinity_df,
+        salinity_config.bottle_times,
+        salinity_config.bottle_sal,
+    )
+
+    tidal_df = load_tidal_predictions(tidal_prediction_paths)
+
+    outputs = {
+        "df_dict": df_dict,
+        "data_extracted": data_extracted,
+        "result_dfs": result_dfs,
+        "harmonic_mean_dfs": harmonic_mean_dfs,
+        "salinity_df": salinity_df,
+        "salinity_residuals": salinity_residuals,
+        "tidal_df": tidal_df,
+    }
+
+    return outputs
+
+
+def apply_tidal_and_velocity(
+    combined_df: pd.DataFrame,
+    tidal_df: pd.DataFrame,
+    result_df: pd.DataFrame,
+    amplitude: float | None = None,
+    rho: float | None = None,
+) -> pd.DataFrame:
+    if amplitude is None or rho is None:
+        amplitude, rho = optimize_tidal_correction(combined_df, tidal_df)
+
+    corrected = apply_tidal_correction(combined_df, tidal_df, amplitude, rho)
+    corrected["interp_v"] = interpolate_velocity(corrected["Record Time"], result_df)
+    return corrected