ZBDat · Copilot · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026
diff --git a/analysis/gold_mlp_prediction_2026-02-25.json b/analysis/gold_mlp_prediction_2026-02-25.json
@@ -0,0 +1,35 @@
+{
+  "setup": {
+    "seq_len": 2,
+    "hidden": 128,
+    "dropout": 0.2,
+    "train_frac": 0.7,
+    "val_frac": 0.1,
+    "batch_size": 64,
+    "epochs": 80,
+    "patience": 20,
+    "lr": 0.001,
+    "weight_decay": 0.0001,
+    "include_indicators": true,
+    "seed": 42
+  },
+  "test_metrics": {
+    "rmse_price": 0.24380004778602185,
+    "mae_price": 0.19273792207241058,
+    "mse_price": 0.05943846330046654,
+    "r2": -0.3544858694076538,
+    "direction_accuracy": 0.5454545454545454
+  },
+  "baseline_metrics": {
+    "rmse_price": 6.5826802233882455,
+    "mae_price": 6.579346187018833,
+    "mse_price": 43.331678923386725
+  },
+  "prediction": {
+    "predicted_log_close": 7.6717987060546875,
+    "predicted_close": 2146.939687700816,
+    "prediction_for_date": "2026-02-25",
+    "based_on_date": "2026-02-24"
+  },
+  "model_path": "analysis/models/gold_mlp_seq2_ep80_20260224.pt"
+}
diff --git a/analysis/gold_transformer_prediction_2026-02-25.json b/analysis/gold_transformer_prediction_2026-02-25.json
@@ -0,0 +1,40 @@
+{
+  "setup": {
+    "seq_len": 3,
+    "train_frac": 0.7,
+    "val_frac": 0.1,
+    "d_model": 60,
+    "nhead": 5,
+    "layers": 1,
+    "dim_ff": 256,
+    "dropout": 0.2,
+    "input_dropout": 0.1,
+    "batch_size": 32,
+    "epochs": 100,
+    "patience": 100,
+    "lr": 0.001,
+    "weight_decay": 0.0001,
+    "include_indicators": true,
+    "seed": 42,
+    "train_end_date": "2026-02-13",
+    "prediction_base_date": "2026-02-24"
+  },
+  "test_metrics": {
+    "rmse_price": 0.5291360702329879,
+    "mae_price": 0.4818355441093445,
+    "mse_price": 0.2799849808216095,
+    "r2": -5.383824825286865,
+    "direction_accuracy": 0.4874141876430206
+  },
+  "baseline_metrics": {
+    "rmse_price": 6.5820665192088015,
+    "mae_price": 6.578734009235542,
+    "mse_price": 43.323599663289464
+  },
+  "prediction": {
+    "predicted_log_close": 6.082147121429443,
+    "predicted_close": 437.96855756983575,
+    "prediction_for_date": "2026-02-25",
+    "based_on_date": "2026-02-24"
+  }
+}
diff --git a/analysis/ui_mlp_service.py b/analysis/ui_mlp_service.py
@@ -200,7 +200,12 @@ def predict_with_bundle(bundle: TrainedBundle, datasets: List[Dict[str, Any]]) -
     with tempfile.TemporaryDirectory() as tmp_dir:
         raw_dir = Path(tmp_dir)
         _prepare_raw_files(datasets, raw_dir)
-        panel = run_prep(raw_dir=raw_dir, out_path=None, include_indicators=bundle.include_indicators)
+        panel = run_prep(
+            raw_dir=raw_dir,
+            out_path=None,
+            include_indicators=bundle.include_indicators,
+            drop_last_unlabeled=False,
+        )
 
     feature_cols = bundle.feature_cols
     if "label" not in panel.columns:

diff --git a/prep_script.py b/prep_script.py
@@ -33,7 +33,7 @@ def _load_price(path: Path, tz: str, close_hour: int) -> pd.Series:
     return pd.Series(grouped.values, index=grouped.index, name="close")
 
 
-def _build_with_indicators(comex_path: Path, sh_path: Path) -> pd.DataFrame:
+def _build_with_indicators(comex_path: Path, sh_path: Path, drop_last_unlabeled: bool = True) -> pd.DataFrame:
     if compute_technical_indicators is None:
         raise RuntimeError("compute_technical_indicators unavailable; set include_indicators=False")
 
@@ -56,11 +56,12 @@ def _build_with_indicators(comex_path: Path, sh_path: Path) -> pd.DataFrame:
     df = base.join(comex_ind, how="inner").join(sh_ind, how="inner")
     df = df.dropna().sort_index()
     df["label"] = df["log_close_sh"].shift(-1)
-    df = df.dropna()
+    if drop_last_unlabeled:
+        df = df.dropna()
     return df
 
 
-def _build_minimal(comex_path: Path, sh_path: Path) -> pd.DataFrame:
+def _build_minimal(comex_path: Path, sh_path: Path, drop_last_unlabeled: bool = True) -> pd.DataFrame:
     comex = _load_price(comex_path, tz="America/New_York", close_hour=17)
     sh = _load_price(sh_path, tz="Asia/Shanghai", close_hour=15)
 
@@ -69,11 +70,17 @@ def _build_minimal(comex_path: Path, sh_path: Path) -> pd.DataFrame:
     df = pd.concat([comex_log, sh_log], axis=1, join="inner")
     df = df.dropna().sort_index()
     df["label"] = df["log_close_sh"].shift(-1)
-    df = df.dropna()
+    if drop_last_unlabeled:
+        df = df.dropna()
     return df
 
 
-def run_prep(raw_dir: Path, out_path: Optional[Path] = None, include_indicators: bool = False) -> pd.DataFrame:
+def run_prep(
+    raw_dir: Path,
+    out_path: Optional[Path] = None,
+    include_indicators: bool = False,
+    drop_last_unlabeled: bool = True,
+) -> pd.DataFrame:
     """Prepare feature table with optional technical indicators and label = next-day log_close_sh.
 
     Default keeps indicators off to remain robust for very short synthetic datasets (tests).
@@ -84,12 +91,12 @@ def run_prep(raw_dir: Path, out_path: Optional[Path] = None, include_indicators:
     sh_path = raw_dir / "shanghai_gold_9999.csv"
 
     if include_indicators and compute_technical_indicators is not None:
-        df = _build_with_indicators(comex_path, sh_path)
+        df = _build_with_indicators(comex_path, sh_path, drop_last_unlabeled=drop_last_unlabeled)
         if len(df) < 3:
             # Fallback when rolling-window indicators wipe out short samples
-            df = _build_minimal(comex_path, sh_path)
+            df = _build_minimal(comex_path, sh_path, drop_last_unlabeled=drop_last_unlabeled)
     else:
-        df = _build_minimal(comex_path, sh_path)
+        df = _build_minimal(comex_path, sh_path, drop_last_unlabeled=drop_last_unlabeled)
 
     if out_path is not None:
         out_path = Path(out_path)

diff --git a/tests/test_ui_mlp_service.py b/tests/test_ui_mlp_service.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 
+import pandas as pd
 import pytest
 
 from analysis.ui_mlp_service import predict_with_bundle, train_from_uploaded_data
@@ -59,3 +60,30 @@ def test_train_fails_on_missing_required_columns(tmp_path: Path) -> None:
     datasets = [{"name": "broken.csv", "data": [{"date": "2024-01-01", "open": 1.0, "close": 1.0}]}]
     with pytest.raises(ValueError, match="missing required columns"):
         train_from_uploaded_data(datasets=datasets, params={"epochs": 1}, save_path=tmp_path / "m.pth")
+
+
+def test_predict_uses_latest_available_date(tmp_path: Path) -> None:
+    rows = _make_rows(1.1, n=24)
+    rows[-1]["date"] = "2024-02-24"
+    datasets = [{"name": "shanghai.csv", "data": rows}]
+    params = {
+        "seq_len": 2,
+        "train_frac": 0.7,
+        "val_frac": 0.1,
+        "epochs": 3,
+        "patience": 2,
+        "batch_size": 8,
+        "lr": 1e-3,
+        "weight_decay": 1e-4,
+        "dropout": 0.1,
+        "hidden": 32,
+        "seed": 42,
+        "include_indicators": False,
+    }
+
+    bundle = train_from_uploaded_data(datasets=datasets, params=params, save_path=tmp_path / "model.pth")
+    pred = predict_with_bundle(bundle, datasets)
+
+    assert pred["based_on_date"] == "2024-02-24"
+    expected_next_business_day = (pd.Timestamp("2024-02-24") + pd.tseries.offsets.BDay(1)).strftime("%Y-%m-%d")
+    assert pred["prediction_for_date"] == expected_next_business_day