From bf9e98c589f20754bbbb0548c38f5bba81997d21 Mon Sep 17 00:00:00 2001 From: Nic van Dessel <51134175+nvandessel@users.noreply.github.com> Date: Fri, 3 Apr 2026 04:56:12 +0000 Subject: [PATCH 1/2] fix(notebook): auto-detect Kaggle dataset mount path Kaggle mounts datasets at /kaggle/input/datasets/{owner}/{slug}/ not /kaggle/input/{slug}/. Try both paths. Also add kernel-metadata.json for CLI-based notebook push. Co-Authored-By: Claude Opus 4.6 (1M context) --- notebooks/kernel-metadata.json | 13 +++++++++++++ notebooks/train-hippofloop.ipynb | 18 ++---------------- 2 files changed, 15 insertions(+), 16 deletions(-) create mode 100644 notebooks/kernel-metadata.json diff --git a/notebooks/kernel-metadata.json b/notebooks/kernel-metadata.json new file mode 100644 index 0000000..3f07f9c --- /dev/null +++ b/notebooks/kernel-metadata.json @@ -0,0 +1,13 @@ +{ + "id": "nvandessel/hippofloop", + "title": "hippofloop", + "code_file": "train-hippofloop.ipynb", + "language": "python", + "kernel_type": "notebook", + "is_private": true, + "enable_gpu": true, + "enable_internet": true, + "dataset_sources": ["nvandessel/floop-decisions"], + "competition_sources": [], + "kernel_sources": [] +} diff --git a/notebooks/train-hippofloop.ipynb b/notebooks/train-hippofloop.ipynb index d7f70fc..a8b0f51 100644 --- a/notebooks/train-hippofloop.ipynb +++ b/notebooks/train-hippofloop.ipynb @@ -51,28 +51,14 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## 2. Load and explore data\n", - "\n", - "Data is expected as a Kaggle Dataset mounted at `/kaggle/input/floop-decisions/`.\n", - "Upload your `decisions.jsonl` files there." - ] + "source": "## 2. Load and explore data\n\nData is expected as a Kaggle Dataset. The path is auto-detected." }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "DATA_DIR = Path(\"/kaggle/input/floop-decisions\")\n", - "\n", - "# Find all JSONL files in the dataset\n", - "jsonl_files = sorted(DATA_DIR.glob(\"*.jsonl\"))\n", - "print(f\"Found {len(jsonl_files)} JSONL files:\")\n", - "for f in jsonl_files:\n", - " size_mb = f.stat().st_size / (1024 * 1024)\n", - " print(f\" {f.name} ({size_mb:.1f} MB)\")" - ] + "source": "# Auto-detect Kaggle dataset path\n_candidates = [\n Path(\"/kaggle/input/datasets/nvandessel/floop-decisions\"),\n Path(\"/kaggle/input/floop-decisions\"),\n]\nDATA_DIR = next((p for p in _candidates if p.exists()), _candidates[-1])\nprint(f\"Data dir: {DATA_DIR}\")\n\n# Find all JSONL files in the dataset\njsonl_files = sorted(DATA_DIR.glob(\"*.jsonl\"))\nprint(f\"Found {len(jsonl_files)} JSONL files:\")\nfor f in jsonl_files:\n size_mb = f.stat().st_size / (1024 * 1024)\n print(f\" {f.name} ({size_mb:.1f} MB)\")" }, { "cell_type": "code", From e02a5c8171c43ba77e64ac95b2ed073d63135af6 Mon Sep 17 00:00:00 2001 From: Nic van Dessel <51134175+nvandessel@users.noreply.github.com> Date: Fri, 3 Apr 2026 05:00:21 +0000 Subject: [PATCH 2/2] fix(notebook): fail fast when dataset path not found Raise FileNotFoundError with actionable message instead of silently falling back to a non-existent path. Addresses Greptile review. Co-Authored-By: Claude Opus 4.6 (1M context) --- notebooks/train-hippofloop.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/train-hippofloop.ipynb b/notebooks/train-hippofloop.ipynb index a8b0f51..0c56293 100644 --- a/notebooks/train-hippofloop.ipynb +++ b/notebooks/train-hippofloop.ipynb @@ -58,7 +58,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "# Auto-detect Kaggle dataset path\n_candidates = [\n Path(\"/kaggle/input/datasets/nvandessel/floop-decisions\"),\n Path(\"/kaggle/input/floop-decisions\"),\n]\nDATA_DIR = next((p for p in _candidates if p.exists()), _candidates[-1])\nprint(f\"Data dir: {DATA_DIR}\")\n\n# Find all JSONL files in the dataset\njsonl_files = sorted(DATA_DIR.glob(\"*.jsonl\"))\nprint(f\"Found {len(jsonl_files)} JSONL files:\")\nfor f in jsonl_files:\n size_mb = f.stat().st_size / (1024 * 1024)\n print(f\" {f.name} ({size_mb:.1f} MB)\")" + "source": "# Auto-detect Kaggle dataset path\n_candidates = [\n Path(\"/kaggle/input/datasets/nvandessel/floop-decisions\"),\n Path(\"/kaggle/input/floop-decisions\"),\n]\nDATA_DIR = next((p for p in _candidates if p.exists()), None)\nif DATA_DIR is None:\n raise FileNotFoundError(\n f\"Kaggle dataset not found. Checked: {[str(p) for p in _candidates]}. \"\n \"Add the 'floop-decisions' dataset in the notebook sidebar.\"\n )\nprint(f\"Data dir: {DATA_DIR}\")\n\n# Find all JSONL files in the dataset\njsonl_files = sorted(DATA_DIR.glob(\"*.jsonl\"))\nif not jsonl_files:\n raise FileNotFoundError(f\"No .jsonl files found in {DATA_DIR}\")\nprint(f\"Found {len(jsonl_files)} JSONL files:\")\nfor f in jsonl_files:\n size_mb = f.stat().st_size / (1024 * 1024)\n print(f\" {f.name} ({size_mb:.1f} MB)\")" }, { "cell_type": "code",