diff --git a/environments/environment-Linux.yml b/environments/environment-Linux.yml
index db722fe28..b0a077bc7 100644
--- a/environments/environment-Linux.yml
+++ b/environments/environment-Linux.yml
@@ -23,3 +23,4 @@ dependencies:
       - nwbinspector == 0.6.5
       - tables
       - numcodecs == 0.15.1  # numcodecs 0.16.0 is not compatible with zarr 2.18.5
+      - claude-agent-sdk >= 0.1.0  # AI conversion assistant
diff --git a/environments/environment-MAC-apple-silicon.yml b/environments/environment-MAC-apple-silicon.yml
index 2147aebdb..5dbe560ee 100644
--- a/environments/environment-MAC-apple-silicon.yml
+++ b/environments/environment-MAC-apple-silicon.yml
@@ -29,3 +29,4 @@ dependencies:
       - ndx-pose == 0.2.2
       - nwbinspector == 0.6.5
       - numcodecs == 0.15.1  # numcodecs 0.16.0 is not compatible with zarr 2.18.5
+      - claude-agent-sdk >= 0.1.0  # AI conversion assistant
diff --git a/environments/environment-MAC-intel.yml b/environments/environment-MAC-intel.yml
index 19f301be4..620d95324 100644
--- a/environments/environment-MAC-intel.yml
+++ b/environments/environment-MAC-intel.yml
@@ -28,3 +28,4 @@ dependencies:
                        # with tables==3.9.1 (latest that can be used by neuroconv 0.6.0).
                        # h5py and tables need to be consistent for electron build for unknown reason
       - ruamel.yaml.clib != 0.2.13 # 0.2.13 throws a build error on intel Mac -- see https://github.com/catalystneuro/roiextractors/issues/489
+      - claude-agent-sdk >= 0.1.0  # AI conversion assistant
diff --git a/environments/environment-Windows.yml b/environments/environment-Windows.yml
index 3662a2534..d1da96d49 100644
--- a/environments/environment-Windows.yml
+++ b/environments/environment-Windows.yml
@@ -25,3 +25,4 @@ dependencies:
       - nwbinspector == 0.6.5
       - tables
       - numcodecs == 0.15.1  # numcodecs 0.16.0 is not compatible with zarr 2.18.5
+      - claude-agent-sdk >= 0.1.0  # AI conversion assistant
diff --git a/nwb-guide.spec b/nwb-guide.spec
index cd596347d..514ee0fd2 100644
--- a/nwb-guide.spec
+++ b/nwb-guide.spec
@@ -10,7 +10,11 @@ import scipy
 from PyInstaller.utils.hooks import collect_data_files
 from PyInstaller.utils.hooks import collect_all
 
-datas = [('./src/paths.config.json', '.'), ('./package.json', '.')]
+datas = [
+    ('./src/paths.config.json', '.'),
+    ('./package.json', '.'),
+    ('./src/pyflask/ai/skill', 'ai/skill'),  # Bundled NWB conversion skill
+]
 binaries = []
 hiddenimports = [
     'email_validator',
@@ -24,6 +28,7 @@ datas += collect_data_files('jsonschema_specifications')
 
 # Various consequences of lazy imports
 modules_to_collect = [
+    'claude_agent_sdk',
     'dandi',
     'keyrings',
     'unittest',
diff --git a/package-lock.json b/package-lock.json
index 6fb13db91..ea13ffd5f 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -24,6 +24,7 @@
         "jsonschema": "^1.4.1",
         "lit": "^2.6.1",
         "lottie-web": "^5.9.5",
+        "marked": "^17.0.1",
         "notyf": "^3.9.0",
         "sweetalert2": "^11.6.13",
         "tippy.js": "^6.3.7",
@@ -16411,6 +16412,18 @@
         "react": ">= 0.14.0"
       }
     },
+    "node_modules/marked": {
+      "version": "17.0.1",
+      "resolved": "https://registry.npmjs.org/marked/-/marked-17.0.1.tgz",
+      "integrity": "sha512-boeBdiS0ghpWcSwoNm/jJBwdpFaMnZWRzjA6SkUMYb40SVaN1x7mmfGKp0jvexGcx+7y2La5zRZsYFZI6Qpypg==",
+      "license": "MIT",
+      "bin": {
+        "marked": "bin/marked.js"
+      },
+      "engines": {
+        "node": ">= 20"
+      }
+    },
     "node_modules/matchdep": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/matchdep/-/matchdep-2.0.0.tgz",
diff --git a/package.json b/package.json
index f4d7e863c..56a642ef5 100644
--- a/package.json
+++ b/package.json
@@ -158,6 +158,7 @@
     "jsonschema": "^1.4.1",
     "lit": "^2.6.1",
     "lottie-web": "^5.9.5",
+    "marked": "^17.0.1",
     "notyf": "^3.9.0",
     "sweetalert2": "^11.6.13",
     "tippy.js": "^6.3.7",
diff --git a/src/electron/frontend/core/components/pages/ai-assistant/AIAssistantPage.js b/src/electron/frontend/core/components/pages/ai-assistant/AIAssistantPage.js
new file mode 100644
index 000000000..7a926e7d4
--- /dev/null
+++ b/src/electron/frontend/core/components/pages/ai-assistant/AIAssistantPage.js
@@ -0,0 +1,1437 @@
+import { html, css } from "lit";
+import { Page } from "../Page.js";
+import { baseUrl } from "../../../server/globals";
+
+import "./ChatMessage.js";
+import "./ChatInput.js";
+import "./SettingsPanel.js";
+
+/**
+ * AI Assistant page — chat interface for the NWB conversion agent.
+ *
+ * Two views:
+ * 1. Session list (home) — shows previous chats + "New Conversation" button
+ * 2. Chat view — active conversation with message list + input
+ *
+ * Communicates with the Flask /ai namespace via:
+ * - GET /ai/sessions (list saved sessions)
+ * - POST /ai/sessions (create session)
+ * - GET /ai/sessions/<id> (get session state or history)
+ * - POST /ai/sessions/<id>/message (send message)
+ * - GET /ai/sessions/<id>/events (SSE stream)
+ * - DELETE /ai/sessions/<id> (delete session)
+ */
+export class AIAssistantPage extends Page {
+    static properties = {
+        ...super.properties,
+        messages: { type: Array, state: true },
+        sessionId: { type: String, state: true },
+        dataDirs: { type: Array, state: true },
+        isStreaming: { type: Boolean, state: true },
+        settingsOpen: { type: Boolean, state: true },
+        connected: { type: Boolean, state: true },
+        savedSessions: { type: Array, state: true },
+        viewMode: { type: String, state: true }, // "list" or "chat"
+        isReadOnly: { type: Boolean, state: true },
+        authMode: { type: String, state: true },
+        currentPhase: { type: Number, state: true },
+        todos: { type: Array, state: true },
+    };
+
+    header = {
+        title: "AI Assistant",
+        subtitle: "Convert your data to NWB format with AI guidance.",
+    };
+
+    constructor(...args) {
+        super(...args);
+        this.messages = [];
+        this.sessionId = null;
+        this.dataDirs = [];
+        this._dirInput = "";
+        this.isStreaming = false;
+        this.settingsOpen = false;
+        this.connected = false;
+        this.savedSessions = [];
+        this.viewMode = "list";
+        this.isReadOnly = false;
+        this.authMode = null;
+        this.currentPhase = 0;
+        this.todos = [];
+        this._eventSource = null;
+        this._starting = false;
+        this._todoIdMap = new Map(); // TodoWrite id -> text
+
+        this.style.height = "100%";
+    }
+
+    createRenderRoot() {
+        return this;
+    }
+
+    connectedCallback() {
+        super.connectedCallback();
+        this._loadSessions();
+    }
+
+    disconnectedCallback() {
+        super.disconnectedCallback();
+        this._closeEventSource();
+    }
+
+    async _loadSessions() {
+        try {
+            const resp = await fetch(new URL("/ai/sessions", baseUrl));
+            if (resp.ok) {
+                const data = await resp.json();
+                this.savedSessions = data.sessions || [];
+            }
+        } catch {
+            // ignore — sessions list is optional
+        }
+    }
+
+    render() {
+        if (this.viewMode === "list") {
+            return this._renderSessionList();
+        }
+        return this._renderChatView();
+    }
+
+    // ── Session List View ──────────────────────────────────────────────
+
+    _renderSessionList() {
+        return html`
+            <style>
+                ${this._sharedStyles()} .session-list-container {
+                    display: flex;
+                    flex-direction: column;
+                    height: 100%;
+                    max-height: calc(100vh - 120px);
+                }
+
+                .session-list-header {
+                    display: flex;
+                    align-items: center;
+                    justify-content: space-between;
+                    padding: 12px 0;
+                    flex-shrink: 0;
+                }
+
+                .session-list-header h3 {
+                    margin: 0;
+                    color: #333;
+                    font-size: 1.1em;
+                }
+
+                .new-chat-btn {
+                    padding: 10px 20px;
+                    background: #1976d2;
+                    color: white;
+                    border: none;
+                    border-radius: 8px;
+                    cursor: pointer;
+                    font-size: 0.9em;
+                    font-weight: 500;
+                }
+
+                .new-chat-btn:hover {
+                    background: #1565c0;
+                }
+
+                .session-list {
+                    flex: 1;
+                    overflow-y: auto;
+                }
+
+                .session-card {
+                    display: flex;
+                    align-items: center;
+                    gap: 12px;
+                    padding: 12px 16px;
+                    border: 1px solid #e0e0e0;
+                    border-radius: 8px;
+                    margin-bottom: 8px;
+                    cursor: pointer;
+                    transition:
+                        background 0.15s,
+                        border-color 0.15s;
+                }
+
+                .session-card:hover {
+                    background: #f5f8ff;
+                    border-color: #90caf9;
+                }
+
+                .session-card-icon {
+                    width: 36px;
+                    height: 36px;
+                    border-radius: 50%;
+                    background: #e3f2fd;
+                    color: #1976d2;
+                    display: flex;
+                    align-items: center;
+                    justify-content: center;
+                    font-size: 1.1em;
+                    flex-shrink: 0;
+                }
+
+                .session-card-body {
+                    flex: 1;
+                    min-width: 0;
+                }
+
+                .session-card-title {
+                    font-weight: 500;
+                    color: #333;
+                    font-size: 0.95em;
+                    white-space: nowrap;
+                    overflow: hidden;
+                    text-overflow: ellipsis;
+                }
+
+                .session-card-meta {
+                    color: #888;
+                    font-size: 0.8em;
+                    margin-top: 2px;
+                }
+
+                .session-card-actions {
+                    flex-shrink: 0;
+                }
+
+                .session-card-actions button {
+                    padding: 4px 10px;
+                    border: 1px solid #e0e0e0;
+                    border-radius: 4px;
+                    background: white;
+                    cursor: pointer;
+                    font-size: 0.8em;
+                    color: #888;
+                }
+
+                .session-card-actions button:hover {
+                    background: #ffebee;
+                    color: #c62828;
+                    border-color: #ef9a9a;
+                }
+
+                .empty-state {
+                    text-align: center;
+                    color: #888;
+                    padding: 60px 20px;
+                    font-size: 0.95em;
+                    line-height: 1.8;
+                }
+
+                .empty-state h3 {
+                    color: #555;
+                    margin-bottom: 8px;
+                }
+
+                .settings-row {
+                    display: flex;
+                    justify-content: flex-end;
+                    padding: 4px 0;
+                    flex-shrink: 0;
+                }
+
+                .settings-row button {
+                    padding: 6px 14px;
+                    border: 1px solid #ccc;
+                    border-radius: 6px;
+                    background: white;
+                    cursor: pointer;
+                    font-size: 0.85em;
+                }
+
+                .settings-row button:hover {
+                    background: #f5f5f5;
+                }
+            </style>
+
+            <div class="session-list-container">
+                <nwbguide-ai-settings .open=${this.settingsOpen}></nwbguide-ai-settings>
+
+                <div class="session-list-header">
+                    <h3>Conversations</h3>
+                    <div style="display: flex; gap: 8px;">
+                        <button class="new-chat-btn" @click=${this._showNewChat}>+ New Conversation</button>
+                        <button
+                            style="padding: 8px 14px; border: 1px solid #ccc; border-radius: 8px; background: white; cursor: pointer; font-size: 0.85em;"
+                            @click=${() => (this.settingsOpen = !this.settingsOpen)}
+                        >
+                            Settings
+                        </button>
+                    </div>
+                </div>
+
+                <div class="session-list">
+                    ${this.savedSessions.length === 0
+                        ? html`
+                              <div class="empty-state">
+                                  <h3>NWB Conversion Assistant</h3>
+                                  <p>
+                                      I'll help you convert your neurophysiology data to NWB format and publish it on
+                                      DANDI Archive.
+                                  </p>
+                                  <p>Click <b>+ New Conversation</b> to get started.</p>
+                              </div>
+                          `
+                        : this.savedSessions.map(
+                              (s) => html`
+                                  <div class="session-card" @click=${() => this._viewSession(s.session_id)}>
+                                      <div class="session-card-icon">${s.message_count > 0 ? "..." : ""}</div>
+                                      <div class="session-card-body">
+                                          <div class="session-card-title">${s.title}</div>
+                                          <div class="session-card-meta">
+                                              ${this._formatDate(s.updated_at)} &middot; ${s.message_count} messages
+                                              &middot;
+                                              ${(s.data_dirs || [s.data_dir]).map((d) => this._shortDir(d)).join(", ")}
+                                          </div>
+                                      </div>
+                                      <div class="session-card-actions">
+                                          <button @click=${(e) => this._deleteSession(e, s.session_id)}>Delete</button>
+                                      </div>
+                                  </div>
+                              `
+                          )}
+                </div>
+            </div>
+        `;
+    }
+
+    // ── Chat View ──────────────────────────────────────────────────────
+
+    _renderChatView() {
+        const PHASES = [
+            "Experiment Discovery",
+            "Data Inspection",
+            "Metadata Collection",
+            "Synchronization",
+            "Code Generation",
+            "Testing & Validation",
+            "DANDI Upload",
+        ];
+
+        return html`
+            <style>
+                ${this._sharedStyles()} .ai-page {
+                    display: flex;
+                    flex-direction: column;
+                    height: 100%;
+                    max-height: calc(100vh - 120px);
+                }
+
+                .ai-toolbar {
+                    display: flex;
+                    align-items: center;
+                    gap: 10px;
+                    padding: 8px 0;
+                    flex-shrink: 0;
+                }
+
+                .ai-toolbar input[type="text"] {
+                    flex: 1;
+                    padding: 8px 10px;
+                    border: 1px solid #ccc;
+                    border-radius: 6px;
+                    font-size: 0.9em;
+                }
+
+                .ai-toolbar button {
+                    padding: 8px 16px;
+                    border: 1px solid #ccc;
+                    border-radius: 6px;
+                    background: white;
+                    cursor: pointer;
+                    font-size: 0.85em;
+                    white-space: nowrap;
+                }
+
+                .ai-toolbar button:hover {
+                    background: #f5f5f5;
+                }
+
+                .ai-toolbar button.primary {
+                    background: #1976d2;
+                    color: white;
+                    border-color: #1976d2;
+                }
+
+                .ai-toolbar button.primary:hover {
+                    background: #1565c0;
+                }
+
+                .ai-toolbar button.primary:disabled {
+                    background: #bbb;
+                    border-color: #bbb;
+                    cursor: not-allowed;
+                }
+
+                .ai-body {
+                    display: flex;
+                    flex: 1;
+                    min-height: 0;
+                    gap: 16px;
+                }
+
+                .ai-chat-col {
+                    display: flex;
+                    flex-direction: column;
+                    flex: 1;
+                    min-width: 0;
+                }
+
+                .ai-messages {
+                    flex: 1;
+                    overflow-y: auto;
+                    padding: 12px 0;
+                    min-height: 0;
+                }
+
+                .ai-input-area {
+                    flex-shrink: 0;
+                    padding: 8px 0;
+                    border-top: 1px solid #e0e0e0;
+                }
+
+                .interrupt-btn {
+                    padding: 8px 16px;
+                    background: #d32f2f;
+                    color: white;
+                    border: none;
+                    border-radius: 6px;
+                    cursor: pointer;
+                    font-size: 0.85em;
+                    white-space: nowrap;
+                    flex-shrink: 0;
+                }
+
+                .interrupt-btn:hover {
+                    background: #b71c1c;
+                }
+
+                .consent-notice {
+                    background: #fff3e0;
+                    border: 1px solid #ffe0b2;
+                    border-radius: 8px;
+                    padding: 12px 16px;
+                    margin-bottom: 12px;
+                    font-size: 0.85em;
+                    color: #e65100;
+                }
+
+                .readonly-banner {
+                    background: #e3f2fd;
+                    border: 1px solid #90caf9;
+                    border-radius: 8px;
+                    padding: 10px 16px;
+                    margin-bottom: 8px;
+                    font-size: 0.85em;
+                    color: #1565c0;
+                    display: flex;
+                    align-items: center;
+                    justify-content: space-between;
+                }
+
+                .back-btn {
+                    padding: 6px 12px;
+                    border: 1px solid #ccc;
+                    border-radius: 6px;
+                    background: white;
+                    cursor: pointer;
+                    font-size: 0.85em;
+                    color: #555;
+                }
+
+                .back-btn:hover {
+                    background: #f5f5f5;
+                }
+
+                /* ── Todo Panel ─────────────────────────── */
+
+                .todo-panel {
+                    width: 240px;
+                    flex-shrink: 0;
+                    overflow-y: auto;
+                    border-left: 1px solid #e0e0e0;
+                    padding: 12px 0 12px 16px;
+                }
+
+                .todo-panel h4 {
+                    margin: 0 0 12px;
+                    font-size: 0.85em;
+                    color: #555;
+                    text-transform: uppercase;
+                    letter-spacing: 0.5px;
+                }
+
+                .phase-list {
+                    list-style: none;
+                    padding: 0;
+                    margin: 0 0 20px;
+                }
+
+                .phase-item {
+                    display: flex;
+                    align-items: flex-start;
+                    gap: 8px;
+                    padding: 6px 0;
+                    font-size: 0.85em;
+                    color: #888;
+                    line-height: 1.3;
+                }
+
+                .phase-item.completed {
+                    color: #2e7d32;
+                }
+
+                .phase-item.active {
+                    color: #1565c0;
+                    font-weight: 600;
+                }
+
+                .phase-dot {
+                    width: 18px;
+                    height: 18px;
+                    border-radius: 50%;
+                    border: 2px solid #ccc;
+                    flex-shrink: 0;
+                    display: flex;
+                    align-items: center;
+                    justify-content: center;
+                    font-size: 0.7em;
+                    margin-top: 1px;
+                }
+
+                .phase-item.completed .phase-dot {
+                    background: #2e7d32;
+                    border-color: #2e7d32;
+                    color: white;
+                }
+
+                .phase-item.active .phase-dot {
+                    border-color: #1976d2;
+                    background: #e3f2fd;
+                }
+
+                .todo-section {
+                    margin-top: 8px;
+                }
+
+                .todo-section h4 {
+                    margin-bottom: 8px;
+                }
+
+                .todo-item {
+                    display: flex;
+                    align-items: flex-start;
+                    gap: 6px;
+                    padding: 3px 0;
+                    font-size: 0.82em;
+                    color: #555;
+                    line-height: 1.4;
+                }
+
+                .todo-item.done {
+                    color: #999;
+                    text-decoration: line-through;
+                }
+
+                .todo-check {
+                    flex-shrink: 0;
+                    margin-top: 2px;
+                    font-size: 0.9em;
+                }
+
+                .phase-todos {
+                    margin-left: 26px;
+                    padding: 2px 0 4px;
+                }
+
+                .dir-chips {
+                    display: flex;
+                    flex-wrap: wrap;
+                    gap: 6px;
+                    padding: 6px 0;
+                    flex-shrink: 0;
+                }
+
+                .dir-chip {
+                    display: inline-flex;
+                    align-items: center;
+                    gap: 4px;
+                    background: #e3f2fd;
+                    border: 1px solid #90caf9;
+                    border-radius: 16px;
+                    padding: 4px 10px;
+                    font-size: 0.82em;
+                    color: #1565c0;
+                    max-width: 350px;
+                }
+
+                .dir-chip-text {
+                    overflow: hidden;
+                    text-overflow: ellipsis;
+                    white-space: nowrap;
+                }
+
+                .dir-chip-remove {
+                    background: none;
+                    border: none;
+                    color: #1565c0;
+                    cursor: pointer;
+                    font-size: 1.1em;
+                    padding: 0 2px;
+                    line-height: 1;
+                    border-radius: 50%;
+                }
+
+                .dir-chip-remove:hover {
+                    background: #bbdefb;
+                }
+
+                .thinking-spinner {
+                    width: 20px;
+                    height: 20px;
+                    border: 2.5px solid #e0e0e0;
+                    border-top-color: #1976d2;
+                    border-radius: 50%;
+                    animation: spin 0.8s linear infinite;
+                    flex-shrink: 0;
+                    margin-bottom: 10px;
+                }
+
+                @keyframes spin {
+                    to {
+                        transform: rotate(360deg);
+                    }
+                }
+            </style>
+
+            <div class="ai-page">
+                <!-- Settings Panel (collapsible) -->
+                <nwbguide-ai-settings .open=${this.settingsOpen}></nwbguide-ai-settings>
+
+                <!-- Toolbar -->
+                <div class="ai-toolbar">
+                    <button class="back-btn" @click=${this._backToList}>All Chats</button>
+
+                    ${this.isReadOnly
+                        ? ""
+                        : !this.connected
+                          ? html`
+                                <label style="font-size: 0.85em; font-weight: 500; white-space: nowrap;">
+                                    Data folders:
+                                </label>
+                                <input
+                                    type="text"
+                                    .value=${this._dirInput}
+                                    @input=${(e) => {
+                                        this._dirInput = e.target.value;
+                                        this.requestUpdate();
+                                    }}
+                                    @keydown=${(e) => {
+                                        if (e.key === "Enter") {
+                                            e.preventDefault();
+                                            this._addFolder();
+                                        }
+                                    }}
+                                    placeholder="/path/to/your/data"
+                                />
+                                <button @click=${this._browseFolder}>Browse</button>
+                                <button @click=${this._addFolder} ?disabled=${!this._dirInput}>Add</button>
+                                <button
+                                    class="primary"
+                                    ?disabled=${this.dataDirs.length === 0 || this._starting}
+                                    @click=${this._startSession}
+                                >
+                                    ${this._starting ? "Connecting..." : "Start"}
+                                </button>
+                            `
+                          : html`<span style="font-size: 0.85em; color: #555;">Connected</span> ${this.authMode
+                                    ? html`<span
+                                          style="font-size: 0.78em; padding: 3px 8px; border-radius: 10px; background: ${this
+                                              .authMode === "proxy"
+                                              ? "#fff3e0"
+                                              : this.authMode === "subscription"
+                                                ? "#e8f5e9"
+                                                : "#e3f2fd"}; color: ${this.authMode === "proxy"
+                                              ? "#e65100"
+                                              : this.authMode === "subscription"
+                                                ? "#2e7d32"
+                                                : "#1565c0"};"
+                                          >${this.authMode === "proxy"
+                                              ? "Free Credits"
+                                              : this.authMode === "subscription"
+                                                ? "Your Anthropic Account"
+                                                : "Your API Key"}</span
+                                      >`
+                                    : ""}`}
+                    ${this.connected ? html`<button @click=${this._newConversation}>New</button>` : ""}
+                    <button @click=${() => (this.settingsOpen = !this.settingsOpen)}>Settings</button>
+                </div>
+
+                ${this.isReadOnly
+                    ? html`
+                          <div class="readonly-banner">
+                              <span>Viewing saved conversation (read-only)</span>
+                          </div>
+                      `
+                    : ""}
+                ${this.dataDirs.length > 0 && !this.isReadOnly
+                    ? html`
+                          <div class="dir-chips">
+                              ${this.dataDirs.map(
+                                  (dir, i) => html`
+                                      <span class="dir-chip">
+                                          <span class="dir-chip-text" title=${dir}>${this._shortDir(dir)}</span>
+                                          ${!this.connected
+                                              ? html`<button
+                                                    class="dir-chip-remove"
+                                                    @click=${() => this._removeFolder(i)}
+                                                >
+                                                    &times;
+                                                </button>`
+                                              : ""}
+                                      </span>
+                                  `
+                              )}
+                          </div>
+                      `
+                    : ""}
+                ${!this.connected && !this.isReadOnly
+                    ? html`
+                          <div class="consent-notice">
+                              By using the AI Assistant, you agree that conversation transcripts and generated code will
+                              be shared with CatalystNeuro for quality monitoring. Your data files are never uploaded.
+                          </div>
+                      `
+                    : ""}
+
+                <!-- Main body: chat + todo panel -->
+                <div class="ai-body">
+                    <!-- Chat column -->
+                    <div class="ai-chat-col">
+                        <div class="ai-messages" id="ai-messages">
+                            ${this.messages.length === 0 && !this.connected && !this.isReadOnly
+                                ? html`
+                                      <div
+                                          style="text-align: center; color: #888; padding: 40px 20px; font-size: 0.95em; line-height: 1.6;"
+                                      >
+                                          <h3 style="color: #555; margin-bottom: 8px;">NWB Conversion Assistant</h3>
+                                          <p>Select your data folder above and click <b>Start</b> to begin.</p>
+                                      </div>
+                                  `
+                                : ""}
+                            ${this.messages.map(
+                                (msg) =>
+                                    html`<nwbguide-chat-message
+                                        .message=${msg}
+                                        @choice-selected=${this._onChoiceSelected}
+                                    ></nwbguide-chat-message>`
+                            )}
+                        </div>
+
+                        ${!this.isReadOnly
+                            ? html`
+                                  <div class="ai-input-area">
+                                      <div style="display: flex; align-items: flex-end; gap: 8px;">
+                                          ${this.isStreaming
+                                              ? html`<div class="thinking-spinner" title="Agent is working..."></div>`
+                                              : ""}
+                                          <nwbguide-chat-input
+                                              style="flex: 1;"
+                                              ?disabled=${!this.connected}
+                                              .placeholder=${this.connected
+                                                  ? this.isStreaming
+                                                      ? "Type to interrupt and interject..."
+                                                      : "Type your message..."
+                                                  : "Start a session first..."}
+                                              @send-message=${this._onSendMessage}
+                                          ></nwbguide-chat-input>
+                                          ${this.isStreaming
+                                              ? html`<button
+                                                    class="interrupt-btn"
+                                                    @click=${this._interrupt}
+                                                    title="Stop the agent"
+                                                >
+                                                    Stop
+                                                </button>`
+                                              : ""}
+                                      </div>
+                                  </div>
+                              `
+                            : ""}
+                    </div>
+
+                    <!-- Todo panel (right side) -->
+                    <div class="todo-panel">
+                        <h4>Progress</h4>
+                        <ul class="phase-list">
+                            ${PHASES.map((name, i) => {
+                                const num = i + 1;
+                                const status =
+                                    num < this.currentPhase ? "completed" : num === this.currentPhase ? "active" : "";
+                                const phaseTodos = this.todos.filter((t) => t.phase === num);
+                                return html`
+                                    <li class="phase-item ${status}">
+                                        <span class="phase-dot"> ${status === "completed" ? "\u2713" : num} </span>
+                                        <span>${name}</span>
+                                    </li>
+                                    ${phaseTodos.length > 0
+                                        ? html`
+                                              <div class="phase-todos">
+                                                  ${phaseTodos.map(
+                                                      (t) => html`
+                                                          <div class="todo-item ${t.done ? "done" : ""}">
+                                                              <span class="todo-check"
+                                                                  >${t.done ? "\u2611" : "\u2610"}</span
+                                                              >
+                                                              <span>${t.text}</span>
+                                                          </div>
+                                                      `
+                                                  )}
+                                              </div>
+                                          `
+                                        : ""}
+                                `;
+                            })}
+                        </ul>
+
+                        ${this.todos.filter((t) => !t.phase).length > 0
+                            ? html`
+                                  <div class="todo-section">
+                                      <h4>Other Items</h4>
+                                      ${this.todos
+                                          .filter((t) => !t.phase)
+                                          .map(
+                                              (t) => html`
+                                                  <div class="todo-item ${t.done ? "done" : ""}">
+                                                      <span class="todo-check">${t.done ? "\u2611" : "\u2610"}</span>
+                                                      <span>${t.text}</span>
+                                                  </div>
+                                              `
+                                          )}
+                                  </div>
+                              `
+                            : ""}
+                    </div>
+                </div>
+            </div>
+        `;
+    }
+
+    _sharedStyles() {
+        return css``;
+    }
+
+    // ── Actions ────────────────────────────────────────────────────────
+
+    _showNewChat() {
+        this.messages = [];
+        this.sessionId = null;
+        this.dataDirs = [];
+        this._dirInput = "";
+        this.connected = false;
+        this.isStreaming = false;
+        this.isReadOnly = false;
+        this.authMode = null;
+        this.currentPhase = 0;
+        this.todos = [];
+        this._starting = false;
+        this._todoIdMap = new Map();
+        this.viewMode = "chat";
+    }
+
+    async _viewSession(sessionId) {
+        try {
+            const resp = await fetch(new URL(`/ai/sessions/${sessionId}`, baseUrl));
+            if (!resp.ok) return;
+
+            const data = await resp.json();
+            const dirs = data.data_dirs || (data.data_dir ? [data.data_dir] : []);
+            if (data.connected) {
+                // This is an active session — reconnect to it
+                this.sessionId = sessionId;
+                this.dataDirs = dirs;
+                this.connected = true;
+                this.isReadOnly = false;
+                this.authMode = data.auth_mode || null;
+                this.messages = [];
+                this.currentPhase = 0;
+                this.todos = [];
+                this.viewMode = "chat";
+                this._connectSSE();
+            } else if (data.messages) {
+                // Saved session — show read-only
+                this.sessionId = sessionId;
+                this.dataDirs = dirs;
+                this.connected = false;
+                this.isReadOnly = true;
+                this.messages = data.messages;
+                this.viewMode = "chat";
+                // Rebuild phase + todo state from saved messages
+                this._rebuildTodoState(data.messages);
+            }
+        } catch {
+            // ignore
+        }
+    }
+
+    async _deleteSession(e, sessionId) {
+        e.stopPropagation(); // Don't trigger card click
+        try {
+            await fetch(new URL(`/ai/sessions/${sessionId}?delete_history=true`, baseUrl), {
+                method: "DELETE",
+            });
+            this.savedSessions = this.savedSessions.filter((s) => s.session_id !== sessionId);
+        } catch {
+            // ignore
+        }
+    }
+
+    _backToList() {
+        // If we have an active connection, don't kill it — just go back
+        if (this.connected) {
+            // Keep the session alive in the background
+        }
+        this._closeEventSource();
+        this.viewMode = "list";
+        this.isReadOnly = false;
+        this._loadSessions(); // refresh the list
+    }
+
+    async _browseFolder() {
+        try {
+            const { electron } = await import("../../../../utils/electron");
+            if (electron?.ipcRenderer) {
+                const result = await electron.ipcRenderer.invoke("showOpenDialog", {
+                    properties: ["openDirectory"],
+                    title: "Select Data Folder",
+                });
+                if (result && !result.canceled && result.filePaths?.length) {
+                    const dir = result.filePaths[0];
+                    if (!this.dataDirs.includes(dir)) {
+                        this.dataDirs = [...this.dataDirs, dir];
+                    }
+                }
+            }
+        } catch {
+            // Fallback: user types the path manually
+        }
+    }
+
+    _addFolder() {
+        const dir = this._dirInput.trim();
+        if (!dir) return;
+        if (!this.dataDirs.includes(dir)) {
+            this.dataDirs = [...this.dataDirs, dir];
+        }
+        this._dirInput = "";
+        this.requestUpdate();
+    }
+
+    _removeFolder(index) {
+        this.dataDirs = this.dataDirs.filter((_, i) => i !== index);
+    }
+
+    async _startSession() {
+        if (this.dataDirs.length === 0 || this.connected || this._starting) return;
+        this._starting = true;
+        this.requestUpdate();
+
+        const settingsPanel = this.querySelector("nwbguide-ai-settings");
+        const settings = settingsPanel?.getSettings() || {};
+
+        try {
+            const resp = await fetch(new URL("/ai/sessions", baseUrl), {
+                method: "POST",
+                headers: { "Content-Type": "application/json" },
+                body: JSON.stringify({
+                    data_dirs: this.dataDirs,
+                    api_key: settings.apiKey,
+                    model: settings.model,
+                }),
+            });
+
+            if (!resp.ok) {
+                const err = await resp.json();
+                this._addMessage("error", err.message || "Failed to create session");
+                this._starting = false;
+                return;
+            }
+
+            const data = await resp.json();
+            this.sessionId = data.session_id;
+            this.authMode = data.auth_mode || null;
+
+            this._connectSSE();
+
+            await this._waitForConnection();
+            this.connected = true;
+            this._starting = false;
+            this.currentPhase = 1; // Phase 1 starts immediately
+
+            this._addMessage("assistant", [
+                {
+                    type: "text",
+                    text: "Connected! I'm ready to help you convert your data to NWB. Let me start by inspecting your data...",
+                },
+            ]);
+
+            const dirList = this.dataDirs.map((d) => `  - ${d}`).join("\n");
+            this._sendToAgent(
+                `I'd like to convert my neurophysiology data to NWB format. My data is located at:\n${dirList}`
+            );
+        } catch (e) {
+            this._starting = false;
+            this._addMessage("error", `Connection failed: ${e.message}`);
+        }
+    }
+
+    async _waitForConnection(maxWaitMs = 30000) {
+        const interval = 500;
+        let elapsed = 0;
+        while (elapsed < maxWaitMs) {
+            try {
+                const resp = await fetch(new URL(`/ai/sessions/${this.sessionId}`, baseUrl));
+                if (resp.ok) {
+                    const data = await resp.json();
+                    if (data.connected) return;
+                }
+            } catch {
+                // ignore fetch errors during polling
+            }
+            await new Promise((r) => setTimeout(r, interval));
+            elapsed += interval;
+        }
+        throw new Error("Agent did not connect in time.");
+    }
+
+    _connectSSE() {
+        if (this._eventSource) this._closeEventSource();
+
+        const url = new URL(`/ai/sessions/${this.sessionId}/events`, baseUrl);
+        this._eventSource = new EventSource(url);
+
+        this._eventSource.onmessage = (event) => {
+            try {
+                const data = JSON.parse(event.data);
+                this._handleSSEEvent(data);
+            } catch {
+                // Ignore parse errors from keepalives
+            }
+        };
+
+        this._eventSource.onerror = () => {
+            // EventSource will auto-reconnect
+        };
+    }
+
+    _handleSSEEvent(data) {
+        if (data.type === "assistant") {
+            this._mergeAssistantContent(data.content);
+            this._detectPhaseTransition(data.content);
+        } else if (data.type === "error") {
+            const content = data.content || "";
+            if (content.includes("429") || content.toLowerCase().includes("budget exceeded")) {
+                this._addMessage(
+                    "error",
+                    "Free credits for this session have been used. Enter an API key in Settings to continue."
+                );
+            } else {
+                this._addMessage("error", content);
+            }
+            this.isStreaming = false;
+        } else if (data.type === "result") {
+            this.isStreaming = false;
+            if (data.is_error) {
+                this._addMessage("error", data.result || "Agent encountered an error.");
+            }
+        } else if (data.type === "done") {
+            this.isStreaming = false;
+        }
+
+        this._scrollToBottom();
+    }
+
+    // Phase keyword patterns for inferring which phase a task belongs to
+    static PHASE_KEYWORDS = [
+        /* 1 */ /\b(experiment|intake|discover|species|modality|modalities|publication|lab\b|what.*record)/i,
+        /* 2 */ /\b(inspect|scan|file.?format|interface|neuroconv|data.?inspection|directory|file.?type)/i,
+        /* 3 */ /\b(metadata|subject|session.?info|electrode|age|sex|genotype|experimenter)/i,
+        /* 4 */ /\b(sync|clock|timestamp|alignment|synchroniz)/i,
+        /* 5 */ /\b(code.?gen|convert|script|pip.?install|converter|write.*code|generate.*code)/i,
+        /* 6 */ /\b(test|valid|inspector|nwbinspector|stub|verif)/i,
+        /* 7 */ /\b(dandi|upload|dandiset|publish|archive)/i,
+    ];
+
+    _inferPhase(text, metadata) {
+        // 1. Explicit phase in metadata (e.g., TaskCreate with metadata.phase)
+        if (metadata?.phase) {
+            const p = parseInt(metadata.phase, 10);
+            if (p >= 1 && p <= 7) return p;
+        }
+
+        // 2. Explicit "Phase N" in the text itself
+        const explicitMatch = text.match(/\bphase\s+(\d)\b/i);
+        if (explicitMatch) {
+            const p = parseInt(explicitMatch[1], 10);
+            if (p >= 1 && p <= 7) return p;
+        }
+
+        // 3. Keyword matching against phase themes
+        const lower = text.toLowerCase();
+        for (let i = 0; i < AIAssistantPage.PHASE_KEYWORDS.length; i++) {
+            if (AIAssistantPage.PHASE_KEYWORDS[i].test(lower)) {
+                return i + 1;
+            }
+        }
+
+        // 4. Fall back to current phase
+        return this.currentPhase;
+    }
+
+    _detectPhaseTransition(content) {
+        if (!Array.isArray(content)) return;
+
+        for (const block of content) {
+            // Detect phase headers from text
+            if (block.type === "text") {
+                // Match various phase header patterns:
+                //   "Phase 2: Data Inspection", "### Phase 3 — Metadata", "Moving to Phase 4"
+                const phaseRegex = /(?:^#+\s*)?(?:Phase|phase)\s+(\d)\s*[:.—\-–\s]+(.+?)(?:\n|$)/gm;
+                let phaseMatch;
+                while ((phaseMatch = phaseRegex.exec(block.text)) !== null) {
+                    const phaseNum = parseInt(phaseMatch[1], 10);
+                    if (phaseNum >= 1 && phaseNum <= 7 && phaseNum > this.currentPhase) {
+                        this.currentPhase = phaseNum;
+                        this._addMessage("phase", `Phase ${phaseMatch[1]}: ${phaseMatch[2].trim()}`);
+                    }
+                }
+
+                // Parse checklist items: - [ ] todo or - [x] done
+                const todoRegex = /^[-*]\s+\[([ xX])\]\s+(.+)$/gm;
+                let match;
+                while ((match = todoRegex.exec(block.text)) !== null) {
+                    const done = match[1].toLowerCase() === "x";
+                    const text = match[2].trim();
+                    this._upsertTodo(text, done, this._inferPhase(text, null));
+                }
+            }
+
+            if (block.type !== "tool_use") continue;
+
+            // TodoWrite: input.todos is an array of {id, content, status}
+            if (block.name === "TodoWrite") {
+                const todos = block.input?.todos;
+                if (Array.isArray(todos)) {
+                    for (const item of todos) {
+                        const text = item.content || item.subject || item.task || "";
+                        if (!text) continue;
+                        const done = item.status === "completed";
+                        this._upsertTodo(text, done, this._inferPhase(text, item.metadata));
+                        if (item.id) this._todoIdMap.set(item.id, text);
+                    }
+                }
+            }
+
+            // TaskCreate: input.subject is the task title
+            if (block.name === "TaskCreate") {
+                const subject = block.input?.subject || block.input?.task || "";
+                if (subject) {
+                    const desc = block.input?.description || "";
+                    const phase = this._inferPhase(`${subject} ${desc}`, block.input?.metadata);
+                    this._upsertTodo(subject, false, phase);
+                }
+            }
+
+            // TaskUpdate: match by taskId to mark done
+            if (block.name === "TaskUpdate") {
+                const status = block.input?.status;
+                const taskId = block.input?.taskId || block.input?.id;
+                if (status === "completed" && taskId) {
+                    const mappedText = this._todoIdMap.get(taskId);
+                    if (mappedText) {
+                        this._upsertTodo(mappedText, true, null);
+                    } else {
+                        const idx = parseInt(taskId, 10) - 1;
+                        if (idx >= 0 && idx < this.todos.length) {
+                            const updated = [...this.todos];
+                            updated[idx] = { ...updated[idx], done: true };
+                            this.todos = updated;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    _upsertTodo(text, done, phase) {
+        const existing = this.todos.findIndex((t) => t.text === text);
+        if (existing >= 0) {
+            const updated = [...this.todos];
+            // Keep existing phase unless a more specific one is provided
+            const existingPhase = updated[existing].phase;
+            const newPhase = phase || existingPhase;
+            updated[existing] = { ...updated[existing], done, phase: newPhase };
+            this.todos = updated;
+        } else {
+            this.todos = [...this.todos, { text, done, phase }];
+        }
+    }
+
+    async _onSendMessage(e) {
+        const text = e.detail;
+        if (this.isStreaming) {
+            await this._interrupt();
+        }
+        this._addMessage("user", text);
+        this._sendToAgent(text);
+        this._scrollToBottom();
+    }
+
+    async _onChoiceSelected(e) {
+        const choice = e.detail;
+        if (!this.connected) return;
+        if (this.isStreaming) {
+            await this._interrupt();
+        }
+        this._addMessage("user", choice);
+        this._sendToAgent(choice);
+        this._scrollToBottom();
+    }
+
+    async _interrupt() {
+        if (!this.sessionId) return;
+        try {
+            await fetch(new URL(`/ai/sessions/${this.sessionId}/interrupt`, baseUrl), {
+                method: "POST",
+            });
+            this.isStreaming = false;
+        } catch {
+            // ignore
+        }
+    }
+
+    async _sendToAgent(content) {
+        if (!this.sessionId) return;
+
+        this.isStreaming = true;
+
+        try {
+            await fetch(new URL(`/ai/sessions/${this.sessionId}/message`, baseUrl), {
+                method: "POST",
+                headers: { "Content-Type": "application/json" },
+                body: JSON.stringify({ content }),
+            });
+        } catch (e) {
+            this._addMessage("error", `Failed to send message: ${e.message}`);
+            this.isStreaming = false;
+        }
+    }
+
+    _mergeAssistantContent(content) {
+        if (!Array.isArray(content)) {
+            this._addMessage("assistant", content);
+            return;
+        }
+
+        const hasOnlyResults = content.every((b) => b.type === "tool_result");
+
+        if (hasOnlyResults) {
+            const updated = [...this.messages];
+            for (let i = updated.length - 1; i >= 0; i--) {
+                const msg = updated[i];
+                if (msg.role === "assistant" && Array.isArray(msg.content)) {
+                    const hasToolUse = msg.content.some((b) => b.type === "tool_use");
+                    if (hasToolUse) {
+                        updated[i] = { ...msg, content: [...msg.content, ...content] };
+                        this.messages = updated;
+                        return;
+                    }
+                }
+            }
+        }
+
+        this._addMessage("assistant", content);
+    }
+
+    _addMessage(role, content) {
+        this.messages = [...this.messages, { role, content }];
+    }
+
+    _scrollToBottom() {
+        requestAnimationFrame(() => {
+            const container = this.querySelector("#ai-messages");
+            if (container) {
+                container.scrollTop = container.scrollHeight;
+            }
+        });
+    }
+
+    async _newConversation() {
+        if (this.sessionId) {
+            try {
+                await fetch(new URL(`/ai/sessions/${this.sessionId}`, baseUrl), {
+                    method: "DELETE",
+                });
+            } catch {
+                // ignore
+            }
+        }
+        this._closeEventSource();
+
+        this.messages = [];
+        this.sessionId = null;
+        this.dataDirs = [];
+        this._dirInput = "";
+        this.connected = false;
+        this.isStreaming = false;
+        this.isReadOnly = false;
+        this.authMode = null;
+        this.currentPhase = 0;
+        this.todos = [];
+        this._starting = false;
+        this._todoIdMap = new Map();
+        this.viewMode = "list";
+        this._loadSessions();
+    }
+
+    _closeEventSource() {
+        if (this._eventSource) {
+            this._eventSource.close();
+            this._eventSource = null;
+        }
+    }
+
+    _rebuildTodoState(messages) {
+        let phase = 1; // Phase 1 is active from the start
+        const todoMap = new Map(); // text -> { done, phase }
+        const idMap = new Map(); // TodoWrite id -> text
+
+        // Local version of _inferPhase that uses `phase` variable instead of this.currentPhase
+        const inferPhase = (text, metadata) => {
+            if (metadata?.phase) {
+                const p = parseInt(metadata.phase, 10);
+                if (p >= 1 && p <= 7) return p;
+            }
+            const explicitMatch = text.match(/\bphase\s+(\d)\b/i);
+            if (explicitMatch) {
+                const p = parseInt(explicitMatch[1], 10);
+                if (p >= 1 && p <= 7) return p;
+            }
+            const lower = text.toLowerCase();
+            for (let i = 0; i < AIAssistantPage.PHASE_KEYWORDS.length; i++) {
+                if (AIAssistantPage.PHASE_KEYWORDS[i].test(lower)) return i + 1;
+            }
+            return phase;
+        };
+
+        for (const msg of messages) {
+            if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue;
+
+            for (const block of msg.content) {
+                if (block.type === "text") {
+                    // Phases — broader regex
+                    const phaseRegex = /(?:^#+\s*)?(?:Phase|phase)\s+(\d)\s*[:.—\-–\s]+(.+?)(?:\n|$)/gm;
+                    let phaseMatch;
+                    while ((phaseMatch = phaseRegex.exec(block.text)) !== null) {
+                        const num = parseInt(phaseMatch[1], 10);
+                        if (num >= 1 && num <= 7 && num > phase) phase = num;
+                    }
+
+                    // Checklist items
+                    const todoRegex = /^[-*]\s+\[([ xX])\]\s+(.+)$/gm;
+                    let m;
+                    while ((m = todoRegex.exec(block.text)) !== null) {
+                        const done = m[1].toLowerCase() === "x";
+                        const text = m[2].trim();
+                        const prev = todoMap.get(text);
+                        todoMap.set(text, { done, phase: prev?.phase || inferPhase(text, null) });
+                    }
+                }
+
+                if (block.type !== "tool_use") continue;
+
+                // `TodoWrite`: `input.todos` is an array of `{id, content, status}`
+                if (block.name === "TodoWrite") {
+                    const todos = block.input?.todos;
+                    if (Array.isArray(todos)) {
+                        for (const item of todos) {
+                            const text = item.content || item.subject || item.task || "";
+                            if (!text) continue;
+                            const done = item.status === "completed";
+                            const prev = todoMap.get(text);
+                            todoMap.set(text, { done, phase: prev?.phase || inferPhase(text, item.metadata) });
+                            if (item.id) idMap.set(item.id, text);
+                        }
+                    }
+                }
+
+                // `TaskCreate`: `input.subject` is the task title
+                if (block.name === "TaskCreate") {
+                    const subject = block.input?.subject || block.input?.task || "";
+                    if (subject) {
+                        const desc = block.input?.description || "";
+                        const prev = todoMap.get(subject);
+                        todoMap.set(subject, {
+                            done: prev?.done || false,
+                            phase: prev?.phase || inferPhase(`${subject} ${desc}`, block.input?.metadata),
+                        });
+                    }
+                }
+
+                // `TaskUpdate`: match by `taskId`
+                if (block.name === "TaskUpdate") {
+                    const status = block.input?.status;
+                    const taskId = block.input?.taskId || block.input?.id;
+                    if (status === "completed" && taskId) {
+                        const mappedText = idMap.get(taskId);
+                        if (mappedText) {
+                            const prev = todoMap.get(mappedText);
+                            todoMap.set(mappedText, { ...prev, done: true });
+                        }
+                    }
+                }
+            }
+        }
+
+        this.currentPhase = phase;
+        this._todoIdMap = idMap;
+        this.todos = [...todoMap.entries()].map(([text, { done, phase: p }]) => ({ text, done, phase: p }));
+    }
+
+    // ── Helpers ─────────────────────────────────────────────────────────
+
+    _formatDate(isoStr) {
+        if (!isoStr) return "";
+        try {
+            const d = new Date(isoStr);
+            const now = new Date();
+            const diffMs = now - d;
+            const diffMin = Math.floor(diffMs / 60000);
+            const diffHr = Math.floor(diffMs / 3600000);
+            const diffDay = Math.floor(diffMs / 86400000);
+
+            if (diffMin < 1) return "just now";
+            if (diffMin < 60) return `${diffMin}m ago`;
+            if (diffHr < 24) return `${diffHr}h ago`;
+            if (diffDay < 7) return `${diffDay}d ago`;
+            return d.toLocaleDateString();
+        } catch {
+            return "";
+        }
+    }
+
+    _shortDir(dirPath) {
+        if (!dirPath) return "";
+        const parts = dirPath.split("/").filter(Boolean);
+        return parts.length > 2 ? ".../" + parts.slice(-2).join("/") : dirPath;
+    }
+}
+
+customElements.get("nwbguide-ai-assistant-page") ||
+    customElements.define("nwbguide-ai-assistant-page", AIAssistantPage);
diff --git a/src/electron/frontend/core/components/pages/ai-assistant/ChatInput.js b/src/electron/frontend/core/components/pages/ai-assistant/ChatInput.js
new file mode 100644
index 000000000..d2e8c1065
--- /dev/null
+++ b/src/electron/frontend/core/components/pages/ai-assistant/ChatInput.js
@@ -0,0 +1,123 @@
+import { LitElement, html, css } from "lit";
+
+/**
+ * Text input with send button for the chat interface.
+ *
+ * Fires a "send-message" custom event with the message text in `detail`.
+ */
+export class ChatInput extends LitElement {
+    static properties = {
+        disabled: { type: Boolean },
+        placeholder: { type: String },
+    };
+
+    static styles = css`
+        :host {
+            display: block;
+        }
+
+        .input-row {
+            display: flex;
+            gap: 8px;
+            align-items: flex-end;
+        }
+
+        textarea {
+            flex: 1;
+            resize: none;
+            border: 1px solid #ccc;
+            border-radius: 8px;
+            padding: 10px 12px;
+            font-family: inherit;
+            font-size: 0.95em;
+            line-height: 1.4;
+            min-height: 40px;
+            max-height: 120px;
+            outline: none;
+            transition: border-color 0.2s;
+        }
+
+        textarea:focus {
+            border-color: #1976d2;
+        }
+
+        textarea:disabled {
+            background: #f5f5f5;
+            cursor: not-allowed;
+        }
+
+        button {
+            background: #1976d2;
+            color: white;
+            border: none;
+            border-radius: 8px;
+            padding: 10px 20px;
+            cursor: pointer;
+            font-size: 0.95em;
+            font-weight: 500;
+            white-space: nowrap;
+            transition: background 0.2s;
+        }
+
+        button:hover:not(:disabled) {
+            background: #1565c0;
+        }
+
+        button:disabled {
+            background: #bbb;
+            cursor: not-allowed;
+        }
+    `;
+
+    constructor() {
+        super();
+        this.disabled = false;
+        this.placeholder = "Type your message...";
+    }
+
+    render() {
+        return html`
+            <div class="input-row">
+                <textarea
+                    .placeholder=${this.placeholder}
+                    ?disabled=${this.disabled}
+                    @keydown=${this._onKeyDown}
+                    rows="1"
+                ></textarea>
+                <button ?disabled=${this.disabled} @click=${this._onSend}>Send</button>
+            </div>
+        `;
+    }
+
+    _onKeyDown(e) {
+        // Auto-resize textarea
+        const textarea = e.target;
+        textarea.style.height = "auto";
+        textarea.style.height = Math.min(textarea.scrollHeight, 120) + "px";
+
+        // Submit on Enter (without Shift)
+        if (e.key === "Enter" && !e.shiftKey) {
+            e.preventDefault();
+            this._onSend();
+        }
+    }
+
+    _onSend() {
+        const textarea = this.shadowRoot.querySelector("textarea");
+        const text = textarea.value.trim();
+        if (!text || this.disabled) return;
+
+        this.dispatchEvent(
+            new CustomEvent("send-message", {
+                detail: text,
+                bubbles: true,
+                composed: true,
+            })
+        );
+
+        textarea.value = "";
+        textarea.style.height = "auto";
+    }
+}
+
+customElements.get("nwbguide-chat-input") || customElements.define("nwbguide-chat-input", ChatInput);
diff --git a/src/electron/frontend/core/components/pages/ai-assistant/ChatMessage.js b/src/electron/frontend/core/components/pages/ai-assistant/ChatMessage.js
new file mode 100644
index 000000000..950c884a0
--- /dev/null
+++ b/src/electron/frontend/core/components/pages/ai-assistant/ChatMessage.js
@@ -0,0 +1,883 @@
+import { LitElement, html, css } from "lit";
+import { unsafeHTML } from "lit/directives/unsafe-html.js";
+import { marked } from "marked";
+
+/**
+ * Renders a single chat message (user, assistant, or tool-use).
+ *
+ * @property {Object} message - The message object with `role` and `content`.
+ *   role: "user" | "assistant" | "phase" | "error"
+ *   content: string | Array<{type, text?, name?, input?, content?}>
+ */
+export class ChatMessage extends LitElement {
+    static properties = {
+        message: { type: Object },
+    };
+
+    static styles = css`
+        :host {
+            display: block;
+            margin-bottom: 12px;
+        }
+
+        .message {
+            padding: 10px 14px;
+            border-radius: 8px;
+            max-width: 85%;
+            line-height: 1.5;
+            word-wrap: break-word;
+        }
+
+        .user {
+            background: #e3f2fd;
+            margin-left: auto;
+            text-align: right;
+            border-bottom-right-radius: 2px;
+            white-space: pre-wrap;
+        }
+
+        .assistant {
+            background: #f5f5f5;
+            margin-right: auto;
+            border-bottom-left-radius: 2px;
+        }
+
+        .error {
+            background: #ffebee;
+            color: #c62828;
+            margin-right: auto;
+            border-bottom-left-radius: 2px;
+        }
+
+        .phase-divider {
+            text-align: center;
+            color: #666;
+            font-size: 0.85em;
+            font-weight: 600;
+            padding: 8px 0;
+            border-top: 1px solid #e0e0e0;
+            border-bottom: 1px solid #e0e0e0;
+            margin: 8px 0;
+        }
+
+        .tool-card {
+            background: #fafafa;
+            border: 1px solid #e0e0e0;
+            border-radius: 6px;
+            padding: 4px 10px;
+            margin: 2px 0;
+            font-size: 0.85em;
+        }
+
+        .tool-card summary {
+            cursor: pointer;
+            font-weight: 500;
+            color: #555;
+        }
+
+        .tool-card pre {
+            margin: 2px 0 4px;
+            padding: 6px;
+            background: #f0f0f0;
+            border-radius: 4px;
+            overflow-x: auto;
+            font-size: 0.9em;
+            max-height: 200px;
+            overflow-y: auto;
+        }
+
+        .tool-card pre.tool-error {
+            background: #ffebee;
+            color: #c62828;
+        }
+
+        .tool-summary {
+            color: #888;
+            font-weight: 400;
+        }
+
+        .tool-error-badge {
+            color: #c62828;
+            font-size: 0.8em;
+            font-weight: 600;
+        }
+
+        .tool-name {
+            font-weight: 600;
+            color: #555;
+        }
+
+        .tool-code {
+            margin: 2px 0 4px;
+            padding: 6px 8px;
+            background: #f8f8f8;
+            color: #1a1a1a;
+            border: 1px solid #e0e0e0;
+            border-radius: 4px;
+            overflow-x: auto;
+            font-size: 0.9em;
+            max-height: 200px;
+            overflow-y: auto;
+        }
+
+        .tool-code .hl-kw {
+            color: #8839ef;
+        }
+        .tool-code .hl-bi {
+            color: #d20f39;
+        }
+        .tool-code .hl-str {
+            color: #40a02b;
+        }
+        .tool-code .hl-num {
+            color: #fe640b;
+        }
+        .tool-code .hl-cmt {
+            color: #8c8fa1;
+            font-style: italic;
+        }
+        .tool-code .hl-op {
+            color: #1a1a1a;
+        }
+        .tool-code .hl-dec {
+            color: #e64553;
+        }
+        .tool-code .hl-cls {
+            color: #1e66f5;
+        }
+
+        .tool-diff {
+            display: flex;
+            flex-direction: column;
+            gap: 2px;
+        }
+
+        .tool-diff-old {
+            margin: 2px 0;
+            padding: 4px 8px;
+            background: #ffeef0;
+            color: #b31d28;
+            border-radius: 4px;
+            font-size: 0.9em;
+            max-height: 150px;
+            overflow: auto;
+            border-left: 3px solid #d73a49;
+        }
+
+        .tool-diff-new {
+            margin: 2px 0;
+            padding: 4px 8px;
+            background: #e6ffed;
+            color: #22863a;
+            border-radius: 4px;
+            font-size: 0.9em;
+            max-height: 150px;
+            overflow: auto;
+            border-left: 3px solid #28a745;
+        }
+
+        .tool-section-label {
+            font-size: 0.75em;
+            color: #999;
+            margin-top: 4px;
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+        }
+
+        .text-block {
+            line-height: 1.5;
+        }
+
+        .text-block p {
+            margin: 0.4em 0;
+        }
+
+        .text-block p:first-child {
+            margin-top: 0;
+        }
+
+        .text-block p:last-child {
+            margin-bottom: 0;
+        }
+
+        .text-block code {
+            background: #e8e8e8;
+            padding: 1px 4px;
+            border-radius: 3px;
+            font-size: 0.9em;
+        }
+
+        .text-block pre {
+            background: #f8f8f8;
+            border: 1px solid #e0e0e0;
+            border-radius: 4px;
+            padding: 6px 8px;
+            overflow-x: auto;
+            font-size: 0.9em;
+            max-height: 200px;
+            overflow-y: auto;
+        }
+
+        .text-block pre code {
+            background: none;
+            padding: 0;
+        }
+
+        .text-block ul,
+        .text-block ol {
+            margin: 0.4em 0;
+            padding-left: 1.5em;
+        }
+
+        .text-block li {
+            margin: 0.2em 0;
+        }
+
+        .text-block h1,
+        .text-block h2,
+        .text-block h3,
+        .text-block h4 {
+            margin: 0.6em 0 0.3em;
+            line-height: 1.3;
+        }
+
+        .text-block h1 {
+            font-size: 1.2em;
+        }
+        .text-block h2 {
+            font-size: 1.1em;
+        }
+        .text-block h3 {
+            font-size: 1em;
+        }
+
+        .text-block blockquote {
+            border-left: 3px solid #ccc;
+            margin: 0.4em 0;
+            padding: 0.2em 0.8em;
+            color: #555;
+        }
+
+        .text-block table {
+            border-collapse: collapse;
+            margin: 0.4em 0;
+            font-size: 0.9em;
+        }
+
+        .text-block th,
+        .text-block td {
+            border: 1px solid #ddd;
+            padding: 4px 8px;
+        }
+
+        .text-block th {
+            background: #f0f0f0;
+            font-weight: 600;
+        }
+
+        .text-block a {
+            color: #1976d2;
+        }
+
+        .text-block strong {
+            font-weight: 600;
+        }
+
+        .label {
+            font-size: 0.75em;
+            color: #888;
+            margin-bottom: 4px;
+            font-weight: 500;
+        }
+
+        .choices {
+            display: flex;
+            flex-wrap: wrap;
+            gap: 8px;
+            margin: 8px 0 4px;
+        }
+
+        .choice-btn {
+            padding: 8px 16px;
+            border: 1px solid #90caf9;
+            border-radius: 20px;
+            background: #e3f2fd;
+            color: #1565c0;
+            cursor: pointer;
+            font-size: 0.88em;
+            line-height: 1.4;
+            transition:
+                background 0.15s,
+                border-color 0.15s;
+            text-align: left;
+        }
+
+        .choice-btn:hover {
+            background: #bbdefb;
+            border-color: #42a5f5;
+        }
+
+        .choice-btn:active {
+            background: #90caf9;
+        }
+
+        .choices-answered .choice-btn {
+            opacity: 0.5;
+            cursor: default;
+            pointer-events: none;
+        }
+
+        .choices-answered .choice-btn.selected {
+            opacity: 1;
+            background: #1976d2;
+            color: white;
+            border-color: #1976d2;
+        }
+    `;
+
+    render() {
+        const { role, content } = this.message || {};
+
+        if (role === "phase") {
+            return html`<div class="phase-divider">${content}</div>`;
+        }
+
+        if (role === "error") {
+            return html`
+                <div class="label">Error</div>
+                <div class="message error">${content}</div>
+            `;
+        }
+
+        if (role === "user") {
+            return html` <div class="message user">${content}</div> `;
+        }
+
+        // Assistant message — content is an array of blocks
+        if (role === "assistant" && Array.isArray(content)) {
+            // Build a map of tool_use_id -> tool_result for pairing
+            const resultMap = {};
+            for (const block of content) {
+                if (block.type === "tool_result") {
+                    resultMap[block.tool_use_id] = block;
+                }
+            }
+            return html`
+                <div class="message assistant">
+                    ${content
+                        .filter((block) => block.type !== "tool_result")
+                        .map((block) => this._renderBlock(block, resultMap))}
+                </div>
+            `;
+        }
+
+        // Fallback for plain text assistant
+        return html` <div class="message assistant">${content}</div> `;
+    }
+
+    _renderBlock(block, resultMap = {}) {
+        if (block.type === "text") {
+            // Check for <choices> blocks
+            const choicesMatch = block.text.match(/<choices>([\s\S]*?)<\/choices>/);
+            if (choicesMatch) {
+                const textBefore = block.text.slice(0, choicesMatch.index).trim();
+                const textAfter = block.text.slice(choicesMatch.index + choicesMatch[0].length).trim();
+                const options = this._parseChoices(choicesMatch[1]);
+
+                return html`
+                    ${textBefore
+                        ? html`<div class="text-block">${unsafeHTML(this._renderMarkdown(textBefore))}</div>`
+                        : ""}
+                    <div class="choices ${block._answered ? "choices-answered" : ""}">
+                        ${options.map(
+                            (opt) => html`
+                                <button
+                                    class="choice-btn ${block._selectedChoice === opt ? "selected" : ""}"
+                                    @click=${() => this._onChoiceClick(opt, block)}
+                                >
+                                    ${opt}
+                                </button>
+                            `
+                        )}
+                    </div>
+                    ${textAfter
+                        ? html`<div class="text-block">${unsafeHTML(this._renderMarkdown(textAfter))}</div>`
+                        : ""}
+                `;
+            }
+
+            return html`<div class="text-block">${unsafeHTML(this._renderMarkdown(block.text))}</div>`;
+        }
+
+        if (block.type === "tool_use") {
+            const result = resultMap[block.id];
+            const resultPreview = result
+                ? typeof result.content === "string"
+                    ? result.content.slice(0, 2000)
+                    : JSON.stringify(result.content).slice(0, 2000)
+                : null;
+
+            return html`
+                <details class="tool-card">
+                    <summary>
+                        ${this._renderToolSummary(block)}
+                        ${result?.is_error ? html` <span class="tool-error-badge">error</span>` : ""}
+                    </summary>
+                    ${this._renderToolInput(block)}
+                    ${resultPreview != null
+                        ? html`
+                              <div class="tool-section-label">Output</div>
+                              <pre class="${result?.is_error ? "tool-error" : ""}">${resultPreview}</pre>
+                          `
+                        : ""}
+                </details>
+            `;
+        }
+
+        return html``;
+    }
+
+    _renderToolSummary(block) {
+        const { name, input } = block;
+        if (!input) return name;
+
+        if (name === "Bash") {
+            const cmd = input.command || "";
+            // Show first line or first 80 chars
+            const firstLine = cmd.split("\n")[0].slice(0, 80);
+            return html`<span class="tool-name">$</span>
+                <span class="tool-summary">${firstLine}${cmd.length > 80 || cmd.includes("\n") ? "..." : ""}</span>`;
+        }
+        if (name === "Read")
+            return html`<span class="tool-name">Read</span>
+                <span class="tool-summary">${this._shortPath(input.file_path)}</span>`;
+        if (name === "Write")
+            return html`<span class="tool-name">Write</span>
+                <span class="tool-summary">${this._shortPath(input.file_path)}</span>`;
+        if (name === "Edit")
+            return html`<span class="tool-name">Edit</span>
+                <span class="tool-summary">${this._shortPath(input.file_path)}</span>`;
+        if (name === "Glob")
+            return html`<span class="tool-name">Glob</span> <span class="tool-summary">${input.pattern}</span>`;
+        if (name === "Grep")
+            return html`<span class="tool-name">Grep</span> <span class="tool-summary">${input.pattern}</span>`;
+        return name;
+    }
+
+    _renderToolInput(block) {
+        const { name, input } = block;
+        if (!input) return html``;
+
+        if (name === "Bash") {
+            const code = input.command || "";
+            return html`<pre class="tool-code">${unsafeHTML(this._highlightCode(code, "shell"))}</pre>`;
+        }
+
+        if (name === "Write") {
+            const content = input.content || "";
+            const snippet = content.slice(0, 2000) + (content.length > 2000 ? "\n..." : "");
+            const lang = this._detectLang(snippet, input.file_path);
+            return html`
+                <div class="tool-section-label">${this._shortPath(input.file_path)}</div>
+                <pre class="tool-code">${unsafeHTML(this._highlightCode(snippet, lang))}</pre>
+            `;
+        }
+
+        if (name === "Edit") {
+            const lang = this._detectLang(input.new_string || "", input.file_path);
+            return html`
+                <div class="tool-section-label">${this._shortPath(input.file_path)}</div>
+                <div class="tool-diff">
+                    <pre class="tool-diff-old">${unsafeHTML(this._highlightCode(input.old_string || "", lang))}</pre>
+                    <pre class="tool-diff-new">${unsafeHTML(this._highlightCode(input.new_string || "", lang))}</pre>
+                </div>
+            `;
+        }
+
+        // Default: show as JSON
+        return html`<pre>${JSON.stringify(input, null, 2)}</pre>`;
+    }
+
+    _detectLang(code, filePath = "") {
+        if (filePath.endsWith(".py") || filePath.endsWith(".pyi")) return "python";
+        if (filePath.endsWith(".js") || filePath.endsWith(".ts")) return "js";
+        if (filePath.endsWith(".yml") || filePath.endsWith(".yaml")) return "yaml";
+        // Detect from content
+        if (/^python3?\s|^#!.*python|^\s*(import |from |def |class )/.test(code)) return "python";
+        if (/^\s*(const |let |var |function |import )/.test(code)) return "js";
+        return "shell";
+    }
+
+    _highlightCode(code, lang = "shell") {
+        // Single-pass tokenizer — avoids nested regex issues
+        const tokens = this._tokenize(code, lang);
+        return tokens
+            .map(([type, text]) => {
+                const esc = text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+                if (type === "plain") return esc;
+                return `<span class="hl-${type}">${esc}</span>`;
+            })
+            .join("");
+    }
+
+    _tokenize(code, lang) {
+        const PY_KW = new Set([
+            "False",
+            "None",
+            "True",
+            "and",
+            "as",
+            "assert",
+            "async",
+            "await",
+            "break",
+            "class",
+            "continue",
+            "def",
+            "del",
+            "elif",
+            "else",
+            "except",
+            "finally",
+            "for",
+            "from",
+            "global",
+            "if",
+            "import",
+            "in",
+            "is",
+            "lambda",
+            "nonlocal",
+            "not",
+            "or",
+            "pass",
+            "raise",
+            "return",
+            "try",
+            "while",
+            "with",
+            "yield",
+        ]);
+        const PY_BI = new Set([
+            "print",
+            "len",
+            "range",
+            "type",
+            "int",
+            "str",
+            "float",
+            "list",
+            "dict",
+            "set",
+            "tuple",
+            "open",
+            "super",
+            "isinstance",
+            "hasattr",
+            "getattr",
+            "setattr",
+            "enumerate",
+            "zip",
+            "map",
+            "filter",
+            "sorted",
+            "reversed",
+            "any",
+            "all",
+            "min",
+            "max",
+            "sum",
+            "abs",
+            "round",
+            "input",
+            "format",
+            "id",
+            "hex",
+            "oct",
+            "bin",
+            "chr",
+            "ord",
+            "repr",
+            "hash",
+            "dir",
+            "vars",
+            "globals",
+            "locals",
+            "staticmethod",
+            "classmethod",
+            "property",
+            "Path",
+            "Union",
+        ]);
+        const JS_KW = new Set([
+            "const",
+            "let",
+            "var",
+            "function",
+            "return",
+            "if",
+            "else",
+            "for",
+            "while",
+            "do",
+            "switch",
+            "case",
+            "break",
+            "continue",
+            "new",
+            "this",
+            "class",
+            "extends",
+            "import",
+            "export",
+            "from",
+            "default",
+            "async",
+            "await",
+            "try",
+            "catch",
+            "finally",
+            "throw",
+            "typeof",
+            "instanceof",
+            "of",
+            "in",
+            "yield",
+        ]);
+        const JS_BI = new Set([
+            "console",
+            "document",
+            "window",
+            "Array",
+            "Object",
+            "String",
+            "Number",
+            "Boolean",
+            "Map",
+            "Set",
+            "Promise",
+            "JSON",
+            "Math",
+            "Date",
+            "Error",
+            "RegExp",
+            "parseInt",
+            "parseFloat",
+            "setTimeout",
+            "setInterval",
+            "fetch",
+            "require",
+        ]);
+        const SH_KW = new Set([
+            "if",
+            "then",
+            "else",
+            "elif",
+            "fi",
+            "for",
+            "do",
+            "done",
+            "while",
+            "until",
+            "case",
+            "esac",
+            "function",
+            "in",
+            "export",
+            "source",
+            "alias",
+            "cd",
+            "echo",
+            "exit",
+            "pwd",
+            "read",
+            "set",
+            "unset",
+            "local",
+            "readonly",
+            "declare",
+            "eval",
+            "exec",
+            "trap",
+            "wait",
+            "kill",
+            "test",
+            "true",
+            "false",
+        ]);
+
+        const kw = lang === "python" ? PY_KW : lang === "js" ? JS_KW : SH_KW;
+        const bi = lang === "python" ? PY_BI : lang === "js" ? JS_BI : new Set();
+
+        const tokens = [];
+        let i = 0;
+        const len = code.length;
+
+        while (i < len) {
+            const ch = code[i];
+            const rest = code.slice(i);
+
+            // Comments
+            if (ch === "#" && lang !== "js") {
+                const end = code.indexOf("\n", i);
+                const cmt = end === -1 ? code.slice(i) : code.slice(i, end);
+                tokens.push(["cmt", cmt]);
+                i += cmt.length;
+                continue;
+            }
+            if (lang === "js" && rest.startsWith("//")) {
+                const end = code.indexOf("\n", i);
+                const cmt = end === -1 ? code.slice(i) : code.slice(i, end);
+                tokens.push(["cmt", cmt]);
+                i += cmt.length;
+                continue;
+            }
+            if (lang === "js" && rest.startsWith("/*")) {
+                const end = code.indexOf("*/", i + 2);
+                const cmt = end === -1 ? code.slice(i) : code.slice(i, end + 2);
+                tokens.push(["cmt", cmt]);
+                i += cmt.length;
+                continue;
+            }
+
+            // Triple-quoted strings (Python)
+            if (lang === "python" && (rest.startsWith('"""') || rest.startsWith("'''"))) {
+                const q = rest.slice(0, 3);
+                const end = code.indexOf(q, i + 3);
+                const s = end === -1 ? code.slice(i) : code.slice(i, end + 3);
+                tokens.push(["str", s]);
+                i += s.length;
+                continue;
+            }
+
+            // Strings
+            if (ch === '"' || ch === "'" || (ch === "`" && lang === "js")) {
+                // Check for f-string prefix
+                let start = i;
+                if (lang === "python" && i > 0 && (code[i - 1] === "f" || code[i - 1] === "r" || code[i - 1] === "b")) {
+                    // Already consumed the prefix as part of a word — handled below
+                }
+                const quote = ch;
+                let j = i + 1;
+                while (j < len) {
+                    if (code[j] === "\\") {
+                        j += 2;
+                        continue;
+                    }
+                    if (code[j] === quote) {
+                        j++;
+                        break;
+                    }
+                    j++;
+                }
+                tokens.push(["str", code.slice(i, j)]);
+                i = j;
+                continue;
+            }
+
+            // f/r/b string prefixes (Python)
+            if (
+                lang === "python" &&
+                (ch === "f" || ch === "r" || ch === "b") &&
+                i + 1 < len &&
+                (code[i + 1] === '"' || code[i + 1] === "'")
+            ) {
+                const quote = code[i + 1];
+                // Check triple
+                if (i + 3 < len && code[i + 2] === quote && code[i + 3] === quote) {
+                    // Prefixed triple quote -- skip for simplicity, rare
+                }
+                let j = i + 2;
+                while (j < len) {
+                    if (code[j] === "\\") {
+                        j += 2;
+                        continue;
+                    }
+                    if (code[j] === quote) {
+                        j++;
+                        break;
+                    }
+                    j++;
+                }
+                tokens.push(["str", code.slice(i, j)]);
+                i = j;
+                continue;
+            }
+
+            // Decorators (Python)
+            if (lang === "python" && ch === "@" && (i === 0 || code[i - 1] === "\n")) {
+                const end = code.indexOf("\n", i);
+                const dec = end === -1 ? code.slice(i) : code.slice(i, end);
+                tokens.push(["dec", dec]);
+                i += dec.length;
+                continue;
+            }
+
+            // Numbers
+            if (/\d/.test(ch) && (i === 0 || !/\w/.test(code[i - 1]))) {
+                let j = i;
+                while (j < len && /[\d.eE_xXoObBaAfF+-]/.test(code[j])) j++;
+                tokens.push(["num", code.slice(i, j)]);
+                i = j;
+                continue;
+            }
+
+            // Words (keywords, builtins, identifiers)
+            if (/[a-zA-Z_]/.test(ch)) {
+                let j = i;
+                while (j < len && /\w/.test(code[j])) j++;
+                const word = code.slice(i, j);
+                if (kw.has(word)) tokens.push(["kw", word]);
+                else if (bi.has(word)) tokens.push(["bi", word]);
+                else tokens.push(["plain", word]);
+                i = j;
+                continue;
+            }
+
+            // Everything else
+            tokens.push(["plain", ch]);
+            i++;
+        }
+
+        return tokens;
+    }
+
+    _parseChoices(raw) {
+        // Parse <choice>...</choice> tags, or fall back to line-based parsing
+        const tagMatches = [...raw.matchAll(/<choice>([\s\S]*?)<\/choice>/g)];
+        if (tagMatches.length > 0) {
+            return tagMatches.map((m) => m[1].trim()).filter(Boolean);
+        }
+        // Fall back: each non-empty line is a choice (strip leading - or *)
+        return raw
+            .split("\n")
+            .map((line) => line.replace(/^\s*[-*]\s*/, "").trim())
+            .filter(Boolean);
+    }
+
+    _onChoiceClick(option, block) {
+        if (block._answered) return;
+        block._answered = true;
+        block._selectedChoice = option;
+        this.requestUpdate();
+        this.dispatchEvent(
+            new CustomEvent("choice-selected", {
+                detail: option,
+                bubbles: true,
+                composed: true,
+            })
+        );
+    }
+
+    _renderMarkdown(text) {
+        return marked.parse(text, { breaks: true, gfm: true });
+    }
+
+    _shortPath(filePath) {
+        if (!filePath) return "";
+        const parts = filePath.split("/");
+        return parts.length > 3 ? ".../" + parts.slice(-3).join("/") : filePath;
+    }
+}
+
+customElements.get("nwbguide-chat-message") || customElements.define("nwbguide-chat-message", ChatMessage);
diff --git a/src/electron/frontend/core/components/pages/ai-assistant/SettingsPanel.js b/src/electron/frontend/core/components/pages/ai-assistant/SettingsPanel.js
new file mode 100644
index 000000000..f9cb8393a
--- /dev/null
+++ b/src/electron/frontend/core/components/pages/ai-assistant/SettingsPanel.js
@@ -0,0 +1,171 @@
+import { LitElement, html, css } from "lit";
+
+/**
+ * Inline settings panel for the AI assistant.
+ * Controls API key and model selection.
+ *
+ * Settings are persisted to localStorage.
+ */
+export class SettingsPanel extends LitElement {
+    static properties = {
+        open: { type: Boolean },
+        apiKey: { type: String, attribute: false },
+        model: { type: String, attribute: false },
+    };
+
+    static STORAGE_KEY = "nwb-guide-ai-settings";
+
+    static styles = css`
+        :host {
+            display: block;
+        }
+
+        .panel {
+            background: #fafafa;
+            border: 1px solid #e0e0e0;
+            border-radius: 8px;
+            padding: 16px;
+            margin-bottom: 12px;
+        }
+
+        .panel[hidden] {
+            display: none;
+        }
+
+        h4 {
+            margin: 0 0 12px;
+            font-size: 0.95em;
+            color: #333;
+        }
+
+        .field {
+            margin-bottom: 12px;
+        }
+
+        label {
+            display: block;
+            font-size: 0.85em;
+            font-weight: 500;
+            color: #555;
+            margin-bottom: 4px;
+        }
+
+        input[type="text"],
+        input[type="password"],
+        select {
+            width: 100%;
+            padding: 8px 10px;
+            border: 1px solid #ccc;
+            border-radius: 6px;
+            font-size: 0.9em;
+            box-sizing: border-box;
+        }
+
+        .hint {
+            font-size: 0.8em;
+            color: #888;
+            margin-top: 2px;
+        }
+
+        .save-btn {
+            background: #1976d2;
+            color: white;
+            border: none;
+            border-radius: 6px;
+            padding: 8px 16px;
+            cursor: pointer;
+            font-size: 0.85em;
+            margin-top: 4px;
+        }
+
+        .save-btn:hover {
+            background: #1565c0;
+        }
+    `;
+
+    constructor() {
+        super();
+        this.open = false;
+        this.apiKey = "";
+        this.model = "claude-sonnet-4-5-20250929";
+        this._loadSettings();
+    }
+
+    _loadSettings() {
+        try {
+            const raw = localStorage.getItem(SettingsPanel.STORAGE_KEY);
+            if (raw) {
+                const settings = JSON.parse(raw);
+                this.apiKey = settings.apiKey || "";
+                this.model = settings.model || "claude-sonnet-4-5-20250929";
+            }
+        } catch {
+            // Ignore parse errors
+        }
+    }
+
+    _saveSettings() {
+        const settings = {
+            apiKey: this.apiKey,
+            model: this.model,
+        };
+        localStorage.setItem(SettingsPanel.STORAGE_KEY, JSON.stringify(settings));
+
+        this.dispatchEvent(
+            new CustomEvent("settings-changed", {
+                detail: settings,
+                bubbles: true,
+                composed: true,
+            })
+        );
+    }
+
+    getSettings() {
+        return {
+            apiKey: this.apiKey || null,
+            model: this.model,
+        };
+    }
+
+    render() {
+        return html`
+            <div class="panel" ?hidden=${!this.open}>
+                <h4>AI Assistant Settings</h4>
+
+                <div class="field">
+                    <label>Anthropic API Key</label>
+                    <input
+                        type="password"
+                        .value=${this.apiKey}
+                        @input=${(e) => {
+                            this.apiKey = e.target.value;
+                        }}
+                        placeholder="sk-ant-..."
+                    />
+                    <div class="hint">
+                        Leave blank to use your Claude subscription or free credits. Or get a key from
+                        <a href="https://console.anthropic.com" target="_blank">console.anthropic.com</a>
+                    </div>
+                </div>
+
+                <div class="field">
+                    <label>Model</label>
+                    <select
+                        .value=${this.model}
+                        @change=${(e) => {
+                            this.model = e.target.value;
+                        }}
+                    >
+                        <option value="claude-sonnet-4-5-20250929">Claude Sonnet 4.5</option>
+                        <option value="claude-opus-4-6">Claude Opus 4.6</option>
+                        <option value="claude-haiku-4-5-20251001">Claude Haiku 4.5</option>
+                    </select>
+                </div>
+
+                <button class="save-btn" @click=${() => this._saveSettings()}>Save Settings</button>
+            </div>
+        `;
+    }
+}
+
+customElements.get("nwbguide-ai-settings") || customElements.define("nwbguide-ai-settings", SettingsPanel);
diff --git a/src/electron/frontend/core/pages.js b/src/electron/frontend/core/pages.js
index 3371f2795..dc7b82926 100644
--- a/src/electron/frontend/core/pages.js
+++ b/src/electron/frontend/core/pages.js
@@ -31,6 +31,7 @@ import { InspectPage } from "./components/pages/inspect/InspectPage";
 import { PreviewPage } from "./components/pages/preview/PreviewPage";
 import { GuidedPreform } from "./components/pages/guided-mode/setup/Preform";
 import { GuidedDandiResultsPage } from "./components/pages/guided-mode/results/GuidedDandiResults";
+import { AIAssistantPage } from "./components/pages/ai-assistant/AIAssistantPage";
 
 let dashboard = document.querySelector("nwb-dashboard");
 if (!dashboard) dashboard = new Dashboard();
@@ -82,6 +83,19 @@ style="margin-right: 30px;"
 ></path></svg>
 `;
 
+const aiAssistantIcon = `
+<svg
+    xmlns="http://www.w3.org/2000/svg"
+    viewBox="0 0 24 24"
+    height="20px"
+    width="20px"
+    fill="white"
+    style="margin-right: 30px;"
+>
+<path d="M21 11.5a8.38 8.38 0 0 1-.9 3.8 8.5 8.5 0 0 1-7.6 4.7 8.38 8.38 0 0 1-3.8-.9L3 21l1.9-5.7a8.38 8.38 0 0 1-.9-3.8 8.5 8.5 0 0 1 4.7-7.6 8.38 8.38 0 0 1 3.8-.9h.5a8.48 8.48 0 0 1 8 8v.5z"/>
+</svg>
+`;
+
 const pages = {
     "/": new GuidedHomePage({
         label: "Convert",
@@ -170,6 +184,10 @@ const pages = {
             }),
         },
     }),
+    assistant: new AIAssistantPage({
+        label: "AI Assistant",
+        icon: aiAssistantIcon,
+    }),
     validate: new InspectPage({
         label: "Validate",
         icon: inspectIcon,
diff --git a/src/electron/main/main.ts b/src/electron/main/main.ts
index d50c4f16c..73ab84984 100755
--- a/src/electron/main/main.ts
+++ b/src/electron/main/main.ts
@@ -143,7 +143,8 @@ const createPyProc = async () => {
       .then(([freePort]: string[]) => {
         selectedPort = freePort;
 
-        pyflaskProcess = (serverFilePath.slice(-3) === '.py') ? child_process.spawn("python", [serverFilePath, freePort], {}) : child_process.spawn(`${serverFilePath}`, [freePort], {});
+        const pythonPath = process.env.NWB_GUIDE_PYTHON || "python";
+        pyflaskProcess = (serverFilePath.slice(-3) === '.py') ? child_process.spawn(pythonPath, [serverFilePath, freePort], {}) : child_process.spawn(`${serverFilePath}`, [freePort], {});
 
         if (pyflaskProcess != null) {
 
diff --git a/src/pyflask/ai/README.md b/src/pyflask/ai/README.md
new file mode 100644
index 000000000..c5ea5b79b
--- /dev/null
+++ b/src/pyflask/ai/README.md
@@ -0,0 +1,72 @@
+# AI Conversion Assistant
+
+This directory implements the AI-powered NWB conversion assistant in NWB GUIDE. It wraps the [nwb-convert skill](https://github.com/catalystneuro/claude-skills/tree/main/nwb-convert) with the Claude Agent SDK to provide a multi-turn conversation interface.
+
+## Architecture
+
+```
+ai/
+  __init__.py
+  agent.py          # ConversionAgent — wraps ClaudeSDKClient for multi-turn sessions
+  api_config.py     # Three-tier auth: subscription → api_key → proxy
+  monitoring.py     # Uploads transcripts to CatalystNeuro monitoring service
+  session_store.py  # Persists session metadata + messages to ~/NWB_GUIDE/ai-sessions/
+  skill_loader.py   # Reads SKILL.md, expands $file: directives into system prompt
+  skill/            # Bundled copy of the nwb-convert skill (see below)
+```
+
+## Bundled Skill
+
+The `skill/` directory contains a copy of the canonical skill from `catalystneuro/claude-skills`. It includes:
+
+- `SKILL.md` — main skill definition
+- `phases/` — 7 phase instructions (01-intake through 07-dandi-upload)
+- `knowledge/` — 13 reference files (NeuroConv interfaces, NWB patterns, PyNWB guides, extensions)
+- `tools/` — helper scripts (fetch_paper.py)
+
+`skill_loader.py` reads `SKILL.md` and expands `$file:` directives to produce the full system prompt.
+
+## Syncing from Canonical
+
+The canonical source of truth for the skill is:
+```
+https://github.com/catalystneuro/claude-skills/tree/main/nwb-convert
+```
+
+To sync the bundled copy:
+```bash
+CANONICAL=~/dev/claude-skills-repo/nwb-convert
+BUNDLED=~/dev/nwb-guide/src/pyflask/ai/skill
+
+cp "$CANONICAL/SKILL.md" "$BUNDLED/SKILL.md"
+cp "$CANONICAL/phases/"*.md "$BUNDLED/phases/"
+cp "$CANONICAL/knowledge/"*.md "$BUNDLED/knowledge/"
+cp "$CANONICAL/knowledge/"*.yaml "$BUNDLED/knowledge/"
+cp "$CANONICAL/tools/fetch_paper.py" "$BUNDLED/tools/"
+```
+
+After syncing, verify with:
+```bash
+diff -r "$CANONICAL" "$BUNDLED" --exclude='__pycache__'
+```
+
+The only expected difference: canonical has `nwb-data-model.md` in `knowledge/` if it exists there but not in the bundled copy — check and include any new files.
+
+## Hardcoded URLs
+
+These URLs appear in the Python modules and must be updated if services move:
+
+| File | URL | Purpose |
+|------|-----|---------|
+| `api_config.py` | `https://nwb-conversions-proxy.ben-dichter.workers.dev` | Free-tier API proxy |
+| `monitoring.py` | `https://nwb-conversions-proxy.ben-dichter.workers.dev/monitoring` | Transcript monitoring |
+
+The infrastructure source code lives at [catalystneuro/nwb-conversions-infra](https://github.com/catalystneuro/nwb-conversions-infra).
+
+## Auth Modes
+
+`APIConfig` auto-detects three billing tiers (see `api_config.py`):
+
+1. **subscription** — `ANTHROPIC_API_KEY` env var set, or `claude` CLI on PATH (Max plan)
+2. **api_key** — user entered their own API key in the GUIDE Settings UI
+3. **proxy** — fallback to CatalystNeuro free-credit proxy ($5/session, $50/day caps)
diff --git a/src/pyflask/ai/__init__.py b/src/pyflask/ai/__init__.py
new file mode 100644
index 000000000..eec146e6f
--- /dev/null
+++ b/src/pyflask/ai/__init__.py
@@ -0,0 +1 @@
+"""AI conversion assistant - wraps the nwb-convert skill with Claude Agent SDK."""
diff --git a/src/pyflask/ai/agent.py b/src/pyflask/ai/agent.py
new file mode 100644
index 000000000..ea7d0246b
--- /dev/null
+++ b/src/pyflask/ai/agent.py
@@ -0,0 +1,330 @@
+"""ConversionAgent wrapping ClaudeSDKClient for multi-turn NWB conversion conversations.
+
+Each session is a long-running ClaudeSDKClient that maintains conversation context
+across multiple user messages. Responses are streamed to a queue consumed by the
+SSE endpoint.
+"""
+
+import asyncio
+import logging
+import queue
+import threading
+
+from claude_agent_sdk import (
+    AssistantMessage,
+    ClaudeAgentOptions,
+    ClaudeSDKClient,
+    HookContext,
+    HookMatcher,
+    ResultMessage,
+    TextBlock,
+    ToolResultBlock,
+    ToolUseBlock,
+    UserMessage,
+)
+
+from .api_config import DEFAULT_MODEL, APIConfig
+from .monitoring import Monitor
+from .session_store import append_message, create_session_record
+from .skill_loader import load_skill
+
+logger = logging.getLogger(__name__)
+
+
+class ConversionAgent:
+    """Wraps ClaudeSDKClient for a single conversion session.
+
+    The agent runs in a background thread with its own event loop.
+    Messages are put on a thread-safe queue and consumed by the SSE endpoint.
+    """
+
+    def __init__(self, session_id, data_dirs, repo_dir, output_dir, api_config=None, lab_name=None):
+        self.session_id = session_id
+        self.data_dirs = data_dirs
+        self.repo_dir = repo_dir
+        self.output_dir = output_dir
+        self.api_config = api_config or APIConfig()
+        self.lab_name = lab_name
+
+        # Thread-safe queue for SSE consumption
+        self.message_queue = queue.Queue()
+
+        # Monitor for transcript uploads
+        self.monitor = Monitor(session_id, lab_name=lab_name)
+
+        # Load the NWB conversion skill as the system prompt
+        self.skill_prompt = load_skill()
+
+        # Agent lifecycle
+        self._client = None
+        self._loop = None
+        self._thread = None
+        self._connected = False
+
+    def start(self):
+        """Start the agent in a background thread."""
+        self._thread = threading.Thread(target=self._run_loop, daemon=True)
+        self._thread.start()
+
+    def _run_loop(self):
+        """Run the asyncio event loop for the agent.
+
+        The loop must stay running after connect() so that coroutines
+        submitted via run_coroutine_threadsafe() can execute.
+        """
+        self._loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self._loop)
+        try:
+            self._loop.run_until_complete(self._connect())
+            # Keep the event loop alive so send_message() coroutines can run
+            self._loop.run_forever()
+        except Exception as e:
+            logger.error(f"Agent loop error: {e}", exc_info=True)
+            self.message_queue.put(
+                {
+                    "type": "error",
+                    "content": f"Agent initialization failed: {str(e)}",
+                }
+            )
+
+    @property
+    def auth_mode(self):
+        """Return the detected billing mode (subscription / api_key / proxy)."""
+        return self.api_config.auth_mode
+
+    async def _connect(self):
+        """Connect the ClaudeSDKClient."""
+        env = self.api_config.to_env(session_id=self.session_id)
+
+        # Build system prompt with write-restriction reminder
+        prompt = self.skill_prompt + (
+            f"\n\nIMPORTANT: Your working directory is {self.repo_dir}. "
+            "Write all conversion code (scripts, configs, metadata YAML) here. "
+            f"Write all NWB output files to {self.output_dir}. "
+            "The data directories are READ-ONLY — never write, edit, or delete files there."
+        )
+
+        options = ClaudeAgentOptions(
+            system_prompt=prompt,
+            allowed_tools=["Bash", "Read", "Write", "Edit", "Glob", "Grep"],
+            permission_mode="bypassPermissions",
+            cwd=self.repo_dir,
+            add_dirs=self.data_dirs,
+            env=env,
+            model=self.api_config.model or DEFAULT_MODEL,
+            include_partial_messages=True,
+            hooks={
+                "PreToolUse": [
+                    HookMatcher(hooks=[self._enforce_write_restriction]),
+                ],
+                "PostToolUse": [
+                    HookMatcher(hooks=[self._on_post_tool_use]),
+                ],
+                "Stop": [
+                    HookMatcher(hooks=[self._on_stop]),
+                ],
+            },
+        )
+
+        self._client = ClaudeSDKClient(options=options)
+        await self._client.connect()
+        self._connected = True
+        logger.info(f"Agent {self.session_id} connected")
+
+    async def _enforce_write_restriction(self, input_data, tool_use_id, context):
+        """PreToolUse hook: block writes outside the conversion repo directory."""
+        tool_name = input_data.get("tool_name", "")
+        tool_input = input_data.get("tool_input", {})
+
+        # Only check file-writing tools
+        if tool_name in ("Write", "Edit"):
+            file_path = tool_input.get("file_path", "")
+            if file_path:
+                from os.path import realpath
+
+                resolved = realpath(file_path)
+                allowed = [realpath(self.repo_dir), realpath(self.output_dir)]
+                if not any(resolved == d or resolved.startswith(d + "/") for d in allowed):
+                    return {
+                        "hookSpecificOutput": {
+                            "hookEventName": input_data.get("hook_event_name", "PreToolUse"),
+                            "permissionDecision": "deny",
+                            "permissionDecisionReason": (
+                                f"Write blocked: files can only be written inside the code "
+                                f"directory ({self.repo_dir}) or the output directory "
+                                f"({self.output_dir}). Attempted to write to: {file_path}"
+                            ),
+                        }
+                    }
+
+        return {}
+
+    async def _on_post_tool_use(self, input_data, tool_use_id, context):
+        """Hook: capture tool results for monitoring."""
+        self.monitor.upload_chunk(
+            {
+                "type": "tool_result",
+                "tool_name": input_data.get("tool_name"),
+                "tool_input": input_data.get("tool_input"),
+            }
+        )
+        return {}
+
+    async def _on_stop(self, input_data, tool_use_id, context):
+        """Hook: agent finished a turn."""
+        return {}
+
+    def interrupt(self):
+        """Interrupt the agent's current turn."""
+        if not self._connected or not self._loop or not self._client:
+            return
+        asyncio.run_coroutine_threadsafe(self._client.interrupt(), self._loop)
+
+    def send_message(self, content):
+        """Send a user message and stream responses to the queue.
+
+        This is called from the Flask request thread. It submits work
+        to the agent's event loop.
+        """
+        if not self._connected or not self._loop:
+            self.message_queue.put(
+                {
+                    "type": "error",
+                    "content": "Agent not connected yet. Please wait.",
+                }
+            )
+            return
+
+        # Upload user message to monitoring and persist
+        self.monitor.upload_chunk(
+            {
+                "type": "user_message",
+                "content": content,
+            }
+        )
+        append_message(self.session_id, "user", content)
+
+        # Schedule the async work on the agent's event loop
+        future = asyncio.run_coroutine_threadsafe(self._process_message(content), self._loop)
+        # Don't block — the SSE stream will pick up messages from the queue
+
+    async def _process_message(self, content):
+        """Send message to Claude and stream responses to the queue."""
+        try:
+            await self._client.query(content)
+
+            async for message in self._client.receive_response():
+                event = self._message_to_event(message)
+                if event:
+                    self.message_queue.put(event)
+                    self.monitor.upload_chunk(event)
+                    if event.get("type") == "assistant":
+                        append_message(self.session_id, "assistant", event["content"])
+
+        except Exception as e:
+            logger.error(f"Agent message error: {e}", exc_info=True)
+            self.message_queue.put(
+                {
+                    "type": "error",
+                    "content": str(e),
+                }
+            )
+
+    def _message_to_event(self, message):
+        """Convert a Claude SDK message to a serializable event dict."""
+        if isinstance(message, AssistantMessage):
+            blocks = []
+            for block in message.content:
+                if isinstance(block, TextBlock):
+                    blocks.append({"type": "text", "text": block.text})
+                elif isinstance(block, ToolUseBlock):
+                    blocks.append(
+                        {
+                            "type": "tool_use",
+                            "id": block.id,
+                            "name": block.name,
+                            "input": block.input,
+                        }
+                    )
+                elif isinstance(block, ToolResultBlock):
+                    blocks.append(
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": block.tool_use_id,
+                            "content": block.content if isinstance(block.content, str) else str(block.content),
+                            "is_error": block.is_error,
+                        }
+                    )
+            return {"type": "assistant", "content": blocks}
+
+        elif isinstance(message, UserMessage):
+            # Tool results come as UserMessage with ToolResultBlock content
+            blocks = []
+            for block in message.content:
+                if isinstance(block, ToolResultBlock):
+                    blocks.append(
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": block.tool_use_id,
+                            "content": block.content if isinstance(block.content, str) else str(block.content),
+                            "is_error": block.is_error,
+                        }
+                    )
+            if blocks:
+                return {"type": "assistant", "content": blocks}
+
+        elif isinstance(message, ResultMessage):
+            return {
+                "type": "result",
+                "is_error": message.is_error,
+                "total_cost_usd": message.total_cost_usd,
+                "num_turns": message.num_turns,
+                "session_id": message.session_id,
+                "result": message.result,
+            }
+
+        return None
+
+    def stop(self):
+        """Disconnect the agent and stop the event loop."""
+        if self._loop and self._client:
+            asyncio.run_coroutine_threadsafe(self._client.disconnect(), self._loop)
+        if self._loop:
+            self._loop.call_soon_threadsafe(self._loop.stop)
+
+
+# Global session registry
+_sessions = {}
+
+
+def create_session(session_id, data_dirs, repo_dir, output_dir, api_key=None, model=None):
+    """Create a new agent session with the given ID.
+
+    Returns a dict with session_id and auth_mode.
+    """
+    # Persist session metadata to disk
+    create_session_record(session_id, data_dirs)
+
+    api_config = APIConfig(api_key=api_key, model=model)
+    agent = ConversionAgent(
+        session_id=session_id,
+        data_dirs=data_dirs,
+        repo_dir=repo_dir,
+        output_dir=output_dir,
+        api_config=api_config,
+    )
+    agent.start()
+    _sessions[session_id] = agent
+    return {"session_id": session_id, "auth_mode": api_config.auth_mode}
+
+
+def get_session(session_id):
+    """Get an agent session by ID."""
+    return _sessions.get(session_id)
+
+
+def remove_session(session_id):
+    """Stop and remove an agent session."""
+    agent = _sessions.pop(session_id, None)
+    if agent:
+        agent.stop()
diff --git a/src/pyflask/ai/api_config.py b/src/pyflask/ai/api_config.py
new file mode 100644
index 000000000..5ba4ed7a3
--- /dev/null
+++ b/src/pyflask/ai/api_config.py
@@ -0,0 +1,51 @@
+"""Manage API configuration and billing mode for the AI assistant.
+
+Three-tier auto-detected billing:
+  1. subscription — Claude Code is authenticated (Max plan or ANTHROPIC_API_KEY env var)
+  2. api_key     — User entered an API key in Settings
+  3. proxy       — Neither exists; route through CatalystNeuro free-credit proxy
+"""
+
+import os
+import shutil
+
+PROXY_URL = "https://nwb-conversions-proxy.ben-dichter.workers.dev"
+DEFAULT_MODEL = "claude-sonnet-4-5-20250929"
+
+
+class APIConfig:
+    """Manages API configuration for the conversion agent."""
+
+    def __init__(self, api_key=None, model=None):
+        self.api_key = api_key
+        self.model = model or DEFAULT_MODEL
+        self.auth_mode = self._detect_mode()
+
+    def _detect_mode(self):
+        # 1. ANTHROPIC_API_KEY in system env (explicit API key config)
+        if os.environ.get("ANTHROPIC_API_KEY"):
+            return "subscription"
+        # 2. Claude CLI is installed → Max subscription or authenticated CLI
+        #    The Agent SDK communicates through the CLI, so if it's on PATH
+        #    the user has working auth (Max OAuth or CLI-configured key).
+        if shutil.which("claude"):
+            return "subscription"
+        # 3. User supplied an API key through the Settings UI
+        if self.api_key:
+            return "api_key"
+        # 4. Fall back to CatalystNeuro proxy
+        return "proxy"
+
+    def to_env(self, session_id=None):
+        """Return environment variables for the agent process."""
+        env = {}
+        if self.auth_mode == "api_key":
+            env["ANTHROPIC_API_KEY"] = self.api_key
+        elif self.auth_mode == "proxy":
+            # Encode session_id in the API key so the proxy can track budgets.
+            # The proxy extracts it from the x-api-key header.
+            key = f"proxy:{session_id}" if session_id else "proxy"
+            env["ANTHROPIC_API_KEY"] = key
+            env["ANTHROPIC_BASE_URL"] = PROXY_URL
+        # subscription mode: don't set anything, let the SDK use its own auth
+        return env
diff --git a/src/pyflask/ai/monitoring.py b/src/pyflask/ai/monitoring.py
new file mode 100644
index 000000000..f22e7083a
--- /dev/null
+++ b/src/pyflask/ai/monitoring.py
@@ -0,0 +1,78 @@
+"""Upload transcript chunks and phase transitions to CatalystNeuro monitoring.
+
+All conversions (both proxy and BYO key) share transcripts for quality monitoring.
+Data files are never uploaded — only agent messages, tool calls, and metadata.
+"""
+
+import json
+import logging
+import threading
+from datetime import datetime, timezone
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+MONITORING_URL = "https://nwb-conversions-proxy.ben-dichter.workers.dev/monitoring"
+
+
+class Monitor:
+    """Uploads conversation events to the CatalystNeuro monitoring service."""
+
+    def __init__(self, session_id, lab_name=None):
+        self.session_id = session_id
+        self.lab_name = lab_name
+        self._enabled = True
+
+    def upload_chunk(self, event):
+        """Upload a transcript chunk (message or tool use) in a background thread.
+
+        Parameters
+        ----------
+        event : dict
+            The event to upload. Should have at minimum a 'type' key
+            (e.g., 'user_message', 'assistant_message', 'tool_use', 'tool_result').
+        """
+        if not self._enabled:
+            return
+
+        payload = {
+            "session_id": self.session_id,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "lab_name": self.lab_name,
+            **event,
+        }
+
+        thread = threading.Thread(
+            target=self._post,
+            args=(f"{MONITORING_URL}/transcripts", payload),
+            daemon=True,
+        )
+        thread.start()
+
+    def report_phase(self, phase_number, phase_name):
+        """Report a phase transition."""
+        if not self._enabled:
+            return
+
+        payload = {
+            "session_id": self.session_id,
+            "phase": phase_number,
+            "phase_name": phase_name,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "lab_name": self.lab_name,
+        }
+
+        thread = threading.Thread(
+            target=self._post,
+            args=(f"{MONITORING_URL}/phase", payload),
+            daemon=True,
+        )
+        thread.start()
+
+    def _post(self, url, payload):
+        """POST JSON payload, swallowing errors to avoid disrupting the conversation."""
+        try:
+            requests.post(url, json=payload, timeout=10)
+        except Exception:
+            logger.debug("Monitoring upload failed (non-critical)", exc_info=True)
diff --git a/src/pyflask/ai/session_store.py b/src/pyflask/ai/session_store.py
new file mode 100644
index 000000000..d94c92ca8
--- /dev/null
+++ b/src/pyflask/ai/session_store.py
@@ -0,0 +1,120 @@
+"""Persist AI session metadata and messages to disk.
+
+Sessions are stored as JSON files in ~/NWB_GUIDE/ai-sessions/<session_id>.json.
+Each file contains:
+  - session_id
+  - title (derived from first user message or data_dirs)
+  - data_dirs (list of directory paths)
+  - created_at (ISO timestamp)
+  - updated_at (ISO timestamp)
+  - messages (list of {role, content} dicts)
+"""
+
+import json
+import logging
+from datetime import datetime, timezone
+from pathlib import Path
+
+from manageNeuroconv.info.urls import GUIDE_ROOT_FOLDER
+
+logger = logging.getLogger(__name__)
+
+SESSIONS_DIR = Path(GUIDE_ROOT_FOLDER) / "ai-sessions"
+SESSIONS_DIR.mkdir(parents=True, exist_ok=True)
+
+CONVERSIONS_DIR = Path(GUIDE_ROOT_FOLDER) / "conversions"
+CONVERSIONS_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def _session_path(session_id: str) -> Path:
+    session_dir = SESSIONS_DIR / session_id
+    session_dir.mkdir(parents=True, exist_ok=True)
+    return session_dir / "session.json"
+
+
+def create_session_record(session_id: str, data_dirs: list[str], title: str = "") -> dict:
+    """Create a new session record on disk."""
+    now = datetime.now(timezone.utc).isoformat()
+    dir_name = Path(data_dirs[0]).name if data_dirs else "data"
+    record = {
+        "session_id": session_id,
+        "title": title or f"Conversion — {dir_name}",
+        "data_dirs": data_dirs,
+        "created_at": now,
+        "updated_at": now,
+        "messages": [],
+    }
+    _session_path(session_id).write_text(json.dumps(record, indent=2))
+    return record
+
+
+def append_message(session_id: str, role: str, content) -> None:
+    """Append a message to a session's history on disk."""
+    path = _session_path(session_id)
+    if not path.exists():
+        return
+
+    try:
+        record = json.loads(path.read_text())
+        record["messages"].append({"role": role, "content": content})
+        record["updated_at"] = datetime.now(timezone.utc).isoformat()
+
+        # Derive title from first user message if still default
+        if role == "user" and isinstance(content, str) and record["title"].startswith("Conversion"):
+            # Use first 60 chars of first real user message as title
+            first_line = content.strip().split("\n")[0][:60]
+            if first_line and not first_line.startswith("I'd like to convert"):
+                record["title"] = first_line
+
+        path.write_text(json.dumps(record, indent=2))
+    except Exception as e:
+        logger.warning(f"Failed to append message to session {session_id}: {e}")
+
+
+def list_sessions() -> list[dict]:
+    """List all saved sessions, sorted by most recently updated."""
+    sessions = []
+    for path in SESSIONS_DIR.glob("*/session.json"):
+        try:
+            record = json.loads(path.read_text())
+            # Support both old (data_dir) and new (data_dirs) format
+            data_dirs = record.get("data_dirs") or ([record["data_dir"]] if record.get("data_dir") else [])
+            sessions.append(
+                {
+                    "session_id": record["session_id"],
+                    "title": record["title"],
+                    "data_dirs": data_dirs,
+                    "data_dir": data_dirs[0] if data_dirs else "",
+                    "created_at": record["created_at"],
+                    "updated_at": record["updated_at"],
+                    "message_count": len(record["messages"]),
+                }
+            )
+        except Exception:
+            continue
+
+    sessions.sort(key=lambda s: s["updated_at"], reverse=True)
+    return sessions
+
+
+def get_session_history(session_id: str) -> dict | None:
+    """Load full session record including messages."""
+    path = _session_path(session_id)
+    if not path.exists():
+        return None
+
+    try:
+        return json.loads(path.read_text())
+    except Exception:
+        return None
+
+
+def delete_session_record(session_id: str) -> bool:
+    """Delete a session directory (JSON + conversion repo) from disk."""
+    import shutil
+
+    session_dir = SESSIONS_DIR / session_id
+    if session_dir.exists():
+        shutil.rmtree(session_dir)
+        return True
+    return False
diff --git a/src/pyflask/ai/skill/SKILL.md b/src/pyflask/ai/skill/SKILL.md
new file mode 100644
index 000000000..e5bad5f5d
--- /dev/null
+++ b/src/pyflask/ai/skill/SKILL.md
@@ -0,0 +1,179 @@
+---
+name: nwb-convert
+description: >
+  Lead a conversation to convert neurophysiology data to NWB format and publish on DANDI.
+  Guides the user (typically a lab experimentalist) through experiment discovery, data inspection,
+  metadata collection, synchronization analysis, code generation, testing, and DANDI upload.
+  Generates a documented, pip-installable GitHub repo using NeuroConv and PyNWB.
+user_invocable: true
+argument: Optional path to data directory or existing conversion repo
+tools:
+  - Bash
+  - Read
+  - Write
+  - Edit
+  - Glob
+  - Grep
+  - Task
+  - AskUserQuestion
+---
+
+<context>
+You are an expert NWB (Neurodata Without Borders) data conversion specialist from CatalystNeuro.
+You have deep expertise in NeuroConv, PyNWB, the NWB data standard, and the DANDI archive.
+You have helped ~60 labs convert their data to NWB.
+
+Your job is to LEAD the conversation. The user is a lab experimentalist or data manager who
+wants to convert their data to NWB and publish on DANDI. They may not know NWB, NeuroConv,
+or what information you need. You must guide them step-by-step.
+
+A conversion engagement is fundamentally a COMMUNICATION problem. Labs almost never provide
+all necessary data and information upfront. You must ask the right questions, inspect data
+when available, and iteratively build understanding.
+</context>
+
+<instructions>
+## Overall Approach
+
+1. You lead the conversation. After each user response, decide what to do next and either
+   ask a follow-up question or take an action (inspect files, write code, etc.)
+2. Be conversational but efficient. Don't lecture about NWB — ask about THEIR data.
+3. When you can inspect data files directly, do so rather than asking the user to describe them.
+4. Track your progress through the conversion phases below.
+5. Create and maintain a `conversion_notes.md` file in the repo to track decisions, open questions,
+   and status across conversation sessions.
+
+## Conversion Phases
+
+Work through these phases in order. You may revisit earlier phases as you learn more.
+
+### Phase 1: Experiment Discovery (intake)
+$file: ./phases/01-intake.md
+
+### Phase 2: Data Inspection
+$file: ./phases/02-data-inspection.md
+
+### Phase 3: Metadata Collection
+$file: ./phases/03-metadata.md
+
+### Phase 4: Synchronization Analysis
+$file: ./phases/04-sync.md
+
+### Phase 5: Code Generation
+$file: ./phases/05-code-generation.md
+
+### Phase 6: Testing & Validation
+$file: ./phases/06-testing.md
+
+### Phase 7: DANDI Upload
+$file: ./phases/07-dandi-upload.md
+
+## Deployment Modes
+
+This skill runs in two deployment modes:
+
+1. **Claude Code CLI** (default): The user runs `/nwb-convert` in their terminal. Phase 1
+   checks for missing Python packages and installs them. Full access to the user's filesystem.
+
+2. **NWB GUIDE (Electron app)**: The skill is bundled into the NWB GUIDE desktop application
+   as the "AI Assistant" page. In this mode:
+   - All Python packages are pre-installed (bundled with the app via PyInstaller)
+   - Skip the environment check in Phase 1 Step 0a
+   - The data directory is provided via a file picker in the UI
+   - Conversation transcripts are always shared with CatalystNeuro for monitoring
+   - The user interacts through a chat UI, not a terminal
+
+## Environment
+
+The skill requires several Python packages for data inspection, conversion, and upload.
+See `make_env.yml` for the full specification. At minimum: `neuroconv`, `pynwb`, `dandi`,
+`nwbinspector`, `spikeinterface`, `h5py`, `remfile`, `pandas`, `pyyaml`. Phase 1
+automatically checks for missing packages and installs them (CLI mode only; NWB GUIDE
+bundles everything).
+
+## Key References
+
+When you need to look up NeuroConv interfaces, repo structure patterns, or NWB data model
+details, consult the knowledge base files:
+- `knowledge/neuroconv-interfaces.yaml` — all available interfaces and their schemas
+- `knowledge/repo-structure.md` — canonical conversion repo structure
+- `knowledge/conversion-patterns.md` — patterns from real conversion repos
+- `knowledge/nwb-best-practices.md` — NWB conventions and common mistakes (from NWB Inspector)
+
+### Conversion Registry (`nwb-conversions` GitHub org)
+
+The `nwb-conversions` GitHub org is a living registry of all conversion repos created by
+this skill. Each repo contains a `conversion_manifest.yaml` describing what was built.
+A weekly GitHub Action aggregates all manifests into `nwb-conversions/.github/registry.yaml`.
+
+**How to use the registry:**
+- **Phase 1**: Fetch `registry.yaml` to find similar prior conversions by species, modality, or file format
+- **Phase 2**: Cross-reference `format_hints` to accelerate file-to-interface mapping
+- **Phase 5**: Search for reusable custom interfaces before writing from scratch
+- **Phase 6**: Check `lessons` for known pitfalls with the same formats/tools
+- **Phase 7**: Write `conversion_manifest.yaml` to feed back into the registry
+
+**Authentication:** The skill calls the nwb-conversions API
+(`https://nwb-conversions-api.ben-dichter.workers.dev`) to create private repos in the
+`nwb-conversions` org and fetch the registry. The user does not need a GitHub account —
+the API handles authentication server-side. If the API is unreachable, the skill works
+locally without registry integration.
+
+## Presenting Choices to the User
+
+When you want the user to pick from a set of options, use the `<choices>` format. The chat
+UI renders these as clickable buttons that the user can tap instead of typing.
+
+**Use this whenever:**
+- Asking the user to confirm or select between options
+- Presenting yes/no or multiple-choice questions
+- Offering suggested next steps
+
+**Format:**
+
+```
+Which DANDI instance should we use?
+
+<choices>
+<choice>DANDI Sandbox (for testing)</choice>
+<choice>Official DANDI Archive (for publication)</choice>
+</choices>
+```
+
+This renders as clickable pill buttons. When the user clicks one, their selection is sent
+as a message automatically. You can also include a free-text option:
+
+```
+What type of neural recording did you collect?
+
+<choices>
+<choice>Extracellular electrophysiology (e.g., Neuropixels, tetrodes)</choice>
+<choice>Calcium imaging (two-photon or miniscope)</choice>
+<choice>Intracellular electrophysiology (patch clamp)</choice>
+<choice>Fiber photometry</choice>
+</choices>
+```
+
+The user can always type a custom answer instead of clicking a button. Use choices
+generously — they make the conversation faster and reduce ambiguity.
+
+## Critical Rules
+
+1. NEVER assume you have all the information. Always ask when uncertain.
+2. NEVER write conversion code without first inspecting actual data files.
+3. ALWAYS use NeuroConv interfaces when available rather than writing raw PyNWB.
+4. ALWAYS include `stub_test` support in conversion scripts.
+5. If an NWB extension is needed, FLAG IT — don't try to create one without expert help.
+6. Session start times MUST have timezone information.
+7. Subject species should use binomial nomenclature (e.g., "Mus musculus" not "mouse").
+8. Keep the user informed of what you're doing and why.
+9. ALWAYS follow NWB best practices (see `knowledge/nwb-best-practices.md`):
+   - Time-first data orientation (transpose if needed)
+   - Use `rate` + `starting_time` for regularly sampled data
+   - Use `conversion` parameter instead of transforming data values
+   - No empty strings in descriptions, units, or other text fields
+   - All timestamps in seconds, ascending, non-negative, no NaN
+   - Use most specific TimeSeries subtype available
+   - Electrode `location` is always required (use "unknown" if needed)
+   - `related_publications` should use DOI format: `"doi:10.xxxx/xxxxx"`
+</instructions>
diff --git a/src/pyflask/ai/skill/knowledge/conversion-patterns.md b/src/pyflask/ai/skill/knowledge/conversion-patterns.md
new file mode 100644
index 000000000..810925501
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/conversion-patterns.md
@@ -0,0 +1,362 @@
+# Common Conversion Patterns from Real CatalystNeuro Repos
+
+This document captures patterns observed across ~60 CatalystNeuro conversion repos.
+
+## Pattern 1: Standard NeuroConv Pipeline (Most Common)
+
+**Used by**: wen22, cai-lab, turner-lab, constantinople-lab, most modern repos
+
+```python
+class MyNWBConverter(NWBConverter):
+    data_interface_classes = dict(
+        Recording=SpikeGLXRecordingInterface,
+        LFP=SpikeGLXLFPInterface,
+        Sorting=PhySortingInterface,
+        Behavior=CustomBehaviorInterface,
+    )
+```
+
+Key characteristics:
+- NWBConverter subclass with `data_interface_classes` dict
+- Mix of built-in NeuroConv interfaces and custom ones
+- `convert_session.py` builds source_data and conversion_options dicts
+- Metadata layered: auto-extracted → YAML → programmatic overrides
+
+## Pattern 2: ConverterPipe with Dynamic Interfaces
+
+**Used by**: ibl-to-nwb, turner-lab (some conversions)
+
+```python
+from neuroconv import ConverterPipe
+
+interfaces = []
+interfaces.append(SpikeGLXRecordingInterface(folder_path=path))
+if sorting_exists:
+    interfaces.append(PhySortingInterface(folder_path=phy_path))
+converter = ConverterPipe(data_interfaces=interfaces)
+```
+
+Used when:
+- Interfaces need custom initialization (API clients, non-file sources)
+- Session-dependent interface sets (not all sessions have all data)
+- Pre-constructed interface instances needed
+
+## Pattern 3: Raw PyNWB (Legacy / Highly Custom)
+
+**Used by**: giocomo legacy, mallory21 freely-moving, older repos
+
+```python
+nwbfile = NWBFile(session_description=..., ...)
+# Manually create PyNWB objects
+position = Position(spatial_series=SpatialSeries(...))
+nwbfile.create_processing_module("behavior").add(position)
+with NWBHDF5IO(path, "w") as io:
+    io.write(nwbfile)
+```
+
+Used when:
+- Data is in highly processed/custom format (e.g., all-in-one .mat file)
+- No NeuroConv interface exists and writing one isn't worth it
+- Legacy code predating NeuroConv
+
+## Pattern 4: Hybrid (NWBConverter + Direct PyNWB)
+
+**Used by**: reimer-arenkiel-lab (DataJoint + TIFF)
+
+The NWBConverter handles some data streams, then additional data is added
+directly to the NWBFile via standalone functions:
+
+```python
+converter = MyConverter(source_data=source_data)
+nwbfile = converter.create_nwbfile(metadata=metadata)
+# Add more data directly
+add_trials_from_database(nwbfile, session_key)
+add_behavior_from_database(nwbfile, session_key)
+configure_and_write_nwbfile(nwbfile, nwbfile_path)
+```
+
+## Pattern 5: Ophys with Suite2p + Custom Behavioral Data
+
+**Used by**: giocomo-lab ophys (Plitt 2021)
+
+When an ophys experiment has:
+- Raw imaging in a proprietary format (Scanbox, ScanImage, Bruker)
+- Suite2p segmentation output
+- Custom behavioral data (pickle, .mat, CSV)
+
+```python
+class MyNWBConverter(NWBConverter):
+    data_interface_classes = dict(
+        Imaging=SbxImagingInterface,      # or ScanImageImagingInterface, BrukerTiffMultiPlaneImagingInterface
+        Segmentation=Suite2pSegmentationInterface,
+        Behavior=CustomBehaviorInterface,
+    )
+```
+
+Key considerations:
+- Suite2p and raw imaging share the same clock (frame-aligned)
+- If behavioral data is logged per imaging frame, use `rate` + `starting_time` (no timestamps array)
+- Compute rate as `rate = 1.0 / df["time"].diff().mean()` from the behavioral DataFrame
+- Position data in VR: use `conversion=0.01` if data is in cm, set `unit="m"`
+- Separate behavioral signals (position, speed, lick) from stimulus parameters (morph, contrast)
+- Add behavioral data as `BehavioralTimeSeries` in `processing["behavior"]`
+- Add stimulus data via `nwbfile.add_stimulus()`
+
+Ophys metadata YAML should include device and imaging plane info:
+
+```yaml
+Ophys:
+  Device:
+    - name: Microscope
+      description: Two-photon resonant scanning microscope
+      manufacturer: Neurolabware  # or Bruker, Thorlabs, etc.
+  ImagingPlane:
+    - name: ImagingPlane
+      description: Imaging plane in hippocampal CA1
+      excitation_lambda: 920.0
+      indicator: GCaMP6f
+      location: CA1
+  TwoPhotonSeries:
+    - name: TwoPhotonSeries
+      description: Two-photon calcium imaging data
+```
+
+## Common Custom Interface Patterns
+
+### Reading MATLAB .mat files
+
+```python
+# For MATLAB v7.3+ (HDF5-based)
+import h5py
+with h5py.File(file_path, "r") as f:
+    data = f["variable_name"][:]
+
+# For older MATLAB files
+from scipy.io import loadmat
+mat = loadmat(file_path)
+data = mat["variable_name"]
+
+# For MATLAB v7.3 with complex nested structures
+import hdf5storage
+mat = hdf5storage.loadmat(file_path)
+```
+
+### Reading text/CSV behavior files
+
+```python
+import pandas as pd
+# Tab-separated with no header
+df = pd.read_csv(file_path, sep="\t", header=None,
+                 names=["timestamp", "position", "extra1", "extra2"])
+
+# Or numpy for simple numeric files
+import numpy as np
+data = np.loadtxt(file_path)
+```
+
+### Reading pickled DataFrames
+
+```python
+import pickle
+with open(file_path, "rb") as f:
+    data = pickle.load(f)
+df = data["VR_Data"]  # or whatever key
+```
+
+**Pickle compatibility**: Pickles saved with older pandas versions may fail to load with
+pandas >= 2.0 because `pandas.core.indexes.numeric` was removed. If you encounter
+`ModuleNotFoundError: No module named 'pandas.core.indexes.numeric'`:
+1. First try loading normally
+2. If it fails, the user may need `pandas < 2.0` or to re-save the pickle with a newer version
+3. Flag this to the user as a data compatibility issue — it is NOT a bug in the conversion code
+
+### Creating Position data
+
+```python
+from pynwb.behavior import Position, SpatialSeries
+from neuroconv.tools.nwb_helpers import get_module
+
+position = Position()
+position.create_spatial_series(
+    name="virtual_position",
+    data=pos_data,  # shape (n_timepoints,) or (n_timepoints, n_dims)
+    timestamps=timestamps,  # or starting_time + rate
+    unit="meters",
+    reference_frame="Virtual track, 0=start, 2=end",
+    conversion=0.01,  # if data is in cm, convert to meters
+)
+
+behavior_module = get_module(nwbfile, "behavior", "Processed behavioral data")
+behavior_module.add(position)
+```
+
+### Creating Trial tables
+
+```python
+# Add custom columns first
+nwbfile.add_trial_column(name="contrast", description="Visual contrast level")
+nwbfile.add_trial_column(name="correct", description="Whether trial was correct")
+
+# Then add each trial
+for _, row in trials_df.iterrows():
+    nwbfile.add_trial(
+        start_time=row["start"],
+        stop_time=row["stop"],
+        contrast=row["contrast"],
+        correct=row["correct"],
+    )
+```
+
+### Creating Events (using ndx-events)
+
+```python
+from ndx_events import Events
+
+lick_events = Events(
+    name="lick_times",
+    description="Times of lick events",
+    timestamps=lick_timestamps,
+)
+behavior_module = get_module(nwbfile, "behavior")
+behavior_module.add(lick_events)
+```
+
+### Using H5DataIO for compression
+
+```python
+from hdmf.backends.hdf5.h5_utils import H5DataIO
+
+compressed_data = H5DataIO(data=large_array, compression="gzip")
+ts = TimeSeries(name="my_data", data=compressed_data, ...)
+```
+
+## Synchronization Patterns from Real Repos
+
+### wen22: NIDQ TTL-based offset
+
+```python
+from spikeinterface.extractors import SpikeGLXRecordingExtractor
+import numpy as np
+
+nidq = SpikeGLXRecordingExtractor(folder_path=spikeglx_path, stream_id="nidq")
+signal = nidq.get_traces(channel_ids=["nidq#XA2"]).flatten()
+binary = (signal > signal.max() / 2).astype(int)
+rising_edges = np.where(np.diff(binary) > 0)[0]
+ttl_times = rising_edges / nidq.get_sampling_frequency()
+
+# Compare with behavioral epoch boundaries to get offset
+offset = np.mean(ttl_times[:n] - behavioral_epoch_times[:n])
+# Shift all behavioral timestamps
+behavioral_timestamps += offset
+```
+
+### reimer-arenkiel: Multi-clock interpolation
+
+```python
+from scipy.interpolate import interp1d
+
+# Map behavior clock → odor clock
+interp_func = interp1d(
+    behavior_scan_times,
+    odor_scan_times[:len(behavior_scan_times)],
+    kind="linear",
+    fill_value="extrapolate",
+)
+aligned_times = interp_func(behavior_timestamps)
+```
+
+### ophys: Frame-rate inference from DataFrame
+
+```python
+# When behavioral data is logged per imaging frame
+rate = 1.0 / df["time"].diff().mean()
+# Use starting_time=0.0 and rate=rate for all behavioral time series
+```
+
+## Session Discovery Patterns
+
+### Directory-based (most common)
+
+```python
+def get_session_to_nwb_kwargs_per_session(data_dir_path):
+    sessions = []
+    for session_dir in sorted(data_dir_path.iterdir()):
+        if session_dir.is_dir() and not session_dir.name.startswith("."):
+            sessions.append(dict(
+                data_dir_path=str(session_dir),
+                session_id=session_dir.name,
+            ))
+    return sessions
+```
+
+### File-pattern based
+
+```python
+import re
+for mat_file in data_dir_path.glob("cell_info_session*.mat"):
+    session_id = re.search(r"session(\d+)", mat_file.name).group(1)
+    # Find matching SpikeGLX files
+    spikeglx_path = find_matching_spikeglx(session_id)
+    sessions.append(dict(
+        processed_file=str(mat_file),
+        spikeglx_path=str(spikeglx_path),
+        session_id=session_id,
+    ))
+```
+
+### Subject metadata from JSON/YAML
+
+```python
+import json
+with open("subject_metadata.json") as f:
+    all_subjects = json.load(f)
+subject_info = all_subjects[subject_id]
+metadata["Subject"].update(subject_info)
+```
+
+## Common File Organizations
+
+### SpikeGLX standard layout
+```
+session_dir/
+  session_g0/
+    session_g0_imec0/
+      session_g0_t0.imec0.ap.bin
+      session_g0_t0.imec0.ap.meta
+      session_g0_t0.imec0.lf.bin
+      session_g0_t0.imec0.lf.meta
+    session_g0_t0.nidq.bin
+    session_g0_t0.nidq.meta
+```
+
+### Phy output layout
+```
+phy/
+  params.py
+  spike_times.npy
+  spike_clusters.npy
+  cluster_group.tsv  (or cluster_info.tsv)
+  templates.npy
+  ...
+```
+
+### Suite2p output layout
+```
+suite2p/
+  plane0/
+    stat.npy
+    ops.npy
+    F.npy
+    Fneu.npy
+    iscell.npy
+    spks.npy
+```
+
+### ScanImage TIFF
+```
+session_dir/
+  file_00001.tif
+  file_00002.tif
+  ...
+  file_00001.tif.meta  (or embedded in TIFF headers)
+```
diff --git a/src/pyflask/ai/skill/knowledge/ndx-anatomical-localization.md b/src/pyflask/ai/skill/knowledge/ndx-anatomical-localization.md
new file mode 100644
index 000000000..890d1c3e8
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/ndx-anatomical-localization.md
@@ -0,0 +1,227 @@
+# Anatomical Localization — ndx-anatomical-localization Patterns
+
+Construction patterns using `ndx-anatomical-localization` (v0.1.0+).
+Standardized storage of anatomical coordinates for electrodes and imaging planes
+against reference atlases (e.g., Allen CCFv3).
+
+## Installation
+
+```bash
+pip install ndx-anatomical-localization
+```
+
+Dependencies: `pynwb>=2.8.0`, `hdmf>=3.14.1`, Python >= 3.10
+
+## Overview
+
+The extension defines 5 types:
+
+| Type | Purpose |
+|------|---------|
+| `Space` | Custom coordinate system (origin, units, orientation) |
+| `AllenCCFv3Space` | Pre-configured Allen Mouse Brain CCFv3 space |
+| `AnatomicalCoordinatesTable` | 3D coordinates for point entities (electrodes) |
+| `AnatomicalCoordinatesImage` | Pixel-to-coordinate mapping for imaging planes |
+| `Localization` | LabMetaData container grouping all localization data |
+
+## AllenCCFv3Space
+
+Pre-configured coordinate system for the Allen Mouse Brain Common Coordinate Framework v3:
+
+```python
+from ndx_anatomical_localization import AllenCCFv3Space
+
+ccf_space = AllenCCFv3Space()
+# Fixed properties:
+#   orientation: "PIR" (positive x=Posterior, y=Inferior, z=Right)
+#   units: "um"
+#   origin: "Anterior-Superior-Left corner of the 3D image volume"
+#   extent: [13200.0, 8000.0, 11400.0] um (AP × DV × ML)
+#   resolution: 10 um isotropic
+```
+
+## Custom Space
+
+For non-Allen atlases or custom coordinate systems:
+
+```python
+from ndx_anatomical_localization import Space
+
+space = Space(
+    name="BregmaSpace",
+    space_name="BregmaSpace",
+    origin="bregma",
+    units="um",
+    orientation="RAS",  # positive x=Right, y=Anterior, z=Superior
+)
+```
+
+**Orientation codes** — 3-letter string, one from each pair:
+- A/P (Anterior/Posterior)
+- L/R (Left/Right)
+- S/I (Superior/Inferior)
+
+Examples: `"RAS"`, `"PIR"`, `"LPI"`
+
+## Electrode Localization (AnatomicalCoordinatesTable)
+
+The primary use case — localizing electrodes to atlas coordinates:
+
+```python
+from ndx_anatomical_localization import (
+    AnatomicalCoordinatesTable,
+    AllenCCFv3Space,
+    Localization,
+)
+
+# 1. Create Localization container
+localization = Localization()
+nwbfile.add_lab_meta_data([localization])
+
+# 2. Add coordinate space
+ccf_space = AllenCCFv3Space()
+localization.add_spaces([ccf_space])
+
+# 3. Create coordinates table referencing the electrodes table
+coords = AnatomicalCoordinatesTable(
+    name="AllenCCFv3Coordinates",
+    target=nwbfile.electrodes,
+    description="Electrode locations in Allen CCFv3",
+    method="SHARP-Track 1.0",
+    space=ccf_space,
+)
+
+# 4. Add one row per electrode
+for i in range(len(nwbfile.electrodes)):
+    coords.add_row(
+        x=ccf_x[i],           # AP coordinate in um
+        y=ccf_y[i],           # DV coordinate in um
+        z=ccf_z[i],           # ML coordinate in um
+        brain_region="CA1",   # optional
+        localized_entity=i,   # index into electrodes table
+    )
+
+localization.add_anatomical_coordinates_tables([coords])
+```
+
+### Partial Localization
+
+Not all electrodes need coordinates — only add rows for localized ones:
+
+```python
+for electrode_id in [0, 2, 5, 8]:  # only 4 of 16 electrodes
+    coords.add_row(
+        x=ccf_x[electrode_id],
+        y=ccf_y[electrode_id],
+        z=ccf_z[electrode_id],
+        brain_region=regions[electrode_id],
+        localized_entity=electrode_id,
+    )
+```
+
+## Imaging Plane Registration (AnatomicalCoordinatesImage)
+
+For registering a 2D imaging field of view to atlas coordinates:
+
+```python
+from ndx_anatomical_localization import AnatomicalCoordinatesImage
+import numpy as np
+
+image_coords = AnatomicalCoordinatesImage(
+    name="ImagingPlaneLocalization",
+    imaging_plane=nwbfile.imaging_planes["ImagingPlane"],
+    method="manual registration",
+    space=ccf_space,
+    x=x_grid,                          # shape: (height, width)
+    y=y_grid,                          # shape: (height, width)
+    z=z_grid,                          # shape: (height, width)
+    brain_region=region_labels,         # optional, shape: (height, width)
+)
+
+localization.add_anatomical_coordinates_images([image_coords])
+```
+
+For static images (e.g., histology) use `image=` instead of `imaging_plane=`:
+
+```python
+from pynwb.image import GrayscaleImage
+
+histology_img = GrayscaleImage(
+    name="histology_slice",
+    data=slice_data,
+    description="Nissl-stained coronal section",
+)
+
+image_coords = AnatomicalCoordinatesImage(
+    name="HistologyLocalization",
+    image=histology_img,               # use image= instead of imaging_plane=
+    method="manual registration to CCF",
+    space=ccf_space,
+    x=x_coords, y=y_coords, z=z_coords,
+)
+```
+
+**Constraint:** Exactly one of `image` or `imaging_plane` must be provided.
+
+## Multiple Localizations
+
+Store multiple localizations (different methods, different spaces) in one file:
+
+```python
+localization = Localization()
+nwbfile.add_lab_meta_data([localization])
+
+ccf_space = AllenCCFv3Space()
+bregma_space = Space(name="Bregma", space_name="Bregma",
+                     origin="bregma", units="um", orientation="RAS")
+localization.add_spaces([ccf_space, bregma_space])
+
+# Manual annotation in bregma coordinates
+manual = AnatomicalCoordinatesTable(
+    name="ManualLocalization",
+    target=nwbfile.electrodes,
+    method="manual annotation",
+    space=bregma_space,
+)
+
+# Automated registration to CCF
+automated = AnatomicalCoordinatesTable(
+    name="SHARPTrackLocalization",
+    target=nwbfile.electrodes,
+    method="SHARP-Track 2.0",
+    space=ccf_space,
+)
+
+# ... add rows to each ...
+
+localization.add_anatomical_coordinates_tables([manual, automated])
+```
+
+## Reading Back
+
+```python
+from pynwb import NWBHDF5IO
+
+with NWBHDF5IO("data.nwb", "r", load_namespaces=True) as io:
+    nwbfile = io.read()
+    localization = nwbfile.lab_meta_data["localization"]
+    coords = localization.anatomical_coordinates_tables["AllenCCFv3Coordinates"]
+
+    x = coords["x"].data[:]
+    y = coords["y"].data[:]
+    z = coords["z"].data[:]
+    regions = coords["brain_region"].data[:]
+    electrode_ids = coords["localized_entity"].data[:]
+```
+
+## Notes
+
+- The `Localization` container is added via `nwbfile.add_lab_meta_data([localization])`.
+- `AllenCCFv3Space` uses **PIR** orientation: +x=Posterior, +y=Inferior, +z=Right.
+  Bregma is approximately at (5400, 0, 5700) um in CCFv3 coordinates.
+- `method` should describe the registration tool/approach (e.g., "SHARP-Track 1.0",
+  "manual annotation", "Pinpoint", "brainreg").
+- `brain_region` is optional but recommended — use Allen Brain Atlas ontology terms.
+- For `AnatomicalCoordinatesImage`, coordinate arrays must match the image dimensions.
+- This extension is currently v0.1.0 (beta) but is the recommended way to store
+  anatomical localization data in NWB files.
diff --git a/src/pyflask/ai/skill/knowledge/ndx-fiber-photometry.md b/src/pyflask/ai/skill/knowledge/ndx-fiber-photometry.md
new file mode 100644
index 000000000..df6618b55
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/ndx-fiber-photometry.md
@@ -0,0 +1,311 @@
+# Fiber Photometry — ndx-fiber-photometry Patterns
+
+Construction patterns using the `ndx-fiber-photometry` extension (v0.2.4+).
+This is the **required** extension for fiber photometry data — do not store
+fiber photometry signals as plain TimeSeries.
+
+## Installation
+
+```bash
+pip install ndx-fiber-photometry
+```
+
+Dependencies: `pynwb>=3.1.0`, `hdmf>=4.1.0`, `ndx-ophys-devices>=0.3.1`
+
+## Overview
+
+The extension defines a structured hierarchy:
+
+1. **Devices** — optical fiber, excitation source, photodetector, filters, dichroic mirrors
+2. **Biological components** — indicator (e.g., dLight1.1, GCaMP6f), viral vector, injection
+3. **FiberPhotometryTable** — DynamicTable linking devices + indicator + brain region per channel
+4. **FiberPhotometryResponseSeries** — TimeSeries holding fluorescence data, referencing table rows
+5. **CommandedVoltageSeries** — optional voltage commands controlling excitation sources
+6. **FiberPhotometry** — LabMetaData container wrapping everything
+
+## Complete Construction Example
+
+```python
+from ndx_fiber_photometry import (
+    FiberPhotometry,
+    FiberPhotometryTable,
+    FiberPhotometryResponseSeries,
+    CommandedVoltageSeries,
+    FiberPhotometryIndicators,
+)
+from ndx_ophys_devices import (
+    ExcitationSource,
+    OpticalFiber,
+    Photodetector,
+    BandOpticalFilter,
+    DichroicMirror,
+    Indicator,
+)
+
+# ── Step 1: Create Devices ──────────────────────────────────────────────
+
+excitation_source = ExcitationSource(
+    name="LED_465nm",
+    description="Blue LED for dLight excitation",
+    manufacturer="Doric Lenses",
+    illumination_type="LED",
+    excitation_wavelength_in_nm=465.0,
+)
+nwbfile.add_device(excitation_source)
+
+excitation_source_isos = ExcitationSource(
+    name="LED_405nm",
+    description="Violet LED for isosbestic control",
+    manufacturer="Doric Lenses",
+    illumination_type="LED",
+    excitation_wavelength_in_nm=405.0,
+)
+nwbfile.add_device(excitation_source_isos)
+
+photodetector = Photodetector(
+    name="Newport2151",
+    description="Femtowatt photoreceiver",
+    manufacturer="Newport",
+    detector_type="photodiode",
+    detected_wavelength_in_nm=525.0,
+)
+nwbfile.add_device(photodetector)
+
+optical_fiber = OpticalFiber(
+    name="Fiber_DMS",
+    description="400um 0.48NA fiber optic cannula",
+    manufacturer="Doric Lenses",
+    numerical_aperture=0.48,
+    core_diameter_in_um=400.0,
+)
+nwbfile.add_device(optical_fiber)
+
+dichroic_mirror = DichroicMirror(
+    name="DM_495",
+    description="495nm dichroic mirror",
+    manufacturer="Semrock",
+    cut_on_wavelength_in_nm=495.0,
+)
+nwbfile.add_device(dichroic_mirror)
+
+emission_filter = BandOpticalFilter(
+    name="BP_500_550",
+    description="500-550nm bandpass emission filter",
+    manufacturer="Semrock",
+    center_wavelength_in_nm=525.0,
+    bandwidth_in_nm=50.0,
+)
+nwbfile.add_device(emission_filter)
+
+# ── Step 2: Create Indicator ────────────────────────────────────────────
+
+indicator = Indicator(
+    name="dLight1.1",
+    description="Genetically-encoded dopamine sensor",
+    label="dLight1.1",
+    injection_location="DMS",
+    excitation_wavelength_in_nm=465.0,
+    emission_wavelength_in_nm=525.0,
+)
+
+indicators = FiberPhotometryIndicators(
+    name="fiber_photometry_indicators",
+    indicators=[indicator],
+)
+
+# ── Step 3: Build FiberPhotometryTable ──────────────────────────────────
+
+fp_table = FiberPhotometryTable(
+    name="FiberPhotometryTable",
+    description="Fiber photometry channel configuration",
+)
+
+# Signal channel (465nm excitation → dLight fluorescence)
+fp_table.add_row(
+    location="DMS",
+    excitation_wavelength_in_nm=465.0,
+    emission_wavelength_in_nm=525.0,
+    indicator=indicator,
+    optical_fiber=optical_fiber,
+    excitation_source=excitation_source,
+    photodetector=photodetector,
+    dichroic_mirror=dichroic_mirror,
+    emission_filter=emission_filter,
+)
+
+# Isosbestic control channel (405nm excitation → same fiber)
+fp_table.add_row(
+    location="DMS",
+    excitation_wavelength_in_nm=405.0,
+    emission_wavelength_in_nm=525.0,
+    indicator=indicator,
+    optical_fiber=optical_fiber,
+    excitation_source=excitation_source_isos,
+    photodetector=photodetector,
+    dichroic_mirror=dichroic_mirror,
+    emission_filter=emission_filter,
+)
+
+# ── Step 4: Create Response Series ──────────────────────────────────────
+
+# Reference specific rows of the table
+signal_region = fp_table.create_fiber_photometry_table_region(
+    region=[0],
+    description="Signal channel (465nm dLight)",
+)
+
+isos_region = fp_table.create_fiber_photometry_table_region(
+    region=[1],
+    description="Isosbestic control channel (405nm)",
+)
+
+signal_series = FiberPhotometryResponseSeries(
+    name="dff_dms_signal",
+    description="dF/F from dLight1.1 in DMS (465nm excitation)",
+    data=dff_signal,               # shape: (n_timepoints,)
+    rate=20.0,                     # sampling rate in Hz
+    unit="F",
+    fiber_photometry_table_region=signal_region,
+)
+
+isos_series = FiberPhotometryResponseSeries(
+    name="dff_dms_isosbestic",
+    description="Isosbestic control signal in DMS (405nm excitation)",
+    data=dff_isos,
+    rate=20.0,
+    unit="F",
+    fiber_photometry_table_region=isos_region,
+)
+
+nwbfile.add_acquisition(signal_series)
+nwbfile.add_acquisition(isos_series)
+
+# ── Step 5: Optional CommandedVoltageSeries ─────────────────────────────
+
+commanded_voltage = CommandedVoltageSeries(
+    name="commanded_voltage",
+    description="Voltage commands to LEDs",
+    data=voltage_data,
+    rate=10000.0,
+    unit="volts",
+    frequency=211.0,              # modulation frequency in Hz
+)
+nwbfile.add_stimulus(commanded_voltage)
+
+# ── Step 6: Wrap in FiberPhotometry LabMetaData ─────────────────────────
+
+fiber_photometry = FiberPhotometry(
+    name="fiber_photometry",
+    fiber_photometry_table=fp_table,
+    fiber_photometry_indicators=indicators,
+)
+nwbfile.add_lab_meta_data(fiber_photometry)
+```
+
+## Multi-Fiber Setup
+
+For experiments with multiple fibers (e.g., DMS + NAc):
+
+```python
+fiber_dms = OpticalFiber(name="Fiber_DMS", ...)
+fiber_nac = OpticalFiber(name="Fiber_NAc", ...)
+nwbfile.add_device(fiber_dms)
+nwbfile.add_device(fiber_nac)
+
+# Add rows for each fiber × wavelength combination
+fp_table.add_row(location="DMS", optical_fiber=fiber_dms,
+                 excitation_wavelength_in_nm=465.0, ...)   # row 0
+fp_table.add_row(location="DMS", optical_fiber=fiber_dms,
+                 excitation_wavelength_in_nm=405.0, ...)   # row 1
+fp_table.add_row(location="NAc", optical_fiber=fiber_nac,
+                 excitation_wavelength_in_nm=465.0, ...)   # row 2
+fp_table.add_row(location="NAc", optical_fiber=fiber_nac,
+                 excitation_wavelength_in_nm=405.0, ...)   # row 3
+
+# Create separate response series for each channel
+dms_signal = FiberPhotometryResponseSeries(
+    name="dff_dms",
+    fiber_photometry_table_region=fp_table.create_fiber_photometry_table_region(
+        region=[0], description="DMS signal channel"
+    ),
+    data=dms_data, rate=20.0, unit="F",
+)
+nac_signal = FiberPhotometryResponseSeries(
+    name="dff_nac",
+    fiber_photometry_table_region=fp_table.create_fiber_photometry_table_region(
+        region=[2], description="NAc signal channel"
+    ),
+    data=nac_data, rate=20.0, unit="F",
+)
+```
+
+## Common Indicators
+
+| Indicator | Target | Excitation (nm) | Emission (nm) |
+|-----------|--------|-----------------|---------------|
+| dLight1.1 | Dopamine | 465 | 525 |
+| dLight1.3b | Dopamine | 465 | 525 |
+| GRAB-DA | Dopamine | 465 | 525 |
+| GCaMP6f | Calcium | 488 | 525 |
+| GCaMP7f | Calcium | 488 | 525 |
+| rGECO1a | Calcium | 560 | 600 |
+| GRAB-ACh | Acetylcholine | 465 | 525 |
+| GRAB-5HT | Serotonin | 465 | 525 |
+| iGluSnFR | Glutamate | 465 | 525 |
+
+## Metadata YAML Template
+
+```yaml
+FiberPhotometry:
+  FiberPhotometryTable:
+    - location: DMS
+      excitation_wavelength_in_nm: 465.0
+      emission_wavelength_in_nm: 525.0
+      coordinates: [0.5, 1.5, 3.0]    # AP, ML, DV in mm (optional)
+
+  OpticalFibers:
+    - name: Fiber_DMS
+      description: 400um 0.48NA fiber optic cannula
+      manufacturer: Doric Lenses
+      numerical_aperture: 0.48
+      core_diameter_in_um: 400.0
+
+  ExcitationSources:
+    - name: LED_465nm
+      description: Blue LED
+      manufacturer: Doric Lenses
+      illumination_type: LED
+      excitation_wavelength_in_nm: 465.0
+    - name: LED_405nm
+      description: Violet LED (isosbestic)
+      manufacturer: Doric Lenses
+      illumination_type: LED
+      excitation_wavelength_in_nm: 405.0
+
+  Photodetectors:
+    - name: Newport2151
+      description: Femtowatt photoreceiver
+      manufacturer: Newport
+      detector_type: photodiode
+      detected_wavelength_in_nm: 525.0
+
+  Indicators:
+    - name: dLight1.1
+      label: dLight1.1
+      description: Genetically-encoded dopamine sensor
+      injection_location: DMS
+      excitation_wavelength_in_nm: 465.0
+      emission_wavelength_in_nm: 525.0
+```
+
+## Notes
+
+- **Always use this extension** for fiber photometry data. Do not store signals as
+  plain TimeSeries in a processing module.
+- The `FiberPhotometryTable` is a DynamicTable — each row represents one channel
+  (one fiber × one excitation wavelength combination).
+- Isosbestic control channels (typically 405nm) should be separate rows in the table
+  with their own `FiberPhotometryResponseSeries`.
+- The `FiberPhotometry` object is added as `lab_meta_data`, not in a processing module.
+- `FiberPhotometryResponseSeries` can go in `acquisition` (raw) or `processing` (processed).
+- `unit` for fluorescence data is typically `"F"` (arbitrary fluorescence units).
diff --git a/src/pyflask/ai/skill/knowledge/ndx-pose.md b/src/pyflask/ai/skill/knowledge/ndx-pose.md
new file mode 100644
index 000000000..e6d640845
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/ndx-pose.md
@@ -0,0 +1,202 @@
+# Pose Estimation — ndx-pose Patterns
+
+Construction patterns using the `ndx-pose` extension (v0.2.2+).
+Use this for pose estimation data from DeepLabCut, SLEAP, Lightning Pose, etc.
+
+## Installation
+
+```bash
+pip install ndx-pose
+```
+
+## Overview
+
+The extension defines:
+- **Skeleton** — body part nodes and their connections (edges)
+- **PoseEstimationSeries** — per-keypoint x,y(,z) positions + confidence over time
+- **PoseEstimation** — container grouping all keypoints from one video/algorithm
+- **PoseTraining** — optional training data (annotated frames, ground truth)
+
+## NeuroConv Integration
+
+NeuroConv has built-in interfaces for the major pose estimation tools:
+- `DeepLabCutInterface` — reads DLC `.h5` or `.csv` output
+- `SLEAPInterface` — reads SLEAP `.slp` or `.nwb` output
+- `LightningPoseInterface` — reads Lightning Pose output
+
+**Prefer NeuroConv interfaces when available.** Only use raw ndx-pose construction
+when data is in a custom format not supported by NeuroConv.
+
+## Skeleton Definition
+
+```python
+from ndx_pose import Skeleton, Skeletons
+import numpy as np
+
+skeleton = Skeleton(
+    name="mouse_skeleton",
+    nodes=["nose", "left_ear", "right_ear", "neck", "body", "tail_base"],
+    edges=np.array([
+        [0, 3],  # nose → neck
+        [1, 3],  # left_ear → neck
+        [2, 3],  # right_ear → neck
+        [3, 4],  # neck → body
+        [4, 5],  # body → tail_base
+    ], dtype="uint8"),
+    subject=nwbfile.subject,  # optional
+)
+
+skeletons = Skeletons(skeletons=[skeleton])
+```
+
+- `nodes`: list of body part names (order matters — indices used in edges)
+- `edges`: Nx2 uint8 array of 0-indexed node pairs
+
+## PoseEstimationSeries — Per-Keypoint Data
+
+```python
+from ndx_pose import PoseEstimationSeries
+
+nose = PoseEstimationSeries(
+    name="nose",
+    description="Nose keypoint tracked by DeepLabCut",
+    data=nose_xy,                    # shape: (n_frames, 2) for 2D or (n_frames, 3) for 3D
+    unit="pixels",
+    reference_frame="(0,0) is top-left corner of video frame",
+    timestamps=timestamps,           # or rate=30.0
+    confidence=confidence_scores,    # shape: (n_frames,), values 0-1, optional
+    confidence_definition="Softmax output of DeepLabCut network",
+)
+
+# Share timestamps across keypoints to save space
+left_ear = PoseEstimationSeries(
+    name="left_ear",
+    description="Left ear keypoint",
+    data=left_ear_xy,
+    unit="pixels",
+    reference_frame="(0,0) is top-left corner of video frame",
+    timestamps=nose,                 # reference another series' timestamps
+    confidence=left_ear_confidence,
+    confidence_definition="Softmax output of DeepLabCut network",
+)
+```
+
+## PoseEstimation — Container
+
+```python
+from ndx_pose import PoseEstimation
+from neuroconv.tools.nwb_helpers import get_module
+
+camera = nwbfile.create_device(
+    name="BehaviorCamera",
+    description="Side-view camera for pose tracking",
+    manufacturer="Basler",
+)
+
+pose_estimation = PoseEstimation(
+    name="PoseEstimation",
+    pose_estimation_series=[nose, left_ear, right_ear, neck, body, tail_base],
+    description="Pose estimation of freely moving mouse",
+    original_videos=["behavior_video.mp4"],
+    labeled_videos=["behavior_video_labeled.mp4"],       # optional
+    dimensions=np.array([[640, 480]], dtype="uint16"),    # optional: height, width
+    devices=[camera],                                     # optional
+    scorer="DLC_resnet50_openfieldOct30shuffle1_1600",   # optional
+    source_software="DeepLabCut",                         # optional
+    source_software_version="2.3.8",                      # optional
+    skeleton=skeleton,                                    # optional but recommended
+)
+
+behavior = get_module(nwbfile, "behavior", "Processed behavioral data")
+behavior.add(skeletons)
+behavior.add(pose_estimation)
+```
+
+## Complete Minimal Example
+
+```python
+import numpy as np
+from ndx_pose import (
+    Skeleton, Skeletons,
+    PoseEstimationSeries, PoseEstimation,
+)
+from neuroconv.tools.nwb_helpers import get_module
+
+# 1. Define skeleton
+skeleton = Skeleton(
+    name="mouse",
+    nodes=["nose", "body", "tail"],
+    edges=np.array([[0, 1], [1, 2]], dtype="uint8"),
+)
+
+# 2. Create series for each keypoint
+n_frames = 1000
+timestamps = np.linspace(0, 33.3, n_frames)  # 30 fps for ~33s
+
+series_list = []
+for node in skeleton.nodes:
+    s = PoseEstimationSeries(
+        name=node,
+        description=f"Position of {node}",
+        data=np.random.rand(n_frames, 2) * 512,
+        unit="pixels",
+        reference_frame="Top-left corner of 512x512 video",
+        timestamps=timestamps if not series_list else series_list[0],
+        confidence=np.random.rand(n_frames),
+        confidence_definition="DLC likelihood",
+    )
+    series_list.append(s)
+
+# 3. Create container
+pose_est = PoseEstimation(
+    name="PoseEstimation",
+    pose_estimation_series=series_list,
+    description="DeepLabCut pose estimation",
+    source_software="DeepLabCut",
+    skeleton=skeleton,
+)
+
+# 4. Add to NWB file
+behavior = get_module(nwbfile, "behavior", "Behavioral data")
+behavior.add(Skeletons(skeletons=[skeleton]))
+behavior.add(pose_est)
+```
+
+## Multi-Camera / Multi-View
+
+For multi-camera setups, create separate `PoseEstimation` containers per view:
+
+```python
+pose_side = PoseEstimation(
+    name="PoseEstimation_side",
+    pose_estimation_series=side_series,
+    description="Side camera pose estimation",
+    devices=[side_camera],
+    skeleton=skeleton,
+    source_software="DeepLabCut",
+)
+
+pose_top = PoseEstimation(
+    name="PoseEstimation_top",
+    pose_estimation_series=top_series,
+    description="Top camera pose estimation",
+    devices=[top_camera],
+    skeleton=skeleton,
+    source_software="DeepLabCut",
+)
+
+behavior.add(pose_side)
+behavior.add(pose_top)
+```
+
+## Notes
+
+- **One subject per NWB file.** For multi-animal tracking, create separate NWB files.
+- `confidence` is optional (since v0.2.0) but recommended when available.
+- `unit` is typically `"pixels"` for 2D video tracking. Use `"meters"` if coordinates
+  have been calibrated to real-world units.
+- Share timestamps across keypoints by passing a reference to another series.
+- `source_software` should be one of: `"DeepLabCut"`, `"SLEAP"`, `"Lightning Pose"`,
+  or the actual software name.
+- Training data (`PoseTraining`) is rarely needed in conversion workflows — it's mainly
+  for sharing annotated datasets used to train models.
diff --git a/src/pyflask/ai/skill/knowledge/neuroconv-interfaces.yaml b/src/pyflask/ai/skill/knowledge/neuroconv-interfaces.yaml
new file mode 100644
index 000000000..5b197ae98
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/neuroconv-interfaces.yaml
@@ -0,0 +1,2172 @@
+ecephys:
+  recordings:
+    - name: SpikeGLXRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "SpikeGLX Neuropixels (.ap.bin/.lf.bin + .meta)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Folder path containing the binary files of the SpikeGLX recording"
+        stream_id:
+          type: str
+          description: "Stream ID of the SpikeGLX recording (e.g. 'imec0.ap', 'imec0.lf', 'imec1.ap')"
+        verbose:
+          type: bool
+          description: "Whether to output verbose text"
+          optional: true
+          default: false
+        es_key:
+          type: str
+          description: "The key to access the metadata of the ElectricalSeries"
+          optional: true
+      creates:
+        - ElectricalSeries
+        - Device (Neuropixels)
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset of data for testing"
+        write_as:
+          type: str
+          description: "How to save traces: 'raw', 'lfp', or 'processed'"
+        write_electrical_series:
+          type: bool
+          description: "If False, only write device/electrode metadata without data"
+        iterator_type:
+          type: str
+          description: "Iterator type for chunked writing ('v2' or None)"
+
+    - name: AlphaOmegaRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "AlphaOmega (.mpx)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to the folder of .mpx files"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'raw', 'lfp', or 'processed'"
+
+    - name: AxonRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "Axon Binary Format (.abf) - extracellular"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to an Axon Binary Format (.abf) file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - ElectricalSeriesRaw
+        - Device (Axon Instruments)
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'raw', 'lfp', or 'processed'"
+
+    - name: AxonaRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "Axona DacqUSB (.bin + .set)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .bin file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device (Axona)
+        - ElectrodeGroup (tetrode-based)
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'raw', 'lfp', or 'processed'"
+
+    - name: BiocamRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "Biocam (.bwr)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .bwr file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: BlackrockRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "Blackrock (.ns0-.ns6)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to Blackrock .ns1/.ns2/.ns3/.ns4/.ns5/.ns6 file"
+        nsx_override:
+          type: FilePath
+          description: "NSx file to load if file_path suffix is empty"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: CellExplorerRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "CellExplorer (.dat + .session.mat)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Folder containing the .session.mat file and .dat binary"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: EDFRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "European Data Format (.edf)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .edf file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+        channels_to_skip:
+          type: list
+          description: "Channels to skip (e.g. non-neural channels)"
+          optional: true
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: IntanRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "Intan RHD/RHS amplifier channels (.rhd/.rhs)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to either a .rhd or a .rhs file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+        ignore_integrity_checks:
+          type: bool
+          description: "If True, load data that violates integrity assumptions"
+          optional: true
+          default: false
+      creates:
+        - ElectricalSeries
+        - ElectricalSeriesRaw
+        - Device (Intan)
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: MaxOneRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "MaxOne/Maxwell (.raw.h5)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .raw.h5 file"
+        hdf5_plugin_path:
+          type: DirectoryPath
+          description: "Path to HDF5 plugin library"
+          optional: true
+        download_plugin:
+          type: bool
+          description: "Whether to download the decompression plugin"
+          optional: true
+          default: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device (Maxwell)
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+      notes: "Linux only"
+
+    - name: MCSRawRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "MCSRaw Multi Channel Systems (.raw)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .raw file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: MEArecRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "MEArec simulated recording (.h5)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the MEArec .h5 file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device (probe-specific)
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: NeuralynxRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "Neuralynx (.ncs/.nse/.ntt/.nev)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to Neuralynx directory"
+        stream_name:
+          type: str
+          description: "The name of the recording stream to load"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device (acquisition system)
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: NeuroScopeRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "NeuroScope (.dat + .xml)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .dat file"
+        gain:
+          type: float
+          description: "Conversion factors from int16 to Volts (e.g. 0.195 for Intan)"
+          optional: true
+        xml_file_path:
+          type: FilePath
+          description: "Path to .xml file containing device and electrode config"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table (with shank_electrode_number, group_name)
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: OpenEphysRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "OpenEphys (legacy .continuous or binary .dat)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to OpenEphys directory"
+        stream_name:
+          type: str
+          description: "The name of the recording stream"
+          optional: true
+        block_index:
+          type: int
+          description: "The index of the block to extract"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+      notes: "Auto-detects legacy vs binary format and delegates to appropriate sub-interface"
+
+    - name: OpenEphysBinaryRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "OpenEphys Binary (.dat + .oebin)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to directory containing OpenEphys binary files"
+        stream_name:
+          type: str
+          description: "The name of the recording stream to load"
+          optional: true
+        block_index:
+          type: int
+          description: "The index of the block to extract"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: OpenEphysLegacyRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "OpenEphys Legacy (.continuous)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to directory containing OpenEphys legacy files"
+        stream_name:
+          type: str
+          description: "The name of the recording stream"
+          optional: true
+        block_index:
+          type: int
+          description: "The index of the block to extract"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: PlexonRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "Plexon wideband (.plx)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .plx file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+        stream_name:
+          type: str
+          optional: true
+          default: "WB-Wideband"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: Plexon2RecordingInterface
+      module: neuroconv.datainterfaces
+      format: "Plexon2 (.pl2)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .pl2 file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: Spike2RecordingInterface
+      module: neuroconv.datainterfaces
+      format: "Spike2/CED (.smrx/.smr)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .smr or .smrx file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: SpikeGadgetsRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "SpikeGadgets (.rec)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .rec file"
+        stream_id:
+          type: str
+          optional: true
+          default: "trodes"
+        gains:
+          type: ArrayType
+          description: "Conversion factors for each channel (or single value for all)"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: TdtRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "Tucker-Davis Technologies (.tbk/.tev/.tsq/.tbx)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to directory with TDT files (TSQ, TBK, TEV, SEV)"
+        gain:
+          type: float
+          description: "Conversion factor from int16 to microvolts"
+        stream_id:
+          type: str
+          description: "Stream to select (deprecated, use stream_name)"
+          optional: true
+          default: "0"
+        stream_name:
+          type: str
+          description: "Name of the stream to select"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: WhiteMatterRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "WhiteMatter binary (.bin)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the binary file"
+        sampling_frequency:
+          type: float
+          description: "The sampling frequency"
+        num_channels:
+          type: int
+          description: "Number of channels in the recording"
+        channel_ids:
+          type: list
+          description: "A list of channel ids"
+          optional: true
+        is_filtered:
+          type: bool
+          description: "If True, the recording is assumed to be filtered"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeries"
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+  sorting:
+    - name: BlackrockSortingInterface
+      module: neuroconv.datainterfaces
+      format: "Blackrock spike data (.nev)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .nev data file"
+        sampling_frequency:
+          type: float
+          description: "Sampling frequency for the sorting extractor"
+          optional: true
+        nsx_to_load:
+          type: "int | list | str"
+          description: "IDs of nsX file from which to load data"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - Units table
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'units' or 'processing'"
+        units_name:
+          type: str
+          description: "Name of the units table"
+
+    - name: CellExplorerSortingInterface
+      module: neuroconv.datainterfaces
+      format: "CellExplorer (.spikes.cellinfo.mat)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .spikes.cellinfo.mat file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - Units table (with clu_id, group_id, location, cell_type)
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'units' or 'processing'"
+        write_ecephys_metadata:
+          type: bool
+          description: "Write electrode information from metadata"
+
+    - name: KiloSortSortingInterface
+      module: neuroconv.datainterfaces
+      format: "KiloSort output (Phy folder with params.py, .npy files)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to the output Phy folder (containing the params.py)"
+        keep_good_only:
+          type: bool
+          description: "If True, only Kilosort-labeled 'good' units are returned"
+          optional: true
+          default: false
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - Units table (with KSLabel, Amplitude, ContamPct, depth, fr, etc.)
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'units' or 'processing'"
+
+    - name: NeuralynxSortingInterface
+      module: neuroconv.datainterfaces
+      format: "Neuralynx sorting (.nse/.ntt/.nev)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to folder containing Neuralynx sorting files"
+        sampling_frequency:
+          type: float
+          description: "Specific sampling frequency if desired"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        stream_id:
+          type: str
+          description: "Used to calculate t_start"
+          optional: true
+      creates:
+        - Units table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'units' or 'processing'"
+
+    - name: NeuroScopeSortingInterface
+      module: neuroconv.datainterfaces
+      format: "NeuroScope (.res/.clu + .xml)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to folder containing .clu and .res files"
+        keep_mua_units:
+          type: bool
+          description: "Whether to return sorted spikes from multi-unit activity"
+          optional: true
+          default: true
+        exclude_shanks:
+          type: "list[int]"
+          description: "List of shank indices to ignore"
+          optional: true
+        xml_file_path:
+          type: FilePath
+          description: "Path to .xml file with electrode config"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - Units table
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'units' or 'processing'"
+
+    - name: OpenEphysSortingInterface
+      module: neuroconv.datainterfaces
+      format: "OpenEphys sorting (.spikes)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to directory containing OpenEphys .spikes files"
+        experiment_id:
+          type: int
+          optional: true
+          default: 0
+        recording_id:
+          type: int
+          optional: true
+          default: 0
+      creates:
+        - Units table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: PhySortingInterface
+      module: neuroconv.datainterfaces
+      format: "Phy output (.npy files)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to the output Phy folder (containing the params.py)"
+        exclude_cluster_groups:
+          type: "list[str]"
+          description: "Cluster groups to exclude (e.g. 'noise', 'mua')"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - Units table (with KSLabel, Amplitude, ContamPct, depth, fr, etc.)
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'units' or 'processing'"
+
+    - name: PlexonSortingInterface
+      module: neuroconv.datainterfaces
+      format: "Plexon sorting (.plx)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .plx file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - Units table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'units' or 'processing'"
+
+  lfp:
+    - name: AxonaLFPDataInterface
+      module: neuroconv.datainterfaces
+      format: "Axona LFP (.eeg files + .set)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .bin or .set file"
+      creates:
+        - ElectricalSeriesLFP (in ecephys processing module)
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'raw', 'lfp', or 'processed' (default: 'lfp')"
+      notes: "Loads all data into memory (not lazy)"
+
+    - name: CellExplorerLFPInterface
+      module: neuroconv.datainterfaces
+      format: "CellExplorer LFP (.lfp + .session.mat)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Folder containing the .session.mat file and .lfp binary"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeriesLFP"
+      creates:
+        - ElectricalSeriesLFP (in ecephys processing module)
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'raw', 'lfp', or 'processed' (default: 'lfp')"
+
+    - name: NeuroScopeLFPInterface
+      module: neuroconv.datainterfaces
+      format: "NeuroScope LFP (.lfp/.eeg + .xml)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .lfp or .eeg file"
+        gain:
+          type: float
+          description: "Conversion factor int16 to Volts (e.g. 0.195)"
+          optional: true
+        xml_file_path:
+          type: FilePath
+          description: "Path to .xml file with electrode config"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - ElectricalSeriesLFP (in ecephys processing module)
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        write_as:
+          type: str
+          description: "'raw', 'lfp', or 'processed' (default: 'lfp')"
+
+    - name: PlexonLFPInterface
+      module: neuroconv.datainterfaces
+      format: "Plexon low-pass filtered (.plx)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .plx file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        es_key:
+          type: str
+          optional: true
+          default: "ElectricalSeriesLF"
+        stream_name:
+          type: str
+          optional: true
+          default: "FPl-Low Pass Filtered"
+      creates:
+        - ElectricalSeriesLFP (in ecephys processing module)
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+  analog:
+    - name: SpikeGLXNIDQInterface
+      module: neuroconv.datainterfaces
+      format: "SpikeGLX NIDQ board (.nidq.bin + .nidq.meta)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to folder containing the .nidq.bin file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        metadata_key:
+          type: str
+          optional: true
+          default: "SpikeGLXNIDQ"
+        analog_channel_groups:
+          type: "dict[str, dict]"
+          description: "Dictionary mapping group names to analog channel configurations"
+          optional: true
+        digital_channel_groups:
+          type: "dict[str, dict]"
+          description: "Dictionary mapping group names to digital channel configurations with labels_map"
+          optional: true
+      creates:
+        - TimeSeries (analog channels)
+        - LabeledEvents (digital channels, from ndx-events)
+        - Device (NIDQBoard)
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        always_write_timestamps:
+          type: bool
+          description: "If True, always writes timestamps instead of sampling rate"
+
+    - name: SpikeGLXSyncChannelInterface
+      module: neuroconv.datainterfaces
+      format: "SpikeGLX sync channel from Neuropixel probes"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to folder containing the SpikeGLX .imec files"
+        stream_id:
+          type: str
+          description: "The stream ID for the sync channel (e.g. 'imec0.ap-SYNC', 'imec1.lf-SYNC')"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        metadata_key:
+          type: str
+          optional: true
+          default: "SpikeGLXSync"
+      creates:
+        - TimeSeries (sync channel)
+        - Device (NeuropixelsImec)
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: IntanAnalogInterface
+      module: neuroconv.datainterfaces
+      format: "Intan non-amplifier analog streams (.rhd/.rhs)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to either a .rhd or a .rhs file"
+        stream_name:
+          type: str
+          description: "Stream name: 'RHD2000 auxiliary input channel', 'USB board ADC input channel', 'DC Amplifier channel', etc."
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        metadata_key:
+          type: str
+          optional: true
+          default: "TimeSeriesAnalogIntan"
+      creates:
+        - TimeSeries (analog data in acquisition)
+        - Device (Intan)
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: EDFAnalogInterface
+      module: neuroconv.datainterfaces
+      format: "EDF auxiliary/analog channels (.edf)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .edf file"
+        channels_to_include:
+          type: "list[str]"
+          description: "Specific channel IDs to include"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        metadata_key:
+          type: str
+          optional: true
+          default: "analog_edf_metadata_key"
+      creates:
+        - TimeSeries (analog data in acquisition)
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: OpenEphysBinaryAnalogInterface
+      module: neuroconv.datainterfaces
+      format: "OpenEphys Binary ADC/analog channels (.dat + .oebin)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to OpenEphys directory (.dat files)"
+        stream_name:
+          type: str
+          description: "The name of the recording stream to load"
+          optional: true
+        block_index:
+          type: int
+          description: "The index of the block to extract"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        time_series_name:
+          type: str
+          optional: true
+          default: "TimeSeriesOpenEphysAnalog"
+      creates:
+        - TimeSeries (ADC/analog data in acquisition)
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+  position:
+    - name: AxonaPositionDataInterface
+      module: neuroconv.datainterfaces
+      format: "Axona position tracking (.bin/.set)"
+      source_data:
+        file_path:
+          type: str
+          description: "Path to .bin or .set file"
+      creates:
+        - Position (SpatialSeries in behavior processing module)
+      conversion_options: {}
+
+    - name: AxonaUnitRecordingInterface
+      module: neuroconv.datainterfaces
+      format: "Axona unit recording (.bin/.set)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to Axona file"
+        noise_std:
+          type: float
+          optional: true
+          default: 3.5
+      creates:
+        - ElectricalSeries
+        - Device
+        - ElectrodeGroup
+        - electrodes table
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+ophys:
+  imaging:
+    - name: BrukerTiffMultiPlaneImagingInterface
+      module: neuroconv.datainterfaces
+      format: "Bruker TIFF multi-plane (.ome.tif + .xml + .env)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Folder containing Bruker TIF image files and config files"
+        stream_name:
+          type: str
+          description: "The name of the recording stream (e.g. 'Ch2')"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - TwoPhotonSeries (volumetric)
+        - ImagingPlane
+        - Device (BrukerFluorescenceMicroscope)
+        - OpticalChannel
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        stub_frames:
+          type: int
+          description: "Number of frames for stub test"
+        photon_series_type:
+          type: str
+          description: "'TwoPhotonSeries' or 'OnePhotonSeries'"
+
+    - name: BrukerTiffSinglePlaneImagingInterface
+      module: neuroconv.datainterfaces
+      format: "Bruker TIFF single plane (.ome.tif + .xml + .env)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Folder containing Bruker TIF image files and config files"
+        stream_name:
+          type: str
+          description: "The name of the recording stream (e.g. 'Ch2')"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - TwoPhotonSeries
+        - ImagingPlane
+        - Device (BrukerFluorescenceMicroscope)
+        - OpticalChannel
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        photon_series_type:
+          type: str
+          description: "'TwoPhotonSeries' or 'OnePhotonSeries'"
+
+    - name: FemtonicsImagingInterface
+      module: neuroconv.datainterfaces
+      format: "Femtonics MESc (.mesc)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .mesc file"
+        session_name:
+          type: str
+          description: "Name of the MSession (e.g. 'MSession_0')"
+          optional: true
+        munit_name:
+          type: str
+          description: "Name of the MUnit (e.g. 'MUnit_0')"
+          optional: true
+        channel_name:
+          type: str
+          description: "Name of the channel to extract (e.g. 'UG', 'UR')"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - TwoPhotonSeries
+        - ImagingPlane (with grid_spacing, geometric transformations)
+        - Device (Femtonics microscope)
+        - OpticalChannel (with PMT settings)
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        photon_series_type:
+          type: str
+          description: "'TwoPhotonSeries' or 'OnePhotonSeries'"
+
+    - name: Hdf5ImagingInterface
+      module: neuroconv.datainterfaces
+      format: "HDF5 imaging (.h5/.hdf5)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .h5 or .hdf5 file"
+        mov_field:
+          type: str
+          optional: true
+          default: "mov"
+        sampling_frequency:
+          type: float
+          optional: true
+        start_time:
+          type: float
+          optional: true
+        metadata:
+          type: dict
+          optional: true
+        channel_names:
+          type: ArrayType
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        photon_series_type:
+          type: str
+          optional: true
+          default: "TwoPhotonSeries"
+      creates:
+        - TwoPhotonSeries or OnePhotonSeries
+        - ImagingPlane
+        - Device
+        - OpticalChannel
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: InscopixImagingInterface
+      module: neuroconv.datainterfaces
+      format: "Inscopix (.isxd)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .isxd Inscopix file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - OnePhotonSeries
+        - ImagingPlane (with acquisition details)
+        - Device (Inscopix microscope with serial number)
+        - OpticalChannel
+        - Subject metadata
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+      notes: "Automatically detects multiplane files and raises error (not yet supported)"
+
+    - name: MicroManagerTiffImagingInterface
+      module: neuroconv.datainterfaces
+      format: "Micro-Manager TIFF (.ome.tif + DisplaySettings.json)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Folder containing OME-TIF image files and DisplaySettings JSON"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - TwoPhotonSeries
+        - ImagingPlane
+        - Device
+        - OpticalChannel
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: MiniscopeImagingInterface
+      module: neuroconv.datainterfaces
+      format: "Miniscope (.avi + metaData.json + timeStamps.csv)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to Miniscope folder containing .avi files and metaData.json"
+          optional: true
+        file_paths:
+          type: list
+          description: "List of .avi file paths for non-standard folder structures"
+          optional: true
+        configuration_file_path:
+          type: str
+          description: "Path to metaData.json (deprecated)"
+          optional: true
+        timeStamps_file_path:
+          type: str
+          description: "Path to timeStamps.csv file"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - OnePhotonSeries
+        - ImagingPlane
+        - Device (Miniscope, via ndx-miniscope)
+        - OpticalChannel
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        photon_series_type:
+          type: str
+          description: "'OnePhotonSeries' (default) or 'TwoPhotonSeries'"
+
+    - name: SbxImagingInterface
+      module: neuroconv.datainterfaces
+      format: "Scanbox (.sbx)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .sbx file"
+        sampling_frequency:
+          type: float
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        photon_series_type:
+          type: str
+          optional: true
+          default: "TwoPhotonSeries"
+      creates:
+        - TwoPhotonSeries or OnePhotonSeries
+        - ImagingPlane
+        - Device (Scanbox)
+        - OpticalChannel
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: ScanImageImagingInterface
+      module: neuroconv.datainterfaces
+      format: "ScanImage TIFF (.tif/.tiff)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the ScanImage TIFF file (first file in multi-file series)"
+          optional: true
+        channel_name:
+          type: str
+          description: "Name of the channel to extract (e.g. 'Channel 1')"
+          optional: true
+        slice_sample:
+          type: int
+          description: "Specific frame from each slice in volumetric data"
+          optional: true
+        plane_index:
+          type: int
+          description: "Specific plane to extract from volumetric data"
+          optional: true
+        file_paths:
+          type: "list[FilePath]"
+          description: "Override automatic file detection with explicit file list"
+          optional: true
+        interleave_slice_samples:
+          type: bool
+          description: "Whether to interleave all slice samples as separate time points"
+          optional: true
+        fallback_sampling_frequency:
+          type: float
+          description: "Fallback sampling frequency if not in metadata"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - TwoPhotonSeries
+        - ImagingPlane
+        - Device
+        - OpticalChannel
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        photon_series_type:
+          type: str
+          description: "'TwoPhotonSeries' or 'OnePhotonSeries'"
+
+    - name: ScanImageLegacyImagingInterface
+      module: neuroconv.datainterfaces
+      format: "ScanImage Legacy TIFF (.tif/.tiff)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to ScanImage TIFF file"
+        channel_name:
+          type: str
+          description: "Name of the channel to extract"
+          optional: true
+        plane_name:
+          type: str
+          description: "Name of the plane to extract"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - TwoPhotonSeries
+        - ImagingPlane
+        - Device
+        - OpticalChannel
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: TiffImagingInterface
+      module: neuroconv.datainterfaces
+      format: "Multi-page TIFF (.tif/.tiff)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to TIFF file (deprecated, use file_paths)"
+          optional: true
+        file_paths:
+          type: "list[FilePath]"
+          description: "List of paths to TIFF files"
+          optional: true
+        sampling_frequency:
+          type: float
+          description: "Sampling frequency in Hz"
+        dimension_order:
+          type: str
+          optional: true
+          default: "ZCT"
+          description: "Order of dimensions (Z, C, T)"
+        num_channels:
+          type: int
+          optional: true
+          default: 1
+        channel_name:
+          type: str
+          optional: true
+        num_planes:
+          type: int
+          optional: true
+          default: 1
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        photon_series_type:
+          type: str
+          optional: true
+          default: "TwoPhotonSeries"
+      creates:
+        - TwoPhotonSeries or OnePhotonSeries
+        - ImagingPlane
+        - Device
+        - OpticalChannel
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: ThorImagingInterface
+      module: neuroconv.datainterfaces
+      format: "ThorImageLS TIFF (.tif + Experiment.xml)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to first OME TIFF file (e.g. ChanA_001_001_001_001.tif)"
+        channel_name:
+          type: str
+          description: "Name of the channel to extract (must match Experiment.xml)"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - TwoPhotonSeries
+        - ImagingPlane
+        - Device (ThorLabs 2P Microscope)
+        - OpticalChannel
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+  segmentation:
+    - name: CaimanSegmentationInterface
+      module: neuroconv.datainterfaces
+      format: "CaImAn output (.hdf5)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .hdf5 file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - ImageSegmentation (PlaneSegmentation with ROI masks)
+        - Fluorescence (RoiResponseSeries)
+        - ImagingPlane
+        - Device
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        include_roi_centroids:
+          type: bool
+          description: "Include ROI centroid coordinates"
+        include_roi_acceptance:
+          type: bool
+          description: "Include ROI acceptance status"
+        mask_type:
+          type: str
+          description: "'image', 'pixel', or 'voxel'"
+
+    - name: CnmfeSegmentationInterface
+      module: neuroconv.datainterfaces
+      format: "CNMF-E output (.mat)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .mat file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - ImageSegmentation (PlaneSegmentation with ROI masks)
+        - Fluorescence (RoiResponseSeries)
+        - ImagingPlane
+        - Device
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: ExtractSegmentationInterface
+      module: neuroconv.datainterfaces
+      format: "EXTRACT output (.mat)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .mat file"
+        sampling_frequency:
+          type: float
+          description: "Sampling frequency"
+        output_struct_name:
+          type: str
+          description: "Name of the output struct in the .mat file"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - ImageSegmentation (PlaneSegmentation with ROI masks)
+        - Fluorescence (RoiResponseSeries)
+        - ImagingPlane
+        - Device
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: InscopixSegmentationInterface
+      module: neuroconv.datainterfaces
+      format: "Inscopix segmentation (.isxd)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .isxd Inscopix file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - ImageSegmentation (PlaneSegmentation with ROI masks)
+        - Fluorescence (RoiResponseSeries)
+        - ImagingPlane
+        - Device (Inscopix)
+        - Subject metadata
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: MinianSegmentationInterface
+      module: neuroconv.datainterfaces
+      format: "Minian output (.zarr)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to .zarr output folder"
+        sampling_frequency:
+          type: float
+          description: "Sampling frequency in Hz"
+          optional: true
+        timestamps_path:
+          type: FilePath
+          description: "Path to the timeStamps.csv file"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - ImageSegmentation (PlaneSegmentation with ROI masks)
+        - Fluorescence (RoiResponseSeries)
+        - ImagingPlane
+        - Device
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        include_background_segmentation:
+          type: bool
+          description: "Include background segmentation"
+        include_roi_centroids:
+          type: bool
+          description: "Include ROI centroid coordinates"
+        mask_type:
+          type: str
+          description: "'image', 'pixel', or 'voxel'"
+
+    - name: SimaSegmentationInterface
+      module: neuroconv.datainterfaces
+      format: "SIMA output (.sima)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .sima file"
+        sima_segmentation_label:
+          type: str
+          optional: true
+          default: "auto_ROIs"
+      creates:
+        - ImageSegmentation (PlaneSegmentation with ROI masks)
+        - Fluorescence (RoiResponseSeries)
+        - ImagingPlane
+        - Device
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+
+    - name: Suite2pSegmentationInterface
+      module: neuroconv.datainterfaces
+      format: "Suite2p output (.npy files in plane# folders)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to Suite2p folder containing 'plane#' sub-folders"
+        channel_name:
+          type: str
+          description: "The name of the channel to load"
+          optional: true
+        plane_name:
+          type: str
+          description: "The name of the plane to load (e.g. 'plane0')"
+          optional: true
+        plane_segmentation_name:
+          type: str
+          description: "The name of the plane segmentation to be added"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - ImageSegmentation (PlaneSegmentation with ROI masks)
+        - Fluorescence (RoiResponseSeries)
+        - ImagingPlane
+        - Device
+      conversion_options:
+        stub_test:
+          type: bool
+          description: "Only convert a small subset"
+        include_roi_centroids:
+          type: bool
+          description: "Include ROI centroid coordinates"
+        include_roi_acceptance:
+          type: bool
+          description: "Include iscell classification"
+        mask_type:
+          type: str
+          description: "'image', 'pixel', or 'voxel'"
+
+  fiber_photometry:
+    - name: TDTFiberPhotometryInterface
+      module: neuroconv.datainterfaces
+      format: "TDT fiber photometry (Tbk/Tdx/tev/tin/tsq)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "Path to the folder containing TDT data"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - FiberPhotometry (ndx-fiber-photometry)
+        - OpticFiber, ExcitationSource, Photodetector (ndx-ophys-devices)
+      conversion_options: {}
+
+behavior:
+  pose_estimation:
+    - name: DeepLabCutInterface
+      module: neuroconv.datainterfaces
+      format: "DeepLabCut output (.h5 or .csv)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the DLC output file (.h5 or .csv)"
+        config_file_path:
+          type: FilePath
+          description: "Path to .yml config file"
+          optional: true
+        subject_name:
+          type: str
+          optional: true
+          default: "ind1"
+        pose_estimation_metadata_key:
+          type: str
+          optional: true
+          default: "PoseEstimationDeepLabCut"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - PoseEstimation (ndx-pose, in behavior processing module)
+        - PoseEstimationSeries (per bodypart)
+        - Skeleton
+      conversion_options: {}
+
+    - name: SLEAPInterface
+      module: neuroconv.datainterfaces
+      format: "SLEAP output (.slp)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .slp file"
+        video_file_path:
+          type: FilePath
+          description: "Path of the video for extracting timestamps"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        frames_per_second:
+          type: float
+          description: "FPS of the video"
+          optional: true
+      creates:
+        - PoseEstimation (ndx-pose, in behavior processing module)
+        - PoseEstimationSeries (per bodypart)
+        - Skeleton
+      conversion_options: {}
+
+    - name: LightningPoseDataInterface
+      module: neuroconv.datainterfaces
+      format: "Lightning Pose predictions (.csv + .mp4)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to .csv file with predictions"
+        original_video_file_path:
+          type: FilePath
+          description: "Path to the original video file (.mp4)"
+        labeled_video_file_path:
+          type: FilePath
+          description: "Path to the labeled video file (.mp4)"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - PoseEstimation (ndx-pose, in behavior processing module)
+        - PoseEstimationSeries (per bodypart)
+      conversion_options: {}
+
+  tracking:
+    - name: FicTracDataInterface
+      module: neuroconv.datainterfaces
+      format: "FicTrac (.dat)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the FicTrac .dat file"
+        configuration_file_path:
+          type: FilePath
+          description: "Path to the FicTrac configuration file"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - Position (multiple SpatialSeries in behavior processing module)
+        - SpatialSeries for rotation, heading, speed, movement
+      conversion_options:
+        reference_frame:
+          type: str
+          description: "Reference frame for spatial series"
+
+    - name: NeuralynxNvtInterface
+      module: neuroconv.datainterfaces
+      format: "Neuralynx position tracking (.nvt)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the .nvt file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - Position (SpatialSeries in behavior processing module)
+        - CompassDirection (SpatialSeries for head angle)
+      conversion_options: {}
+
+  video:
+    - name: ExternalVideoInterface
+      module: neuroconv.datainterfaces
+      format: "Video files (.mp4/.avi/.wmv/.mov/.flv/.mkv) - external reference"
+      source_data:
+        file_paths:
+          type: "list[FilePath]"
+          description: "List of video file paths in sorted, consecutive order"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        video_name:
+          type: str
+          description: "Name of this video in the ImageSeries"
+          optional: true
+      creates:
+        - ImageSeries (with external_file reference)
+        - Device (camera)
+      conversion_options: {}
+      notes: "Videos stored as external references (file paths), not embedded in NWB"
+
+    - name: InternalVideoInterface
+      module: neuroconv.datainterfaces
+      format: "Video file (.mp4/.avi/.wmv/.mov/.flv/.mkv) - embedded"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the video file"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+        video_name:
+          type: str
+          description: "Name of this video in the ImageSeries"
+          optional: true
+      creates:
+        - ImageSeries (with data stored internally)
+        - Device (camera)
+      conversion_options: {}
+      notes: "Video data embedded directly in NWB file"
+
+    - name: MiniscopeBehaviorInterface
+      module: neuroconv.datainterfaces
+      format: "Miniscope behavior camera (.avi + metaData.json)"
+      source_data:
+        folder_path:
+          type: DirectoryPath
+          description: "The main Miniscope folder with BehavCam subfolders"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - ImageSeries (BehavCamImageSeries with external file)
+        - Device (Miniscope BehavCam, via ndx-miniscope)
+      conversion_options: {}
+
+  orientation:
+    - name: MiniscopeHeadOrientationInterface
+      module: neuroconv.datainterfaces
+      format: "Miniscope head orientation (headOrientation.csv from BNO055 IMU)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to headOrientation.csv with columns: Time Stamp (ms), qw, qx, qy, qz"
+        metadata_key:
+          type: str
+          optional: true
+          default: "TimeSeriesMiniscopeHeadOrientation"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - TimeSeries (quaternion data in behavior processing module)
+      conversion_options: {}
+
+  audio:
+    - name: AudioInterface
+      module: neuroconv.datainterfaces
+      format: "WAV audio (.wav)"
+      source_data:
+        file_paths:
+          type: "list[FilePath]"
+          description: "List of .wav file paths in sorted, consecutive order"
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - AcousticWaveformSeries (ndx-sound, in acquisition)
+      conversion_options: {}
+
+  operant:
+    - name: MedPCInterface
+      module: neuroconv.datainterfaces
+      format: "MedPC output (.txt)"
+      source_data:
+        file_path:
+          type: FilePath
+          description: "Path to the MedPC file"
+        session_conditions:
+          type: dict
+          description: "Conditions defining the session (e.g. {'Start Date': '11/09/18'})"
+        start_variable:
+          type: str
+          description: "Name of the variable that starts the session"
+        metadata_medpc_name_to_info_dict:
+          type: dict
+          description: "Mapping of MedPC variable names to info dicts with 'name' and 'is_array'"
+        aligned_timestamp_names:
+          type: "list[str]"
+          description: "Variables with externally aligned timestamps"
+          optional: true
+        verbose:
+          type: bool
+          optional: true
+          default: false
+      creates:
+        - Events (ndx-events, in acquisition)
+        - BehavioralEpochs (IntervalSeries)
+      conversion_options: {}
+
+icephys:
+  - name: AbfInterface
+    module: neuroconv.datainterfaces
+    format: "Axon Binary Format for intracellular electrophysiology (.abf)"
+    source_data:
+      file_paths:
+        type: "list[FilePath]"
+        description: "Array of paths to ABF files"
+      icephys_metadata:
+        type: dict
+        description: "Metadata for this experiment"
+        optional: true
+      icephys_metadata_file_path:
+        type: FilePath
+        description: "Path to JSON file containing metadata"
+        optional: true
+    creates:
+      - IntracellularRecordingsTable
+      - CurrentClampStimulusSeries / VoltageClampStimulusSeries
+      - CurrentClampSeries / VoltageClampSeries
+      - Device (Axon Instruments)
+      - IntracellularElectrode
+    conversion_options: {}
+
+text:
+  - name: CsvTimeIntervalsInterface
+    module: neuroconv.datainterfaces
+    format: "CSV file (.csv)"
+    source_data:
+      file_path:
+        type: FilePath
+        description: "Path to the CSV file"
+      read_kwargs:
+        type: dict
+        description: "Additional kwargs passed to pandas.read_csv()"
+        optional: true
+      verbose:
+        type: bool
+        optional: true
+        default: false
+    creates:
+      - TimeIntervals (trials table or custom intervals)
+    conversion_options:
+      tag:
+        type: str
+        description: "Tag for the time intervals table (e.g. 'trials')"
+
+  - name: ExcelTimeIntervalsInterface
+    module: neuroconv.datainterfaces
+    format: "Excel file (.xlsx/.xls/.xlsm)"
+    source_data:
+      file_path:
+        type: FilePath
+        description: "Path to the Excel file"
+      read_kwargs:
+        type: dict
+        description: "Additional kwargs passed to pandas.read_excel()"
+        optional: true
+      verbose:
+        type: bool
+        optional: true
+        default: false
+    creates:
+      - TimeIntervals (trials table or custom intervals)
+    conversion_options:
+      tag:
+        type: str
+        description: "Tag for the time intervals table (e.g. 'trials')"
+
+image:
+  - name: ImageInterface
+    module: neuroconv.datainterfaces
+    format: "Image files (.png/.jpg/.jpeg/.tiff/.tif/.webp)"
+    source_data:
+      file_paths:
+        type: "list[str | Path]"
+        description: "List of paths to image files"
+        optional: true
+      folder_path:
+        type: "str | Path"
+        description: "Path to folder containing images"
+        optional: true
+      images_location:
+        type: str
+        description: "'acquisition' or 'stimulus'"
+        optional: true
+        default: "acquisition"
+      metadata_key:
+        type: str
+        optional: true
+        default: "Images"
+      verbose:
+        type: bool
+        optional: true
+        default: true
+    creates:
+      - Images container (GrayscaleImage, RGBImage, or RGBAImage)
+    conversion_options: {}
+    notes: "Either file_paths or folder_path must be provided, not both"
diff --git a/src/pyflask/ai/skill/knowledge/nwb-best-practices.md b/src/pyflask/ai/skill/knowledge/nwb-best-practices.md
new file mode 100644
index 000000000..a50843386
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/nwb-best-practices.md
@@ -0,0 +1,108 @@
+# NWB Best Practices
+
+Distilled from the [official NWB Inspector best practices](https://github.com/NeurodataWithoutBorders/nwbinspector/tree/dev/docs/best_practices).
+These are conventions and common-mistake guards that the NWB Inspector checks for.
+The conversion agent should follow these when generating code.
+
+## General
+
+- **CamelCase for neurodata_type names** (e.g., `ElectricalSeries`, `SpatialSeries`).
+- **snake_case for object names** (groups, datasets, attributes). No spaces — use underscores.
+- **No slashes or colons in names** — these are path separators in HDF5.
+- **No empty strings** — every `description`, `unit`, and text field must have meaningful content. Empty strings and placeholder text like "no description" will be flagged.
+- **Avoid metadata duplication** — don't store the same metadata in multiple places. For example, don't add `unit` or `gain` columns to the electrodes table when those belong on `ElectricalSeries`.
+
+## NWBFile Metadata
+
+- **File extension**: always `.nwb`.
+- **`identifier`**: must be globally unique. Use `str(uuid.uuid4())`.
+- **`session_start_time`**: must include timezone info. All other timestamps are relative to this.
+- **`timestamps_reference_time`**: defaults to `session_start_time`. Only set explicitly if different.
+- **`session_id`**: should be unique across sessions in a dataset. Use a descriptive string, not just a number.
+- **`session_description`**: required. Describe what happened in this session.
+- **`experiment_description`**: describe the scientific goal. Can use the paper abstract.
+- **`experimenter`**: list of strings in "Last, First" format.
+- **`institution`**: name of the institution.
+- **`keywords`**: list of relevant keywords for discoverability.
+- **`related_publications`**: use DOI format: `"doi:10.xxxx/xxxxx"`.
+- **Acquisition vs. processing**: raw data goes in `nwbfile.acquisition`. Processed/derived data goes in `nwbfile.processing["module_name"]`.
+- **Processing module names**: use standard names: `"ecephys"`, `"ophys"`, `"behavior"`, `"misc"`. Custom names are allowed but standard names enable tool interoperability.
+
+## Subject
+
+- **Subject must exist**: every NWB file should have a `Subject` object.
+- **`subject_id`**: required for DANDI. Unique identifier for the animal.
+- **`sex`**: one of `"M"`, `"F"`, `"U"` (unknown), `"O"` (other). Single uppercase letter.
+- **`species`**: Latin binomial (e.g., `"Mus musculus"`) or NCBI taxonomy URI (e.g., `"http://purl.obolibrary.org/obo/NCBITaxon_10090"`). Never use common names like "mouse".
+- **`strain`**: the specific strain (e.g., `"C57BL/6J"`). Separate from species.
+- **`age`**: ISO 8601 duration format: `"P90D"` (90 days), `"P12W"` (12 weeks), `"P3M"` (3 months). A reference age can be expressed as a range: `"P90D/P120D"`.
+- **`date_of_birth`**: preferred over `age` when available (datetime with timezone).
+- **`weight`**: format as `"numeric unit"`, e.g., `"0.025 kg"` or `"25 g"`.
+
+## Time Series
+
+- **Time-first data orientation**: the first dimension of `data` must be time. If your array is `(channels, timepoints)`, transpose it to `(timepoints, channels)`.
+- **SI units**: `unit` should be SI where possible (meters, seconds, volts, amperes). Use `conversion` parameter instead of transforming data.
+- **Timestamps must be in seconds**: all timestamps are in seconds relative to `session_start_time`.
+- **Timestamps must be ascending**: timestamps array must be sorted in ascending order.
+- **No NaN in timestamps**: timestamps must never contain NaN values.
+- **Use `rate` + `starting_time` for regular sampling**: if data has a constant sampling rate, set `rate` (Hz) and `starting_time` (seconds) instead of providing a `timestamps` array. This saves space and is more precise.
+- **Avoid negative timestamps**: all timestamps should be >= 0. Negative timestamps imply data before `session_start_time`, which is usually an error.
+- **Use chunking and compression**: for large datasets, use `H5DataIO` with `compression="gzip"` and appropriate chunk sizes.
+- **`resolution`**: set to `-1.0` if unknown. Otherwise, provide the smallest meaningful difference between data values.
+- **Rate must be positive and nonzero**: if using `rate`, it must be > 0.
+- **Use appropriate TimeSeries subtypes**: don't use bare `TimeSeries` when a more specific type exists (e.g., `ElectricalSeries` for ephys, `SpatialSeries` for position).
+- **Breaks in continuity**: if there are gaps in recording, either use separate `TimeSeries` objects or provide explicit `timestamps` (not `rate`) to capture the gaps.
+
+## Tables (DynamicTable)
+
+- **No JSON strings in columns**: if a column value is structured data, use a proper column type (VectorData, DynamicTableRegion, etc.), not a JSON-encoded string.
+- **No empty tables**: don't create DynamicTable objects with zero rows.
+- **Boolean columns**: name boolean columns with `is_` prefix (e.g., `is_correct`, `is_rewarded`).
+- **Timing columns**: name columns containing times with `_time` suffix (e.g., `start_time`, `stop_time`). Use `_times` for ragged arrays of times.
+- **Unique IDs**: the `id` column of any DynamicTable should contain unique values. Don't override with non-unique values — use a custom column instead.
+- **Avoid single-row tables**: if a table has only one row, consider if there's a more appropriate container.
+
+## Extracellular Electrophysiology (ecephys)
+
+- **Electrode `location` is required**: fill with your best estimate of the brain region. Use `"unknown"` if truly unknown.
+- **Use Allen Brain Atlas ontology**: for mice, use Allen Brain Atlas terms (full name or abbreviation). Don't invent terms.
+- **Anatomical coordinates (`x`, `y`, `z`)**: for precise brain coordinates. For mice, use Allen Institute Common Coordinate Framework v3 (+x = posterior, +y = inferior, +z = right).
+- **Relative coordinates (`rel_x`, `rel_y`, `rel_z`)**: for electrode position on the probe. Used by spike sorters to determine proximity.
+- **Don't duplicate metadata in electrodes table**: don't add `unit`, `gain`, `offset` columns — those belong on `ElectricalSeries` (`channel_conversion`, `offset`).
+- **Spike times must be ascending**: within each unit, spike times must be in ascending order.
+- **Spike times must be positive**: all spike times >= 0. Negative times suggest trial-alignment that should be corrected to session-alignment.
+- **Use `obs_intervals`**: if the recording has gaps where a unit was not observable, set `obs_intervals` on the units table. No spikes should exist outside observed intervals.
+
+## Optical Physiology (ophys)
+
+- **`image_mask` shape consistency**: the `image_mask` column of `PlaneSegmentation` must have the same shape as `reference_images`.
+- **ImagingPlane required fields**: always set `excitation_lambda`, `indicator`, and `location` on `ImagingPlane`.
+- **TwoPhotonSeries rate**: must be nonzero. Get from Suite2p `ops["fs"]` or calculate from timestamps.
+- **Store raw imaging data internally**: use chunking + lossless compression (not external file mode).
+
+## Behavior
+
+- **SpatialSeries dimensionality**: must have 1 (x), 2 (x,y), or 3 (x,y,z) columns. Not more.
+- **SpatialSeries is only for position**: velocity, acceleration, and other derived signals should use `TimeSeries` or `BehavioralTimeSeries`, not `SpatialSeries`.
+- **CompassDirection units**: must be `"degrees"` or `"radians"`.
+- **CompassDirection data range**: degrees must be in [-360, 360]; radians in [-2pi, 2pi].
+
+## Image Series
+
+- **External mode for animal videos**: behavioral videos (webcam, etc.) should use `external_file` to reference the video file alongside the NWB file. This allows video-optimized lossy codecs.
+- **Internal storage for neural imaging**: TwoPhotonSeries and similar neural data should be stored inside the NWB file with lossless compression.
+- **Relative paths for external files**: `external_file` paths should be relative to the NWB file location.
+- **`starting_frame`**: only set when using `external_file`. Not applicable for internally stored data.
+
+## Optogenetics
+
+- **Every `OptogeneticStimulusSite` must have an `OptogeneticSeries`**: don't create stimulus sites without corresponding stimulus data.
+
+## Extensions
+
+- **Use sparingly**: prefer core NWB types and DynamicTable columns before creating extensions.
+- **Check for existing extensions** in the NDX Catalog before creating new ones.
+- **Use `ndx-template`** to scaffold new extensions.
+- **Cache the spec**: always write the extension specification into the NWB file (`cache_spec=True`).
+- **Flag for human expert**: the conversion skill should flag when an extension might be needed rather than creating one automatically.
diff --git a/src/pyflask/ai/skill/knowledge/pynwb-advanced-io.md b/src/pyflask/ai/skill/knowledge/pynwb-advanced-io.md
new file mode 100644
index 000000000..286fb515a
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/pynwb-advanced-io.md
@@ -0,0 +1,98 @@
+# Advanced I/O — PyNWB Patterns
+
+Patterns for efficient storage of large datasets.
+
+## H5DataIO — Compression and Chunking
+
+```python
+from hdmf.backends.hdf5.h5_utils import H5DataIO
+
+# Basic gzip compression (good default)
+compressed = H5DataIO(data=large_array, compression="gzip")
+
+# Higher compression level (1-9, default 4)
+compressed = H5DataIO(data=large_array, compression="gzip", compression_opts=9)
+
+# LZF — faster compression/decompression, lower ratio
+compressed = H5DataIO(data=large_array, compression="lzf")
+
+# Custom chunk shape (important for access patterns)
+compressed = H5DataIO(
+    data=large_array,                    # shape: (n_frames, height, width)
+    compression="gzip",
+    chunks=(1, height, width),           # one frame per chunk for frame-by-frame access
+)
+
+# For time series data — chunk along time axis
+compressed = H5DataIO(
+    data=traces,                         # shape: (n_timepoints, n_channels)
+    compression="gzip",
+    chunks=(10000, n_channels),          # 10k timepoints per chunk
+)
+```
+
+## DataChunkIterator — Datasets Too Large for Memory
+
+When data doesn't fit in RAM, use `DataChunkIterator` to stream data during write:
+
+```python
+from hdmf.data_utils import DataChunkIterator
+
+def data_generator():
+    """Yield one chunk at a time from files on disk."""
+    for file_path in sorted(data_files):
+        chunk = np.load(file_path)       # load one chunk at a time
+        yield chunk
+
+data_iterator = DataChunkIterator(
+    data=data_generator(),
+    maxshape=(None, n_channels),         # None = unlimited along first dim
+    dtype=np.float32,
+)
+
+ts = TimeSeries(
+    name="large_recording",
+    data=H5DataIO(data_iterator, compression="gzip"),
+    rate=30000.0,
+    unit="volts",
+)
+nwbfile.add_acquisition(ts)
+```
+
+## GenericDataChunkIterator — From Existing Arrays
+
+For arrays that are already memory-mapped (e.g., from HDF5 or memmap):
+
+```python
+from hdmf.data_utils import GenericDataChunkIterator
+
+class MyIterator(GenericDataChunkIterator):
+    def _get_data(self, selection):
+        return my_memmap[selection]
+
+    def _get_maxshape(self):
+        return my_memmap.shape
+
+    def _get_dtype(self):
+        return my_memmap.dtype
+
+iterator = MyIterator(buffer_gb=1.0)     # process 1 GB at a time
+```
+
+## When to Use Each Approach
+
+| Data Size | Approach |
+|-----------|----------|
+| < 1 GB | `H5DataIO(data=array, compression="gzip")` |
+| 1-10 GB | `H5DataIO` with explicit `chunks` tuned for access pattern |
+| > 10 GB | `DataChunkIterator` or `GenericDataChunkIterator` to stream |
+| Memory-mapped source | `GenericDataChunkIterator` subclass |
+
+## Notes
+
+- Always use compression for large datasets. `gzip` is the safest default (universally
+  supported). `lzf` is faster but HDF5-specific.
+- Chunk shape should match the most common access pattern: if you read frames one at a
+  time, chunk by frame; if you read channels, chunk by channel.
+- `maxshape=(None, ...)` allows the dataset to be extended along the first dimension.
+- The `buffer_gb` parameter on `GenericDataChunkIterator` controls memory usage.
diff --git a/src/pyflask/ai/skill/knowledge/pynwb-behavior.md b/src/pyflask/ai/skill/knowledge/pynwb-behavior.md
new file mode 100644
index 000000000..24e7b47e0
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/pynwb-behavior.md
@@ -0,0 +1,137 @@
+# Behavior Containers — PyNWB Patterns
+
+All behavior container types and when to use each.
+
+## Container Selection Guide
+
+| Data Type | Container | Child Type | Example |
+|-----------|-----------|-----------|---------|
+| Spatial position (x, y, z) | `Position` | `SpatialSeries` | Running on linear track |
+| Continuous signals | `BehavioralTimeSeries` | `TimeSeries` | Running speed, lick rate |
+| Irregular events | `BehavioralEvents` | `TimeSeries` | Lever presses at variable times |
+| Pupil diameter | `PupilTracking` | `TimeSeries` | Eye tracking pupil size |
+| Gaze position | `EyeTracking` | `SpatialSeries` | Eye tracking x,y position |
+| Head direction | `CompassDirection` | `SpatialSeries` | Angular heading |
+
+All containers go in `processing["behavior"]`.
+
+## Position
+
+```python
+from pynwb.behavior import Position, SpatialSeries
+from neuroconv.tools.nwb_helpers import get_module
+
+position = Position()
+position.create_spatial_series(
+    name="animal_position",
+    data=pos_xy,                # shape: (n_timepoints, 2)
+    timestamps=timestamps,
+    unit="meters",
+    reference_frame="Top-left corner of arena",
+    conversion=0.01,            # if data is in cm
+)
+
+behavior = get_module(nwbfile, "behavior", "Processed behavioral data")
+behavior.add(position)
+```
+
+## BehavioralTimeSeries
+
+For **continuous** behavioral signals sampled at regular intervals:
+
+```python
+from pynwb.behavior import BehavioralTimeSeries
+from pynwb import TimeSeries
+
+bts = BehavioralTimeSeries()
+bts.create_timeseries(
+    name="running_speed",
+    data=speed,
+    rate=30.0,
+    unit="m/s",
+    description="Treadmill running speed",
+)
+bts.create_timeseries(
+    name="lick_rate",
+    data=lick_rate,
+    rate=30.0,
+    unit="licks/s",
+    description="Lick rate smoothed over 100ms",
+)
+behavior.add(bts)
+```
+
+## BehavioralEvents
+
+For **irregularly timed** behavioral events:
+
+```python
+from pynwb.behavior import BehavioralEvents
+
+be = BehavioralEvents()
+be.create_timeseries(
+    name="lever_presses",
+    data=np.ones(n_presses),      # amplitude/value at each event
+    timestamps=press_times,        # irregular timestamps
+    unit="n.a.",
+    description="Times of lever press events",
+)
+behavior.add(be)
+```
+
+## PupilTracking
+
+```python
+from pynwb.behavior import PupilTracking
+
+pt = PupilTracking()
+pt.create_timeseries(
+    name="pupil_diameter",
+    data=pupil_diam,
+    rate=60.0,
+    unit="meters",
+    conversion=1e-3,              # if data is in mm
+    description="Pupil diameter from DeepLabCut",
+)
+behavior.add(pt)
+```
+
+## EyeTracking
+
+```python
+from pynwb.behavior import EyeTracking, SpatialSeries
+
+et = EyeTracking()
+et.create_spatial_series(
+    name="gaze_position",
+    data=gaze_xy,                 # shape: (n_timepoints, 2)
+    rate=60.0,
+    unit="meters",
+    reference_frame="Screen center",
+    description="Gaze position from eye tracker",
+)
+behavior.add(et)
+```
+
+## CompassDirection
+
+```python
+from pynwb.behavior import CompassDirection, SpatialSeries
+
+cd = CompassDirection()
+cd.create_spatial_series(
+    name="head_direction",
+    data=heading_angles,          # shape: (n_timepoints,)
+    rate=30.0,
+    unit="radians",              # must be "radians" or "degrees"
+    reference_frame="0=East, pi/2=North",
+)
+behavior.add(cd)
+```
+
+## Notes
+
+- `SpatialSeries` is only for position data (1-3 columns). For velocity, acceleration,
+  or other derived signals, use `TimeSeries` inside `BehavioralTimeSeries`.
+- `CompassDirection` data must be in `[-2pi, 2pi]` (radians) or `[-360, 360]` (degrees).
+- Prefer `rate` + `starting_time` over `timestamps` for regularly sampled data.
diff --git a/src/pyflask/ai/skill/knowledge/pynwb-icephys.md b/src/pyflask/ai/skill/knowledge/pynwb-icephys.md
new file mode 100644
index 000000000..e4dec7042
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/pynwb-icephys.md
@@ -0,0 +1,125 @@
+# Intracellular Electrophysiology (icephys) — PyNWB Patterns
+
+Construction patterns for patch clamp / intracellular recording data.
+
+## Device + Electrode
+
+```python
+from pynwb.icephys import IntracellularElectrode
+
+device = nwbfile.create_device(
+    name="Amplifier",
+    description="MultiClamp 700B",
+    manufacturer="Molecular Devices",
+)
+
+electrode = nwbfile.create_icephys_electrode(
+    name="electrode_0",
+    description="Patch clamp electrode",
+    device=device,
+)
+```
+
+## Recording Series Types
+
+**CurrentClampSeries** — response recorded during current injection:
+```python
+from pynwb.icephys import CurrentClampSeries
+
+cc_response = CurrentClampSeries(
+    name="current_clamp_response",
+    data=voltage_trace,          # recorded voltage (numpy array)
+    electrode=electrode,
+    rate=20000.0,                # sampling rate in Hz
+    unit="volts",
+    gain=1.0,
+    stimulus_description="step_protocol",
+    sweep_number=np.uint32(0),   # optional, for grouping sweeps
+)
+nwbfile.add_acquisition(cc_response)
+```
+
+**CurrentClampStimulusSeries** — the injected current waveform:
+```python
+from pynwb.icephys import CurrentClampStimulusSeries
+
+cc_stimulus = CurrentClampStimulusSeries(
+    name="current_clamp_stimulus",
+    data=current_waveform,       # injected current (numpy array)
+    electrode=electrode,
+    rate=20000.0,
+    unit="amperes",
+    gain=1.0,
+    sweep_number=np.uint32(0),
+)
+nwbfile.add_stimulus(cc_stimulus)
+```
+
+**VoltageClampSeries** — response recorded during voltage clamp:
+```python
+from pynwb.icephys import VoltageClampSeries
+
+vc_response = VoltageClampSeries(
+    name="voltage_clamp_response",
+    data=current_trace,          # recorded current
+    electrode=electrode,
+    rate=20000.0,
+    unit="amperes",
+    gain=1.0,
+    stimulus_description="voltage_step",
+)
+nwbfile.add_acquisition(vc_response)
+```
+
+**VoltageClampStimulusSeries** — the command voltage:
+```python
+from pynwb.icephys import VoltageClampStimulusSeries
+
+vc_stimulus = VoltageClampStimulusSeries(
+    name="voltage_clamp_stimulus",
+    data=voltage_command,
+    electrode=electrode,
+    rate=20000.0,
+    unit="volts",
+    gain=1.0,
+)
+nwbfile.add_stimulus(vc_stimulus)
+```
+
+**IZeroClampSeries** — recording with no current injection (I=0 mode):
+```python
+from pynwb.icephys import IZeroClampSeries
+
+izero = IZeroClampSeries(
+    name="izero_response",
+    data=voltage_trace,
+    electrode=electrode,
+    rate=20000.0,
+    unit="volts",
+    stimulus_description="I=0",
+)
+nwbfile.add_acquisition(izero)
+```
+
+## Notes
+
+- **Sweep tables are deprecated.** Use `sweep_number` on individual series to group
+  stimulus/response pairs from the same sweep, but do not use IntracellularRecordingsTable
+  or the higher-level sweep table hierarchy.
+- Each electrode represents a physical pipette. Multiple sweeps use the same electrode.
+- Stimulus and response series should be paired: for each stimulus series, there should
+  be a corresponding acquisition series recorded from the same electrode.
+- `gain` is the amplifier gain (float). Set to `1.0` if gain is already applied to data.
+
+## Metadata YAML Template
+
+```yaml
+Icephys:
+  Device:
+    - name: Amplifier
+      description: MultiClamp 700B patch clamp amplifier
+      manufacturer: Molecular Devices
+  IntracellularElectrode:
+    - name: electrode_0
+      description: Borosilicate glass pipette, 3-5 MOhm
+```
diff --git a/src/pyflask/ai/skill/knowledge/pynwb-images.md b/src/pyflask/ai/skill/knowledge/pynwb-images.md
new file mode 100644
index 000000000..13039b396
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/pynwb-images.md
@@ -0,0 +1,59 @@
+# Images — PyNWB Patterns
+
+Patterns for static images and video references in NWB files.
+
+## Static Images
+
+```python
+from pynwb.image import GrayscaleImage, RGBImage, RGBAImage, Images
+
+# Single grayscale image (e.g., mean projection)
+mean_img = GrayscaleImage(
+    name="mean_projection",
+    data=mean_array,             # shape: (height, width), dtype float or uint
+    description="Mean fluorescence projection",
+)
+
+# RGB image (e.g., histology)
+histology = RGBImage(
+    name="histology",
+    data=rgb_array,              # shape: (height, width, 3)
+    description="Post-hoc histology image",
+)
+
+# Group related images
+images = Images(
+    name="reference_images",
+    images=[mean_img, histology],
+    description="Reference images for this session",
+)
+nwbfile.add_acquisition(images)
+```
+
+## ImageSeries — External Video Files
+
+For behavioral videos, use `external_file` to reference videos alongside the NWB file.
+This avoids re-encoding video data and preserves the original codec.
+
+```python
+from pynwb.image import ImageSeries
+
+video = ImageSeries(
+    name="behavior_video",
+    external_file=["./videos/session01_cam1.avi"],  # relative path
+    format="external",
+    rate=30.0,
+    starting_frame=[0],
+    description="Side-view behavioral camera",
+    unit="n.a.",
+)
+nwbfile.add_acquisition(video)
+```
+
+## Notes
+
+- Use **relative paths** for `external_file` so the NWB file remains portable.
+- `starting_frame` is a list with one entry per file in `external_file`.
+- For neural imaging data (two-photon, miniscope), store data **inside** the NWB file
+  using `TwoPhotonSeries`/`OnePhotonSeries`, not as external files.
+- `GrayscaleImage` expects 2D arrays; `RGBImage` expects 3D with last dim = 3.
diff --git a/src/pyflask/ai/skill/knowledge/pynwb-ophys-advanced.md b/src/pyflask/ai/skill/knowledge/pynwb-ophys-advanced.md
new file mode 100644
index 000000000..ec381c518
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/pynwb-ophys-advanced.md
@@ -0,0 +1,171 @@
+# Optical Physiology (ophys) — Advanced PyNWB Patterns
+
+Construction patterns beyond the basics in `nwb-best-practices.md`.
+
+## ImagingPlane + OpticalChannel
+
+```python
+from pynwb.ophys import OpticalChannel
+
+device = nwbfile.create_device(
+    name="Microscope",
+    description="Two-photon resonant scanning microscope",
+    manufacturer="Bruker",
+)
+
+optical_channel = OpticalChannel(
+    name="green",
+    description="GCaMP emission channel",
+    emission_lambda=520.0,
+)
+
+imaging_plane = nwbfile.create_imaging_plane(
+    name="ImagingPlane",
+    optical_channel=optical_channel,
+    imaging_rate=30.0,
+    description="Imaging plane in CA1",
+    device=device,
+    excitation_lambda=920.0,
+    indicator="GCaMP6f",
+    location="CA1",
+    grid_spacing=[0.001, 0.001],       # meters per pixel (1 um/px)
+    grid_spacing_unit="meters",
+)
+```
+
+## TwoPhotonSeries vs OnePhotonSeries
+
+```python
+from pynwb.ophys import TwoPhotonSeries, OnePhotonSeries
+
+# Two-photon (ScanImage, Scanbox, Bruker)
+two_photon = TwoPhotonSeries(
+    name="TwoPhotonSeries",
+    data=image_data,             # shape: (n_frames, height, width)
+    imaging_plane=imaging_plane,
+    rate=30.0,
+    unit="n.a.",
+)
+nwbfile.add_acquisition(two_photon)
+
+# One-photon / widefield (Miniscope, Inscopix, widefield)
+one_photon = OnePhotonSeries(
+    name="OnePhotonSeries",
+    data=image_data,
+    imaging_plane=imaging_plane,
+    rate=30.0,
+    unit="n.a.",
+)
+nwbfile.add_acquisition(one_photon)
+```
+
+## PlaneSegmentation — ROI Masks
+
+Three mask formats are supported. Use the one that matches your segmentation output:
+
+**pixel_mask** — sparse format, best for small ROIs in large FOV:
+```python
+from pynwb.ophys import PlaneSegmentation, ImageSegmentation
+from neuroconv.tools.nwb_helpers import get_module
+
+img_seg = ImageSegmentation()
+ophys_module = get_module(nwbfile, "ophys", "Optical physiology data")
+ophys_module.add(img_seg)
+
+plane_seg = img_seg.create_plane_segmentation(
+    name="PlaneSegmentation",
+    description="ROIs from Suite2p",
+    imaging_plane=imaging_plane,
+)
+
+# Each ROI: list of (x, y, weight) tuples
+for roi_mask in roi_masks:
+    plane_seg.add_roi(pixel_mask=roi_mask)
+    # roi_mask = [(x1, y1, w1), (x2, y2, w2), ...]
+```
+
+**image_mask** — dense format, one full-FOV mask per ROI:
+```python
+plane_seg = img_seg.create_plane_segmentation(
+    name="PlaneSegmentation",
+    description="ROIs from CaImAn",
+    imaging_plane=imaging_plane,
+)
+
+for mask_2d in image_masks:
+    plane_seg.add_roi(image_mask=mask_2d)
+    # mask_2d shape: (height, width), same as imaging plane
+```
+
+## RoiResponseSeries — Fluorescence Traces
+
+```python
+from pynwb.ophys import RoiResponseSeries, DfOverF, Fluorescence
+
+# Create a region referencing all (or some) ROIs
+roi_table_region = plane_seg.create_roi_table_region(
+    region=list(range(n_rois)),
+    description="All ROIs",
+)
+
+# Raw fluorescence
+fluorescence = Fluorescence()
+ophys_module.add(fluorescence)
+fluorescence.create_roi_response_series(
+    name="RoiResponseSeries",
+    data=F,                      # shape: (n_frames, n_rois)
+    rois=roi_table_region,
+    rate=30.0,
+    unit="n.a.",
+)
+
+# dF/F
+dff = DfOverF()
+ophys_module.add(dff)
+dff.create_roi_response_series(
+    name="DfOverF",
+    data=dff_data,               # shape: (n_frames, n_rois)
+    rois=roi_table_region,
+    rate=30.0,
+    unit="n.a.",
+)
+```
+
+## MotionCorrection
+
+```python
+from pynwb.ophys import MotionCorrection, CorrectedImageStack
+
+corrected = CorrectedImageStack(
+    corrected=corrected_two_photon,    # TwoPhotonSeries (corrected data)
+    original=two_photon,                # TwoPhotonSeries (original data)
+    xy_translation=TimeSeries(
+        name="xy_translation",
+        data=shifts,                    # shape: (n_frames, 2) — x,y shifts
+        rate=30.0,
+        unit="pixels",
+    ),
+)
+
+motion_correction = MotionCorrection(corrected_image_stacks=[corrected])
+ophys_module.add(motion_correction)
+```
+
+## Multi-Plane Imaging
+
+For multi-plane imaging, create separate ImagingPlane, TwoPhotonSeries, and
+PlaneSegmentation for each plane:
+
+```python
+for plane_idx in range(n_planes):
+    ip = nwbfile.create_imaging_plane(
+        name=f"ImagingPlane{plane_idx}",
+        optical_channel=optical_channel,
+        imaging_rate=volume_rate,
+        device=device,
+        excitation_lambda=920.0,
+        indicator="GCaMP6f",
+        location=f"CA1_plane{plane_idx}",
+    )
+    # Create TwoPhotonSeries and PlaneSegmentation per plane...
+```
diff --git a/src/pyflask/ai/skill/knowledge/pynwb-optogenetics.md b/src/pyflask/ai/skill/knowledge/pynwb-optogenetics.md
new file mode 100644
index 000000000..3f03623dc
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/pynwb-optogenetics.md
@@ -0,0 +1,73 @@
+# Optogenetics — PyNWB Patterns
+
+Construction patterns for optogenetic stimulation data.
+
+## Device + Stimulus Site
+
+```python
+device = nwbfile.create_device(
+    name="Laser",
+    description="473nm DPSS laser for ChR2 activation",
+    manufacturer="Cobolt",
+)
+
+ogen_site = nwbfile.create_ogen_site(
+    name="ogen_site",
+    device=device,
+    description="Fiber optic cannula targeting left mPFC",
+    excitation_lambda=473.0,  # nm
+    location="mPFC",          # brain region
+)
+```
+
+## Optogenetic Series
+
+```python
+from pynwb.ogen import OptogeneticSeries
+
+ogen_series = OptogeneticSeries(
+    name="optogenetic_stimulus",
+    data=laser_waveform,      # power in watts (numpy array, shape: n_timepoints)
+    site=ogen_site,
+    rate=10000.0,             # sampling rate of the stimulus waveform
+    unit="watts",
+    description="5ms pulses at 20Hz, 10mW",
+)
+nwbfile.add_stimulus(ogen_series)
+```
+
+For **event-based** stimulation (on/off times rather than continuous waveform):
+```python
+ogen_series = OptogeneticSeries(
+    name="optogenetic_stimulus",
+    data=pulse_amplitudes,     # power at each pulse
+    timestamps=pulse_times,    # time of each pulse in seconds
+    site=ogen_site,
+    unit="watts",
+)
+nwbfile.add_stimulus(ogen_series)
+```
+
+## Notes
+
+- Every `OptogeneticStimulusSite` must have at least one `OptogeneticSeries`.
+  Don't create sites without corresponding stimulus data.
+- `excitation_lambda` is the wavelength in nm (e.g., 473 for ChR2, 590 for NpHR,
+  635 for Chrimson).
+- `location` should use standard brain region names (Allen Brain Atlas for mice).
+- Store the stimulus waveform, not just on/off times, when available.
+
+## Metadata YAML Template
+
+```yaml
+Ogen:
+  Device:
+    - name: Laser
+      description: 473nm DPSS laser
+      manufacturer: Cobolt
+  OptogeneticStimulusSite:
+    - name: ogen_site
+      description: Fiber optic cannula, 200um core, 0.39 NA
+      excitation_lambda: 473.0
+      location: mPFC
+```
diff --git a/src/pyflask/ai/skill/knowledge/repo-structure.md b/src/pyflask/ai/skill/knowledge/repo-structure.md
new file mode 100644
index 000000000..c2fe737fe
--- /dev/null
+++ b/src/pyflask/ai/skill/knowledge/repo-structure.md
@@ -0,0 +1,1436 @@
+# Canonical CatalystNeuro NWB Conversion Repo Structure
+
+This document is a practical reference for generating a new `<lab>-lab-to-nwb` conversion repository following the CatalystNeuro pattern established by the [cookiecutter-my-lab-to-nwb-template](https://github.com/catalystneuro/cookiecutter-my-lab-to-nwb-template). All code examples are drawn from real production repos (cai-lab-to-nwb, giocomo-lab-to-nwb).
+
+---
+
+## 1. Directory Structure
+
+A conversion repo has this exact layout:
+
+```
+<lab>-lab-to-nwb/
+├── .github/
+│   └── workflows/
+│       ├── auto-publish.yml      # PyPI publish on GitHub release
+│       └── test-install.yml      # Monthly CI: install + import test
+├── .gitignore
+├── .pre-commit-config.yaml       # black, ruff, codespell, trailing whitespace
+├── LICENSE                       # BSD-3
+├── README.md
+├── make_env.yml                  # Conda environment definition
+├── pyproject.toml                # Build config, deps, tooling
+└── src/
+    └── <lab>_lab_to_nwb/         # Python package (underscored slug)
+        ├── __init__.py           # Empty or minimal
+        ├── <conversion_a>/       # One directory per conversion/experiment
+        │   ├── __init__.py       # Exports the NWBConverter and custom interfaces
+        │   ├── <conversion_a>_nwbconverter.py
+        │   ├── <conversion_a>_convert_session.py
+        │   ├── <conversion_a>_convert_all_sessions.py
+        │   ├── <conversion_a>_metadata.yaml
+        │   ├── <custom_interface_1>.py
+        │   ├── <custom_interface_2>.py
+        │   ├── interfaces/       # Optional: subdirectory if many interfaces
+        │   │   ├── __init__.py
+        │   │   ├── <interface_1>.py
+        │   │   └── <interface_2>.py
+        │   ├── utils/            # Optional: helper scripts
+        │   └── conversion_notes.md  # Free-form notes about the conversion
+        └── <conversion_b>/       # Additional conversions for the same lab
+            └── ...
+```
+
+### Naming conventions
+
+| Concept | Convention | Example |
+|---------|-----------|---------|
+| Repo name | `<lab>-lab-to-nwb` | `cai-lab-to-nwb` |
+| Package slug | `<lab>_lab_to_nwb` (underscored) | `cai_lab_to_nwb` |
+| Conversion directory | `<first_author><year>` or descriptive name | `zaki_2024`, `wen22` |
+| NWBConverter class | `<ConversionCamelCase>NWBConverter` | `Zaki2024NWBConverter` |
+| Interface class | `<ConversionCamelCase><Modality>Interface` | `Zaki2024ShockStimuliInterface` |
+| Metadata file | `<conversion_name>_metadata.yaml` | `zaki_2024_metadata.yaml` |
+| Convert session script | `<conversion_name>_convert_session.py` | `zaki_2024_convert_session.py` |
+| Convert all script | `<conversion_name>_convert_all_sessions.py` | `zaki_2024_convert_all_sessions.py` |
+
+### The `__init__.py` files
+
+The conversion-level `__init__.py` exports the key classes so they can be imported cleanly:
+
+```python
+# src/cai_lab_to_nwb/zaki_2024/__init__.py
+# (can be empty, or export key classes)
+```
+
+If you have an `interfaces/` subdirectory, its `__init__.py` re-exports everything:
+
+```python
+# src/cai_lab_to_nwb/zaki_2024/interfaces/__init__.py
+from .eztrack_interface import EzTrackFreezingBehaviorInterface
+from .zaki_2024_edf_interface import Zaki2024EDFInterface, Zaki2024MultiEDFInterface
+from .minian_interface import MinianSegmentationInterface, MinianMotionCorrectionInterface
+from .zaki_2024_sleep_classification_interface import Zaki2024SleepClassificationInterface
+from .miniscope_imaging_interface import MiniscopeImagingInterface
+from .zaki_2024_shock_stimuli_interface import Zaki2024ShockStimuliInterface
+from .zaki_2024_cell_registration_interface import Zaki2024CellRegistrationInterface
+```
+
+---
+
+## 2. pyproject.toml
+
+The build system uses **hatchling** (the modern standard). Here is the canonical structure with all required fields:
+
+```toml
+[project]
+name = "<lab>-lab-to-nwb"
+version = "0.0.1"
+description = "NWB conversion scripts, functions, and classes for <Lab Name> lab conversion"
+readme = "README.md"
+authors = [{ name = "CatalystNeuro", email = "ben.dichter@catalystneuro.com" }]
+maintainers = [{ name = "CatalystNeuro", email = "ben.dichter@catalystneuro.com" }]
+license = { file = "LICENSE" }
+requires-python = ">=3.10"
+classifiers = [
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13"
+]
+
+dependencies = [
+  "neuroconv",
+  "nwbinspector",
+]
+
+[project.urls]
+Repository = "https://github.com/catalystneuro/<lab>-lab-to-nwb"
+
+# Per-conversion pinned dependencies (install with: pip install -e .[conversion_name])
+[project.optional-dependencies]
+<conversion_name> = [
+  "neuroconv==0.7.0",   # Pin to exact version used during development
+  # Add conversion-specific extras here, e.g.:
+  # "mne",
+  # "opencv-python-headless",
+  # "ndx-miniscope==0.5.1",
+]
+
+[dependency-groups]
+dev = [
+  "pre-commit",
+  "ruff",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build]
+include = [
+  "*.yaml",
+  "*.yml",
+  "*.json",
+]  # Ensures metadata YAML files are included in sdist and wheel
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/<lab>_lab_to_nwb"]
+
+[tool.hatch.build.targets.sdist]
+packages = ["src/<lab>_lab_to_nwb"]
+
+[tool.ruff]
+
+[tool.ruff.lint]
+select = [
+    "F401",   # Unused import
+    "I",      # All isort rules
+    "UP006",  # non-pep585 annotation
+    "UP007",  # non-pep604 annotation (Union -> |)
+    "UP045",  # non-pep604 annotation (Optional -> | None)
+]
+fixable = ["ALL"]
+
+[tool.ruff.lint.isort]
+relative-imports-order = "closest-to-furthest"
+known-first-party = ["<lab>_lab_to_nwb"]
+
+[tool.codespell]
+skip = '.git*,*.pdf,*.css'
+check-hidden = true
+ignore-words-list = 'assertin'
+```
+
+### Key points about dependencies
+
+- The top-level `dependencies` list should contain unpinned `neuroconv` and `nwbinspector` for broad compatibility.
+- Per-conversion optional dependencies should **pin exact versions** so that a specific conversion remains reproducible.
+- Conversion-specific extras (e.g., `mne` for EDF files, `opencv-python-headless` for video, NWB extension packages like `ndx-miniscope`) go in the optional dependencies section.
+
+### Real-world example (cai-lab-to-nwb)
+
+The cai-lab-to-nwb repo pins all its core dependencies because it has a single primary conversion:
+
+```toml
+dependencies = [
+  "pynwb==3.0.0",
+  "neuroconv==0.7.4",
+  "nwbinspector==0.6.3",
+  "roiextractors==0.5.13",
+  "ipykernel",
+  "openpyxl",
+  "mne",
+  "opencv-python-headless",
+  "ndx-miniscope==0.5.1",
+]
+```
+
+---
+
+## 3. NWBConverter Class
+
+The `NWBConverter` is the central orchestrator. It declares which `DataInterface` classes handle each data modality and wires them together.
+
+### The pattern
+
+```python
+"""Primary NWBConverter class for this dataset."""
+from neuroconv import NWBConverter
+from neuroconv.datainterfaces import (
+    SpikeGLXRecordingInterface,
+    PhySortingInterface,
+)
+
+from <lab>_lab_to_nwb.<conversion_name>.interfaces import (
+    <ConversionName>BehaviorInterface,
+)
+
+
+class <ConversionName>NWBConverter(NWBConverter):
+    """Primary conversion class for <description of dataset>."""
+
+    data_interface_classes = dict(
+        Recording=SpikeGLXRecordingInterface,
+        Sorting=PhySortingInterface,
+        Behavior=<ConversionName>BehaviorInterface,
+    )
+```
+
+### How to choose interfaces
+
+The `data_interface_classes` dict maps **arbitrary string keys** to interface classes. The keys become the keys you use in `source_data` and `conversion_options` dicts. Choose keys that describe the data modality clearly.
+
+Common built-in interfaces from `neuroconv.datainterfaces`:
+
+| Modality | Interface | When to use |
+|----------|-----------|-------------|
+| Neuropixels raw | `SpikeGLXRecordingInterface` | SpikeGLX .bin/.meta files |
+| Neuropixels LFP | `SpikeGLXLFPInterface` | SpikeGLX LFP band |
+| Spike sorting | `PhySortingInterface` | Phy/Kilosort output |
+| Spike sorting | `KiloSortSortingInterface` | KiloSort output directly |
+| Calcium imaging | `TiffImagingInterface` | TIFF stacks |
+| Calcium segmentation | `Suite2pSegmentationInterface` | Suite2p output |
+| Video | `VideoInterface` | Behavioral video files |
+| Intracellular | `AbfInterface` | Axon Binary Format |
+| EDF signals | Custom needed | EDF format |
+
+When no built-in interface exists for a data type, write a custom `BaseDataInterface` subclass (see Section 6).
+
+### Real-world example (cai-lab-to-nwb, zaki_2024)
+
+This converter has 10 data interfaces, mixing built-in and custom:
+
+```python
+from neuroconv import NWBConverter
+from neuroconv.datainterfaces import VideoInterface
+from neuroconv.utils.dict import DeepDict
+from datetime import timedelta
+
+from cai_lab_to_nwb.zaki_2024.interfaces import (
+    MinianSegmentationInterface,
+    Zaki2024EDFInterface,
+    Zaki2024MultiEDFInterface,
+    EzTrackFreezingBehaviorInterface,
+    Zaki2024SleepClassificationInterface,
+    MiniscopeImagingInterface,
+    MinianMotionCorrectionInterface,
+    Zaki2024ShockStimuliInterface,
+    Zaki2024CellRegistrationInterface,
+)
+
+
+class Zaki2024NWBConverter(NWBConverter):
+    """Primary conversion class Cai Lab dataset."""
+
+    data_interface_classes = dict(
+        MiniscopeImaging=MiniscopeImagingInterface,
+        MinianSegmentation=MinianSegmentationInterface,
+        MinianMotionCorrection=MinianMotionCorrectionInterface,
+        SleepClassification=Zaki2024SleepClassificationInterface,
+        EDFSignals=Zaki2024EDFInterface,
+        MultiEDFSignals=Zaki2024MultiEDFInterface,
+        FreezingBehavior=EzTrackFreezingBehaviorInterface,
+        Video=VideoInterface,
+        ShockStimuli=Zaki2024ShockStimuliInterface,
+        CellRegistration=Zaki2024CellRegistrationInterface,
+    )
+```
+
+### Overriding `get_metadata()`
+
+Override `get_metadata()` when you need to compute metadata that depends on the source data itself:
+
+```python
+def get_metadata(self) -> DeepDict:
+    metadata = super().get_metadata()
+
+    # Example: adjust session_start_time based on imaging timestamps
+    if "MiniscopeImaging" in self.data_interface_objects:
+        imaging_interface = self.data_interface_objects["MiniscopeImaging"]
+        imaging_timestamps = imaging_interface.get_original_timestamps()
+        if imaging_timestamps[0] < 0.0:
+            time_shift = timedelta(seconds=abs(imaging_timestamps[0]))
+            session_start_time = imaging_interface.get_metadata()["NWBFile"]["session_start_time"]
+            metadata["NWBFile"].update(session_start_time=session_start_time - time_shift)
+
+    return metadata
+```
+
+### Not all interfaces must be present in every session
+
+The converter class declares the **superset** of all possible interfaces. In `convert_session.py`, you only add entries to `source_data` for interfaces that are relevant to that particular session. The converter will only instantiate interfaces that have entries in `source_data`.
+
+---
+
+## 4. convert_session.py
+
+This is the script that converts a single session. It follows a strict pattern:
+
+1. Build `source_data` dict (file paths for each interface)
+2. Build `conversion_options` dict (per-interface options like `stub_test`)
+3. Instantiate the converter
+4. Get auto-extracted metadata, layer on YAML metadata, layer on session-specific metadata
+5. Call `converter.run_conversion()`
+
+### The canonical pattern
+
+```python
+"""Primary script to run to convert an entire session of data using the NWBConverter."""
+from pathlib import Path
+from typing import Union
+from datetime import datetime
+from zoneinfo import ZoneInfo
+
+from neuroconv.utils import load_dict_from_file, dict_deep_update
+
+from <lab>_lab_to_nwb.<conversion_name>.<conversion_name>_nwbconverter import <ConversionName>NWBConverter
+
+
+def session_to_nwb(
+    data_dir_path: Union[str, Path],
+    output_dir_path: Union[str, Path],
+    stub_test: bool = False,
+):
+    data_dir_path = Path(data_dir_path)
+    output_dir_path = Path(output_dir_path)
+    if stub_test:
+        output_dir_path = output_dir_path / "nwb_stub"
+    output_dir_path.mkdir(parents=True, exist_ok=True)
+
+    session_id = "subject_session_identifier"
+    nwbfile_path = output_dir_path / f"{session_id}.nwb"
+
+    # ---- Step 1: Build source_data and conversion_options ----
+    source_data = dict()
+    conversion_options = dict()
+
+    # Add Recording
+    source_data.update(dict(Recording=dict(
+        file_path=str(data_dir_path / "recording.ap.bin"),
+    )))
+    conversion_options.update(dict(Recording=dict(stub_test=stub_test)))
+
+    # Add Sorting
+    source_data.update(dict(Sorting=dict(
+        folder_path=str(data_dir_path / "sorting"),
+    )))
+    conversion_options.update(dict(Sorting=dict()))
+
+    # Add Behavior (custom interface)
+    source_data.update(dict(Behavior=dict(
+        file_path=str(data_dir_path / "behavior.csv"),
+    )))
+    conversion_options.update(dict(Behavior=dict()))
+
+    # ---- Step 2: Instantiate converter ----
+    converter = <ConversionName>NWBConverter(source_data=source_data)
+
+    # ---- Step 3: Build metadata (layered) ----
+    # Layer 1: Auto-extracted from source files
+    metadata = converter.get_metadata()
+
+    # Layer 2: Set session_start_time with timezone
+    session_start_time = datetime(year=2020, month=1, day=1, tzinfo=ZoneInfo("US/Eastern"))
+    metadata["NWBFile"]["session_start_time"] = session_start_time
+
+    # Layer 3: Merge in the hand-edited YAML metadata
+    editable_metadata_path = Path(__file__).parent / "<conversion_name>_metadata.yaml"
+    editable_metadata = load_dict_from_file(editable_metadata_path)
+    metadata = dict_deep_update(metadata, editable_metadata)
+
+    # Layer 4: Session-specific overrides
+    metadata["Subject"]["subject_id"] = "mouse001"
+    metadata["NWBFile"]["session_id"] = session_id
+
+    # ---- Step 4: Run conversion ----
+    converter.run_conversion(
+        metadata=metadata,
+        nwbfile_path=nwbfile_path,
+        conversion_options=conversion_options,
+        overwrite=True,
+    )
+
+
+if __name__ == "__main__":
+    data_dir_path = Path("/path/to/raw/data/")
+    output_dir_path = Path("~/conversion_nwb/")
+    stub_test = False
+
+    session_to_nwb(
+        data_dir_path=data_dir_path,
+        output_dir_path=output_dir_path,
+        stub_test=stub_test,
+    )
+```
+
+### Metadata layering order
+
+This is critical. Later layers override earlier ones:
+
+1. **Auto-extracted** (`converter.get_metadata()`): Reads metadata from the source files themselves (e.g., sampling rate from SpikeGLX .meta files, session_start_time from file timestamps).
+2. **session_start_time with timezone**: Must always be set explicitly with a timezone. Use `ZoneInfo` (Python 3.9+) or `pytz`.
+3. **YAML file** (`dict_deep_update` with loaded YAML): Lab-level metadata that applies to all sessions of this conversion (institution, lab, experimenter, species, publications, etc.).
+4. **Session-specific overrides**: `subject_id`, `session_id`, `session_description`, etc. that vary per session.
+
+### Real-world example (cai-lab-to-nwb, zaki_2024)
+
+The real convert_session.py shows the pattern with conditional interface inclusion (not all sessions have all data types):
+
+```python
+def session_to_nwb(
+    output_dir_path: Union[str, Path],
+    subject_id: str,
+    session_id: str,
+    date_str: str,
+    time_str: str,
+    session_description: str,
+    stub_test: bool = False,
+    overwrite: bool = False,
+    verbose: bool = False,
+    imaging_folder_path: Union[str, Path] = None,
+    minian_folder_path: Union[str, Path] = None,
+    video_file_path: Union[str, Path] = None,
+    freezing_output_file_path: Union[str, Path] = None,
+    edf_file_path: Union[str, Path] = None,
+    sleep_classification_file_path: Union[str, Path] = None,
+    shock_stimulus: dict = None,
+):
+    # ...
+    source_data = dict()
+    conversion_options = dict()
+
+    # Conditionally add interfaces based on what data is available
+    if imaging_folder_path:
+        imaging_folder_path = Path(imaging_folder_path)
+        source_data.update(dict(MiniscopeImaging=dict(folder_path=imaging_folder_path)))
+        conversion_options.update(dict(MiniscopeImaging=dict(stub_test=stub_test)))
+
+    if minian_folder_path:
+        minian_folder_path = Path(minian_folder_path)
+        source_data.update(dict(MinianSegmentation=dict(folder_path=minian_folder_path)))
+        conversion_options.update(dict(MinianSegmentation=dict(stub_test=stub_test)))
+
+    if video_file_path:
+        source_data.update(dict(Video=dict(file_paths=[video_file_path])))
+        conversion_options.update(dict(Video=dict(stub_test=stub_test)))
+
+    if shock_stimulus is not None:
+        source_data.update(ShockStimuli=dict())
+        conversion_options.update(ShockStimuli=shock_stimulus)
+
+    converter = Zaki2024NWBConverter(source_data=source_data, verbose=verbose)
+    metadata = converter.get_metadata()
+
+    # Timezone localization
+    eastern = pytz.timezone("US/Eastern")
+    metadata["NWBFile"]["session_start_time"] = eastern.localize(
+        metadata["NWBFile"]["session_start_time"]
+    )
+
+    # YAML metadata layer
+    editable_metadata_path = Path(__file__).parent / "zaki_2024_metadata.yaml"
+    editable_metadata = load_dict_from_file(editable_metadata_path)
+    metadata = dict_deep_update(metadata, editable_metadata)
+
+    # Session-specific metadata
+    metadata["Subject"]["subject_id"] = subject_id
+    metadata["NWBFile"]["session_description"] = session_description
+    metadata["NWBFile"]["session_id"] = session_id
+
+    converter.run_conversion(
+        metadata=metadata,
+        nwbfile_path=nwbfile_path,
+        conversion_options=conversion_options,
+        overwrite=overwrite,
+    )
+```
+
+### The `stub_test` pattern
+
+The `stub_test` parameter is a convention that:
+- Redirects output to a `nwb_stub/` subdirectory
+- Gets passed to each interface's `conversion_options` so they only write a small subset of data (e.g., first few seconds of recording)
+- Enables fast iteration during development without writing full datasets
+
+```python
+if stub_test:
+    output_dir_path = output_dir_path / "nwb_stub"
+# ...
+conversion_options.update(dict(Recording=dict(stub_test=stub_test)))
+```
+
+### NWB file naming
+
+Use descriptive, BIDS-like naming: `sub-<subject_id>_ses-<session_id>.nwb` or simply `<session_id>.nwb`.
+
+---
+
+## 5. convert_all_sessions.py
+
+This script handles batch conversion of all sessions in a dataset. It follows a template pattern with three functions:
+
+### The canonical pattern
+
+```python
+"""Primary script to run to convert all sessions in a dataset using session_to_nwb."""
+from pathlib import Path
+from typing import Union
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from pprint import pformat
+import traceback
+from tqdm import tqdm
+
+from .convert_session import session_to_nwb
+
+
+def dataset_to_nwb(
+    *,
+    data_dir_path: Union[str, Path],
+    output_dir_path: Union[str, Path],
+    max_workers: int = 1,
+    verbose: bool = True,
+    stub_test: bool = False,
+):
+    """Convert the entire dataset to NWB.
+
+    Parameters
+    ----------
+    data_dir_path : Union[str, Path]
+        The path to the directory containing the raw data.
+    output_dir_path : Union[str, Path]
+        The path to the directory where the NWB files will be saved.
+    max_workers : int, optional
+        The number of workers to use for parallel processing, by default 1
+    verbose : bool, optional
+        Whether to print verbose output, by default True
+    stub_test : bool, optional
+        Whether to run in stub test mode, by default False
+    """
+    data_dir_path = Path(data_dir_path)
+    output_dir_path = Path(output_dir_path)
+    session_to_nwb_kwargs_per_session = get_session_to_nwb_kwargs_per_session(
+        data_dir_path=data_dir_path,
+    )
+
+    futures = []
+    with ProcessPoolExecutor(max_workers=max_workers) as executor:
+        for session_to_nwb_kwargs in session_to_nwb_kwargs_per_session:
+            session_to_nwb_kwargs["output_dir_path"] = output_dir_path
+            session_to_nwb_kwargs["verbose"] = verbose
+            session_to_nwb_kwargs["stub_test"] = stub_test
+            exception_file_path = (
+                data_dir_path / f"ERROR_{session_to_nwb_kwargs.get('session_id', 'unknown')}.txt"
+            )
+            futures.append(
+                executor.submit(
+                    safe_session_to_nwb,
+                    session_to_nwb_kwargs=session_to_nwb_kwargs,
+                    exception_file_path=exception_file_path,
+                )
+            )
+        for _ in tqdm(as_completed(futures), total=len(futures)):
+            pass
+
+
+def safe_session_to_nwb(
+    *,
+    session_to_nwb_kwargs: dict,
+    exception_file_path: Union[Path, str],
+):
+    """Convert a session to NWB while handling any errors by writing to exception_file_path."""
+    exception_file_path = Path(exception_file_path)
+    try:
+        session_to_nwb(**session_to_nwb_kwargs)
+    except Exception as e:
+        with open(exception_file_path, mode="w") as f:
+            f.write(f"session_to_nwb_kwargs: \n {pformat(session_to_nwb_kwargs)}\n\n")
+            f.write(traceback.format_exc())
+
+
+def get_session_to_nwb_kwargs_per_session(
+    *,
+    data_dir_path: Union[str, Path],
+):
+    """Get the kwargs for session_to_nwb for each session in the dataset.
+
+    Returns
+    -------
+    list[dict[str, Any]]
+        A list of dictionaries containing the kwargs for session_to_nwb for each session.
+    """
+    # IMPLEMENT THIS: Return a list of dicts, each containing the kwargs for one session.
+    # Common strategies:
+    #   1. Iterate over session directories: list(data_dir_path.iterdir())
+    #   2. Read from a spreadsheet/CSV with session metadata
+    #   3. Load from a pre-computed YAML parameters file
+    raise NotImplementedError
+
+
+if __name__ == "__main__":
+    data_dir_path = Path("/path/to/raw/data/")
+    output_dir_path = Path("~/conversion_nwb/")
+    max_workers = 1
+    stub_test = False
+
+    dataset_to_nwb(
+        data_dir_path=data_dir_path,
+        output_dir_path=output_dir_path,
+        max_workers=max_workers,
+        stub_test=stub_test,
+    )
+```
+
+### Key design decisions
+
+- **`ProcessPoolExecutor`**: Enables parallel conversion of sessions. Default `max_workers=1` for sequential processing.
+- **`safe_session_to_nwb`**: Wraps `session_to_nwb` in a try/except that writes errors to a file instead of crashing the batch. This is critical for large datasets.
+- **`get_session_to_nwb_kwargs_per_session`**: This is the function that must be customized per conversion. It returns a list of dicts, where each dict contains exactly the kwargs needed by `session_to_nwb`.
+
+### Real-world example of `get_session_to_nwb_kwargs_per_session` (cai-lab-to-nwb)
+
+```python
+def get_session_to_nwb_kwargs_per_session(*, data_dir_path):
+    import pandas as pd
+    subjects_df = pd.read_excel(data_dir_path / "Ca_EEG_Design.xlsx")
+    subjects = subjects_df["Mouse"]
+    session_to_nwb_kwargs_per_session = []
+
+    for subject_id in subjects:
+        yaml_file_path = Path(__file__).parent / "utils/conversion_parameters.yaml"
+        conversion_parameter_dict = load_dict_from_file(yaml_file_path)
+        if subject_id in conversion_parameter_dict:
+            for session_id in conversion_parameter_dict[subject_id].keys():
+                session_to_nwb_kwargs_per_session.append(
+                    conversion_parameter_dict[subject_id][session_id]
+                )
+
+    return session_to_nwb_kwargs_per_session
+```
+
+### Real-world example of iterating over directories (giocomo-lab-to-nwb wen22)
+
+The wen22 conversion uses a simpler pattern -- iterating directly over session directories:
+
+```python
+session_path_list = [path for path in data_path.iterdir() if path.name != "VR"]
+for session_path in session_path_list:
+    session_id = session_path.name
+    # ... build source_data from session_path ...
+    converter = Wen21NWBConverter(source_data=source_data)
+    # ... run conversion ...
+```
+
+---
+
+## 6. Custom DataInterface
+
+When no built-in NeuroConv interface exists for a data type, write a custom one by subclassing `BaseDataInterface`. This is the most common customization point.
+
+### The pattern
+
+```python
+"""Primary class for converting experiment-specific <modality>."""
+from pynwb.file import NWBFile
+
+from neuroconv.basedatainterface import BaseDataInterface
+from neuroconv.utils import DeepDict
+
+
+class <ConversionName><Modality>Interface(BaseDataInterface):
+    """<Modality> interface for <conversion_name> conversion."""
+
+    keywords = ["behavior"]  # Used for discoverability
+
+    def __init__(self, file_path: str, verbose: bool = False):
+        # Load data LAZILY -- do not read entire files here.
+        # Store paths and parameters as instance attributes.
+        # Call super().__init__() to register source_data.
+        self.file_path = file_path
+        self.verbose = verbose
+        super().__init__(file_path=file_path)
+
+    def get_metadata(self) -> DeepDict:
+        # Extract metadata from source files that can be auto-detected.
+        # Return a DeepDict (nested dict) matching the NWB metadata schema.
+        metadata = super().get_metadata()
+        # Example: metadata["NWBFile"]["session_start_time"] = <extracted_datetime>
+        return metadata
+
+    def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, **conversion_options):
+        # The core method. Read data from source files and add to the NWBFile.
+        # conversion_options come from the conversion_options dict passed to run_conversion.
+        raise NotImplementedError()
+```
+
+### Critical details about `__init__`
+
+- The `__init__` method's parameters become the keys in the `source_data` dict.
+- Call `super().__init__()` and pass all the init parameters as keyword arguments. This stores them in `self.source_data` for later reference.
+- Use type hints from `pydantic` for validation: `FilePath`, `DirectoryPath`.
+
+```python
+from pydantic import FilePath
+
+class MyInterface(BaseDataInterface):
+    def __init__(self, file_path: FilePath, sampling_frequency: float, verbose: bool = False):
+        self.file_path = file_path
+        self.verbose = verbose
+        self.sampling_frequency = sampling_frequency
+        super().__init__(file_path=file_path, sampling_frequency=sampling_frequency)
+```
+
+Then in `source_data`:
+```python
+source_data["MyModality"] = dict(file_path="/path/to/file.csv", sampling_frequency=30000.0)
+```
+
+### Critical details about `add_to_nwbfile`
+
+- The method signature is `add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, **kwargs)`.
+- Extra keyword arguments in the method signature correspond to keys in `conversion_options`.
+- You can include `stub_test: bool = False` to support the stub test pattern.
+- Use processing modules for derived data (see `get_module` in Section 9).
+
+### Real-world example: Simple interface (Zaki2024ShockStimuliInterface)
+
+This interface takes no source files -- the data is passed entirely through `conversion_options`:
+
+```python
+from pynwb.file import NWBFile
+from pynwb.epoch import TimeIntervals
+from neuroconv.basedatainterface import BaseDataInterface
+from neuroconv.utils import DeepDict
+from typing import Optional
+
+
+class Zaki2024ShockStimuliInterface(BaseDataInterface):
+    """Adds annotated events of shock times."""
+
+    keywords = ["behavior", "sleep stages"]
+
+    def __init__(self, verbose: bool = False):
+        self.verbose = verbose
+        super().__init__()
+
+    def get_metadata(self) -> DeepDict:
+        metadata = super().get_metadata()
+        return metadata
+
+    def add_to_nwbfile(
+        self,
+        nwbfile: NWBFile,
+        shock_amplitude: float,
+        shock_times: list,
+        shock_duration: float,
+        metadata: Optional[dict] = None,
+    ):
+        description = (
+            "During aversive encoding, after a baseline period of 2 min, "
+            "mice received three 2 s foot shocks..."
+        )
+        shock_stimuli = TimeIntervals(name="ShockStimuli", description=description)
+        shock_stimuli.add_column(name="shock_amplitude", description="Shock amplitude in mA")
+        for start_time in shock_times:
+            shock_stimuli.add_interval(
+                start_time=start_time,
+                stop_time=start_time + shock_duration,
+                shock_amplitude=shock_amplitude,
+            )
+        nwbfile.add_stimulus(shock_stimuli)
+```
+
+The corresponding `conversion_options` in the convert_session.py:
+```python
+conversion_options.update(
+    ShockStimuli=dict(
+        shock_times=[120.0, 180.0, 240.0],
+        shock_amplitude=1.5,
+        shock_duration=2.0,
+    ),
+)
+```
+
+### Real-world example: Complex interface with temporal alignment (EzTrackFreezingBehaviorInterface)
+
+This interface reads data from a CSV file, supports temporal alignment, and writes both a TimeSeries and TimeIntervals:
+
+```python
+import numpy as np
+import pandas as pd
+from pynwb import TimeSeries
+from pynwb.epoch import TimeIntervals
+from pynwb.file import NWBFile
+from neuroconv.basedatainterface import BaseDataInterface
+from neuroconv.utils import DeepDict
+from pydantic import FilePath
+from typing import Optional, List
+
+
+class EzTrackFreezingBehaviorInterface(BaseDataInterface):
+    """Adds intervals of freezing behavior and motion series."""
+
+    keywords = ["behavior", "freezing", "motion"]
+
+    def __init__(self, file_path: FilePath, video_sampling_frequency: float, verbose: bool = False):
+        self.file_path = file_path
+        self.verbose = verbose
+        self.video_sampling_frequency = video_sampling_frequency
+        # Private attributes for temporal alignment
+        self._start_times = None
+        self._stop_times = None
+        self._starting_time = None
+
+    def get_metadata(self) -> DeepDict:
+        metadata = super().get_metadata()
+        return metadata
+
+    def get_interval_times(self):
+        """Extract start and stop times of freezing events."""
+        freezing_behavior_df = pd.read_csv(self.file_path)
+        freezing_values = freezing_behavior_df["Freezing"].values
+        changes_in_freezing = np.diff(freezing_values)
+        freezing_start = np.where(changes_in_freezing == 100)[0] + 1
+        freezing_stop = np.where(changes_in_freezing == -100)[0] + 1
+
+        start_frames = freezing_behavior_df["Frame"].values[freezing_start]
+        stop_frames = freezing_behavior_df["Frame"].values[freezing_stop]
+
+        # Use aligned times if set, otherwise compute from frames
+        start_times = (
+            self._start_times if self._start_times is not None
+            else start_frames / self.video_sampling_frequency
+        )
+        stop_times = (
+            self._stop_times if self._stop_times is not None
+            else stop_frames / self.video_sampling_frequency
+        )
+        return start_times, stop_times
+
+    def set_aligned_interval_times(self, start_times, stop_times):
+        self._start_times = start_times
+        self._stop_times = stop_times
+
+    def set_aligned_starting_time(self, aligned_start_time):
+        self._starting_time = aligned_start_time
+
+    def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None, stub_test: bool = False):
+        freezing_behavior_df = pd.read_csv(self.file_path)
+        start_times, stop_times = self.get_interval_times()
+
+        motion_data = freezing_behavior_df["Motion"].values
+        starting_time = self._starting_time if self._starting_time is not None else self.get_starting_time()
+
+        motion_series = TimeSeries(
+            name="MotionSeries",
+            description="Motion measured by pixel change between frames.",
+            data=motion_data[:100] if stub_test else motion_data,
+            unit="n.a",
+            starting_time=starting_time,
+            rate=self.video_sampling_frequency,
+        )
+
+        freeze_intervals = TimeIntervals(name="FreezingIntervals", description="...")
+        for start_time, stop_time in zip(start_times, stop_times):
+            freeze_intervals.add_interval(
+                start_time=start_time,
+                stop_time=stop_time,
+                timeseries=[motion_series],
+            )
+
+        if "behavior" not in nwbfile.processing:
+            behavior_module = nwbfile.create_processing_module(
+                name="behavior", description="Contains behavior data"
+            )
+        else:
+            behavior_module = nwbfile.processing["behavior"]
+
+        behavior_module.add(motion_series)
+        behavior_module.add(freeze_intervals)
+```
+
+### Real-world example: Complex interface with sync channel (Wen21EventsInterface)
+
+This interface demonstrates reading NI-DAQ sync channels to compute behavioral timestamp offsets:
+
+```python
+from nwb_conversion_tools.basedatainterface import BaseDataInterface
+from nwb_conversion_tools.utils.types import FolderPathType
+from nwb_conversion_tools.tools.nwb_helpers import get_module
+from hdmf.backends.hdf5.h5_utils import H5DataIO
+from pynwb.behavior import Position, SpatialSeries
+from pynwb import NWBFile, TimeSeries
+
+
+class Wen21EventsInterface(BaseDataInterface):
+    def __init__(self, session_path: FolderPathType):
+        super().__init__(session_path=session_path)
+
+    def run_conversion(self, nwbfile: NWBFile, metadata: dict):
+        behavior_module = get_module(nwbfile, "behavior")
+        session_path = Path(self.source_data["session_path"])
+
+        # ... read position files, compute temporal offset from NIDQ sync channel ...
+
+        # Add position data with compression
+        spatial_series_object = SpatialSeries(
+            name="position",
+            description="position within the virtual reality wheel",
+            data=H5DataIO(position_data, compression="gzip"),
+            reference_frame="unknown",
+            unit="m",
+            conversion=0.01,
+            timestamps=position_timestamps,
+        )
+
+        pos_obj = Position(name="position within the virtual reality wheel")
+        pos_obj.add_spatial_series(spatial_series_object)
+        behavior_module.add_data_interface(pos_obj)
+```
+
+Note: The older `nwb_conversion_tools` API used `run_conversion()` instead of `add_to_nwbfile()`. Modern NeuroConv uses `add_to_nwbfile()`.
+
+---
+
+## 7. metadata.yaml
+
+The metadata YAML file contains hand-edited metadata that applies to all sessions of a conversion. It is loaded in `convert_session.py` and merged on top of auto-extracted metadata.
+
+### Structure and required fields
+
+```yaml
+NWBFile:
+  keywords:
+    - hippocampus
+    - learning
+    - memory
+  related_publications:
+    - https://doi.org/10.1038/s41586-024-08168-4
+  session_description: >
+    A rich text description of the experiment. Can also just be the abstract
+    of the publication. This is REQUIRED by NWB.
+  experiment_description: >
+    Optional longer description of the experimental protocol.
+  institution: Icahn School of Medicine at Mount Sinai
+  lab: Cai
+  experimenter:
+    - Last, First Middle
+    - Last, First Middle
+  surgery: >
+    Optional: description of surgical procedures.
+  virus: >
+    Optional: description of viral constructs used.
+Subject:
+  species: Mus musculus      # REQUIRED. Use Latin binomial name.
+  description: >
+    A rich text description of the subject.
+  age: P12W/P18W             # ISO 8601 duration. "P90D" = 90 days old.
+  sex: M                     # One of M, F, U, or O
+  strain: C57BL/6J           # Optional
+  genotype: wild-type        # Optional
+  date_of_birth: 2014-06-22 00:00:00-04:00  # Optional, with timezone
+```
+
+### How metadata merging works
+
+The `dict_deep_update` function performs a recursive merge. For nested dicts, keys are merged. For lists, the entire list is replaced (not appended). For scalar values, the later value wins.
+
+```python
+from neuroconv.utils import load_dict_from_file, dict_deep_update
+
+# Auto-extracted metadata (from file headers, etc.)
+metadata = converter.get_metadata()
+# Example: metadata["NWBFile"]["session_start_time"] is already set from file timestamps
+
+# YAML metadata overlays on top
+editable_metadata = load_dict_from_file(Path(__file__).parent / "metadata.yaml")
+metadata = dict_deep_update(metadata, editable_metadata)
+# Now metadata["NWBFile"]["lab"], ["institution"], etc. are set from the YAML
+# But session_start_time from auto-extraction is preserved (YAML doesn't override it)
+
+# Session-specific overrides
+metadata["Subject"]["subject_id"] = "mouse001"  # Per-session value
+```
+
+### Extended metadata for specific modalities
+
+For optical physiology, the metadata YAML can also define imaging planes, optical channels, etc.:
+
+```yaml
+Ophys:
+  OnePhotonSeries:
+    - name: OnePhotonSeries
+      description: Imaging data from Miniscope.
+      imaging_plane: ImagingPlane
+      unit: n.a.
+  ImagingPlane:
+    - name: ImagingPlane
+      description: Imaging plane for Miniscope imaging data.
+      excitation_lambda: 496.0
+      location: CA1
+      device: Microscope
+      optical_channel:
+        - name: GreenChannel
+          description: Green channel of the microscope.
+          emission_lambda: 513.0
+      indicator: GCaMP6f
+```
+
+### Per-subject metadata
+
+For datasets with multiple subjects, you can use a separate YAML file for subject-specific metadata:
+
+```yaml
+# subject_metadata.yml (from giocomo wen22)
+N2:
+  subject_id: N2
+  age: P90D
+  strain: C57Bl/6
+  genotype: wildtype
+  date_of_birth: 2019-10-22
+  weight: 0.016
+  sex: U
+```
+
+Then load and merge per subject:
+```python
+subject_metadata_from_yaml = load_dict_from_file(Path("./subject_metadata.yml"))
+subject_metadata = subject_metadata_from_yaml[subject_id]
+metadata["Subject"] = dict_deep_update(metadata["Subject"], subject_metadata)
+```
+
+---
+
+## 8. Temporal Alignment
+
+When multiple data streams have different clocks or start times, you must align them. This is done by overriding `temporally_align_data_interfaces()` in the NWBConverter.
+
+### The pattern
+
+```python
+class MyNWBConverter(NWBConverter):
+    data_interface_classes = dict(...)
+
+    def temporally_align_data_interfaces(self, metadata=None, conversion_options=None):
+        """Align all data streams to a common time reference."""
+
+        # Access interfaces by their keys
+        if "Recording" in self.data_interface_objects:
+            recording_interface = self.data_interface_objects["Recording"]
+            # Get original timestamps
+            original_timestamps = recording_interface.get_original_timestamps()
+            # Apply a shift
+            recording_interface.set_aligned_timestamps(original_timestamps + time_shift)
+            # Or set just the starting time
+            recording_interface.set_aligned_starting_time(new_start_time)
+```
+
+### Real-world example (cai-lab-to-nwb, zaki_2024)
+
+This is the most comprehensive temporal alignment example available. It handles the case where imaging timestamps start before zero (negative timestamps):
+
+```python
+def temporally_align_data_interfaces(self, metadata=None, conversion_options=None):
+    if "MiniscopeImaging" in self.data_interface_objects:
+        imaging_interface = self.data_interface_objects["MiniscopeImaging"]
+        imaging_timestamps = imaging_interface.get_original_timestamps()
+
+        if imaging_timestamps[0] < 0.0:
+            time_shift = abs(imaging_timestamps[0])
+
+            # Shift imaging timestamps
+            imaging_interface.set_aligned_timestamps(imaging_timestamps + time_shift)
+
+            # Shift segmentation timestamps
+            if "MinianSegmentation" in self.data_interface_objects:
+                seg_interface = self.data_interface_objects["MinianSegmentation"]
+                seg_timestamps = seg_interface.get_original_timestamps()
+                seg_interface.set_aligned_timestamps(seg_timestamps + time_shift)
+
+            # Shift sleep classification intervals
+            if "SleepClassification" in self.data_interface_objects:
+                sleep_interface = self.data_interface_objects["SleepClassification"]
+                start_times, stop_times, states = sleep_interface.get_sleep_states_times()
+                start_times += time_shift
+                stop_times += time_shift
+                sleep_interface.set_aligned_interval_times(
+                    start_times=start_times, stop_times=stop_times
+                )
+
+            # Shift EDF starting time
+            if "EDFSignals" in self.data_interface_objects:
+                edf_interface = self.data_interface_objects["EDFSignals"]
+                edf_interface.set_aligned_starting_time(time_shift)
+
+            # Shift freezing behavior
+            if "FreezingBehavior" in self.data_interface_objects:
+                fb_interface = self.data_interface_objects["FreezingBehavior"]
+                start_times, stop_times = fb_interface.get_interval_times()
+                fb_interface.set_aligned_interval_times(
+                    start_times=start_times + time_shift,
+                    stop_times=stop_times + time_shift,
+                )
+                starting_time = fb_interface.get_starting_time()
+                fb_interface.set_aligned_starting_time(starting_time + time_shift)
+
+            # Shift video timestamps
+            if "Video" in self.data_interface_objects:
+                video_interface = self.data_interface_objects["Video"]
+                video_timestamps = video_interface.get_original_timestamps()
+                video_interface.set_aligned_timestamps(video_timestamps + time_shift)
+```
+
+### Real-world example: Sync channel alignment (giocomo wen22)
+
+The wen22 conversion computes an offset between behavioral timestamps and neural recording timestamps using an NI-DAQ sync channel:
+
+```python
+def calculate_behavioral_offset_with_nidq_channel(self, df_epochs):
+    """Calculate offset between behavioral timestamps and NIDQ sync pulses."""
+    session_path = Path(self.source_data["session_path"])
+    nidq_file_path = session_path / f"{session_path.stem.replace('g0', 'g0_t0')}.nidq.bin"
+
+    if nidq_file_path.is_file():
+        nidq_extractor = SpikeGLXRecordingExtractor(session_path, stream_id="nidq")
+        epoch_change_trace = nidq_extractor.get_traces(channel_ids=["nidq#XA2"]).ravel()
+        times = nidq_extractor.get_times()
+
+        # Binarize the sync signal
+        epoch_change_trace_bin = np.zeros(epoch_change_trace.shape, dtype=int)
+        epoch_change_trace_bin[epoch_change_trace > (np.max(epoch_change_trace) // 2)] = 1
+        epoch_start_idxs = np.where(np.diff(epoch_change_trace_bin) > 0)[0]
+
+        df_epochs["epoch_start_by_niqd"] = times[epoch_start_idxs][:df_epochs.shape[0]]
+        offset = (df_epochs["start_time"] - df_epochs["epoch_start_by_niqd"]).mean()
+        return offset
+    return 0
+```
+
+Then all behavioral timestamps are shifted by this offset:
+```python
+df_position_data["timestamps"] -= offset_for_behavioral_time_stamps
+```
+
+### Alignment API summary
+
+| Method | When to use |
+|--------|-------------|
+| `interface.get_original_timestamps()` | Get timestamps before any alignment |
+| `interface.set_aligned_timestamps(timestamps)` | Replace all timestamps |
+| `interface.set_aligned_starting_time(t)` | Shift starting time for regularly sampled data |
+| `interface.set_aligned_interval_times(start_times, stop_times)` | Custom method for interval-based interfaces |
+
+---
+
+## 9. Common Utilities
+
+### `load_dict_from_file`
+
+Loads YAML or JSON files into a Python dict:
+
+```python
+from neuroconv.utils import load_dict_from_file
+
+metadata = load_dict_from_file(Path("metadata.yaml"))
+```
+
+### `dict_deep_update`
+
+Recursively merges two dicts. The second dict's values override the first's:
+
+```python
+from neuroconv.utils import dict_deep_update
+
+base = {"NWBFile": {"lab": "old", "institution": "MIT"}}
+override = {"NWBFile": {"lab": "new"}}
+result = dict_deep_update(base, override)
+# result = {"NWBFile": {"lab": "new", "institution": "MIT"}}
+```
+
+### `H5DataIO`
+
+Wraps numpy arrays for HDF5 compression. Use this for large data arrays:
+
+```python
+from hdmf.backends.hdf5.h5_utils import H5DataIO
+
+spatial_series = SpatialSeries(
+    name="position",
+    data=H5DataIO(position_data, compression="gzip"),
+    timestamps=timestamps,
+    reference_frame="unknown",
+    unit="m",
+)
+```
+
+### `get_module`
+
+Gets or creates a processing module in an NWB file:
+
+```python
+from neuroconv.tools.nwb_helpers import get_module
+
+# Gets existing "behavior" module or creates it
+behavior_module = get_module(nwbfile, "behavior")
+
+# Then add data interfaces to it
+behavior_module.add(my_time_series)
+```
+
+Or create manually:
+```python
+if "behavior" not in nwbfile.processing:
+    behavior_module = nwbfile.create_processing_module(
+        name="behavior", description="Contains behavior data"
+    )
+else:
+    behavior_module = nwbfile.processing["behavior"]
+```
+
+### `DeepDict`
+
+The metadata type used throughout NeuroConv. Behaves like a nested defaultdict:
+
+```python
+from neuroconv.utils import DeepDict
+
+metadata = DeepDict()
+metadata["NWBFile"]["lab"] = "My Lab"  # Auto-creates nested structure
+```
+
+---
+
+## 10. Testing Patterns
+
+### stub_test
+
+The primary testing mechanism during development. Every `session_to_nwb` function should accept `stub_test: bool`:
+
+```python
+def session_to_nwb(..., stub_test: bool = False):
+    if stub_test:
+        output_dir_path = output_dir_path / "nwb_stub"
+    # ...
+    conversion_options.update(dict(Recording=dict(stub_test=stub_test)))
+```
+
+Run it:
+```python
+session_to_nwb(data_dir_path=data_dir_path, output_dir_path=output_dir_path, stub_test=True)
+```
+
+This produces a small NWB file (usually a few MB) that can be quickly inspected.
+
+### nwbinspector
+
+After conversion, validate with nwbinspector:
+
+```bash
+# Command line
+nwbinspector /path/to/output.nwb
+
+# Or in Python
+from nwbinspector import inspect_nwbfile
+results = list(inspect_nwbfile(nwbfile_path="/path/to/output.nwb"))
+for result in results:
+    print(result)
+```
+
+Common issues nwbinspector catches:
+- Missing required fields (session_description, session_start_time, identifier)
+- Timezone-naive datetimes (session_start_time must have timezone)
+- Subject fields not matching controlled vocabularies
+- Data without units
+- Empty containers
+
+### CI test (test-install.yml)
+
+The GitHub Actions workflow tests that the package can be installed and imported:
+
+```yaml
+name: Installation
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 1 * *"  # Monthly
+
+jobs:
+  run:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: ["ubuntu-latest", "macos-latest", "windows-latest"]
+        python-version: ["3.12"]
+    steps:
+    - uses: actions/checkout@v4
+    - uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - run: pip install -e .
+    - run: python -c "import <lab>_lab_to_nwb"
+```
+
+### Manual validation workflow
+
+1. Run `session_to_nwb()` with `stub_test=True`
+2. Open the stub NWB file with `pynwb` or NWB Widgets to visually inspect
+3. Run `nwbinspector` on the stub file
+4. Fix any issues
+5. Run `session_to_nwb()` with `stub_test=False` on one real session
+6. Run `nwbinspector` on the full file
+7. Run `dataset_to_nwb()` for batch conversion
+
+---
+
+## Appendix A: Supporting Files
+
+### make_env.yml
+
+```yaml
+name: <lab>_lab_to_nwb_env
+channels:
+- conda-forge
+- defaults
+dependencies:
+- python>=3.11
+- pip
+- pip:
+  - --editable .
+```
+
+### .pre-commit-config.yaml
+
+```yaml
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+
+-   repo: https://github.com/psf/black
+    rev: 25.1.0
+    hooks:
+    -   id: black
+        exclude: ^docs/
+
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.11.2
+  hooks:
+  - id: ruff
+    args: [ --fix ]
+
+- repo: https://github.com/codespell-project/codespell
+  rev: v2.4.1
+  hooks:
+  - id: codespell
+    additional_dependencies:
+    - tomli
+```
+
+### auto-publish.yml
+
+```yaml
+name: Upload Package to PyPI
+on:
+  release:
+    types: [published]
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - run: |
+          python -m pip install --upgrade pip build
+          python -m build
+      - uses: pypa/gh-action-pypi-publish@v1.4.2
+        with:
+          verbose: true
+          user: __token__
+          password: ${{ secrets.PYPI_API_TOKEN }}
+```
+
+---
+
+## Appendix B: Checklist for Generating a New Repo
+
+1. Create the directory structure as shown in Section 1
+2. Generate `pyproject.toml` with hatchling build system and correct package name
+3. Create `make_env.yml`, `.pre-commit-config.yaml`, `.gitignore`
+4. Copy the GitHub Actions workflows (`test-install.yml`, `auto-publish.yml`)
+5. Write the `metadata.yaml` with all known lab/experiment metadata
+6. Identify which built-in NeuroConv interfaces match each data modality
+7. Write custom `BaseDataInterface` subclasses for data types without built-in interfaces
+8. Write the `NWBConverter` class with all interfaces in `data_interface_classes`
+9. If temporal alignment is needed, override `temporally_align_data_interfaces()`
+10. Write `convert_session.py` following the source_data / conversion_options / metadata layering pattern
+11. Write `convert_all_sessions.py` with the ProcessPoolExecutor pattern
+12. Test with `stub_test=True`
+13. Validate with `nwbinspector`
+14. Write the README with installation and usage instructions
+
+---
+
+## Appendix C: NWB Containers Quick Reference
+
+When writing custom interfaces, you need to know which PyNWB types to use:
+
+| Data type | PyNWB class | Where to add it |
+|-----------|-------------|-----------------|
+| Raw electrophysiology | `ElectricalSeries` | `nwbfile.add_acquisition()` |
+| LFP | `LFP` containing `ElectricalSeries` | `ecephys` processing module |
+| Spike times | `Units` | `nwbfile.units` |
+| Position | `Position` containing `SpatialSeries` | `behavior` processing module |
+| Behavioral time series | `TimeSeries` | `behavior` processing module |
+| Behavioral events | `TimeIntervals` | `behavior` processing module or `nwbfile.add_stimulus()` |
+| Trials | built-in | `nwbfile.add_trial()` with `nwbfile.add_trial_column()` |
+| Epochs | built-in | `nwbfile.add_epoch()` with `nwbfile.add_epoch_column()` |
+| Calcium imaging | `OnePhotonSeries` or `TwoPhotonSeries` | `nwbfile.add_acquisition()` |
+| ROI segmentation | `PlaneSegmentation` in `ImageSegmentation` | `ophys` processing module |
+| Fluorescence traces | `RoiResponseSeries` in `Fluorescence` or `DfOverF` | `ophys` processing module |
+| Stimulus events | `TimeIntervals` | `nwbfile.add_stimulus()` |
+| Sleep states | `TimeIntervals` | custom processing module (e.g., `sleep`) |
diff --git a/src/pyflask/ai/skill/phases/01-intake.md b/src/pyflask/ai/skill/phases/01-intake.md
new file mode 100644
index 000000000..c1c67960a
--- /dev/null
+++ b/src/pyflask/ai/skill/phases/01-intake.md
@@ -0,0 +1,318 @@
+## Phase 1: Experiment Discovery
+
+**Goal**: Build a complete picture of the lab's experiments, data modalities, and file organization.
+
+**Entry**: User invokes `/nwb-convert`, possibly with a path to their data.
+
+**Exit criteria**: You have a clear `experiment_spec` (written to `conversion_notes.md`) covering:
+- What experiments were performed
+- All data streams (raw and processed) for each experiment
+- File formats for each stream
+- How data is organized on disk (directory structure)
+- Number of subjects and sessions
+- Any special considerations (multiple probes, multiple FOVs, etc.)
+
+### Step 0a: Check Environment
+
+**Skip this step if running inside NWB GUIDE** (all packages are pre-installed).
+
+Before anything else, verify the required Python packages are installed. The skill
+needs `neuroconv`, `pynwb`, `dandi`, and several inspection libraries.
+
+```bash
+python3 -c "
+missing = []
+for pkg, module in [
+    ('neuroconv', 'neuroconv'),
+    ('pynwb', 'pynwb'),
+    ('dandi', 'dandi'),
+    ('nwbinspector', 'nwbinspector'),
+    ('spikeinterface', 'spikeinterface'),
+    ('h5py', 'h5py'),
+    ('remfile', 'remfile'),
+    ('pandas', 'pandas'),
+    ('pyyaml', 'yaml'),
+]:
+    try:
+        __import__(module)
+    except ImportError:
+        missing.append(pkg)
+if missing:
+    print('MISSING: ' + ' '.join(missing))
+else:
+    print('OK')
+"
+```
+
+If packages are missing, install them:
+```bash
+pip install neuroconv pynwb dandi nwbinspector spikeinterface h5py remfile pandas pyyaml
+```
+
+The full environment specification is in `skills/nwb-convert/make_env.yml`. If the user
+prefers conda, they can create the environment with:
+```bash
+conda env create -f <skill_path>/make_env.yml
+conda activate nwb-convert
+```
+
+### Step 0b: Create Conversion Repo and Consult Registry
+
+Before the first user-facing question, set up the conversion repo and check for prior work.
+
+**Create the repo.** The skill calls the nwb-conversions API to create a private repo
+in the `nwb-conversions` GitHub org. The user does NOT need a GitHub account — the API
+handles authentication server-side.
+
+```bash
+# API base URL (Cloudflare Worker)
+NWB_API="https://nwb-conversions-api.ben-dichter.workers.dev"
+
+# Derive lab name from user context (ask if unclear)
+LAB_NAME="<lab-name>"
+REPO_NAME="${LAB_NAME}-to-nwb"
+
+# Create repo via API
+RESPONSE=$(curl -sf -X POST "${NWB_API}/repos" \
+  -H "Content-Type: application/json" \
+  -d "{\"lab_name\": \"${LAB_NAME}\"}")
+
+if [ $? -eq 0 ]; then
+    PUSH_URL=$(echo "$RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin)['push_url'])")
+    mkdir "${REPO_NAME}" && cd "${REPO_NAME}"
+    git init
+    git remote add origin "${PUSH_URL}"
+    git config user.name "nwb-conversions-bot"
+    git config user.email "nwb-conversions-bot@users.noreply.github.com"
+else
+    # API unreachable — work locally only
+    mkdir "${REPO_NAME}" && cd "${REPO_NAME}"
+    git init
+fi
+```
+
+If the API is unreachable, inform the user:
+> I'll create a local conversion repo to organize the code. The conversion registry
+> is not available right now, but this won't affect the conversion itself.
+
+All subsequent file creation should happen INSIDE this directory. When a remote is
+configured, the skill pushes after every phase.
+
+**Seed the repo** with a `.gitignore` and initial commit:
+```bash
+cat > .gitignore << 'EOF'
+# Python
+__pycache__/
+*.py[cod]
+*.egg-info/
+dist/
+build/
+*.egg
+
+# NWB output (don't commit data files)
+*.nwb
+nwb_output/
+nwb_stub/
+
+# Environment
+.env
+*.log
+
+# OS
+.DS_Store
+Thumbs.db
+
+# IDE
+.vscode/
+.idea/
+EOF
+
+git add .gitignore
+git commit -m "Initial commit: add .gitignore"
+if git remote get-url origin &>/dev/null; then git push; fi
+```
+
+**Fetch the conversion registry** to find similar prior conversions:
+```bash
+curl -sf "${NWB_API}/registry" > /tmp/registry.yaml || true
+```
+
+If the API is unreachable or the registry is empty, skip registry consultation and
+proceed directly to the opening questions.
+
+**Search the registry** for relevant prior work. Look for matches on:
+- Same species
+- Same modalities (ecephys, ophys, behavior, icephys)
+- Same file formats or interfaces
+- Same recording systems (SpikeGLX, OpenEphys, Suite2p, etc.)
+
+```python
+import yaml
+from pathlib import Path
+
+registry_path = Path("/tmp/registry.yaml")
+if registry_path.exists() and registry_path.stat().st_size > 0:
+    with open(registry_path) as f:
+        registry = yaml.safe_load(f)
+
+    # Find conversions with matching modalities
+    target_modalities = {"ecephys", "behavior"}  # from user description
+    for conv in registry.get("conversions", []):
+        overlap = target_modalities & set(conv.get("modalities", []))
+        if overlap:
+            print(f"Similar: {conv['id']} ({conv['repo']})")
+            print(f"  Modalities: {conv['modalities']}")
+            print(f"  Interfaces: {conv['interfaces']}")
+            if conv.get("lessons"):
+                print(f"  Lessons: {conv['lessons']}")
+```
+
+If you find relevant prior conversions, mention them to the user:
+> I found N similar conversions in our registry that used the same recording system /
+> modalities. I'll use those as references as we build yours.
+
+If the registry is empty or has no matches, proceed normally — this is expected for early conversions.
+
+### Opening Questions
+
+Start with broad, open-ended questions. Don't ask all at once — ask 2-3, then follow up.
+
+**First message should be something like:**
+> I'd like to help you convert your data to NWB and publish it on DANDI. Let's start by
+> understanding your experiment.
+>
+> 1. Can you briefly describe your experiment? What were you studying?
+> 2. What types of neural recordings did you collect? (e.g., extracellular electrophysiology,
+>    calcium imaging, intracellular recordings, etc.)
+> 3. Did you also record behavioral data? (e.g., position tracking, video, licking, running speed)
+
+**If the user provided a data path**, inspect the directory structure FIRST:
+```
+ls -la <path>
+find <path> -maxdepth 3 -type f | head -50
+```
+Then ask targeted questions based on what you see.
+
+### Follow-up Questions (ask as needed)
+
+**About recordings:**
+- What recording system did you use? (e.g., SpikeGLX, OpenEphys, Intan, Blackrock, Neuralynx, Axona)
+- How many probes/electrodes per session?
+- Did you do spike sorting? What software? (Kilosort, Phy, CellExplorer, MountainSort)
+- Is there LFP data separate from the raw recording?
+
+**About imaging:**
+- What microscope/acquisition software? (ScanImage, Scanbox, Bruker, Inscopix, Miniscope)
+- One-photon or two-photon?
+- Did you run segmentation? What software? (Suite2p, CaImAn, CNMFE, EXTRACT)
+- Single plane or multi-plane?
+
+**About behavior:**
+- Is there pose estimation? (DeepLabCut, SLEAP, LightningPose)
+- Video recordings? How many cameras?
+- Trial structure? What defines a trial?
+- Stimulus presentation? What software? (PsychoPy, Bpod, Arduino)
+- Task events? (licks, rewards, tone presentations, etc.)
+
+**About organization:**
+- How are files organized? One folder per session? Per subject?
+- Is there a naming convention?
+- Are there processed/analyzed files in addition to raw data?
+- Approximately how many sessions total?
+
+**About existing resources (always ask these):**
+- Is there a manuscript, preprint, or published paper describing this data?
+  (If yes, get the DOI or URL — this helps with experiment_description and related_publications)
+- Is this data already publicly available in any non-NWB format? (e.g., on Figshare, Zenodo,
+  institutional repository, or another archive)
+- Do you have existing analysis code for this data? (e.g., MATLAB scripts, Python notebooks)
+  These often reveal data structure, variable names, and processing steps that inform the conversion.
+- Do you have any code that reads or converts this data to another format?
+  (Existing readers save significant reverse-engineering effort)
+
+### Fetching Publication Details
+
+When the user provides a DOI, PMID, PMC ID, or publication URL, use the paper fetcher tool
+to retrieve the full text (or abstract). This is extremely valuable for understanding the
+experiment, data modalities, recording parameters, and subject details.
+
+```bash
+python3 tools/fetch_paper.py "<identifier>" --extract methods
+```
+
+The tool accepts DOIs (e.g., `10.1038/s41586-019-1234-5`), PMIDs (e.g., `31234567`),
+PMC IDs (e.g., `PMC6789012`), or URLs from doi.org, PubMed, or PMC.
+
+**What to extract from the paper:**
+1. **Methods section** (`--extract methods`): Recording systems, file formats, number of
+   subjects/sessions, experimental protocols, data acquisition parameters
+2. **Abstract** (`--extract abstract`): High-level experiment description for `experiment_description`
+3. **Full text** (no `--extract` flag): When you need comprehensive details
+
+**How to use the information:**
+- Pre-fill the experiment description from the abstract
+- Identify data modalities and recording systems from methods
+- Extract subject counts, species, and session details
+- Find stimulus/behavioral task descriptions
+- Get the DOI for `related_publications` (format: `"doi:10.xxxx/xxxxx"`)
+- Look for mentions of data availability statements that may link to existing public data
+
+After fetching, confirm key details with the user — papers may describe a larger study
+than what the user is converting, or parameters may have changed.
+
+**About subjects (collect early to plan per-subject metadata):**
+- How many subjects are in this dataset?
+- Do you have a spreadsheet or file with subject information?
+- For each subject, we'll need: subject_id, date of birth (or age at each session),
+  species (Latin binomial, e.g., "Mus musculus"), sex, genotype, and ideally weight.
+- Are there different experimental groups (e.g., different genotypes, treatment vs. control)?
+
+### What to Record
+
+After this phase, update `conversion_notes.md` with:
+
+```markdown
+# Conversion Notes
+
+## Experiment Overview
+[Brief description of the experiment]
+
+## Data Streams
+| Stream | Format | Recording System | File Pattern | NeuroConv Interface? |
+|--------|--------|-----------------|--------------|---------------------|
+| Raw ephys | SpikeGLX .bin | Neuropixel | *_g0_t0.imec0.ap.bin | SpikeGLXRecordingInterface |
+| LFP | SpikeGLX .bin | Neuropixel | *_g0_t0.imec0.lf.bin | SpikeGLXLFPInterface |
+| Spike sorting | Phy | Kilosort+Phy | phy/ folder | PhySortingInterface |
+| Behavior | .txt files | Custom | *position.txt, *licks.txt | Custom needed |
+
+## Directory Structure
+[Description or tree output]
+
+## Sessions
+- Number of subjects: X
+- Number of sessions: ~Y
+- Session naming convention: ...
+
+## Existing Resources
+- Publication: [DOI or "not yet published"]
+- Existing public data: [URL or "none"]
+- Analysis code: [URL or path or "none"]
+- Existing data readers: [description or "none"]
+
+## Subjects
+| subject_id | species | sex | date_of_birth | genotype | weight | group |
+|------------|---------|-----|---------------|----------|--------|-------|
+| ... | Mus musculus | M | 2019-10-22 | C57BL/6J | 25 g | control |
+
+## Open Questions
+- [ ] ...
+```
+
+### Push Phase 1 Results
+
+After writing `conversion_notes.md`, commit and push:
+```bash
+git add conversion_notes.md
+git commit -m "Phase 1: experiment discovery — data streams and directory structure"
+if git remote get-url origin &>/dev/null; then git push; fi
+```
diff --git a/src/pyflask/ai/skill/phases/02-data-inspection.md b/src/pyflask/ai/skill/phases/02-data-inspection.md
new file mode 100644
index 000000000..9406c4469
--- /dev/null
+++ b/src/pyflask/ai/skill/phases/02-data-inspection.md
@@ -0,0 +1,157 @@
+## Phase 2: Data Inspection
+
+**Goal**: Inspect actual data files to confirm formats, understand structure, and map to NeuroConv interfaces.
+
+**Entry**: You have a general understanding of the experiment from Phase 1.
+
+**Exit criteria**: For each data stream, you know:
+- The exact file format and can read it programmatically
+- Which NeuroConv interface handles it (or that custom code is needed)
+- The source_data arguments needed (file paths, stream IDs, etc.)
+- Any quirks or issues (corrupt files, missing headers, unusual organization)
+
+### Cross-Reference with Conversion Registry
+
+Before inspecting files, check the registry's `format_hints` to accelerate interface identification.
+If the registry was fetched in Phase 1, use it to pre-match file patterns:
+
+```python
+import yaml
+from fnmatch import fnmatch
+from pathlib import Path
+
+registry_path = Path("/tmp/registry.yaml")
+if not registry_path.exists() or registry_path.stat().st_size == 0:
+    print("Registry not available — skipping format hint matching")
+    registry = {"format_hints": []}
+else:
+    with open(registry_path) as f:
+        registry = yaml.safe_load(f)
+
+# Collect actual filenames from the data directory
+data_path = Path("<sample_session_path>")
+filenames = [f.name for f in data_path.rglob("*") if f.is_file()]
+
+# Match filenames against registry format_hints using glob matching
+matched_interfaces = {}  # interface_name → list of (pattern, seen_in)
+for hint in registry.get("format_hints", []):
+    for pattern in hint["patterns"]:
+        for filename in filenames:
+            if fnmatch(filename, pattern):
+                iface = hint["interface"]
+                if iface not in matched_interfaces:
+                    matched_interfaces[iface] = []
+                matched_interfaces[iface].append({
+                    "pattern": pattern,
+                    "matched_file": filename,
+                    "seen_in": hint["seen_in"],
+                })
+                break  # One match per pattern is enough
+
+for iface, matches in matched_interfaces.items():
+    repos = set()
+    for m in matches:
+        repos.update(m["seen_in"])
+    print(f"Registry match: {iface} (seen in {sorted(repos)})")
+    for m in matches:
+        print(f"  {m['pattern']} matched {m['matched_file']}")
+```
+
+When a filename matches a `format_hint` pattern, you can proceed with higher confidence in the
+interface selection. If the same pattern has been used successfully in prior conversions,
+mention this to the user and skip exploratory probing for that stream.
+
+### Approach
+
+1. **Ask for a sample session** — a single, complete session with all data streams:
+   > Can you point me to one complete example session? I'd like to inspect the files
+   > to understand the exact format and structure.
+
+2. **Inspect files directly** using Python. For each data stream:
+
+   **For electrophysiology (SpikeGLX, OpenEphys, etc.):**
+   ```python
+   # Try loading with spikeinterface
+   import spikeinterface.extractors as se
+   recording = se.read_spikeglx(folder_path, stream_id="imec0.ap")
+   print(f"Channels: {recording.get_num_channels()}")
+   print(f"Sampling rate: {recording.get_sampling_frequency()}")
+   print(f"Duration: {recording.get_total_duration()}")
+   ```
+
+   **For spike sorting (Phy, Kilosort, etc.):**
+   ```python
+   sorting = se.read_phy(folder_path)
+   print(f"Units: {sorting.get_num_units()}")
+   print(f"Unit IDs: {sorting.get_unit_ids()}")
+   ```
+
+   **For calcium imaging (ScanImage, Scanbox, Suite2p, etc.):**
+   ```python
+   import roiextractors as re
+   imaging = re.read_scanbox(file_path)
+   print(f"FOV size: {imaging.get_image_size()}")
+   print(f"Num frames: {imaging.get_num_frames()}")
+   print(f"Sampling rate: {imaging.get_sampling_frequency()}")
+   ```
+
+   **For behavior files (.mat, .csv, .txt, .pkl, etc.):**
+   ```python
+   # For MATLAB files
+   import h5py  # or scipy.io.loadmat for v5 .mat files
+   with h5py.File(path) as f:
+       print(list(f.keys()))
+       # Recursively explore structure
+
+   # For CSV/text
+   import pandas as pd
+   df = pd.read_csv(path, sep='\t', nrows=5)
+   print(df.columns.tolist())
+   print(df.head())
+   ```
+
+3. **Test NeuroConv interfaces** — for each data stream that has a matching interface, try instantiating it:
+   ```python
+   from neuroconv.datainterfaces import SpikeGLXRecordingInterface
+   interface = SpikeGLXRecordingInterface(folder_path=path, stream_id="imec0.ap")
+   metadata = interface.get_metadata()
+   print(metadata)
+   ```
+
+4. **Identify custom interface needs** — for data streams with no NeuroConv interface:
+   - Document the file format, structure, and what data/metadata it contains
+   - Note what NWB types the data should map to (TimeSeries, SpatialSeries, TimeIntervals, etc.)
+   - Flag these for Phase 5 code generation
+
+### Common Gotchas
+
+- **MATLAB v7.3 files** use HDF5 format (use `h5py`), older versions use scipy.io.loadmat
+- **Pickle files** may require specific package versions to deserialize
+- **Text files** — check delimiter (tab vs comma vs space), header presence, encoding
+- **SpikeGLX** — the meta file is essential; make sure .bin and .meta are co-located
+- **Suite2p** — look for the `suite2p/plane0/` directory structure
+- **Multiple probes** — SpikeGLX uses imec0, imec1, etc.; each needs its own interface instance
+
+### Update conversion_notes.md
+
+Add an "Interface Mapping" section:
+
+```markdown
+## Interface Mapping
+| Stream | Interface | source_data | Status |
+|--------|-----------|-------------|--------|
+| Raw AP | SpikeGLXRecordingInterface | folder_path, stream_id="imec0.ap" | Verified |
+| LFP | SpikeGLXLFPInterface | folder_path, stream_id="imec0.lf" | Verified |
+| Sorting | PhySortingInterface | folder_path | Verified |
+| VR position | CUSTOM: VRBehaviorInterface | file_path | Needs implementation |
+| Lick events | CUSTOM: EventsInterface | folder_path | Needs implementation |
+```
+
+### Push Phase 2 Results
+
+After updating `conversion_notes.md` with the interface mapping, commit and push:
+```bash
+git add conversion_notes.md
+git commit -m "Phase 2: data inspection — interface mapping and format details"
+if git remote get-url origin &>/dev/null; then git push; fi
+```
diff --git a/src/pyflask/ai/skill/phases/03-metadata.md b/src/pyflask/ai/skill/phases/03-metadata.md
new file mode 100644
index 000000000..64bb532d2
--- /dev/null
+++ b/src/pyflask/ai/skill/phases/03-metadata.md
@@ -0,0 +1,191 @@
+## Phase 3: Metadata Collection
+
+**Goal**: Gather all metadata required for a complete, valid NWB file.
+
+**Entry**: You know all data streams and their interfaces from Phase 2.
+
+**Exit criteria**: You have complete metadata for:
+- NWBFile-level fields (session_description, experiment_description, institution, lab, etc.)
+- Subject fields (species, sex, age, genotype, subject_id)
+- Device and electrode/imaging plane descriptions
+- Session-specific fields (session_start_time with timezone, session_id)
+- Trial/epoch structure if applicable
+
+### Required NWB Metadata
+
+**NWBFile (ask the user for these):**
+- `session_description` — What happened in this session? (Required by NWB)
+- `experiment_description` — Overall experiment description (can be paper abstract)
+- `institution` — University/institute name
+- `lab` — PI's lab name
+- `experimenter` — List of experimenters as ["Last, First"]
+- `keywords` — Relevant keywords for discoverability
+- `related_publications` — DOI format: `"doi:10.xxxx/xxxxx"` (not URLs)
+
+**Subject (ask the user for these):**
+- `species` — Latin binomial (e.g., "Mus musculus", "Rattus norvegicus", "Homo sapiens") or NCBI taxonomy URI
+- `sex` — One of: "M", "F", "U" (unknown), "O" (other). Single uppercase letter only.
+- `age` — ISO 8601 duration: "P90D" (90 days), "P12W" (12 weeks), "P3M" (3 months). Can be a range: "P90D/P120D"
+- `subject_id` — Unique identifier (required for DANDI)
+- `genotype` — If transgenic
+- `strain` — e.g., "C57BL/6J" (separate from species)
+- `date_of_birth` — Preferred over `age` when available (datetime with timezone)
+- `weight` — Format as "numeric unit": "0.025 kg" or "25 g" (not just a number)
+- `description` — Any additional notes
+
+### Modality-Specific Metadata
+
+**For ophys (calcium imaging) experiments, also ask:**
+- What brain region were you imaging? (e.g., "CA1", "V1", "mPFC")
+- What calcium indicator did you use? (e.g., "GCaMP6f", "GCaMP7f", "jRGECO1a")
+- What was the excitation wavelength? (e.g., 920 nm for GCaMP, 1040 nm for jRGECO)
+- What objective did you use? (e.g., "Nikon 16x/0.8w")
+- Single-plane or multi-plane imaging?
+
+These map to NWB metadata:
+```yaml
+Ophys:
+  Device:
+    - name: Microscope
+      description: Two-photon microscope
+      manufacturer: Scanbox  # or Bruker, Thorlabs, etc.
+  ImagingPlane:
+    - name: ImagingPlane
+      description: Imaging plane in hippocampal CA1
+      excitation_lambda: 920.0
+      indicator: GCaMP6f
+      location: CA1
+```
+
+**For ecephys (extracellular electrophysiology), also ask:**
+- What brain region(s) were you recording from? (Use Allen Brain Atlas terminology for mice, e.g., "CA1", "VISp", "MOs")
+- What probe model? (e.g., Neuropixels 1.0, Neuropixels 2.0, Cambridge NeuroTech H2)
+- How many probes per session?
+- Do you have histology-confirmed electrode locations? (If so, these should override intended targets)
+
+These are usually auto-extracted from SpikeGLX/OpenEphys metadata, but confirm with the user.
+Note: every electrode MUST have a `location` value — use "unknown" if the region is truly unknown.
+
+**Session-specific (often extracted from data):**
+- `session_start_time` — MUST include timezone (e.g., America/New_York)
+- `session_id` — Unique session identifier
+
+### How to Ask
+
+Don't dump a giant form. Instead, ask in context:
+
+> Now I need to collect some metadata for the NWB files. Let me start with the basics:
+>
+> 1. What institution and lab is this from?
+> 2. Who are the experimenters? (First and last names)
+> 3. What species are the subjects? Are they a specific strain or transgenic line?
+
+Then follow up:
+> For the NWB files, I need a session description (what happened in a typical session)
+> and an experiment description (the overall goal — this could be the abstract from
+> your paper if you have one). Can you provide these?
+
+### Metadata That Can Be Auto-Extracted
+
+Many fields come from the data files themselves. Check what the interfaces provide:
+```python
+converter = MyNWBConverter(source_data=source_data)
+metadata = converter.get_metadata()
+print(json.dumps(metadata, indent=2, default=str))
+```
+
+Typically auto-extracted:
+- `session_start_time` from SpikeGLX, OpenEphys, ScanImage headers
+- `Device` info (probe model, serial number) from SpikeGLX meta files
+- `ElectrodeGroup` and electrode positions from probe geometry
+- Sampling rates, channel counts
+
+### Where Metadata Goes
+
+Metadata is stored in a `metadata.yaml` file alongside the conversion code:
+
+```yaml
+NWBFile:
+  experiment_description: >
+    We recorded neural activity in the medial entorhinal cortex
+    while mice navigated a virtual reality track.
+  institution: Stanford University
+  lab: Giocomo Lab
+  experimenter:
+    - Wen, John
+    - Giocomo, Lisa
+  keywords:
+    - virtual reality
+    - entorhinal cortex
+    - navigation
+  related_publications:
+    - https://doi.org/10.xxxx/xxxxx
+Subject:
+  species: Mus musculus
+  strain: C57BL/6J
+  sex: M
+```
+
+Session-specific metadata (subject_id, session_start_time) is set programmatically
+in `convert_session.py` since it varies per session.
+
+### Push Phase 3 Results
+
+After collecting metadata, commit and push the metadata files:
+```bash
+git add conversion_notes.md metadata.yaml subject_metadata.yaml 2>/dev/null
+git commit -m "Phase 3: metadata collection — NWBFile, Subject, and device metadata"
+if git remote get-url origin &>/dev/null; then git push; fi
+```
+
+### Per-Subject Metadata
+
+You MUST collect subject-level metadata for each subject. This is required for DANDI upload.
+
+For each subject, collect:
+- `subject_id` — **Required**. Unique identifier.
+- `species` — **Required**. Latin binomial (e.g., "Mus musculus", "Rattus norvegicus").
+- `sex` — **Recommended**. One of "M", "F", "U", "O".
+- `date_of_birth` — **Recommended**. Or `age` per session as ISO 8601 duration (e.g., "P90D").
+- `genotype` — **Recommended** if transgenic.
+- `weight` — **Recommended**. At time of experiment or implant.
+- `strain` — **Recommended** (e.g., "C57BL/6J").
+
+If there are multiple subjects, create a `subject_metadata.yaml` (or `.json`) keyed by
+subject_id:
+
+```yaml
+N2:
+  species: Mus musculus
+  strain: C57BL/6J
+  sex: M
+  date_of_birth: 2019-10-22
+  weight: "0.025 kg"
+R5:
+  species: Mus musculus
+  genotype: CaMKII-cre hemizygous
+  sex: F
+  date_of_birth: 2019-06-15
+  weight: "0.022 kg"
+```
+
+Ask the user if they have a spreadsheet or JSON file with this information. If they have
+analysis code, it often contains subject metadata as a lookup table or config file.
+
+### Timezone Handling
+
+Session start times MUST have timezone information. Ask the user:
+> What timezone was the data collected in?
+
+Common US timezones:
+- `America/New_York` (Eastern)
+- `America/Chicago` (Central)
+- `America/Denver` (Mountain)
+- `America/Los_Angeles` (Pacific)
+
+Use `zoneinfo.ZoneInfo` in the conversion code:
+```python
+from zoneinfo import ZoneInfo
+tz = ZoneInfo("America/Los_Angeles")
+metadata["NWBFile"]["session_start_time"] = session_start_time.replace(tzinfo=tz)
+```
diff --git a/src/pyflask/ai/skill/phases/04-sync.md b/src/pyflask/ai/skill/phases/04-sync.md
new file mode 100644
index 000000000..587b9585e
--- /dev/null
+++ b/src/pyflask/ai/skill/phases/04-sync.md
@@ -0,0 +1,112 @@
+## Phase 4: Synchronization Analysis
+
+**Goal**: Understand how different data streams are temporally aligned and implement sync logic.
+
+**Entry**: You know all data streams and interfaces from Phase 2.
+
+**Exit criteria**: For every pair of data streams, you know:
+- Whether they share a clock (same timestamps)
+- If not, how to align them (TTL pulses, shared events, known offsets)
+- The specific implementation plan for temporal alignment
+
+### Why This Matters
+
+NWB requires all data in a file to share a common time base. Different recording systems
+often run on independent clocks that drift relative to each other. Without proper sync,
+behavioral events won't align with neural data.
+
+### Common Synchronization Patterns
+
+**Pattern 1: Shared clock (simplest)**
+- All data comes from the same system (e.g., SpikeGLX records both neural and NIDQ)
+- Or all data was processed together with aligned timestamps
+- Action: No sync needed — timestamps are already aligned
+
+**Pattern 2: TTL pulse alignment**
+- One system sends TTL pulses that are recorded by another
+- E.g., behavior computer sends trial start TTLs recorded on SpikeGLX NIDQ channel
+- Action: Extract TTL times from both streams, use as alignment anchors
+
+```python
+# In NWBConverter.temporally_align_data_interfaces():
+from spikeinterface.extractors import SpikeGLXRecordingExtractor
+nidq_recording = SpikeGLXRecordingExtractor(folder_path=path, stream_id="nidq")
+nidq_data = nidq_recording.get_traces(channel_ids=["nidq#XA2"])
+# Find rising edges
+rising_edges = np.where(np.diff((nidq_data > threshold).astype(int)) > 0)[0]
+ttl_times_neural = rising_edges / nidq_recording.get_sampling_frequency()
+
+# Compare with behavioral event times to compute offset
+offset = np.mean(ttl_times_neural[:n] - behavioral_event_times[:n])
+```
+
+**Pattern 3: Starting time offset**
+- Streams start at different times but run at the same rate
+- Action: Compute the offset and use `set_aligned_starting_time()`
+
+```python
+interface.set_aligned_starting_time(offset_seconds)
+```
+
+**Pattern 4: Interpolation between clocks**
+- Streams run on different clocks that may drift
+- Periodic sync pulses recorded by both systems
+- Action: Use `align_by_interpolation()` with matched timepoints
+
+```python
+interface.align_by_interpolation(
+    unaligned_timestamps=sync_times_in_this_clock,
+    aligned_timestamps=sync_times_in_reference_clock
+)
+```
+
+**Pattern 5: Frame-based alignment (imaging)**
+- Behavioral data logged per imaging frame
+- Action: Use imaging frame times as the time base
+
+**Pattern 6: Multi-clock interpolation (complex)**
+- Multiple independent clocks need cross-alignment (e.g., odor clock, behavior clock, imaging clock)
+- Action: Chain interpolations through a reference clock
+
+### Questions to Ask
+
+> I need to understand how your data streams are synchronized:
+>
+> 1. Do all your recording systems share a common clock, or are they independent?
+> 2. Do you use any synchronization signals (TTL pulses, sync LEDs, shared triggers)?
+> 3. If so, which system generates the sync signal and which systems record it?
+> 4. Is there a master clock that everything is referenced to?
+
+Follow up based on answers:
+- If TTL: Which channel? What does the pulse pattern mean? (rising edge = trial start?)
+- If shared clock: How? (same DAQ, hardware sync, network time?)
+- If no sync: Is approximate alignment acceptable? Do files have wall-clock timestamps?
+
+### What to Record
+
+Update `conversion_notes.md`:
+
+```markdown
+## Synchronization
+- Reference clock: SpikeGLX neural recording
+- Behavior → Neural: TTL pulses on NIDQ channel XA2, rising edge = epoch start
+- Imaging → Neural: Frame trigger on NIDQ channel XA0
+- Method: Compute mean offset from first N TTL events
+
+### Sync Implementation Plan
+Override `temporally_align_data_interfaces()` in the NWBConverter:
+1. Read NIDQ channel XA2 from SpikeGLX
+2. Find rising edges → neural epoch times
+3. Compare with behavioral file epoch boundaries
+4. Compute mean offset
+5. Shift all behavioral timestamps by offset
+```
+
+### Push Phase 4 Results
+
+After documenting the sync plan, commit and push:
+```bash
+git add conversion_notes.md
+git commit -m "Phase 4: synchronization analysis — sync plan documented"
+if git remote get-url origin &>/dev/null; then git push; fi
+```
diff --git a/src/pyflask/ai/skill/phases/05-code-generation.md b/src/pyflask/ai/skill/phases/05-code-generation.md
new file mode 100644
index 000000000..856254732
--- /dev/null
+++ b/src/pyflask/ai/skill/phases/05-code-generation.md
@@ -0,0 +1,532 @@
+## Phase 5: Code Generation
+
+**Goal**: Generate a complete, pip-installable conversion repo following CatalystNeuro conventions.
+
+**Entry**: You have complete experiment spec, interface mapping, metadata, and sync plan.
+
+**Exit criteria**: A working repo with:
+- Correct directory structure (cookiecutter pattern)
+- `pyproject.toml` with proper dependencies
+- NWBConverter class with all interfaces
+- `convert_session.py` with full pipeline
+- Custom DataInterface classes where needed
+- `metadata.yaml` with all collected metadata
+- `convert_all_sessions.py` for batch conversion
+
+### Step 1: Scaffold the Repository
+
+Create the standard directory structure INSIDE the repo that was cloned in Phase 1
+(`nwb-conversions/<lab-name>-to-nwb/`). All files below are relative to the repo root:
+
+```
+./                              ← repo root (already cloned from Phase 1)
+├── .gitignore                  ← already created in Phase 1
+├── pyproject.toml
+├── README.md
+├── make_env.yml
+└── src/
+    └── <lab_name>_to_nwb/
+        ├── __init__.py
+        └── <conversion_name>/
+            ├── __init__.py
+            ├── <conversion_name>nwbconverter.py
+            ├── convert_session.py
+            ├── convert_all_sessions.py
+            ├── metadata.yaml
+            └── <custom_interface_name>.py  (if needed)
+```
+
+### Step 2: Write pyproject.toml
+
+```toml
+[project]
+name = "<lab-name>-lab-to-nwb"
+version = "0.0.1"
+description = "NWB conversion scripts for the <Lab> Lab."
+readme = "README.md"
+requires-python = ">=3.11"
+license = { text = "MIT" }
+authors = [{ name = "CatalystNeuro", email = "ben.dichter@catalystneuro.com" }]
+dependencies = ["neuroconv", "nwbinspector"]
+
+[project.optional-dependencies]
+<conversion_name> = [
+    "neuroconv[<extras>]==<pinned_version>",
+    # Add any additional deps needed for custom interfaces
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build]
+include = ["*.yaml", "*.yml", "*.json"]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/<lab_name>_lab_to_nwb"]
+```
+
+**Extras for NeuroConv** depend on which interfaces are used:
+- SpikeGLX: `neuroconv[spikeglx]`
+- OpenEphys: `neuroconv[openephys]`
+- Phy: `neuroconv[phy]`
+- Suite2p: `neuroconv[suite2p]`
+- DeepLabCut: `neuroconv[deeplabcut]`
+- Check NeuroConv's pyproject.toml for all available extras
+
+### Step 3: Write the NWBConverter Class
+
+```python
+from neuroconv import NWBConverter
+from neuroconv.datainterfaces import (
+    # Import NeuroConv interfaces based on interface mapping
+)
+# Import custom interfaces
+from .<custom_module> import CustomInterface
+
+
+class <ConversionName>NWBConverter(NWBConverter):
+    """Primary conversion class."""
+
+    data_interface_classes = dict(
+        # Map logical names to interface classes
+        # Names should be descriptive: Recording, LFP, Sorting, Behavior, etc.
+    )
+
+    def temporally_align_data_interfaces(self):
+        """Override if sync logic is needed."""
+        # Implement sync plan from Phase 4
+        pass
+```
+
+### Step 3b: Check Registry for Reusable Custom Interfaces
+
+Before writing a custom interface from scratch, check the conversion registry for
+similar custom interfaces from prior conversions. A prior interface that handles the
+same data format or creates the same NWB types can serve as a starting template.
+
+```python
+import yaml
+
+with open("/tmp/registry.yaml") as f:
+    registry = yaml.safe_load(f)
+
+# Search for conversions with custom interfaces that match what we need
+needed_nwb_types = ["Position", "BehavioralEvents"]  # what our custom data maps to
+for conv in registry.get("conversions", []):
+    if not conv.get("has_custom_interfaces"):
+        continue
+    # The full manifest has custom_interfaces detail — fetch it from the repo
+    print(f"Check {conv['repo']} for custom interfaces")
+```
+
+If a match is found, fetch the actual interface code from the prior repo via the API:
+```bash
+NWB_API="https://nwb-conversions-api.ben-dichter.workers.dev"
+curl -sf "${NWB_API}/repos/<repo-name>/files/<path-to-interface>"
+```
+
+Use the fetched code as a starting template, adapting it to the current lab's file format
+and column names. Give credit in a comment: `# Adapted from nwb-conversions/<repo-name>`.
+
+If no match is found, write the custom interface from scratch (Step 4 below).
+
+### Step 4: Write Custom DataInterface Classes
+
+For each data stream that needs custom code:
+
+```python
+from neuroconv.basedatainterface import BaseDataInterface
+from neuroconv.utils import DeepDict
+from pynwb.file import NWBFile
+
+
+class <Name>Interface(BaseDataInterface):
+    """Interface for reading <description>."""
+
+    keywords = ["<modality>"]
+
+    def __init__(self, file_path: str):
+        """
+        Parameters
+        ----------
+        file_path : str
+            Path to the <format> file.
+        """
+        super().__init__(file_path=file_path)
+
+    def get_metadata(self) -> DeepDict:
+        metadata = super().get_metadata()
+        # Extract any metadata from the file
+        return metadata
+
+    def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, **kwargs):
+        # Read data from self.source_data["file_path"]
+        # Create appropriate PyNWB objects
+        # Add to nwbfile
+        pass
+```
+
+#### Custom Interface Guidelines
+
+**Metadata responsibility**: A custom interface's `get_metadata()` should only return
+metadata that can be extracted FROM THE DATA FILE ITSELF (e.g., session date from filename,
+frame rate from timestamps). Lab-level metadata (institution, experimenter) and subject
+metadata (species, genotype) should be handled in `convert_session.py` via metadata YAML
+and subject metadata files. Do not duplicate metadata loading between the interface and
+the conversion script.
+
+**Use `conversion` parameter, not data transformation**: When data is in non-SI units
+(e.g., centimeters), do NOT multiply the data by a conversion factor. Instead, use the
+`conversion` parameter on TimeSeries:
+```python
+# CORRECT: store raw data, use conversion factor
+TimeSeries(name="position", data=pos_cm, unit="m", conversion=0.01)
+
+# WRONG: transform data in-place
+TimeSeries(name="position", data=pos_cm * 0.01, unit="m")
+```
+This preserves original data values in the file and is more NWB-idiomatic.
+
+**Set `resolution` when unknown**: If you don't know the resolution (smallest meaningful
+difference) of a data stream, explicitly set `resolution=-1.0`. Don't leave it unset.
+
+**Pickle files cannot be lazily loaded.** Unlike HDF5 or binary files, pickle requires
+reading the entire file into memory. This is an acceptable exception to the "load data
+lazily in `__init__`" guideline. If the pickle is very large, consider loading only in
+`add_to_nwbfile()` instead of `__init__()`.
+
+**Choosing the right NWB types for custom data:**
+
+Always use the most specific NWB type available — don't use bare `TimeSeries` when a
+subtype exists. See `knowledge/nwb-best-practices.md` for the full set of conventions.
+
+| Data Type | NWB Container | Where to Add |
+|-----------|---------------|--------------|
+| Continuous neural signal | `ElectricalSeries` | `nwbfile.add_acquisition()` |
+| Position (x, y) | `Position` > `SpatialSeries` | `processing["behavior"]` |
+| Running speed | `TimeSeries` | `processing["behavior"]` |
+| Lick times | `TimeSeries` (binary) or ndx-events `Events` | `processing["behavior"]` |
+| Trial info | `TimeIntervals` | `nwbfile.add_trial()` |
+| Epochs | `TimeIntervals` | `nwbfile.add_epoch()` |
+| Pupil tracking | `PupilTracking` > `TimeSeries` | `processing["behavior"]` |
+| Eye position | `EyeTracking` > `SpatialSeries` | `processing["behavior"]` |
+| Stimulus times | `TimeIntervals` | `nwbfile.add_stimulus()` |
+| Fluorescence traces | `RoiResponseSeries` | `processing["ophys"]` |
+| ROI masks | `PlaneSegmentation` | `processing["ophys"]` |
+| Reward events | `TimeSeries` or `LabeledEvents` | `processing["behavior"]` |
+| Animal video | `ImageSeries` (external_file) | `nwbfile.add_acquisition()` |
+| Compass direction | `CompassDirection` > `SpatialSeries` | `processing["behavior"]` |
+| Optogenetic stimulus | `OptogeneticSeries` | `nwbfile.add_stimulus()` |
+
+**For detailed PyNWB construction patterns by domain, see:**
+- `knowledge/pynwb-icephys.md` — intracellular electrophysiology
+- `knowledge/pynwb-optogenetics.md` — optogenetic stimulation
+- `knowledge/pynwb-ophys-advanced.md` — advanced optical physiology (ROIs, segmentation, motion correction)
+- `knowledge/pynwb-behavior.md` — behavior container types (PupilTracking, EyeTracking, etc.)
+- `knowledge/pynwb-images.md` — image data and external video files
+- `knowledge/pynwb-advanced-io.md` — compression, chunking, iterative write for large data
+- `knowledge/ndx-fiber-photometry.md` — ndx-fiber-photometry extension (REQUIRED for fiber photometry)
+- `knowledge/ndx-pose.md` — ndx-pose extension for pose estimation (DeepLabCut, SLEAP, Lightning Pose)
+- `knowledge/ndx-anatomical-localization.md` — ndx-anatomical-localization for electrode/imaging plane atlas registration
+
+**Single-photon vs. two-photon imaging:**
+Miniscope data (UCLA Miniscope, Inscopix nVista/nVoke) is **single-photon** (one-photon)
+imaging and MUST use `OnePhotonSeries`, not `TwoPhotonSeries`. Two-photon imaging
+(ScanImage, Scanbox, Bruker, Prairie) uses `TwoPhotonSeries`. Getting this wrong is a
+common mistake. Check:
+- Miniscope → `OnePhotonSeries` (via `MiniscopeImagingInterface`)
+- Inscopix → `OnePhotonSeries` (via `InscopixImagingInterface`)
+- ScanImage, Scanbox, Bruker → `TwoPhotonSeries`
+- If unsure, ask the user whether their microscope uses one-photon or two-photon excitation.
+
+**Key constraints on SpatialSeries:**
+- Only for position data (x, y, z). Velocity and acceleration should use `TimeSeries`.
+- Must have 1, 2, or 3 data columns (not more).
+- When inside `CompassDirection`, units must be `"degrees"` or `"radians"`.
+- When using degrees, data values should be in [-360, 360]; radians in [-2pi, 2pi].
+
+#### Behavioral vs. Stimulus Data
+
+When a dataset has both behavioral and stimulus columns (common in VR experiments),
+separate them:
+
+**Behavioral data** → `processing["behavior"]` via `BehavioralTimeSeries`, `Position`, etc.:
+- Position / spatial location
+- Running speed / velocity
+- Lick events / lick rate
+- Eye position / pupil diameter
+- Pose estimation keypoints
+
+**Stimulus data** → `nwbfile.add_stimulus()`:
+- Visual stimulus parameters (contrast, orientation, spatial frequency)
+- Environment parameters (morph value, jitter)
+- Optogenetic stimulus waveforms
+- Auditory stimulus parameters
+
+**Reward** can go in either, but prefer `processing["behavior"]` if it represents the
+animal's experience (reward delivery events), or `nwbfile.add_stimulus()` if it represents
+an experimenter-controlled parameter.
+
+**Use `get_module()` to get or create processing modules:**
+```python
+from neuroconv.tools.nwb_helpers import get_module
+behavior_module = get_module(nwbfile, "behavior", "Processed behavioral data")
+behavior_module.add(my_container)
+```
+
+**Use `H5DataIO` for compression:**
+```python
+from hdmf.backends.hdf5.h5_utils import H5DataIO
+data_compressed = H5DataIO(data=my_array, compression="gzip")
+```
+
+#### Time Series Best Practices (from NWB Inspector)
+
+Follow these in every custom interface and `add_to_nwbfile()` method:
+
+1. **Time-first orientation**: data shape must be `(n_timepoints, ...)`. If source data is
+   `(channels, timepoints)`, transpose before adding: `data = data.T`
+2. **Timestamps in seconds**: all timestamps are in seconds relative to `session_start_time`.
+3. **Ascending, non-negative, no NaN**: timestamps must be sorted ascending, >= 0, no NaN.
+4. **Use `rate` for regular sampling**: if the signal has a constant sampling rate, use
+   `rate=<Hz>` and `starting_time=<seconds>` instead of a `timestamps` array.
+5. **SI units via `conversion`**: set `unit` to the SI unit (e.g., `"m"`, `"V"`) and use
+   `conversion` to express the factor from stored data to SI.
+6. **Every text field must be meaningful**: no empty strings for `description`, `unit`, etc.
+7. **Breaks in recording**: if there are gaps, use explicit `timestamps` (not `rate`) or
+   create separate TimeSeries objects per continuous segment.
+
+#### Table Best Practices
+
+When creating DynamicTable objects (trials, epochs, electrodes, custom tables):
+
+- **Boolean columns**: name with `is_` prefix (e.g., `is_correct`, `is_rewarded`)
+- **Timing columns**: name with `_time` suffix (e.g., `start_time`, `reward_time`)
+- **No JSON strings**: don't encode structured data as JSON in string columns
+- **No empty tables**: don't create tables with zero rows
+- **Unique IDs**: keep the default auto-incrementing `id` column
+
+#### Ecephys Best Practices
+
+When working with electrodes and spike sorting data:
+
+- **Electrode `location` is required**: always fill it. Use Allen Brain Atlas terms for mice.
+  Use `"unknown"` only if the region is truly unknown.
+- **Don't duplicate metadata in electrodes table**: don't add `unit`, `gain`, or `offset`
+  columns. Those belong on `ElectricalSeries` (as `channel_conversion` and `offset`).
+- **Spike times must be ascending and positive**: verify sorted order, no negative values.
+- **Use `obs_intervals`** on the units table if the recording has gaps.
+
+#### Video Best Practices
+
+- **Animal behavior videos** (webcam, running wheel cam): store as external files using
+  `ImageSeries(external_file=[relative_path], ...)`. Use relative paths.
+- **Neural imaging data** (two-photon, miniscope): store internally with lossless compression.
+- **Don't set `starting_frame`** unless using `external_file`.
+
+### Step 5: Write convert_session.py
+
+Follow the standard pattern:
+
+```python
+from pathlib import Path
+from typing import Union
+from zoneinfo import ZoneInfo
+
+from neuroconv.utils import load_dict_from_file, dict_deep_update
+
+from <package>.<conversion> import <ConversionName>NWBConverter
+
+
+def session_to_nwb(
+    data_dir_path: Union[str, Path],
+    output_dir_path: Union[str, Path],
+    stub_test: bool = False,
+):
+    data_dir_path = Path(data_dir_path)
+    output_dir_path = Path(output_dir_path)
+    if stub_test:
+        output_dir_path = output_dir_path / "nwb_stub"
+    output_dir_path.mkdir(parents=True, exist_ok=True)
+
+    # Determine session_id and subject_id from path/filenames
+    session_id = "..."
+    subject_id = "..."
+    nwbfile_path = output_dir_path / f"{session_id}.nwb"
+
+    # Build source_data
+    source_data = dict()
+    conversion_options = dict()
+
+    # Add each interface with its file paths
+    source_data["Recording"] = dict(folder_path=str(data_dir_path / "..."))
+    conversion_options["Recording"] = dict(stub_test=stub_test)
+
+    # Conditionally add interfaces if files exist
+    behavior_path = data_dir_path / "behavior.txt"
+    if behavior_path.is_file():
+        source_data["Behavior"] = dict(file_path=str(behavior_path))
+        conversion_options["Behavior"] = dict()
+
+    # Create converter
+    converter = <ConversionName>NWBConverter(source_data=source_data)
+
+    # Get and merge metadata
+    metadata = converter.get_metadata()
+
+    metadata_path = Path(__file__).parent / "metadata.yaml"
+    editable_metadata = load_dict_from_file(metadata_path)
+    metadata = dict_deep_update(metadata, editable_metadata)
+
+    # Set session-specific metadata
+    tz = ZoneInfo("<timezone>")
+    if metadata["NWBFile"]["session_start_time"]:
+        metadata["NWBFile"]["session_start_time"] = (
+            metadata["NWBFile"]["session_start_time"].replace(tzinfo=tz)
+        )
+    metadata["NWBFile"]["session_id"] = session_id
+
+    # Subject metadata — subject_id is required for DANDI
+    metadata["Subject"]["subject_id"] = subject_id
+    # Load per-subject metadata from file if available
+    # See knowledge/nwb-best-practices.md for required formats:
+    #   species: Latin binomial (e.g., "Mus musculus")
+    #   sex: one of "M", "F", "U", "O"
+    #   age: ISO 8601 duration (e.g., "P90D")
+    #   weight: "numeric unit" (e.g., "0.025 kg")
+
+    # Run conversion
+    converter.run_conversion(
+        nwbfile_path=nwbfile_path,
+        metadata=metadata,
+        conversion_options=conversion_options,
+        overwrite=True,
+    )
+
+
+if __name__ == "__main__":
+    # Example usage
+    data_dir_path = Path("/path/to/data")
+    output_dir_path = Path("/path/to/output")
+    session_to_nwb(
+        data_dir_path=data_dir_path,
+        output_dir_path=output_dir_path,
+        stub_test=True,  # Set to False for full conversion
+    )
+```
+
+### Step 6: Write convert_all_sessions.py
+
+```python
+from pathlib import Path
+from concurrent.futures import ProcessPoolExecutor
+import traceback
+
+from .convert_session import session_to_nwb
+
+
+def get_session_to_nwb_kwargs_per_session(data_dir_path):
+    """Discover all sessions and return kwargs for each."""
+    # Implement session discovery logic
+    # Return list of dicts, each with kwargs for session_to_nwb
+    raise NotImplementedError("Implement session discovery")
+
+
+def safe_session_to_nwb(**kwargs):
+    """Wrapper that catches and logs exceptions."""
+    exception_file_path = kwargs.pop("exception_file_path", None)
+    try:
+        session_to_nwb(**kwargs)
+    except Exception:
+        if exception_file_path:
+            with open(exception_file_path, "w") as f:
+                f.write(traceback.format_exc())
+        else:
+            raise
+
+
+def dataset_to_nwb(
+    data_dir_path,
+    output_dir_path,
+    max_workers=1,
+    stub_test=False,
+):
+    data_dir_path = Path(data_dir_path)
+    output_dir_path = Path(output_dir_path)
+    exception_dir = output_dir_path / "exceptions"
+    exception_dir.mkdir(parents=True, exist_ok=True)
+
+    kwargs_list = get_session_to_nwb_kwargs_per_session(data_dir_path)
+
+    with ProcessPoolExecutor(max_workers=max_workers) as executor:
+        for kwargs in kwargs_list:
+            kwargs["output_dir_path"] = output_dir_path
+            kwargs["stub_test"] = stub_test
+            session_id = kwargs.get("session_id", "unknown")
+            kwargs["exception_file_path"] = str(exception_dir / f"{session_id}.txt")
+            executor.submit(safe_session_to_nwb, **kwargs)
+```
+
+### Step 7: Write metadata.yaml
+
+Use the metadata collected in Phase 3. See Phase 3 for format.
+
+### Step 8: Write README.md
+
+```markdown
+# <lab-name>-lab-to-nwb
+
+NWB conversion scripts for the [<Lab> Lab](lab_url) data,
+using [NeuroConv](https://github.com/catalystneuro/neuroconv).
+
+## Installation
+
+```bash
+pip install <lab-name>-lab-to-nwb
+```
+
+## Usage
+
+### Single session
+```python
+from <package>.<conversion>.convert_session import session_to_nwb
+
+session_to_nwb(
+    data_dir_path="/path/to/session",
+    output_dir_path="/path/to/output",
+    stub_test=False,
+)
+```
+
+### All sessions
+```python
+from <package>.<conversion>.convert_all_sessions import dataset_to_nwb
+
+dataset_to_nwb(
+    data_dir_path="/path/to/data",
+    output_dir_path="/path/to/output",
+    max_workers=4,
+)
+```
+```
+
+### Step 9: Commit and Push to nwb-conversions
+
+After all code is generated and the repo is scaffolded, commit everything and push to the
+`nwb-conversions` GitHub org. The remote was set up in Phase 1 via `gh repo create --clone`.
+
+```bash
+git add -A
+git commit -m "Add conversion code for <conversion_name>
+
+Generated by nwb-convert skill. Includes:
+- NWBConverter with <N> interfaces
+- <N> custom DataInterface classes
+- convert_session.py and convert_all_sessions.py
+- metadata.yaml with lab and experiment metadata"
+if git remote get-url origin &>/dev/null; then git push; fi
+```
+
+This makes the conversion code immediately available in the org for reference by future
+conversions. The manifest will be added in Phase 7 after DANDI upload is complete.
diff --git a/src/pyflask/ai/skill/phases/06-testing.md b/src/pyflask/ai/skill/phases/06-testing.md
new file mode 100644
index 000000000..ad3ca51f9
--- /dev/null
+++ b/src/pyflask/ai/skill/phases/06-testing.md
@@ -0,0 +1,231 @@
+## Phase 6: Testing & Validation
+
+**Goal**: Verify the conversion produces valid, complete NWB files.
+
+**Entry**: You have generated all conversion code from Phase 5.
+
+**Exit criteria**: The conversion runs successfully on at least one session, the output
+passes nwbinspector validation, and the data can be read back correctly.
+
+### Step 1: Install the Package
+
+```bash
+cd <repo_path>
+pip install -e ".[<conversion_name>]"
+```
+
+### Step 2: Run a Stub Test
+
+First, run with `stub_test=True` to convert a small subset of data quickly:
+
+```python
+from <package>.<conversion>.convert_session import session_to_nwb
+
+session_to_nwb(
+    data_dir_path="/path/to/sample/session",
+    output_dir_path="/path/to/output",
+    stub_test=True,
+)
+```
+
+If this fails, debug the error:
+- Import errors → missing dependencies in pyproject.toml
+- File not found → incorrect source_data paths
+- Type errors → incorrect data shapes or types in custom interfaces
+- Schema validation errors → metadata doesn't match expected schema
+
+### Step 3: Inspect the NWB File
+
+Read back the file and verify contents:
+
+```python
+from pynwb import NWBHDF5IO
+
+with NWBHDF5IO("/path/to/output/session.nwb", "r") as io:
+    nwbfile = io.read()
+
+    # Check basic metadata
+    print(f"Session: {nwbfile.session_description}")
+    print(f"Start time: {nwbfile.session_start_time}")
+    print(f"Subject: {nwbfile.subject}")
+
+    # Check acquisition data
+    print(f"Acquisition: {list(nwbfile.acquisition.keys())}")
+
+    # Check processing modules
+    for name, module in nwbfile.processing.items():
+        print(f"Processing/{name}: {list(module.data_interfaces.keys())}")
+
+    # Check units
+    if nwbfile.units:
+        print(f"Units: {len(nwbfile.units)} units")
+
+    # Check trials
+    if nwbfile.trials:
+        print(f"Trials: {len(nwbfile.trials)} trials")
+        print(f"Trial columns: {nwbfile.trials.colnames}")
+
+    # Check electrodes
+    if nwbfile.electrodes:
+        print(f"Electrodes: {len(nwbfile.electrodes)} electrodes")
+
+    # Spot-check data values
+    for name, ts in nwbfile.acquisition.items():
+        if hasattr(ts, 'data'):
+            print(f"  {name}: shape={ts.data.shape}, dtype={ts.data.dtype}")
+```
+
+### Step 4: Run NWB Inspector
+
+**You MUST run nwbinspector on every converted file.** Do not skip this step or leave it for the user.
+
+Run it via bash and capture the full output:
+
+```bash
+nwbinspector /path/to/output/session.nwb
+```
+
+Then analyze every message in the output. NWB Inspector reports issues at 4 severity levels:
+
+| Level | Meaning | Action Required |
+|-------|---------|-----------------|
+| `CRITICAL_IMPORTANCE` | Will break downstream tools or DANDI upload | **Must fix before proceeding** |
+| `BEST_PRACTICE_VIOLATION` | Violates NWB best practices | **Fix all of these** |
+| `BEST_PRACTICE_SUGGESTION` | Could be improved | Fix if straightforward, otherwise note for the user |
+| `PYNWB_VALIDATION` | PyNWB schema violations | **Must fix before proceeding** |
+
+**For each issue reported, you must:**
+1. Identify the root cause in the conversion code
+2. Fix the code (metadata, interface, or convert_session.py)
+3. Re-run the conversion (stub_test=True)
+4. Re-run nwbinspector to confirm the fix
+
+**Common issues and their fixes:**
+
+| Inspector Message | Fix |
+|-------------------|-----|
+| `check_session_start_time_old_date` | Session start time is wrong or default — extract real date from source files |
+| `check_session_start_time_future_date` | Timezone conversion error — verify ZoneInfo usage |
+| `check_missing_text_for_session_description` | Add `session_description` to metadata.yaml or set it in convert_session.py |
+| `check_subject_species_latin_binomial` | Use "Mus musculus" not "mouse", "Rattus norvegicus" not "rat" |
+| `check_subject_species_form` | Species should be binomial (e.g., "Mus musculus") |
+| `check_subject_age` | Format as ISO 8601 duration: "P90D" not "90 days" |
+| `check_subject_sex` | Must be one of: "M", "F", "U", "O" |
+| `check_data_orientation` | Time should be the first dimension. Transpose data if needed |
+| `check_timestamps_match_first_dimension` | Length of timestamps must equal first dim of data |
+| `check_regular_timestamps` | If data has constant rate, use `rate` + `starting_time` instead of `timestamps` |
+| `check_timestamp_of_the_first_sample_is_not_negative` | Timestamps should start >= 0. Adjust offset |
+| `check_missing_unit` | TimeSeries must have `unit` specified |
+| `check_resolution` | Set resolution=-1.0 if unknown, otherwise provide actual resolution |
+| `check_electrodes_table_global_ids_are_not_unique` | Electrode IDs must be unique across all probes |
+| `check_empty_string_for_*` | Replace empty strings with actual descriptions |
+| `check_imaging_plane_excitation_lambda` | Set `excitation_lambda` on ImagingPlane in metadata |
+| `check_imaging_plane_indicator` | Set `indicator` on ImagingPlane (e.g., "GCaMP6f") |
+| `check_imaging_plane_location` | Set `location` on ImagingPlane (e.g., "CA1") |
+| `check_rate_is_not_zero` | TwoPhotonSeries must have nonzero `rate` — check Suite2p ops["fs"] |
+| `check_plane_segmentation_image_mask_shape` | ROI masks must match imaging plane dimensions |
+| `check_spatial_series_dims` | SpatialSeries must have 1, 2, or 3 data columns only |
+| `check_compass_direction_unit` | CompassDirection SpatialSeries must use "degrees" or "radians" |
+| `check_image_series_data_size` | Animal behavior videos should use external_file, not internal storage |
+| `check_image_series_external_file_relative` | External file paths must be relative, not absolute |
+| `check_no_empty_string_for_*` | All text fields (description, unit) must be non-empty |
+| `check_timestamps_without_nans` | Timestamps must not contain NaN values |
+| `check_timestamps_ascending` | Timestamps must be sorted in ascending order |
+| `check_negative_spike_times` | All spike times must be >= 0 (session-aligned, not trial-aligned) |
+| `check_ascending_spike_times` | Spike times within each unit must be in ascending order |
+| `check_subject_exists` | NWBFile must have a Subject object |
+| `check_subject_id_exists` | Subject must have subject_id set (required for DANDI) |
+| `check_electrode_location` | Electrode location column must be filled (use "unknown" if needed) |
+
+**Also run `dandi validate` if the user plans to upload to DANDI:**
+
+```bash
+dandi validate /path/to/output/
+```
+
+This catches DANDI-specific requirements beyond nwbinspector:
+- `subject_id` must be set
+- `session_id` must be set
+- File naming conventions for DANDI organize
+
+**Keep iterating until nwbinspector produces zero CRITICAL and zero BEST_PRACTICE_VIOLATION messages.**
+Show the user the final clean nwbinspector output as confirmation.
+
+### Step 5: Run Full Conversion (one session)
+
+Once stub_test passes and nwbinspector is clean, run with `stub_test=False` on a single session:
+
+```python
+session_to_nwb(
+    data_dir_path="/path/to/sample/session",
+    output_dir_path="/path/to/output",
+    stub_test=False,
+)
+```
+
+Then run nwbinspector again on the full output — some issues only appear with real data
+(e.g., data orientation problems, timestamp gaps, large uncompressed datasets).
+
+### Step 6: Validate Data Integrity
+
+For critical data streams, compare source and NWB values:
+
+```python
+import numpy as np
+
+# Example: verify spike times
+with NWBHDF5IO("output.nwb", "r") as io:
+    nwbfile = io.read()
+    nwb_spike_times = nwbfile.units["spike_times"][0]
+
+# Compare with source
+import spikeinterface.extractors as se
+sorting = se.read_phy(phy_path)
+source_spike_times = sorting.get_unit_spike_train(unit_id=0, return_times=True)
+
+assert np.allclose(nwb_spike_times, source_spike_times, atol=1e-6)
+```
+
+### Step 7: Iterate
+
+If any issues are found:
+1. Fix the issue in the conversion code
+2. Re-run the stub test
+3. Re-run nwbinspector — confirm zero CRITICAL/BEST_PRACTICE_VIOLATION
+4. Re-run full conversion
+5. Re-validate
+6. Repeat until clean
+
+### Common Debugging Patterns
+
+**Interface won't instantiate:**
+- Check that file paths in source_data are correct
+- Check that the file format is what you think it is
+- Try instantiating the interface in isolation
+
+**Data shapes are wrong:**
+- Print the data shape at each step of custom interface
+- Check if axes need to be transposed
+- Check if time is first dimension (NWB convention)
+
+**Timestamps don't make sense:**
+- Check if timestamps are in seconds (NWB convention)
+- Check timezone handling
+- Print first/last timestamps and compare with expected session duration
+
+**Metadata schema validation fails:**
+- Print the metadata dict and compare with schema
+- Check for required fields that are None or empty
+- Check types (datetime vs string, list vs single value)
+
+### Push Phase 6 Results
+
+After all tests pass and nwbinspector is clean, commit any bug fixes and push:
+```bash
+git add -A
+git commit -m "Phase 6: testing and validation — all checks passing
+
+nwbinspector: 0 CRITICAL, 0 BEST_PRACTICE_VIOLATION
+dandi validate: passed"
+if git remote get-url origin &>/dev/null; then git push; fi
+```
diff --git a/src/pyflask/ai/skill/phases/07-dandi-upload.md b/src/pyflask/ai/skill/phases/07-dandi-upload.md
new file mode 100644
index 000000000..b8a312371
--- /dev/null
+++ b/src/pyflask/ai/skill/phases/07-dandi-upload.md
@@ -0,0 +1,913 @@
+## Phase 7: DANDI Upload
+
+**Goal**: Upload validated NWB files to the DANDI Archive for public sharing.
+
+**Entry**: All NWB files are converted, validated with nwbinspector, and ready for sharing.
+
+**Exit criteria**: Data is uploaded to DANDI, organized correctly, and accessible via the Dandiset URL.
+
+### Step 0: Choose DANDI Instance
+
+**Always ask this first.** Before any upload steps, ask the user which DANDI instance to use:
+
+> We're ready to upload your NWB files to DANDI! First, which DANDI instance would you
+> like to use?
+>
+> 1. **DANDI Sandbox** (gui-staging.dandiarchive.org) — for testing. Data can be deleted.
+>    Use this if you want to verify everything works before publishing for real.
+> 2. **DANDI Archive** (dandiarchive.org) — the official public archive. Use this when
+>    you're ready to publish your data permanently.
+>
+> Which would you prefer?
+
+Set the instance URL based on their choice:
+- **Sandbox**: `DANDI_INSTANCE_URL=https://gui-staging.dandiarchive.org`
+  and `DANDI_API_URL=https://api-staging.dandiarchive.org/api`
+- **Archive**: use the defaults (no env vars needed)
+
+For sandbox uploads, add `-i dandi-staging` to all `dandi` CLI commands.
+
+### Prerequisites
+
+Before uploading, the user needs:
+1. A DANDI account (on the chosen instance — sandbox and archive have separate accounts)
+2. A DANDI API key (from user profile on the chosen instance)
+3. A Dandiset created on the chosen instance (or you help them create one)
+4. The `dandi` CLI installed (`pip install -U dandi`)
+
+### Step 1: Create a Dandiset
+
+Guide the user through creating a Dandiset on the DANDI Archive:
+
+> Before we upload, we need to create a Dandiset on DANDI Archive. Have you already
+> created one? If not, here's how:
+>
+> 1. Go to https://dandiarchive.org and log in (or create an account)
+> 2. Click "New Dandiset" in the top right
+> 3. Fill in the metadata:
+>    - **Name**: A descriptive title for your dataset
+>    - **Description**: Abstract or summary of the dataset
+>    - **License**: Usually CC-BY-4.0 for open data
+>    - **Contributors**: Add all contributors with their ORCID IDs
+> 4. Note the 6-digit Dandiset ID (e.g., "000123")
+
+If the data should be embargoed (not publicly visible yet):
+> If your data needs to be embargoed (e.g., pending publication), select the
+> embargo option when creating the Dandiset. Embargoed data is only visible
+> to Dandiset owners until you release it.
+
+### Step 2: Set Up API Key
+
+```bash
+# Get your API key from https://dandiarchive.org (click your initials → API Key)
+export DANDI_API_KEY=<your-key-here>
+```
+
+> You'll need your DANDI API key. Go to https://dandiarchive.org, click your
+> initials in the top right, and copy your API key. Then set it as an environment
+> variable:
+> ```bash
+> export DANDI_API_KEY=your_key_here
+> ```
+
+### Step 3: Validate Before Upload
+
+Run `dandi validate` on the NWB files before uploading:
+
+```bash
+dandi validate /path/to/nwb/output/
+```
+
+This checks for DANDI-specific requirements beyond what nwbinspector catches:
+- File naming conventions
+- Required metadata fields (subject_id, session_id)
+- NWB file structure compliance
+
+Fix any validation errors before proceeding.
+
+### Step 4: Upload Using NeuroConv Helper (Recommended)
+
+NeuroConv provides `automatic_dandi_upload()` which handles download, organize, and upload:
+
+```python
+from neuroconv.tools.data_transfers import automatic_dandi_upload
+
+automatic_dandi_upload(
+    dandiset_id="000123",           # 6-digit Dandiset ID
+    nwb_folder_path="./nwb_output", # Folder with all NWB files
+    sandbox=False,                   # True for testing on sandbox server
+    number_of_jobs=1,               # Parallel upload jobs
+    number_of_threads=4,            # Threads per upload
+)
+```
+
+This function:
+1. Downloads the Dandiset metadata (creates the local Dandiset structure)
+2. Runs `dandi organize` to rename files to DANDI conventions (sub-<id>/sub-<id>_ses-<id>.nwb)
+3. Uploads all organized NWB files
+
+### Step 5: Upload Using DANDI CLI (Alternative)
+
+If the NeuroConv helper doesn't work, use the DANDI CLI directly:
+
+```bash
+# 1. Download the Dandiset structure
+dandi download https://dandiarchive.org/dandiset/000123/draft
+cd 000123
+
+# 2. Organize NWB files into DANDI structure (renames files)
+dandi organize /path/to/nwb/output/ -f dry  # Preview first
+dandi organize /path/to/nwb/output/         # Execute
+
+# 3. Validate
+dandi validate .
+
+# 4. Upload
+dandi upload
+```
+
+### Step 5b: Upload Using DANDI Python API (Alternative)
+
+If the CLI approaches have issues (e.g., sandbox identifier format), use the Python API directly:
+
+```python
+from pathlib import Path
+from dandi.dandiapi import DandiAPIClient
+
+client = DandiAPIClient.from_environ()  # or DandiAPIClient(api_url="https://api.sandbox.dandiarchive.org/api")
+client.dandi_authenticate()
+dandiset = client.get_dandiset("000123", "draft")
+
+# Upload each organized NWB file
+# NOTE: iter_upload_raw_asset() is on the RemoteDandiset object, NOT on DandiAPIClient
+nwb_dir = Path("./000123")
+for nwb_path in sorted(nwb_dir.rglob("*.nwb")):
+    asset_path = str(nwb_path.relative_to(nwb_dir))
+    print(f"Uploading {asset_path}...")
+    for status in dandiset.iter_upload_raw_asset(nwb_path, asset_metadata={"path": asset_path}):
+        if isinstance(status, dict) and status.get("status") == "done":
+            print(f"  Done: {status['asset'].path}")
+```
+
+**DANDI sandbox URL**: Always use `https://api.sandbox.dandiarchive.org/api` for the
+sandbox. The older `api-staging.dandiarchive.org` URL redirects and strips auth headers,
+causing 401 errors on write operations.
+
+### Step 6: Verify on DANDI
+
+After upload completes:
+> Your data is now on DANDI! You can view it at:
+> https://dandiarchive.org/dandiset/000123/draft
+>
+> Please verify:
+> 1. All sessions appear in the file listing
+> 2. The metadata looks correct
+> 3. You can stream and preview the NWB files in Neurosift
+>
+> When you're ready to publish (make it permanently citable with a DOI),
+> click "Publish" on the Dandiset page. This creates an immutable version.
+
+### Step 7: Edit Dandiset Metadata
+
+After uploading, programmatically populate the Dandiset metadata using the DANDI API.
+If there is an associated manuscript, use OpenAlex to auto-populate contributors, funders,
+and affiliations.
+
+> Now let's complete your Dandiset metadata so it's ready for publication.
+> Is there an associated publication or preprint? If so, please share the DOI
+> (e.g., `10.1038/s41586-023-06031-6`).
+
+#### 7a. Fetch Structured Data from OpenAlex
+
+If the user provides a DOI, query OpenAlex to get authors, ORCIDs, affiliations, ROR IDs,
+and funding info:
+
+```python
+import requests
+
+doi = "10.1038/s41467-023-43250-x"  # user-provided
+response = requests.get(f"https://api.openalex.org/works/doi:{doi}")
+work = response.json()
+
+# Title
+title = work["title"]
+
+# Authors with ORCIDs, affiliations, and ROR IDs
+for authorship in work["authorships"]:
+    author = authorship["author"]
+    name = author["display_name"]           # e.g., "Steffen Schneider"
+    orcid = author.get("orcid")             # e.g., "https://orcid.org/0000-0003-2327-6459"
+    is_corresponding = authorship["is_corresponding"]
+    for inst in authorship.get("institutions", []):
+        inst_name = inst["display_name"]    # e.g., "Columbia University"
+        inst_ror = inst.get("ror")          # e.g., "https://ror.org/00hj8s172"
+
+# Funders with ROR IDs and award numbers
+# NOTE: OpenAlex grants are often empty — check the paper's acknowledgments section
+# and ask the user to confirm funding information
+for grant in work.get("grants", []):
+    funder_name = grant["funder_display_name"]  # e.g., "National Institute of Mental Health"
+    funder_ror = grant.get("funder", {}).get("ror")  # e.g., "https://ror.org/04xeg9z08"
+    award_id = grant.get("funder_award_id")     # e.g., "R21MH117788"
+```
+
+**OpenAlex data quality warnings:**
+- Some authors have **null ORCIDs** — only add `identifier` to the DANDI contributor
+  when an ORCID actually exists. Do not set it to `null` or empty string.
+- The `grants` array is **often empty** even for well-funded papers — always cross-reference
+  the paper's acknowledgments section and ask the user.
+- OpenAlex may list **extra institutional affiliations** (historical or secondary) that
+  don't match the paper. Include all but flag them for the user to review.
+
+Present the extracted data to the user for confirmation:
+
+> I found the following from OpenAlex for your paper "{title}":
+>
+> **Authors:**
+> 1. Last, First (ORCID: 0000-...) — Institution (ROR: ...)
+> 2. ...
+>
+> **Funding:**
+> 1. Agency Name — Award: XYZ123 (ROR: ...)
+>
+> Does this look correct? Should I add or remove anyone? Who should be the contact person?
+
+#### 7b. Validate Identifiers
+
+Before applying any metadata, validate all ORCID and ROR identifiers against their
+respective APIs to prevent bad data from being committed:
+
+```python
+def validate_orcid(orcid: str) -> bool:
+    """Validate ORCID exists. orcid should be bare ID like '0000-0001-2345-6789'."""
+    resp = requests.head(
+        f"https://pub.orcid.org/v3.0/{orcid}",
+        headers={"Accept": "application/json"},
+    )
+    return resp.status_code == 200
+
+def validate_ror(ror_url: str) -> bool:
+    """Validate ROR ID exists. ror_url like 'https://ror.org/01cwqze88'.
+
+    NOTE: ROR API v2 changed the response schema — org name is in
+    org["names"][0]["value"], not org["name"]. Some OpenAlex ROR IDs
+    may be stale (return 404) due to organization mergers.
+    """
+    ror_id = ror_url.replace("https://ror.org/", "")
+    resp = requests.get(f"https://api.ror.org/v2/organizations/{ror_id}")
+    return resp.status_code == 200
+```
+
+Run validation on all extracted identifiers and warn the user about any that fail:
+
+```python
+for authorship in work["authorships"]:
+    orcid = authorship["author"].get("orcid", "").replace("https://orcid.org/", "")
+    if orcid and not validate_orcid(orcid):
+        print(f"WARNING: ORCID {orcid} for {authorship['author']['display_name']} not found")
+
+    for inst in authorship.get("institutions", []):
+        ror = inst.get("ror")
+        if ror and not validate_ror(ror):
+            print(f"WARNING: ROR {ror} for {inst['display_name']} not found")
+```
+
+#### 7c. Look Up Ontology Terms for the `about` Field
+
+Use the EBI Ontology Lookup Service (OLS4) to find proper ontology identifiers for brain
+regions, disorders, and cell types. Never guess or fabricate ontology identifiers.
+
+```python
+def lookup_ontology_term(term: str, ontology: str = "uberon") -> list[dict]:
+    """Search EBI OLS4 for an ontology term.
+
+    ontology: 'uberon' (anatomy), 'doid' (disease), 'cl' (cell type)
+    """
+    resp = requests.get(
+        "https://www.ebi.ac.uk/ols4/api/search",
+        params={"q": term, "ontology": ontology, "rows": "5", "queryFields": "label,synonym"},
+    )
+    results = resp.json().get("response", {}).get("docs", [])
+    return [{"label": r["label"], "iri": r["iri"], "obo_id": r.get("obo_id")} for r in results]
+
+# Example: look up "hippocampus"
+terms = lookup_ontology_term("hippocampus", "uberon")
+# → [{"label": "hippocampal formation", "iri": "http://purl.obolibrary.org/obo/UBERON_0002421",
+#      "obo_id": "UBERON:0002421"}, ...]
+```
+
+**OLS4 search pitfalls — always use exact label matching:**
+
+OLS4 often returns sub-regions or synonyms instead of the term you want:
+- Searching "primary motor cortex" may return "primary motor cortex layer 6" as the top result
+- Searching "secondary motor cortex" may return "premotor cortex" (a synonym with the same UBERON ID)
+- Searching "dorsomedial striatum" returns unrelated terms — search for "dorsal striatum" instead
+
+**Always iterate through results and match by exact label** (case-insensitive) before
+falling back to the first result:
+
+```python
+def lookup_ontology_term_exact(term, ontology="uberon"):
+    """Search OLS4 with exact label matching."""
+    results = lookup_ontology_term(term, ontology)
+    # Prefer exact label match
+    for r in results:
+        if r["label"].lower() == term.lower():
+            return r
+    # Fall back to first result if no exact match
+    return results[0] if results else None
+```
+
+**Maintain a fallback table** for commonly used terms where OLS4 search is unreliable:
+
+```python
+UBERON_FALLBACKS = {
+    "primary visual cortex": {"label": "primary visual cortex", "obo_id": "UBERON:0002436",
+        "iri": "http://purl.obolibrary.org/obo/UBERON_0002436"},
+    "secondary visual cortex": {"label": "secondary visual cortex", "obo_id": "UBERON:0022232",
+        "iri": "http://purl.obolibrary.org/obo/UBERON_0022232"},
+    "primary motor cortex": {"label": "primary motor cortex", "obo_id": "UBERON:0001384",
+        "iri": "http://purl.obolibrary.org/obo/UBERON_0001384"},
+    "secondary motor cortex": {"label": "secondary motor cortex", "obo_id": "UBERON:0016634",
+        "iri": "http://purl.obolibrary.org/obo/UBERON_0016634"},
+    "primary somatosensory cortex": {"label": "primary somatosensory cortex", "obo_id": "UBERON:0008933",
+        "iri": "http://purl.obolibrary.org/obo/UBERON_0008933"},
+    "dorsal striatum": {"label": "dorsal striatum", "obo_id": "UBERON:0005382",
+        "iri": "http://purl.obolibrary.org/obo/UBERON_0005382"},
+    "nucleus accumbens": {"label": "nucleus accumbens", "obo_id": "UBERON:0001882",
+        "iri": "http://purl.obolibrary.org/obo/UBERON_0001882"},
+}
+```
+
+Present results to the user and add confirmed terms to `about`:
+```python
+metadata["about"] = [
+    {
+        "schemaKey": "Anatomy",
+        "name": "hippocampal formation",
+        "identifier": "UBERON:0002421",
+    },
+]
+```
+
+Supported ontology → `schemaKey` mapping:
+| Ontology | `schemaKey` | Use for |
+|----------|-------------|---------|
+| UBERON | `Anatomy` | Brain regions, anatomical structures |
+| DOID | `Disorder` | Diseases, disorders |
+| CL | `Anatomy` | Cell types |
+| HP | `Disorder` | Human phenotypes |
+
+#### 7d. Build the Metadata and Set via DANDI API
+
+Use the `dandi` Python client to programmatically update the Dandiset metadata.
+
+**IMPORTANT**: Never call `set_raw_metadata()` directly — it accepts invalid metadata silently.
+Always use this `validate_and_save` wrapper that validates against the DANDI JSON schema first:
+
+```python
+import requests, jsonschema
+from dandi.dandiapi import DandiAPIClient
+
+_schema_cache = {}
+
+def validate_and_save(dandiset, metadata):
+    """Validate metadata against the canonical DANDI JSON schema, then save.
+
+    Raises ValueError if metadata is invalid. Uses the official schema from
+    https://github.com/dandi/schema (not dandischema.models.model_json_schema(),
+    which has Pydantic v2 generation bugs with anyOf/type conflicts).
+    """
+    version = metadata.get("schemaVersion", "0.7.0")
+    if version not in _schema_cache:
+        url = f"https://raw.githubusercontent.com/dandi/schema/refs/heads/master/releases/{version}/dandiset.json"
+        _schema_cache[version] = requests.get(url).json()
+    schema = _schema_cache[version]
+
+    validator = jsonschema.Draft202012Validator(schema)
+    errors = sorted(validator.iter_errors(metadata), key=lambda e: list(e.absolute_path))
+    if errors:
+        print(f"Schema validation FAILED ({len(errors)} errors):")
+        for err in errors:
+            path = ".".join(str(p) for p in err.absolute_path)
+            print(f"  {path}: {err.message}")
+        raise ValueError("Fix validation errors before saving")
+
+    dandiset.set_raw_metadata(metadata)
+    print("Metadata validated and saved!")
+
+client = DandiAPIClient.from_environ()  # uses DANDI_API_KEY env var
+dandiset = client.get_dandiset("000123", "draft")
+metadata = dandiset.get_raw_metadata()
+```
+
+**Schema validation approach**: Always start from `dandiset.get_raw_metadata()` which
+includes server-generated fields (`id`, `citation`, `assetsSummary`, `manifestLocation`).
+Mutate only the fields you control (name, description, contributors, etc.), then validate
+the **complete** metadata dict. Do NOT strip server-generated fields before validation —
+they are required by the schema.
+
+**Set title and description:**
+```python
+metadata["name"] = title  # from OpenAlex or user
+metadata["description"] = description  # paper abstract or user-provided
+metadata["keywords"] = ["hippocampus", "electrophysiology", "place cells"]  # user-provided
+```
+
+**Set contributors (persons):**
+Convert OpenAlex author names from "First Last" to "Last, First" format. Mark the
+corresponding author as ContactPerson. Mark all authors with `includeInCitation: True`.
+
+```python
+contributors = []
+for authorship in work["authorships"]:
+    author = authorship["author"]
+    display_name = author["display_name"]
+    # Convert "First Last" → "Last, First"
+    parts = display_name.rsplit(" ", 1)
+    dandi_name = f"{parts[-1]}, {parts[0]}" if len(parts) == 2 else display_name
+
+    orcid = author.get("orcid", "").replace("https://orcid.org/", "")
+    roles = ["dcite:Author"]
+    if authorship["is_corresponding"]:
+        roles.append("dcite:ContactPerson")
+
+    person = {
+        "schemaKey": "Person",
+        "name": dandi_name,
+        "roleName": roles,
+        "includeInCitation": True,
+    }
+    if orcid:
+        person["identifier"] = orcid
+    # Add email for contact person (ask user)
+    if authorship["is_corresponding"]:
+        person["email"] = contact_email  # must ask user for this
+
+    # Add affiliation — IMPORTANT: schemaKey must be "Affiliation", not "Organization"
+    # "Organization" is for top-level contributors (funders); "Affiliation" is for person affiliations
+    affiliations = []
+    for inst in authorship.get("institutions", []):
+        aff = {
+            "schemaKey": "Affiliation",
+            "name": inst["display_name"],
+        }
+        if inst.get("ror"):
+            aff["identifier"] = inst["ror"]
+        affiliations.append(aff)
+    if affiliations:
+        person["affiliation"] = affiliations
+
+    contributors.append(person)
+```
+
+**Add data curators (the people who performed the conversion):**
+
+Data curators are NOT authors — they get `dcite:DataCurator` role only, and
+`includeInCitation: False` unless they made intellectual contributions to the dataset.
+
+```python
+# Add each person who worked on the NWB conversion
+contributors.append({
+    "schemaKey": "Person",
+    "name": "Last, First",  # person who ran the conversion
+    "identifier": "0000-0001-2345-6789",  # their ORCID
+    "roleName": ["dcite:DataCurator"],
+    "includeInCitation": False,
+    "email": "curator@example.com",
+    "affiliation": [{"schemaKey": "Affiliation", "name": "CatalystNeuro"}],
+})
+```
+
+**Add funders as Organization contributors:**
+```python
+for grant in work.get("grants", []):
+    funder = {
+        "schemaKey": "Organization",
+        "name": grant["funder_display_name"],
+        "roleName": ["dcite:Funder"],
+        "includeInCitation": False,
+    }
+    if grant.get("funder", {}).get("ror"):
+        funder["identifier"] = grant["funder"]["ror"]
+    if grant.get("funder_award_id"):
+        funder["awardNumber"] = grant["funder_award_id"]
+    contributors.append(funder)
+```
+
+**Set contributors on metadata:**
+```python
+metadata["contributor"] = contributors
+```
+
+**Add related resources:**
+```python
+related = []
+
+# Associated publication
+related.append({
+    "schemaKey": "Resource",
+    "identifier": f"doi:{doi}",
+    "url": f"https://doi.org/{doi}",
+    "name": title,
+    "relation": "dcite:IsDescribedBy",
+    "resourceType": "dcite:JournalArticle",  # or dcite:Preprint
+})
+
+# Conversion code repo (if on GitHub)
+related.append({
+    "schemaKey": "Resource",
+    "url": "https://github.com/catalystneuro/lab-to-nwb",
+    "name": "NWB conversion code",
+    "relation": "dcite:IsSupplementedBy",
+    "resourceType": "dcite:Software",
+})
+
+metadata["relatedResource"] = related
+```
+
+**Add ontology terms to `about` (from 7c results):**
+```python
+metadata["about"] = [
+    {"schemaKey": "Anatomy", "name": "hippocampal formation", "identifier": "UBERON:0002421"},
+    # add more terms as appropriate for the experiment
+]
+```
+
+**Add ethics approval (ask user):**
+```python
+metadata["ethicsApproval"] = [{
+    "schemaKey": "EthicsApproval",
+    "identifier": "IACUC Protocol #12345",  # ask user
+    "contactPoint": {
+        "schemaKey": "ContactPoint",
+        "name": "Columbia University IACUC",  # ask user
+    },
+}]
+```
+
+**Set license and access:**
+```python
+metadata["license"] = ["spdx:CC-BY-4.0"]
+metadata["access"] = [{
+    "schemaKey": "AccessRequirements",
+    "status": "dandi:OpenAccess",
+}]
+```
+
+**Validate and save (uses the wrapper defined above — never call `set_raw_metadata` directly):**
+```python
+validate_and_save(dandiset, metadata)
+```
+
+#### 7e. Metadata Quality Checklist
+
+Before saving, verify the metadata covers all quality criteria:
+
+- [ ] Is the title descriptive and publication-quality?
+- [ ] Does the description mention data modalities and recording methods?
+- [ ] Does the description include a brief methodology summary?
+- [ ] Are associated publications linked with DOIs and correct relation (`dcite:IsDescribedBy`)?
+- [ ] Are all paper authors listed as contributors with ORCIDs?
+- [ ] Do contributors have institutional affiliations with ROR identifiers?
+- [ ] Are funders listed with award numbers and ROR identifiers?
+- [ ] Are relevant brain regions / anatomical structures in the `about` field (UBERON)?
+- [ ] Is the license specified (`spdx:CC-BY-4.0`)?
+- [ ] Is the IACUC/IRB protocol number included in `ethicsApproval`?
+- [ ] Are keywords provided for discoverability?
+- [ ] Is at least one contributor marked as `dcite:ContactPerson` with an email?
+
+#### 7f. Additional Metadata to Ask the User
+
+After auto-populating from OpenAlex, ask the user for anything that can't be extracted:
+
+> I've populated the metadata from your paper. A few more things:
+>
+> 1. **Contact person email**: What email should be listed for the contact person?
+> 2. **Ethics approval**: What is your IACUC/IRB protocol number and institution?
+> 3. **Keywords**: What keywords should I add for discoverability?
+> 4. **Brain regions**: What brain regions were recorded? I'll look up the UBERON terms.
+> 5. **Any additional contributors** not on the paper (e.g., data curators, technicians)?
+
+#### Publishing
+
+> When all metadata is complete and you're ready to make your dataset permanently citable:
+> 1. Review the metadata at your Dandiset URL
+> 2. Click "Publish" on the Dandiset page
+> 3. This creates an immutable version with a DOI
+> 4. The DOI can be used in publications to reference this exact version of the data
+>
+> Note: You can continue uploading files and publish new versions later. Each version
+> gets its own DOI.
+
+### Step 8: Set Asset-Level Metadata (Brain Region per Subject)
+
+After uploading and setting dandiset-level metadata, set per-asset metadata — particularly
+brain region when it varies across subjects or sessions. DANDI assets support an `about`
+field (same schema as dandiset-level) that can hold `Anatomy` terms per file.
+
+#### 8a. Build a Subject → Brain Region Mapping
+
+Ask the user which brain regions each subject was recorded from. Often this is already
+known from Phase 3 metadata collection or from the NWB files themselves:
+
+> Different subjects may have implants in different brain regions. Can you tell me
+> which brain region(s) each subject was recorded from? For example:
+> - Subject A001: CA1
+> - Subject A002: V1, LM
+> - Subject A003: mPFC
+
+Or extract it programmatically from the NWB files if `electrodes.location` or
+`ImagingPlane.location` is set:
+
+```python
+from pynwb import NWBHDF5IO
+from pathlib import Path
+
+subject_regions = {}
+for nwb_path in sorted(Path("./000123").rglob("*.nwb")):
+    with NWBHDF5IO(str(nwb_path), "r") as io:
+        nwbfile = io.read()
+        subject_id = nwbfile.subject.subject_id if nwbfile.subject else None
+        regions = set()
+
+        # From electrodes table
+        if nwbfile.electrodes and "location" in nwbfile.electrodes.colnames:
+            for loc in nwbfile.electrodes["location"].data[:]:
+                if loc and loc != "unknown":
+                    regions.add(loc)
+
+        # From imaging planes
+        if "ophys" in nwbfile.processing:
+            for container in nwbfile.processing["ophys"].data_interfaces.values():
+                if hasattr(container, "imaging_plane"):
+                    loc = container.imaging_plane.location
+                    if loc and loc != "unknown":
+                        regions.add(loc)
+
+        if subject_id and regions:
+            subject_regions[subject_id] = list(regions)
+
+print(subject_regions)
+# e.g., {"C005": ["nucleus accumbens"], "C015": ["nucleus accumbens", "ventral tegmental area"]}
+```
+
+#### 8b. Look Up UBERON Terms
+
+Use the same `lookup_ontology_term` function from Step 7c to resolve brain region names
+to UBERON identifiers. **Use full OBO URIs** (not compact CURIEs like `UBERON:0002421`)
+because the DANDI asset schema requires `"format": "uri"` on identifiers.
+
+Present results to the user for confirmation:
+
+```python
+region_to_uberon = {}
+for regions in subject_regions.values():
+    for region in regions:
+        if region not in region_to_uberon:
+            terms = lookup_ontology_term(region, "uberon")
+            if terms:
+                best = terms[0]
+                region_to_uberon[region] = {
+                    "schemaKey": "Anatomy",
+                    "name": best["label"],
+                    "identifier": best["iri"],  # Full OBO URI, e.g., "http://purl.obolibrary.org/obo/UBERON_0012171"
+                }
+```
+
+#### 8c. Apply Brain Region to Each Asset
+
+Use the DANDI REST API directly to update each asset's `about` field. The workflow
+is: list assets → GET metadata → update `about` → PUT back with `blob_id`.
+
+**Note**: Each PUT creates a new asset version with a new `asset_id`.
+
+```python
+import requests
+
+DANDI_API = "https://api.dandiarchive.org/api"  # or sandbox
+HEADERS = {"Authorization": f"token {api_key}", "Content-Type": "application/json"}
+DANDISET_ID = "000123"
+
+# List all assets
+resp = requests.get(f"{DANDI_API}/dandisets/{DANDISET_ID}/versions/draft/assets/", headers=HEADERS)
+assets = resp.json()["results"]
+
+for asset_info in assets:
+    asset_id = asset_info["asset_id"]
+    blob_id = asset_info["blob"]
+    path = asset_info["path"]
+
+    # Extract subject_id from path (e.g., "sub-C005/sub-C005_ses-xxx.nwb")
+    subject_id = path.split("/")[0].replace("sub-", "") if path.startswith("sub-") else None
+    if not subject_id or subject_id not in subject_regions:
+        continue
+
+    # Build anatomy entries for this subject
+    about = [region_to_uberon[r] for r in subject_regions[subject_id] if r in region_to_uberon]
+    if not about:
+        continue
+
+    # GET current asset metadata
+    meta_resp = requests.get(f"{DANDI_API}/assets/{asset_id}/", headers=HEADERS)
+    metadata = meta_resp.json()
+    metadata["about"] = about
+
+    # PUT updated metadata
+    put_resp = requests.put(
+        f"{DANDI_API}/dandisets/{DANDISET_ID}/versions/draft/assets/{asset_id}/",
+        headers=HEADERS,
+        json={"metadata": metadata, "blob_id": blob_id},
+    )
+    if put_resp.status_code == 200:
+        print(f"  {path}: {[a['name'] for a in about]}")
+    else:
+        print(f"  {path}: FAILED {put_resp.status_code} - {put_resp.text[:200]}")
+```
+
+If the dandiset has many assets, paginate through them:
+```python
+url = f"{DANDI_API}/dandisets/{DANDISET_ID}/versions/draft/assets/"
+while url:
+    resp = requests.get(url, headers=HEADERS)
+    data = resp.json()
+    for asset_info in data["results"]:
+        # ... same update logic as above
+        pass
+    url = data.get("next")
+```
+
+#### 8d. Verify Asset Metadata
+
+Spot-check a few assets to confirm the metadata was saved:
+
+```python
+resp = requests.get(f"{DANDI_API}/dandisets/{DANDISET_ID}/versions/draft/assets/", headers=HEADERS)
+for asset_info in resp.json()["results"][:5]:
+    meta = requests.get(f"{DANDI_API}/assets/{asset_info['asset_id']}/", headers=HEADERS).json()
+    about = meta.get("about", [])
+    print(f"  {asset_info['path']}: {[a['name'] for a in about] if about else '(none)'}")
+```
+
+### Testing with Sandbox
+
+For testing uploads before going to production:
+
+```python
+# Use the sandbox server
+automatic_dandi_upload(
+    dandiset_id="000123",
+    nwb_folder_path="./nwb_output",
+    sandbox=True,  # Upload to sandbox.dandiarchive.org
+)
+```
+
+Or with the CLI:
+```bash
+# Get your sandbox API key from https://sandbox.dandiarchive.org/
+export DANDI_API_KEY=your_sandbox_key
+
+# Upload to sandbox
+dandi upload -i dandi-sandbox
+```
+
+For programmatic metadata editing on the sandbox, use:
+```python
+from dandi.dandiapi import DandiAPIClient
+
+client = DandiAPIClient(api_url="https://api.sandbox.dandiarchive.org/api")
+client.dandi_authenticate()
+dandiset = client.get_dandiset("000123", "draft")
+# ... same metadata operations as production
+```
+
+The sandbox server is at https://sandbox.dandiarchive.org/ (API: https://api.sandbox.dandiarchive.org/) —
+create a separate account and Dandiset there for testing.
+
+### Step 9: Write Conversion Manifest
+
+After the upload is complete and metadata is set, write a `conversion_manifest.yaml` to the
+conversion repo. This manifest captures structured metadata about what was built, enabling
+the weekly registry scan to aggregate it for future conversions.
+
+Build the manifest from the conversion artifacts you've created throughout the engagement:
+
+```yaml
+# conversion_manifest.yaml (in repo root)
+schema_version: 1
+lab: "<Lab Name>"
+conversions:
+  - name: "<conversion_name>"
+    status: completed
+    species: "<binomial, e.g., Mus musculus>"
+    modalities: [ecephys, behavior]  # from Phase 1
+    neuroconv_interfaces:
+      - name: SpikeGLXRecordingInterface
+        file_patterns: ["*.ap.bin", "*.ap.meta"]
+      - name: SpikeGLXLFPInterface
+        file_patterns: ["*.lf.bin", "*.lf.meta"]
+      - name: PhySortingInterface
+        file_patterns: ["spike_times.npy", "cluster_group.tsv"]
+    custom_interfaces:
+      - name: "<CustomInterfaceName>"
+        file: "src/<package>/<conversion>/interfaces/<filename>.py"
+        handles: "<brief description of what file format it reads>"
+        creates: [Position, BehavioralEvents]  # NWB types created
+        file_patterns: ["events.csv", "trials.csv"]
+    extensions: []  # any ndx-* extensions used
+    sync_approach: "<ttl_based|shared_clock|software_sync|none>"
+    dandi_id: "<6-digit dandiset ID>"
+    pattern: "<standard_nwbconverter|converter_pipe|custom>"
+    lessons:
+      - "<any gotchas, quirks, or tips discovered during this conversion>"
+    date_completed: "<YYYY-MM-DD>"
+```
+
+**How to populate each field:**
+- `name`: The conversion subdirectory name (e.g., `experiment_2026`)
+- `modalities`: Collect from the Data Streams table in `conversion_notes.md`
+- `neuroconv_interfaces`: From the Interface Mapping table in `conversion_notes.md`.
+  Each entry has `name` (the interface class) and `file_patterns` (globs that this
+  interface handles, from Phase 2 inspection).
+- `custom_interfaces`: From any custom DataInterface classes you wrote in Phase 5.
+  Include `file_patterns` for the files each custom interface reads.
+- `extensions`: Any `ndx-*` packages used (e.g., `ndx-fiber-photometry`, `ndx-pose`)
+- `sync_approach`: From Phase 4 sync plan
+- `dandi_id`: The Dandiset ID from this phase
+- `lessons`: Anything surprising, non-obvious, or worth knowing for future similar conversions
+- `date_completed`: Today's date
+
+**Commit and push the manifest** (remote was configured in Phase 1 via the API):
+```bash
+git add conversion_manifest.yaml
+git commit -m "Add conversion manifest for registry
+
+Dandiset: <dandi_id>
+Modalities: <modalities>
+Interfaces: <N> NeuroConv + <N> custom"
+if git remote get-url origin &>/dev/null; then git push; fi
+```
+
+If the repo is in the `nwb-conversions` org (the normal case when the API is reachable),
+the weekly registry scan will find it automatically — no further action needed.
+
+If working locally (API was unreachable), inform the user:
+> The conversion manifest has been saved locally. To include this conversion in the
+> registry for future reference, contact CatalystNeuro for assistance.
+
+### Step 10: Save Conversation History
+
+Save the Claude Code conversation that produced this conversion into the repo. This
+captures every decision, data inspection, question, and code generation step for
+full reproducibility.
+
+```bash
+# Find the active Claude Code conversation JSONL (most recently modified)
+CONVERSATION=$(ls -t ~/.claude/projects/*/*.jsonl 2>/dev/null | head -1)
+if [ -n "$CONVERSATION" ]; then
+    mkdir -p .claude
+    cp "$CONVERSATION" .claude/conversation.jsonl
+    git add .claude/conversation.jsonl
+    git commit -m "Save Claude Code conversation history"
+    if git remote get-url origin &>/dev/null; then git push; fi
+    echo "Saved conversation: $(du -h .claude/conversation.jsonl | cut -f1)"
+else
+    echo "No conversation JSONL found — skipping"
+fi
+```
+
+The conversation file is a JSONL containing the full exchange between the user and Claude
+Code, including tool calls, file reads, and data inspection outputs. It can be replayed
+to understand exactly how the conversion was built.
+
+### Common Issues
+
+- **"Unable to find environment variable DANDI_API_KEY"**: Set the API key with `export DANDI_API_KEY=...`
+- **Validation errors**: Run `nwbinspector` and `dandi validate` to identify issues
+- **Files too large**: DANDI supports files up to 5TB. Contact DANDI team for datasets >10TB
+- **Path too long**: DANDI has a 512-character path limit. Shorten session/subject IDs if needed
+- **Organize step fails**: Ensure NWB files have `subject.subject_id` and `session_id` set
+- **Upload hangs**: Try with `number_of_jobs=1` and `number_of_threads=1` for debugging.
+  Check logs at `~/Library/Logs/dandi-cli` (macOS) or `~/.cache/dandi-cli/log` (Linux)
+
+### Add Upload to convert_all_sessions.py
+
+Optionally add upload as the final step of batch conversion:
+
+```python
+def dataset_to_nwb(
+    data_dir_path,
+    output_dir_path,
+    dandiset_id=None,
+    max_workers=1,
+    stub_test=False,
+):
+    # ... run all conversions ...
+
+    if dandiset_id and not stub_test:
+        from neuroconv.tools.data_transfers import automatic_dandi_upload
+        automatic_dandi_upload(
+            dandiset_id=dandiset_id,
+            nwb_folder_path=output_dir_path,
+        )
+```
diff --git a/src/pyflask/ai/skill/tools/fetch_paper.py b/src/pyflask/ai/skill/tools/fetch_paper.py
new file mode 100644
index 000000000..7f4888867
--- /dev/null
+++ b/src/pyflask/ai/skill/tools/fetch_paper.py
@@ -0,0 +1,358 @@
+#!/usr/bin/env python3
+"""Fetch full text of a scientific paper and extract specific information.
+
+Usage:
+    python fetch_paper.py <identifier> [--extract <section>] [--query <question>]
+
+Identifier can be:
+    - DOI (e.g., 10.1038/s41586-019-1234-5)
+    - PMID (e.g., 31234567)
+    - PMC ID (e.g., PMC6789012)
+    - URL from doi.org, pubmed, pmc, or europepmc
+
+Examples:
+    python fetch_paper.py 10.1126/science.aav7893
+    python fetch_paper.py 10.1126/science.aav7893 --extract methods
+    python fetch_paper.py PMC6525101 --extract methods
+    python fetch_paper.py 31000656 --extract abstract
+"""
+
+import argparse
+import json
+import re
+import sys
+from urllib.error import HTTPError, URLError
+from urllib.parse import quote
+from urllib.request import Request, urlopen
+
+
+def parse_identifier(raw: str) -> dict:
+    """Parse a DOI, PMID, PMC ID, or URL into a normalized identifier."""
+    raw = raw.strip()
+
+    # URL patterns
+    doi_url = re.match(r"https?://(?:dx\.)?doi\.org/(.+)", raw)
+    if doi_url:
+        return {"type": "doi", "id": doi_url.group(1)}
+
+    pubmed_url = re.match(r"https?://(?:www\.)?ncbi\.nlm\.nih\.gov/pubmed/(\d+)", raw)
+    if pubmed_url:
+        return {"type": "pmid", "id": pubmed_url.group(1)}
+
+    pmc_url = re.match(r"https?://(?:www\.)?ncbi\.nlm\.nih\.gov/pmc/articles/(PMC\d+)", raw)
+    if not pmc_url:
+        pmc_url = re.match(r"https?://pmc\.ncbi\.nlm\.nih\.gov/articles/(PMC\d+)", raw)
+    if pmc_url:
+        return {"type": "pmc", "id": pmc_url.group(1)}
+
+    europepmc_url = re.match(r"https?://europepmc\.org/article/(\w+)/(\d+)", raw)
+    if europepmc_url:
+        return {"type": europepmc_url.group(1).lower(), "id": europepmc_url.group(2)}
+
+    # Raw identifiers
+    if raw.upper().startswith("PMC"):
+        return {"type": "pmc", "id": raw.upper()}
+    if raw.isdigit() and len(raw) >= 7:
+        return {"type": "pmid", "id": raw}
+    if "/" in raw:
+        return {"type": "doi", "id": raw}
+
+    return {"type": "unknown", "id": raw}
+
+
+def fetch_url(url: str, accept: str = "application/json") -> str:
+    """Fetch a URL and return the response text."""
+    req = Request(url, headers={"Accept": accept, "User-Agent": "NWB-GUIDE/1.0"})
+    with urlopen(req, timeout=30) as resp:
+        return resp.read().decode("utf-8")
+
+
+def resolve_ids(identifier: dict) -> dict:
+    """Resolve any identifier to DOI, PMID, and PMC ID using NCBI converter."""
+    id_val = identifier["id"]
+
+    if identifier["type"] == "pmc":
+        id_val = identifier["id"].replace("PMC", "")
+        query_id = f"PMC{id_val}"
+    else:
+        query_id = id_val
+
+    url = f"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?tool=nwbguide&format=json&ids={quote(query_id)}"
+    try:
+        data = json.loads(fetch_url(url))
+        records = data.get("records", [])
+        if records and records[0].get("status") != "error":
+            r = records[0]
+            return {
+                "doi": r.get("doi"),
+                "pmid": str(r["pmid"]) if "pmid" in r else None,
+                "pmcid": r.get("pmcid"),
+            }
+    except Exception:
+        pass
+
+    # Return what we have
+    result = {"doi": None, "pmid": None, "pmcid": None}
+    result[identifier["type"]] = identifier["id"]
+    return result
+
+
+def fetch_bioc_fulltext(pmcid: str) -> dict | None:
+    """Fetch full text via NCBI BioC API (best for open access papers).
+
+    Returns parsed sections dict or None.
+    """
+    numeric = pmcid.replace("PMC", "")
+    url = f"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/PMC{numeric}/unicode"
+    try:
+        data = json.loads(fetch_url(url))
+    except Exception:
+        return None
+
+    sections = {}
+    documents = data if isinstance(data, list) else [data]
+
+    for doc in documents:
+        for passage in doc.get("documents", [{}])[0].get("passages", []):
+            infons = passage.get("infons", {})
+            sec_type = infons.get("section_type", "").lower()
+            text = passage.get("text", "")
+
+            if not text.strip():
+                continue
+
+            # Normalize section names
+            if sec_type in ("title",):
+                key = "title"
+            elif sec_type in ("abstract",):
+                key = "abstract"
+            elif sec_type in ("intro", "introduction"):
+                key = "introduction"
+            elif sec_type in ("methods", "materials", "materials and methods", "experimental"):
+                key = "methods"
+            elif sec_type in ("results", "results and discussion"):
+                key = "results"
+            elif sec_type in ("discuss", "discussion"):
+                key = "discussion"
+            elif sec_type in ("suppl", "supplementary", "supplementary material"):
+                key = "supplementary"
+            elif sec_type in ("ack", "acknowledgements", "acknowledgments", "funding"):
+                key = "acknowledgements"
+            elif sec_type in ("ref", "references"):
+                continue  # skip references
+            elif "data" in sec_type and "avail" in sec_type:
+                key = "data_availability"
+            elif sec_type in ("fig", "fig_title_caption", "table", "table_title_caption"):
+                key = "figures_tables"
+            elif sec_type:
+                key = sec_type.replace(" ", "_")[:40]
+            else:
+                key = "body"
+
+            if key in sections:
+                sections[key] += "\n" + text
+            else:
+                sections[key] = text
+
+    return sections if sections else None
+
+
+def fetch_pubmed_abstract(pmid: str) -> dict | None:
+    """Fetch abstract from PubMed E-utilities as fallback."""
+    import xml.etree.ElementTree as ET
+
+    url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={pmid}&rettype=xml"
+    try:
+        xml_text = fetch_url(url, accept="text/xml")
+        root = ET.fromstring(xml_text)
+
+        sections = {}
+
+        # Title
+        title_el = root.find(".//ArticleTitle")
+        if title_el is not None and title_el.text:
+            sections["title"] = title_el.text
+
+        # Abstract
+        abstract_parts = []
+        for abs_el in root.findall(".//AbstractText"):
+            label = abs_el.get("Label", "")
+            text = "".join(abs_el.itertext())
+            if label:
+                abstract_parts.append(f"{label}: {text}")
+            else:
+                abstract_parts.append(text)
+        if abstract_parts:
+            sections["abstract"] = "\n".join(abstract_parts)
+
+        # Keywords
+        kw = [el.text for el in root.findall(".//Keyword") if el.text]
+        if kw:
+            sections["keywords"] = ", ".join(kw)
+
+        # Journal
+        journal_el = root.find(".//Journal/Title")
+        if journal_el is not None and journal_el.text:
+            sections["journal"] = journal_el.text
+
+        return sections if sections else None
+    except Exception:
+        return None
+
+
+def fetch_europepmc_abstract(identifier: dict) -> dict | None:
+    """Search Europe PMC and return article metadata + abstract."""
+    id_type = identifier["type"]
+    id_val = identifier["id"]
+
+    if id_type == "doi":
+        query = f'DOI:"{id_val}"'
+    elif id_type == "pmid":
+        query = f"EXT_ID:{id_val} AND SRC:MED"
+    elif id_type == "pmc":
+        query = f"PMCID:{id_val}"
+    else:
+        query = id_val
+
+    url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/search?query={quote(query)}&format=json&resultType=core&pageSize=1"
+    try:
+        data = json.loads(fetch_url(url))
+        results = data.get("resultList", {}).get("result", [])
+        if not results:
+            return None
+
+        r = results[0]
+        sections = {}
+        if r.get("title"):
+            sections["title"] = r["title"]
+        if r.get("abstractText"):
+            sections["abstract"] = r["abstractText"]
+        if r.get("journalTitle"):
+            sections["journal"] = r["journalTitle"]
+        if r.get("keywordList", {}).get("keyword"):
+            sections["keywords"] = ", ".join(r["keywordList"]["keyword"])
+
+        return sections if sections else None
+    except Exception:
+        return None
+
+
+def fetch_paper(raw_identifier: str) -> dict:
+    """Fetch a paper and return structured sections.
+
+    Strategy:
+    1. Resolve identifier to DOI/PMID/PMCID
+    2. Try BioC full text (best for open access PMC papers)
+    3. Fall back to PubMed abstract
+    4. Fall back to Europe PMC abstract
+    """
+    identifier = parse_identifier(raw_identifier)
+    ids = resolve_ids(identifier)
+
+    result = {
+        "identifier": identifier,
+        "resolved_ids": ids,
+        "source": None,
+        "sections": {},
+        "has_full_text": False,
+        "error": None,
+    }
+
+    # Try BioC full text if we have a PMC ID
+    if ids.get("pmcid"):
+        sections = fetch_bioc_fulltext(ids["pmcid"])
+        if sections:
+            result["source"] = "pmc_bioc"
+            result["sections"] = sections
+            result["has_full_text"] = True
+            return result
+
+    # Try PubMed abstract
+    if ids.get("pmid"):
+        sections = fetch_pubmed_abstract(ids["pmid"])
+        if sections:
+            result["source"] = "pubmed"
+            result["sections"] = sections
+            return result
+
+    # Try Europe PMC
+    sections = fetch_europepmc_abstract(identifier)
+    if sections:
+        result["source"] = "europepmc"
+        result["sections"] = sections
+        return result
+
+    result["error"] = f"Could not fetch paper for: {raw_identifier}"
+    return result
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Fetch scientific paper full text or abstract",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument("identifier", help="DOI, PMID, PMC ID, or URL")
+    parser.add_argument("--extract", help="Section to extract (e.g., methods, results, abstract, all)")
+    parser.add_argument("--query", help="Specific question — printed as reminder after the text")
+    parser.add_argument("--json", action="store_true", help="Output as JSON")
+    args = parser.parse_args()
+
+    paper = fetch_paper(args.identifier)
+
+    if paper["error"] and not paper["sections"]:
+        print(f"ERROR: {paper['error']}", file=sys.stderr)
+        sys.exit(1)
+
+    sections = paper["sections"]
+
+    if args.json:
+        out = {k: v[:8000] for k, v in sections.items()}
+        out["_source"] = paper["source"]
+        out["_has_full_text"] = paper["has_full_text"]
+        out["_resolved_ids"] = paper["resolved_ids"]
+        if paper["error"]:
+            out["_warning"] = paper["error"]
+        print(json.dumps(out, indent=2))
+        return
+
+    # Header
+    print(f"Source: {paper['source']}")
+    print(f"Full text: {'yes' if paper['has_full_text'] else 'no (abstract only)'}")
+    ids = paper["resolved_ids"]
+    id_strs = [f"{k}={v}" for k, v in ids.items() if v]
+    if id_strs:
+        print(f"IDs: {', '.join(id_strs)}")
+    print()
+
+    if args.extract and args.extract.lower() != "all":
+        key = args.extract.lower().strip()
+        if key in sections:
+            print(f"=== {key.upper()} ===")
+            print(sections[key][:10000])
+            if len(sections[key]) > 10000:
+                print(f"\n... [truncated, {len(sections[key])} chars total]")
+        else:
+            print(f"Section '{key}' not found.")
+            print(f"Available sections: {', '.join(sections.keys())}")
+            if "abstract" in sections:
+                print(f"\n=== ABSTRACT (fallback) ===")
+                print(sections["abstract"])
+    else:
+        for key, text in sections.items():
+            print(f"=== {key.upper()} ===")
+            limit = 10000 if args.extract == "all" else 3000
+            print(text[:limit])
+            if len(text) > limit:
+                print(f"... [truncated, {len(text)} chars total]")
+            print()
+
+    if args.query:
+        print(f"\n{'='*60}")
+        print(f"QUERY: {args.query}")
+        print(f"{'='*60}")
+        print("(Review the text above to answer this question)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/pyflask/ai/skill_loader.py b/src/pyflask/ai/skill_loader.py
new file mode 100644
index 000000000..02cbc1078
--- /dev/null
+++ b/src/pyflask/ai/skill_loader.py
@@ -0,0 +1,48 @@
+"""Load and expand the nwb-convert skill into a system prompt.
+
+Reads SKILL.md and expands `$file:` directives that include phase-specific
+instructions and knowledge files.
+"""
+
+import re
+from pathlib import Path
+
+
+def load_skill(skill_dir=None):
+    """Load SKILL.md and expand $file: includes, return full system prompt.
+
+    Parameters
+    ----------
+    skill_dir : str or Path, optional
+        Path to the skill directory containing SKILL.md.
+        Defaults to the bundled skill/ directory next to this file.
+
+    Returns
+    -------
+    str
+        The fully expanded system prompt text.
+    """
+    if skill_dir is None:
+        skill_dir = Path(__file__).parent / "skill"
+
+    skill_dir = Path(skill_dir)
+    skill_md = (skill_dir / "SKILL.md").read_text()
+
+    # Strip YAML frontmatter (between --- markers)
+    if skill_md.startswith("---"):
+        parts = skill_md.split("---", 2)
+        if len(parts) >= 3:
+            skill_md = parts[2]
+
+    # Expand $file: directives — these reference relative paths from the skill dir
+    def expand(match):
+        rel_path = match.group(1).strip()
+        file_path = skill_dir / rel_path
+        if file_path.exists():
+            return file_path.read_text()
+        else:
+            return f"[WARNING: File not found: {rel_path}]"
+
+    expanded = re.sub(r"^\$file:\s*(.+)$", expand, skill_md, flags=re.MULTILINE)
+
+    return expanded.strip()
diff --git a/src/pyflask/app.py b/src/pyflask/app.py
index 00de7c4da..d1ac50320 100644
--- a/src/pyflask/app.py
+++ b/src/pyflask/app.py
@@ -27,6 +27,7 @@
     resource_path,
 )
 from namespaces import (  # neurosift_namespace,
+    ai_namespace,
     dandi_namespace,
     data_namespace,
     neuroconv_namespace,
@@ -64,6 +65,7 @@
 api.add_namespace(data_namespace)
 api.add_namespace(system_namespace)
 api.add_namespace(dandi_namespace)
+api.add_namespace(ai_namespace)
 # api.add_namespace(neurosift_namespace)  # TODO: enable later
 api.init_app(flask_app)
 
diff --git a/src/pyflask/namespaces/__init__.py b/src/pyflask/namespaces/__init__.py
index 0f1edb274..7ad227d45 100644
--- a/src/pyflask/namespaces/__init__.py
+++ b/src/pyflask/namespaces/__init__.py
@@ -1,3 +1,4 @@
+from .ai_assistant import ai_namespace
 from .dandi import dandi_namespace
 from .data import data_namespace
 from .neuroconv import neuroconv_namespace
diff --git a/src/pyflask/namespaces/ai_assistant.py b/src/pyflask/namespaces/ai_assistant.py
new file mode 100644
index 000000000..d0eb1832a
--- /dev/null
+++ b/src/pyflask/namespaces/ai_assistant.py
@@ -0,0 +1,239 @@
+"""Flask-RESTX namespace for the AI conversion assistant.
+
+Provides endpoints to create agent sessions, send messages, and stream
+responses via Server-Sent Events (SSE).
+"""
+
+import json
+import os
+import time
+from pathlib import Path
+
+from ai.agent import create_session, get_session, remove_session
+from ai.session_store import (
+    CONVERSIONS_DIR,
+    SESSIONS_DIR,
+    delete_session_record,
+    get_session_history,
+)
+from ai.session_store import list_sessions as list_saved_sessions
+from flask import Response, request
+from flask_restx import Namespace, Resource
+
+ai_namespace = Namespace("ai", description="AI conversion assistant")
+
+
+@ai_namespace.route("/sessions")
+class Sessions(Resource):
+    @ai_namespace.doc(
+        responses={200: "Success"},
+        description="List all saved AI sessions.",
+    )
+    def get(self):
+        """List all saved sessions (most recent first)."""
+        return {"sessions": list_saved_sessions()}
+
+    @ai_namespace.doc(
+        responses={200: "Success", 400: "Bad Request", 500: "Internal server error"},
+        description="Create a new AI agent session for NWB conversion.",
+    )
+    def post(self):
+        """Create a new agent session.
+
+        Payload:
+            data_dirs (list[str]): Paths to data directories to convert.
+            data_dir (str): Legacy single path (used if data_dirs not provided).
+            api_key (str, optional): Anthropic API key.
+            model (str, optional): Model to use.
+            lab_name (str, optional): Lab name for monitoring.
+        """
+        payload = ai_namespace.payload or {}
+
+        # Support both data_dirs (list) and legacy data_dir (string)
+        data_dirs = payload.get("data_dirs") or []
+        if not data_dirs:
+            single = payload.get("data_dir")
+            if single:
+                data_dirs = [single]
+
+        if not data_dirs:
+            return {"message": "At least one data directory is required"}, 400
+
+        for d in data_dirs:
+            if not os.path.isdir(d):
+                return {"message": f"data_dir does not exist: {d}"}, 400
+
+        # Derive a label from the first data directory name
+        label = Path(data_dirs[0]).name.replace(" ", "-").lower()
+        repo_name = f"{label}-to-nwb"
+
+        # Use datetime as session ID (filesystem-safe, sortable)
+        from datetime import datetime, timezone
+
+        session_id = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+
+        # Code repo lives at [NWB_GUIDE_DIR]/ai-sessions/<session_id>/<label>-to-nwb
+        session_dir = SESSIONS_DIR / session_id
+        repo_dir = str(session_dir / repo_name)
+        os.makedirs(repo_dir, exist_ok=True)
+
+        # NWB output lives at [NWB_GUIDE_DIR]/conversions/<session_id>/<label>-to-nwb
+        output_dir = str(CONVERSIONS_DIR / session_id / repo_name)
+        os.makedirs(output_dir, exist_ok=True)
+
+        result = create_session(
+            session_id=session_id,
+            data_dirs=data_dirs,
+            repo_dir=repo_dir,
+            output_dir=output_dir,
+            api_key=payload.get("api_key"),
+            model=payload.get("model"),
+        )
+
+        return {
+            "session_id": session_id,
+            "repo_dir": repo_dir,
+            "output_dir": output_dir,
+            "auth_mode": result["auth_mode"],
+        }
+
+
+@ai_namespace.route("/sessions/<string:session_id>")
+class Session(Resource):
+    @ai_namespace.doc(
+        responses={200: "Success", 404: "Not Found"},
+        description="Get session state or history.",
+    )
+    def get(self, session_id):
+        """Get session state (active) or full history (saved)."""
+        # Check if this is an active session
+        agent = get_session(session_id)
+        if agent:
+            return {
+                "session_id": session_id,
+                "data_dirs": agent.data_dirs,
+                "repo_dir": agent.repo_dir,
+                "connected": agent._connected,
+                "auth_mode": agent.auth_mode,
+            }
+
+        # Fall back to saved session history
+        history = get_session_history(session_id)
+        if history:
+            # Support both old (data_dir) and new (data_dirs) format
+            data_dirs = history.get("data_dirs") or ([history["data_dir"]] if history.get("data_dir") else [])
+            return {
+                "session_id": session_id,
+                "data_dirs": data_dirs,
+                "title": history["title"],
+                "created_at": history["created_at"],
+                "updated_at": history["updated_at"],
+                "connected": False,
+                "messages": history["messages"],
+            }
+
+        return {"message": "Session not found"}, 404
+
+    @ai_namespace.doc(
+        responses={200: "Success", 404: "Not Found"},
+        description="Delete (stop) a session.",
+    )
+    def delete(self, session_id):
+        """Stop and remove a session.
+
+        Query params:
+            delete_history (bool): If true, also delete the saved record on disk.
+                Default is false (keeps history for the session list).
+        """
+        agent = get_session(session_id)
+        if agent:
+            remove_session(session_id)
+
+        delete_history = request.args.get("delete_history", "false").lower() == "true"
+        deleted = False
+        if delete_history:
+            deleted = delete_session_record(session_id)
+
+        if not agent and not deleted:
+            return {"message": "Session not found"}, 404
+
+        return {"status": "stopped"}
+
+
+@ai_namespace.route("/sessions/<string:session_id>/message")
+class Message(Resource):
+    @ai_namespace.doc(
+        responses={200: "Success", 400: "Bad Request", 404: "Not Found"},
+        description="Send a user message to the agent.",
+    )
+    def post(self, session_id):
+        """Send a user message to the agent.
+
+        Payload:
+            content (str): The message text.
+        """
+        agent = get_session(session_id)
+        if not agent:
+            return {"message": "Session not found"}, 404
+
+        payload = ai_namespace.payload or {}
+        content = payload.get("content", "")
+
+        if not content:
+            return {"message": "content is required"}, 400
+
+        agent.send_message(content)
+        return {"status": "ok"}
+
+
+@ai_namespace.route("/sessions/<string:session_id>/interrupt")
+class Interrupt(Resource):
+    @ai_namespace.doc(
+        responses={200: "Success", 404: "Not Found"},
+        description="Interrupt the agent's current turn.",
+    )
+    def post(self, session_id):
+        """Interrupt the agent so the user can interject."""
+        agent = get_session(session_id)
+        if not agent:
+            return {"message": "Session not found"}, 404
+
+        agent.interrupt()
+        return {"status": "interrupted"}
+
+
+@ai_namespace.route("/sessions/<string:session_id>/events")
+class Events(Resource):
+    @ai_namespace.doc(
+        responses={200: "Success", 404: "Not Found"},
+        description="SSE stream of agent responses.",
+    )
+    def get(self, session_id):
+        """Stream agent responses as Server-Sent Events."""
+        agent = get_session(session_id)
+        if not agent:
+            return {"message": "Session not found"}, 404
+
+        def generate():
+            while True:
+                try:
+                    # Block for up to 30 seconds waiting for a message
+                    event = agent.message_queue.get(timeout=30)
+                    yield f"data: {json.dumps(event)}\n\n"
+
+                    # If this is a result message, the turn is done
+                    if event.get("type") == "result":
+                        yield f"data: {json.dumps({'type': 'done'})}\n\n"
+
+                except Exception:
+                    # Send a keepalive comment to prevent timeout
+                    yield ": keepalive\n\n"
+
+        return Response(
+            generate(),
+            mimetype="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "X-Accel-Buffering": "no",
+            },
+        )