diff --git a/apps/browser-demos/pages/sqlite-test/main.ts b/apps/browser-demos/pages/sqlite-test/main.ts index 960fdd2f1..61bb84bbd 100644 --- a/apps/browser-demos/pages/sqlite-test/main.ts +++ b/apps/browser-demos/pages/sqlite-test/main.ts @@ -44,6 +44,18 @@ let kernelBytes: ArrayBuffer | null = null; let vfsImageBytes: Uint8Array | null = null; let testfixtureBytes: ArrayBuffer | null = null; +const DEFAULT_SQLITE_MAX_MEMORY_PAGES = 4096; + +function sqliteMaxMemoryPages(): number { + const raw = import.meta.env.VITE_SQLITE_BROWSER_MAX_MEMORY_PAGES; + if (raw == null || raw === "") return DEFAULT_SQLITE_MAX_MEMORY_PAGES; + const pages = Number(raw); + if (!Number.isInteger(pages) || pages <= 0) { + throw new Error(`Invalid VITE_SQLITE_BROWSER_MAX_MEMORY_PAGES: ${raw}`); + } + return pages; +} + function readVfsFile(fs: MemoryFileSystem, path: string): Uint8Array { const st = fs.stat(path); const fd = fs.open(path, 0, 0); @@ -137,6 +149,7 @@ async function init() { const fixture = readVfsFile(fs, "/usr/bin/testfixture"); testfixtureBytes = new ArrayBuffer(fixture.byteLength); new Uint8Array(testfixtureBytes).set(fixture); + const maxMemoryPages = sqliteMaxMemoryPages(); async function runSqlite(argv: string[], label: string, timeoutMs = 180_000, options: SqliteRunOptions = {}): Promise { const start = performance.now(); @@ -175,6 +188,11 @@ async function init() { const kernel = new BrowserKernel({ memfs: fs, maxWorkers: 4, + // The official SQLite testrunner starts hundreds of short-lived + // testfixture workers in one browser page. Chromium reserves each + // shared Wasm memory up to its maximum, and the 1 GiB host default can + // exhaust renderer address space before the full suite completes. + maxMemoryPages, enableSyscallLog: import.meta.env.VITE_SQLITE_BROWSER_SYSCALL_LOG === "1", syscallLogPtrWidth: sqliteSyscallLogPtrWidth(), onStdout: (data) => { appendStdout(new TextDecoder().decode(data)); }, diff --git a/crates/kernel/src/process.rs b/crates/kernel/src/process.rs index d185e1063..101cbaa55 100644 --- a/crates/kernel/src/process.rs +++ b/crates/kernel/src/process.rs @@ -28,6 +28,36 @@ pub trait HostIO { fn host_read(&mut self, handle: i64, buf: &mut [u8]) -> Result; fn host_write(&mut self, handle: i64, buf: &[u8]) -> Result; fn host_seek(&mut self, handle: i64, offset: i64, whence: u32) -> Result; + fn host_pread(&mut self, handle: i64, buf: &mut [u8], offset: i64) -> Result { + if offset < 0 { + return Err(Errno::EINVAL); + } + let saved_offset = self.host_seek(handle, 0, 1)?; + let result = self + .host_seek(handle, offset, 0) + .and_then(|_| self.host_read(handle, buf)); + let restore_result = self.host_seek(handle, saved_offset, 0); + match (result, restore_result) { + (Ok(n), Ok(_)) => Ok(n), + (Err(e), _) => Err(e), + (Ok(_), Err(e)) => Err(e), + } + } + fn host_pwrite(&mut self, handle: i64, buf: &[u8], offset: i64) -> Result { + if offset < 0 { + return Err(Errno::EINVAL); + } + let saved_offset = self.host_seek(handle, 0, 1)?; + let result = self + .host_seek(handle, offset, 0) + .and_then(|_| self.host_write(handle, buf)); + let restore_result = self.host_seek(handle, saved_offset, 0); + match (result, restore_result) { + (Ok(n), Ok(_)) => Ok(n), + (Err(e), _) => Err(e), + (Ok(_), Err(e)) => Err(e), + } + } fn host_fstat(&mut self, handle: i64) -> Result; fn host_stat(&mut self, path: &[u8]) -> Result; fn host_lstat(&mut self, path: &[u8]) -> Result; diff --git a/crates/kernel/src/syscalls.rs b/crates/kernel/src/syscalls.rs index c5c7ae1d5..50a082b69 100644 --- a/crates/kernel/src/syscalls.rs +++ b/crates/kernel/src/syscalls.rs @@ -1981,7 +1981,6 @@ pub fn sys_pread( } let host_handle = ofd.host_handle; - let saved_offset = ofd.offset; if host_handle == SYNTHETIC_FILE_HANDLE { let data = synthetic_file_content(&ofd.path).ok_or(Errno::EBADF)?; @@ -1994,13 +1993,7 @@ pub fn sys_pread( return Ok(n); } - // Seek to the requested offset, read, then restore. - // Single-threaded, so save/seek/read/restore is safe. - host.host_seek(host_handle, offset, SEEK_SET)?; - let n = host.host_read(host_handle, buf)?; - host.host_seek(host_handle, saved_offset, SEEK_SET)?; - - Ok(n) + host.host_pread(host_handle, buf, offset) } /// Write to a file descriptor at a given offset without modifying the file position. @@ -2038,13 +2031,8 @@ pub fn sys_pwrite( } let host_handle = ofd.host_handle; - let saved_offset = ofd.offset; - - host.host_seek(host_handle, offset, SEEK_SET)?; - let n = host.host_write(host_handle, buf)?; - host.host_seek(host_handle, saved_offset, SEEK_SET)?; - Ok(n) + host.host_pwrite(host_handle, buf, offset) } /// preadv -- scatter-gather read at offset. @@ -9775,6 +9763,11 @@ mod tests { handle_paths: std::collections::HashMap>, missing_paths: std::collections::HashSet>, statfs_by_path: std::collections::HashMap, WasmStatfs>, + read_calls: usize, + write_calls: usize, + seek_calls: usize, + pread_calls: usize, + pwrite_calls: usize, } impl MockHostIO { @@ -9793,6 +9786,11 @@ mod tests { handle_paths: std::collections::HashMap::new(), missing_paths: std::collections::HashSet::new(), statfs_by_path: std::collections::HashMap::new(), + read_calls: 0, + write_calls: 0, + seek_calls: 0, + pread_calls: 0, + pwrite_calls: 0, } } @@ -9856,6 +9854,7 @@ mod tests { } fn host_read(&mut self, _handle: i64, buf: &mut [u8]) -> Result { + self.read_calls += 1; let data = b"hello"; let n = buf.len().min(data.len()); buf[..n].copy_from_slice(&data[..n]); @@ -9863,13 +9862,37 @@ mod tests { } fn host_write(&mut self, _handle: i64, buf: &[u8]) -> Result { + self.write_calls += 1; Ok(buf.len()) } fn host_seek(&mut self, _handle: i64, _offset: i64, _whence: u32) -> Result { + self.seek_calls += 1; Ok(0) } + fn host_pread( + &mut self, + _handle: i64, + buf: &mut [u8], + offset: i64, + ) -> Result { + self.pread_calls += 1; + let data = b"hello"; + let start = offset as usize; + if start >= data.len() { + return Ok(0); + } + let n = buf.len().min(data.len() - start); + buf[..n].copy_from_slice(&data[start..start + n]); + Ok(n) + } + + fn host_pwrite(&mut self, _handle: i64, buf: &[u8], _offset: i64) -> Result { + self.pwrite_calls += 1; + Ok(buf.len()) + } + fn host_fstat(&mut self, handle: i64) -> Result { let (uid, gid) = self.handle_owners.get(&handle).copied().unwrap_or((0, 0)); let mode = self @@ -12821,6 +12844,40 @@ mod tests { ); } + #[test] + fn test_pread_uses_positioned_host_io_without_seeking() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + let fd = sys_open(&mut proc, &mut host, b"/tmp/f", O_RDWR | O_CREAT, 0o644).unwrap(); + let ofd_idx = proc.fd_table.get(fd).unwrap().ofd_ref.0; + proc.ofd_table.get_mut(ofd_idx).unwrap().offset = 77; + + let mut buf = [0u8; 3]; + assert_eq!(sys_pread(&mut proc, &mut host, fd, &mut buf, 1), Ok(3)); + + assert_eq!(&buf, b"ell"); + assert_eq!(host.pread_calls, 1); + assert_eq!(host.read_calls, 0); + assert_eq!(host.seek_calls, 0); + assert_eq!(proc.ofd_table.get(ofd_idx).unwrap().offset, 77); + } + + #[test] + fn test_pwrite_uses_positioned_host_io_without_seeking() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + let fd = sys_open(&mut proc, &mut host, b"/tmp/f", O_RDWR | O_CREAT, 0o644).unwrap(); + let ofd_idx = proc.fd_table.get(fd).unwrap().ofd_ref.0; + proc.ofd_table.get_mut(ofd_idx).unwrap().offset = 88; + + assert_eq!(sys_pwrite(&mut proc, &mut host, fd, b"xyz", 2), Ok(3)); + + assert_eq!(host.pwrite_calls, 1); + assert_eq!(host.write_calls, 0); + assert_eq!(host.seek_calls, 0); + assert_eq!(proc.ofd_table.get(ofd_idx).unwrap().offset, 88); + } + #[test] fn test_pread_espipe_on_socket() { let mut proc = Process::new(1); diff --git a/crates/kernel/src/wasm_api.rs b/crates/kernel/src/wasm_api.rs index 21093d5eb..01ec542a8 100644 --- a/crates/kernel/src/wasm_api.rs +++ b/crates/kernel/src/wasm_api.rs @@ -36,6 +36,20 @@ unsafe extern "C" { fn host_read(handle: i64, buf_ptr: *mut u8, buf_len: u32) -> i32; fn host_write(handle: i64, buf_ptr: *const u8, buf_len: u32) -> i32; fn host_seek(handle: i64, offset_lo: u32, offset_hi: i32, whence: u32) -> i64; + fn host_pread( + handle: i64, + buf_ptr: *mut u8, + buf_len: u32, + offset_lo: u32, + offset_hi: i32, + ) -> i32; + fn host_pwrite( + handle: i64, + buf_ptr: *const u8, + buf_len: u32, + offset_lo: u32, + offset_hi: i32, + ) -> i32; fn host_fstat(handle: i64, stat_ptr: *mut u8) -> i32; fn host_stat(path_ptr: *const u8, path_len: u32, stat_ptr: *mut u8) -> i32; fn host_lstat(path_ptr: *const u8, path_len: u32, stat_ptr: *mut u8) -> i32; @@ -241,6 +255,43 @@ impl HostIO for WasmHostIO { } } + fn host_pread(&mut self, handle: i64, buf: &mut [u8], offset: i64) -> Result { + let offset_lo = offset as u32; + let offset_hi = (offset >> 32) as i32; + let result = unsafe { + host_pread( + handle, + buf.as_mut_ptr(), + buf.len() as u32, + offset_lo, + offset_hi, + ) + }; + if result < 0 { + match Errno::from_u32((-result) as u32) { + Some(e) => Err(e), + None => Err(Errno::EIO), + } + } else { + Ok(result as usize) + } + } + + fn host_pwrite(&mut self, handle: i64, buf: &[u8], offset: i64) -> Result { + let offset_lo = offset as u32; + let offset_hi = (offset >> 32) as i32; + let result = + unsafe { host_pwrite(handle, buf.as_ptr(), buf.len() as u32, offset_lo, offset_hi) }; + if result < 0 { + match Errno::from_u32((-result) as u32) { + Some(e) => Err(e), + None => Err(Errno::EIO), + } + } else { + Ok(result as usize) + } + } + fn host_fstat(&mut self, handle: i64) -> Result { let mut stat = WasmStat { st_dev: 0, diff --git a/docs/porting-guide.md b/docs/porting-guide.md index 79fe06e91..6e393021f 100644 --- a/docs/porting-guide.md +++ b/docs/porting-guide.md @@ -746,7 +746,11 @@ Use `--explain` to ask SQLite's testrunner to print the planned jobs without starting a full permutation run. Browser runs launch the SQLite-only demo page through Vite with `KANDELO_BROWSER_DEMO_INPUTS=sqlite-test` and disable HMR with `KANDELO_BROWSER_TEST_NO_HMR=1` so long test runs do not churn on -artifact writes. +artifact writes. Browser runs also cap each Wasm process at 4096 64KiB pages +(256MiB) by default to keep Chromium from reserving the host default 1GiB for +each short-lived SQLite testfixture worker. Override with +`SQLITE_BROWSER_MAX_MEMORY_PAGES=` when investigating memory-sensitive +SQLite cases. ## Troubleshooting diff --git a/host/src/browser-kernel-worker-entry.ts b/host/src/browser-kernel-worker-entry.ts index 6ba172d35..6bc5b4a84 100644 --- a/host/src/browser-kernel-worker-entry.ts +++ b/host/src/browser-kernel-worker-entry.ts @@ -62,6 +62,7 @@ if (typeof globalThis.setImmediate === "undefined") { import { CentralizedKernelWorker } from "./kernel-worker"; import type { + CloneStartGate, ForkFromThreadContext, ResolvedSpawnProgram, SpawnProgramResolution, @@ -133,6 +134,7 @@ interface ProcessInfo { } const processes = new Map(); const processTeardowns = new Map>(); +const exitGroupPids = new Set(); // Includes standalone thread-worker teardown promises that may outlive the // process map entry they came from. const workerTeardowns = new Set>(); @@ -535,18 +537,15 @@ async function handleInit(msg: Extract) { onClone: (pid, tid, fnPtr, argPtr, stackPtr, tlsPtr, ctidPtr, memory) => handleClone(pid, tid, fnPtr, argPtr, stackPtr, tlsPtr, ctidPtr, memory), onThreadExit: (pid, _tid, channelOffset) => handleThreadExit(pid, channelOffset), + onExitGroup: (pid) => exitGroupPids.add(pid), onExit: (pid, exitStatus) => handleExit(pid, exitStatus), }, ); - // In a dedicated worker, use Atomics.waitAsync directly — no V8 microtask - // chain freeze bug (that's main-thread-only). - kernelWorker.usePolling = false; - // Process a small batch of syscalls via microtask before yielding to the - // event loop via setImmediate. Batch size 8 is a good balance: it gives - // ~8x throughput vs batch-1 while still yielding frequently enough for - // pump timers, message handlers, and rendering to interleave. - (kernelWorker as any).relistenBatchSize = 8; + // Browser worker channels can be registered while cloned pthread workers + // and the shared Wasm memory are still settling. Polling the channel status + // avoids stranding a PENDING syscall behind a stale/missed waitAsync waiter. + kernelWorker.usePolling = true; // Inject stdout/stderr/listen callbacks const kw = kernelWorker as any; @@ -1091,7 +1090,7 @@ async function handleClone( tlsPtr: number, ctidPtr: number, memory: WebAssembly.Memory, -): Promise { +): Promise { const processInfo = processes.get(pid); if (!processInfo) throw new Error(`Unknown pid ${pid} for clone`); threadedProcessPids.add(pid); @@ -1156,6 +1155,7 @@ async function handleClone( threadWorkers.get(pid)!.push(threadEntry); let reclaimed = false; + let terminatingThread = false; const reclaimThread = () => { if (reclaimed) return; reclaimed = true; @@ -1168,6 +1168,7 @@ async function handleClone( } }; const terminateThreadEntry = (): Promise => { + terminatingThread = true; if (!threadEntry.termination) { threadEntry.termination = terminateTrackedWorker( threadWorker, @@ -1178,30 +1179,91 @@ async function handleClone( }; threadExits.register(pid, alloc.channelOffset, terminateThreadEntry); + let startupSettled = false; + let sawThreadExit = false; + let failed = false; + let settleStartup: (gate: CloneStartGate) => void = () => {}; + let rejectStartup: (err: Error) => void = () => {}; + const startup = new Promise((resolve, reject) => { + settleStartup = resolve; + rejectStartup = reject; + }); + + const isThreadTerminationExpected = () => + exitGroupPids.has(pid) || + processTeardowns.has(pid) || + !processes.has(pid) || + intentionallyTerminated.has(threadWorker as object); + const failThread = (reason: string) => { + if (failed) return; + if (isThreadTerminationExpected()) { + void terminateThreadEntry(); + return; + } + failed = true; const text = `[kernel-worker] pid=${pid} tid=${tid}: ${reason}\n`; post({ type: "stderr", pid, data: new TextEncoder().encode(text) }); + if (ctidPtr !== 0) { + const i32 = new Int32Array(memory.buffer); + Atomics.store(i32, ctidPtr >>> 2, 0); + Atomics.notify(i32, ctidPtr >>> 2, 1); + } kernelWorker.notifyThreadExit(pid, tid); kernelWorker.removeChannel(pid, alloc.channelOffset); + if (!startupSettled) { + startupSettled = true; + rejectStartup(new Error(reason)); + } void terminateThreadEntry(); }; threadWorker.on("message", (msg: unknown) => { const m = msg as WorkerToHostMessage; - if (m.type === "thread_exit") { + if (m.type === "ready" && !startupSettled) { + startupSettled = true; + settleStartup({ + tid, + start: () => { + if (!failed && !sawThreadExit && !terminatingThread) { + threadWorker.postMessage({ type: "start_thread" }); + } + }, + }); + } else if (m.type === "thread_exit") { + sawThreadExit = true; void terminateThreadEntry(); + if (!startupSettled) { + startupSettled = true; + rejectStartup(new Error("thread exited before startup completed")); + } } else if ((m as { type?: string }).type === "error") { // worker-main posted {type:"error"} — instantiation failure, top-level // throw, etc. Without this the parent's pthread_join blocks forever. + if (isThreadTerminationExpected()) { + void terminateThreadEntry(); + return; + } failThread((m as { message?: string }).message ?? "thread error"); } }); threadWorker.on("error", (err: Error) => { + if (isThreadTerminationExpected()) { + void terminateThreadEntry(); + return; + } console.error(`[kernel-worker] thread worker error pid=${pid} tid=${tid}:`, err.message); failThread(`worker error: ${err.message ?? err}`); }); + threadWorker.on("exit", (code: number) => { + setTimeout(() => { + if (!sawThreadExit && !failed && !terminatingThread && !isThreadTerminationExpected()) { + failThread(`worker exited before thread_exit (code=${code})`); + } + }, 0); + }); - return tid; + return startup; } function handleThreadExit(pid: number, channelOffset: number): boolean { @@ -1269,6 +1331,7 @@ async function finishProcessExit( try { await teardown; } finally { + exitGroupPids.delete(pid); processTeardowns.delete(pid); } } diff --git a/host/src/kernel-worker.ts b/host/src/kernel-worker.ts index a8dfcbcb7..843877a0e 100644 --- a/host/src/kernel-worker.ts +++ b/host/src/kernel-worker.ts @@ -97,6 +97,7 @@ const FORK_BUF_SIZE = FORK_SAVE_BUFFER_SIZE; /** Errno values */ const EAGAIN = 11; +const EINVAL = 22; const ETIMEDOUT = 110; const EINTR_ERRNO = 4; @@ -500,6 +501,11 @@ export interface ForkFromThreadContext { slotLen: number; } +export interface CloneStartGate { + tid: number; + start: () => void; +} + export interface ResolvedSpawnProgram { programBytes: ArrayBuffer; argv: string[]; @@ -594,7 +600,7 @@ export interface CentralizedKernelCallbacks { * Called when a process calls clone (thread creation). The callback should * spawn a thread Worker sharing the parent's Memory. Returns the TID. */ - onClone?: (pid: number, tid: number, fnPtr: number, argPtr: number, stackPtr: number, tlsPtr: number, ctidPtr: number, memory: WebAssembly.Memory) => Promise; + onClone?: (pid: number, tid: number, fnPtr: number, argPtr: number, stackPtr: number, tlsPtr: number, ctidPtr: number, memory: WebAssembly.Memory) => Promise; /** * Called after a pthread channel reaches SYS_EXIT and the kernel worker has @@ -610,9 +616,9 @@ export interface CentralizedKernelCallbacks { onExit?: (pid: number, exitStatus: number) => void; /** - * Called when a process calls exit_group (terminate all threads). - * The callback should forcefully terminate all thread workers for the process. - * Called BEFORE the process exit is processed. + * Called when a process calls exit_group (terminate all threads), after the + * kernel has recorded the process exit but before the exiting channel is + * woken. Hosts use this to mark thread Worker exits as intentional. */ onExitGroup?: (pid: number) => void; } @@ -1649,6 +1655,7 @@ export class CentralizedKernelWorker { if (tid !== undefined) { this.channelTids.set(`${pid}:${channelOffset}`, tid); + this.syscallRing.delete(channelOffset); } if (threadFnPtr !== undefined && threadArgPtr !== undefined) { this.threadForkContexts.set(`${pid}:${channelOffset}`, { @@ -2064,6 +2071,16 @@ export class CentralizedKernelWorker { return; } + if ( + (syscallNr === SYS_READ || syscallNr === SYS_PREAD || + syscallNr === SYS_WRITE || syscallNr === SYS_PWRITE) && + origArgs[2] < 0 + ) { + if (logging) console.error(logEntry + " = -1 (EINVAL)"); + this.completeChannel(channel, syscallNr, origArgs, undefined, -1, EINVAL); + return; + } + // --- sendmsg/recvmsg: decompose msghdr from process memory --- if (syscallNr === SYS_SENDMSG) { this.handleSendmsg(channel, origArgs); @@ -2340,26 +2357,6 @@ export class CentralizedKernelWorker { this.ensureProcessMemoryCovers(channel.pid, channel.memory, syscallNr, retVal, origArgs); } - // --- DEBUG: detect memory operations in legacy high control pages --- - const highControlFloor = this.highControlFloorForProcess(channel.pid); - if (syscallNr === SYS_MMAP && retVal > 0 && (retVal >>> 0) !== 0xffffffff) { - const mmapAddr = retVal >>> 0; - const mmapLen = origArgs[1] >>> 0; - if (highControlFloor !== null && mmapAddr + mmapLen > highControlFloor) { - console.error(`[MMAP ALERT] pid=${channel.pid} mmap returned 0x${mmapAddr.toString(16)} len=${mmapLen} — OVERLAPS THREAD REGION! args=[${origArgs.map(a => '0x' + (a >>> 0).toString(16)).join(',')}]`); - } - } - if (syscallNr === SYS_MREMAP && retVal > 0 && (retVal >>> 0) !== 0xffffffff) { - const mremapAddr = retVal >>> 0; - const mremapLen = origArgs[2] >>> 0; - if (highControlFloor !== null && mremapAddr + mremapLen > highControlFloor) { - console.error(`[MREMAP ALERT] pid=${channel.pid} mremap returned 0x${mremapAddr.toString(16)} len=${mremapLen} — OVERLAPS THREAD REGION!`); - } - } - if (highControlFloor !== null && syscallNr === SYS_BRK && retVal > highControlFloor) { - console.error(`[BRK ALERT] pid=${channel.pid} brk returned 0x${(retVal >>> 0).toString(16)} — IN THREAD REGION!`); - } - // --- File-backed mmap: populate mapped region with file data --- if (syscallNr === SYS_MMAP && retVal > 0 && (retVal >>> 0) !== 0xffffffff) { const mmapFd = origArgs[4]; @@ -6108,10 +6105,10 @@ export class CentralizedKernelWorker { if (ctidPtr !== 0) { this.threadCtidPtrs.set(`${channel.pid}:${tid}`, ctidPtr); } - this.callbacks.onClone( channel.pid, tid, fnPtr, argPtr, stackPtr, tlsPtr, ctidPtr, channel.memory, - ).then((assignedTid) => { + ).then((cloneResult) => { + const assignedTid = typeof cloneResult === "number" ? cloneResult : cloneResult.tid; if (!this.processes.has(channel.pid)) { if (ctidPtr !== 0) { this.threadCtidPtrs.delete(`${channel.pid}:${tid}`); @@ -6123,6 +6120,9 @@ export class CentralizedKernelWorker { this.threadCtidPtrs.set(`${channel.pid}:${assignedTid}`, ctidPtr); } this.completeChannel(channel, SYS_CLONE, origArgs, undefined, assignedTid, 0); + if (typeof cloneResult !== "number") { + cloneResult.start(); + } }).catch((err) => { if (ctidPtr !== 0) { this.threadCtidPtrs.delete(`${channel.pid}:${tid}`); @@ -6167,9 +6167,8 @@ export class CentralizedKernelWorker { const ctidPtr = this.threadCtidPtrs.get(ctidKey); if (ctidPtr && ctidPtr !== 0) { this.threadCtidPtrs.delete(ctidKey); - const procView = new DataView(channel.memory.buffer); - procView.setInt32(ctidPtr, 0, true); const i32View = new Int32Array(channel.memory.buffer); + Atomics.store(i32View, ctidPtr >>> 2, 0); Atomics.notify(i32View, ctidPtr >>> 2, 1); } } @@ -6211,6 +6210,9 @@ export class CentralizedKernelWorker { // Main thread exit or exit_group: record exit status for waitpid, // queue SIGCHLD to parent, then notify the host callback. const exitingPid = channel.pid; + if (syscallNr === SYS_EXIT_GROUP) { + this.callbacks.onExitGroup?.(exitingPid); + } // Idempotency: this guard is shared with handleProcessTerminated so a // SYS_KILL that races a clean SYS_EXIT from the same process doesn't // produce two SIGCHLDs / two parent wake-ups. Cleared by @@ -7230,20 +7232,6 @@ export class CentralizedKernelWorker { return n; } - private highControlFloorForProcess(pid: number): number | null { - const registration = this.processes.get(pid); - if (!registration) return null; - if (registration.explicitMaxAddr) return null; - let floor: number | null = null; - for (const ch of registration.channels) { - const tlsPageAddr = ch.channelOffset - 2 * WASM_PAGE_SIZE; - if (tlsPageAddr >= PROCESS_MMAP_BASE) { - floor = floor === null ? tlsPageAddr : Math.min(floor, tlsPageAddr); - } - } - return floor; - } - /** * Set the program's initial brk. Compact process layouts pass the first * guest-managed byte after the host control slab; legacy callers may pass diff --git a/host/src/kernel.ts b/host/src/kernel.ts index 86413a374..829ba510e 100644 --- a/host/src/kernel.ts +++ b/host/src/kernel.ts @@ -8,6 +8,8 @@ * env.host_read(handle: i64, buf_ptr, buf_len) -> i32 * env.host_write(handle: i64, buf_ptr, buf_len) -> i32 * env.host_seek(handle: i64, offset_lo, offset_hi, whence) -> i64 + * env.host_pread(handle: i64, buf_ptr, buf_len, offset_lo, offset_hi) -> i32 + * env.host_pwrite(handle: i64, buf_ptr, buf_len, offset_lo, offset_hi) -> i32 * env.host_fstat(handle: i64, stat_ptr) -> i32 * env.host_statfs(path_ptr, path_len, statfs_ptr) -> i32 * @@ -372,6 +374,24 @@ export class WasmPosixKernel { host_seek: (handle: bigint, offsetLo: number, offsetHi: number, whence: number): bigint => { return this.hostSeek(handle, offsetLo, offsetHi, whence); }, + host_pread: ( + handle: bigint, + bufPtr: bigint, + bufLen: number, + offsetLo: number, + offsetHi: number, + ): number => { + return this.hostPread(handle, Number(bufPtr), bufLen, offsetLo, offsetHi); + }, + host_pwrite: ( + handle: bigint, + bufPtr: bigint, + bufLen: number, + offsetLo: number, + offsetHi: number, + ): number => { + return this.hostPwrite(handle, Number(bufPtr), bufLen, offsetLo, offsetHi); + }, host_fstat: (handle: bigint, statPtr: bigint): number => { return this.hostFstat(handle, Number(statPtr)); }, @@ -813,9 +833,7 @@ export class WasmPosixKernel { whence: number, ): bigint { const h = Number(handle); - // Reconstruct 64-bit signed offset from two 32-bit parts. - // JS bitwise operators are 32-bit, so we use multiplication for the high word. - const offset = offsetHi * 0x100000000 + (offsetLo >>> 0); + const offset = this.decodeI64(offsetLo, offsetHi); try { return BigInt(this.io.seek(h, offset, whence)); @@ -824,6 +842,55 @@ export class WasmPosixKernel { } } + private decodeI64(lo: number, hi: number): number { + // JS bitwise operators are 32-bit, so use multiplication for the high word. + return hi * 0x100000000 + (lo >>> 0); + } + + /** + * host_pread(handle: i64, buf_ptr, buf_len, offset_lo, offset_hi) -> i32 + */ + private hostPread( + handle: bigint, + bufPtr: number, + bufLen: number, + offsetLo: number, + offsetHi: number, + ): number { + const h = Number(handle); + const offset = this.decodeI64(offsetLo, offsetHi); + + try { + const mem = this.getMemoryBuffer(); + const buf = mem.subarray(bufPtr, bufPtr + bufLen); + return this.io.read(h, buf, offset, bufLen); + } catch (e) { + return negErrno(e); + } + } + + /** + * host_pwrite(handle: i64, buf_ptr, buf_len, offset_lo, offset_hi) -> i32 + */ + private hostPwrite( + handle: bigint, + bufPtr: number, + bufLen: number, + offsetLo: number, + offsetHi: number, + ): number { + const h = Number(handle); + const offset = this.decodeI64(offsetLo, offsetHi); + const mem = this.getMemoryBuffer(); + const data = mem.slice(bufPtr, bufPtr + bufLen); + + try { + return this.io.write(h, data, offset, bufLen); + } catch (e) { + return negErrno(e); + } + } + /** * host_fstat(handle: i64, stat_ptr) -> i32 * diff --git a/host/src/node-kernel-worker-entry.ts b/host/src/node-kernel-worker-entry.ts index 431e833e6..3ee041a52 100644 --- a/host/src/node-kernel-worker-entry.ts +++ b/host/src/node-kernel-worker-entry.ts @@ -20,6 +20,7 @@ import { join } from "node:path"; import { fileURLToPath } from "node:url"; import { CentralizedKernelWorker } from "./kernel-worker"; import type { + CloneStartGate, ForkFromThreadContext, ResolvedSpawnProgram, SpawnProgramResolution, @@ -104,6 +105,7 @@ interface ProcessInfo { } const processes = new Map(); const processTeardowns = new Map>(); +const exitGroupPids = new Set(); const reportedExits = new Set(); // Workers terminated by the kernel-worker entry itself (handleExit / @@ -583,6 +585,7 @@ async function handleInit(msg: InitMessage) { onSpawn: handlePosixSpawn, onClone: handleClone, onThreadExit: (pid, _tid, channelOffset) => handleThreadExit(pid, channelOffset), + onExitGroup: (pid) => exitGroupPids.add(pid), onExit: handleExit, }, ); @@ -1002,7 +1005,7 @@ async function handleClone( tlsPtr: number, ctidPtr: number, memory: WebAssembly.Memory, -): Promise { +): Promise { const processInfo = processes.get(pid); if (!processInfo) throw new Error(`Unknown pid ${pid} for clone`); @@ -1061,6 +1064,7 @@ async function handleClone( threadWorkers.get(pid)!.push(threadEntry); let reclaimed = false; + let terminatingThread = false; const reclaimThread = () => { if (reclaimed) return; reclaimed = true; @@ -1073,6 +1077,7 @@ async function handleClone( } }; const terminateThreadEntry = (): Promise => { + terminatingThread = true; if (!threadEntry.termination) { threadEntry.termination = terminateTrackedWorker(threadWorker).finally(reclaimThread); } @@ -1080,24 +1085,81 @@ async function handleClone( }; threadExits.register(pid, alloc.channelOffset, terminateThreadEntry); + let startupSettled = false; + let sawThreadExit = false; + let failed = false; + let settleStartup: (gate: CloneStartGate) => void = () => {}; + let rejectStartup: (err: Error) => void = () => {}; + const startup = new Promise((resolve, reject) => { + settleStartup = resolve; + rejectStartup = reject; + }); + + const isThreadTerminationExpected = () => + exitGroupPids.has(pid) || + processTeardowns.has(pid) || + !processes.has(pid) || + intentionallyTerminated.has(threadWorker as object); + const failThread = (reason: string) => { + if (failed) return; + if (isThreadTerminationExpected()) { + void terminateThreadEntry(); + return; + } + failed = true; const text = `[kernel-worker] pid=${pid} tid=${tid}: ${reason}\n`; post({ type: "stderr", pid, data: new TextEncoder().encode(text) }); + if (ctidPtr !== 0) { + const i32 = new Int32Array(memory.buffer); + Atomics.store(i32, ctidPtr >>> 2, 0); + Atomics.notify(i32, ctidPtr >>> 2, 1); + } kernelWorker.notifyThreadExit(pid, tid); kernelWorker.removeChannel(pid, alloc.channelOffset); + if (!startupSettled) { + startupSettled = true; + rejectStartup(new Error(reason)); + } void terminateThreadEntry(); }; threadWorker.on("message", (msg: unknown) => { const m = msg as WorkerToHostMessage; - if (m.type === "thread_exit") { + if (m.type === "ready" && !startupSettled) { + startupSettled = true; + settleStartup({ + tid, + start: () => { + if (!failed && !sawThreadExit && !terminatingThread) { + threadWorker.postMessage({ type: "start_thread" }); + } + }, + }); + } else if (m.type === "thread_exit") { + sawThreadExit = true; void terminateThreadEntry(); + if (!startupSettled) { + startupSettled = true; + rejectStartup(new Error("thread exited before startup completed")); + } } else if (m.type === "error") { + if (isThreadTerminationExpected()) { + void terminateThreadEntry(); + return; + } failThread(m.message); } }); threadWorker.on("error", (err: Error) => failThread(`worker error: ${err.message ?? err}`)); + threadWorker.on("exit", (code: number) => { + setTimeout(() => { + if (!sawThreadExit && !failed && !terminatingThread && !isThreadTerminationExpected()) { + failThread(`worker exited before thread_exit (code=${code})`); + } + }, 0); + }); - return tid; + return startup; } function handleThreadExit(pid: number, channelOffset: number): boolean { @@ -1141,6 +1203,7 @@ async function finishProcessExit(pid: number, exitStatus: number): Promise try { await teardown; } finally { + exitGroupPids.delete(pid); processTeardowns.delete(pid); } } diff --git a/host/src/vfs/memory-fs.ts b/host/src/vfs/memory-fs.ts index b1ffb21f1..16056f980 100644 --- a/host/src/vfs/memory-fs.ts +++ b/host/src/vfs/memory-fs.ts @@ -750,12 +750,7 @@ export class MemoryFileSystem implements FileSystemBackend { length: number, ): number { if (offset !== null) { - // pread semantics: read at offset without changing file position - const savedPos = this.fs.lseek(handle, 0, 1); // SEEK_CUR - this.fs.lseek(handle, offset, 0); // SEEK_SET - const n = this.fs.read(handle, buffer.subarray(0, length)); - this.fs.lseek(handle, savedPos, 0); // restore position - return n; + return this.fs.pread(handle, buffer.subarray(0, length), offset); } return this.fs.read(handle, buffer.subarray(0, length)); } @@ -767,12 +762,7 @@ export class MemoryFileSystem implements FileSystemBackend { length: number, ): number { if (offset !== null) { - // pwrite semantics: write at offset without changing file position - const savedPos = this.fs.lseek(handle, 0, 1); // SEEK_CUR - this.fs.lseek(handle, offset, 0); // SEEK_SET - const n = this.fs.write(handle, buffer.subarray(0, length)); - this.fs.lseek(handle, savedPos, 0); // restore position - return n; + return this.fs.pwrite(handle, buffer.subarray(0, length), offset); } return this.fs.write(handle, buffer.subarray(0, length)); } diff --git a/host/src/vfs/sharedfs-vendor.ts b/host/src/vfs/sharedfs-vendor.ts index db96aca7a..48c0ca9de 100644 --- a/host/src/vfs/sharedfs-vendor.ts +++ b/host/src/vfs/sharedfs-vendor.ts @@ -1194,6 +1194,55 @@ export class SharedFS { } } + private fdReferencesInode(ino: number): boolean { + for (let fd = 0; fd < MAX_FDS; fd++) { + const base = FD_TABLE_OFFSET + fd * FD_ENTRY_SIZE; + if ( + Atomics.load(this.i32, base >> 2) && + this.r32(base + FD_INO) === ino + ) { + return true; + } + } + return false; + } + + private releaseUnlinkedInodeIfUnused(ino: number): void { + const off = this.inodeOffset(ino); + this.inodeWriteLock(ino); + let shouldFree = false; + try { + if ( + this.r32(off + INO_LINK_COUNT) === 0 && + !this.fdReferencesInode(ino) + ) { + this.inodeTruncate(ino, 0); + shouldFree = true; + } + } finally { + this.inodeWriteUnlock(ino); + } + if (shouldFree) this.inodeFree(ino); + } + + private dropInodeLink(ino: number): void { + const off = this.inodeOffset(ino); + this.inodeWriteLock(ino); + let shouldFree = false; + try { + const linkCount = this.r32(off + INO_LINK_COUNT); + const newLinkCount = Math.max(0, linkCount - 1); + this.w32(off + INO_LINK_COUNT, newLinkCount); + if (newLinkCount === 0 && !this.fdReferencesInode(ino)) { + this.inodeTruncate(ino, 0); + shouldFree = true; + } + } finally { + this.inodeWriteUnlock(ino); + } + if (shouldFree) this.inodeFree(ino); + } + // ── Build stat result from inode ───────────────────────────────── private buildStat(ino: number): StatResult { @@ -1292,6 +1341,7 @@ export class SharedFS { const entry = this.fdGet(fd); if (!entry) throw new SFSError(EBADF); this.fdFree(fd); + this.releaseUnlinkedInodeIfUnused(entry.ino); } read(fd: number, buffer: Uint8Array): number { @@ -1315,6 +1365,19 @@ export class SharedFS { } } + pread(fd: number, buffer: Uint8Array, offset: number): number { + const entry = this.fdGet(fd); + if (!entry) throw new SFSError(EBADF); + if (offset < 0) throw new SFSError(EINVAL); + + this.inodeReadLock(entry.ino); + try { + return this.inodeReadData(entry.ino, offset, buffer, buffer.length); + } finally { + this.inodeReadUnlock(entry.ino); + } + } + write(fd: number, data: Uint8Array): number { const entry = this.fdGet(fd); if (!entry) throw new SFSError(EBADF); @@ -1345,6 +1408,33 @@ export class SharedFS { } } + pwrite(fd: number, data: Uint8Array, offset: number): number { + const entry = this.fdGet(fd); + if (!entry) throw new SFSError(EBADF); + if (offset < 0) throw new SFSError(EINVAL); + + const accMode = entry.flags & O_ACCMODE; + if (accMode === O_RDONLY) throw new SFSError(EBADF); + + this.inodeWriteLock(entry.ino); + try { + let writeOffset = offset; + if (entry.flags & O_APPEND) { + const inoOff = this.inodeOffset(entry.ino); + writeOffset = this.r64(inoOff + INO_SIZE); + } + + return this.inodeWriteData( + entry.ino, + writeOffset, + data, + data.length, + ); + } finally { + this.inodeWriteUnlock(entry.ino); + } + } + lseek(fd: number, offset: number, whence: number): number { const entry = this.fdGet(fd); if (!entry) throw new SFSError(EBADF); @@ -1433,17 +1523,7 @@ export class SharedFS { const rc = this.dirRemoveEntry(parentIno, nameBytes); if (rc < 0) throw new SFSError(rc); - this.inodeWriteLock(childIno); - const linkCount = this.r32(childOff + INO_LINK_COUNT); - if (linkCount <= 1) { - this.inodeTruncate(childIno, 0); - this.w32(childOff + INO_LINK_COUNT, 0); - this.inodeWriteUnlock(childIno); - this.inodeFree(childIno); - } else { - this.w32(childOff + INO_LINK_COUNT, linkCount - 1); - this.inodeWriteUnlock(childIno); - } + this.dropInodeLink(childIno); } finally { this.inodeWriteUnlock(parentIno); } @@ -1469,16 +1549,13 @@ export class SharedFS { // Remove any existing entry at destination const existingIno = this.dirLookup(newParent, newNameBytes); + if (existingIno === srcIno) return; if (existingIno >= 0) { const existOff = this.inodeOffset(existingIno); const existMode = this.r32(existOff + INO_MODE); if ((existMode & S_IFMT) === S_IFDIR) throw new SFSError(EISDIR); this.dirRemoveEntry(newParent, newNameBytes); - this.inodeWriteLock(existingIno); - this.inodeTruncate(existingIno, 0); - this.w32(existOff + INO_LINK_COUNT, 0); - this.inodeWriteUnlock(existingIno); - this.inodeFree(existingIno); + this.dropInodeLink(existingIno); } // Add entry in new directory diff --git a/host/src/worker-adapter-browser.ts b/host/src/worker-adapter-browser.ts index 778e6a6e2..008e53b64 100644 --- a/host/src/worker-adapter-browser.ts +++ b/host/src/worker-adapter-browser.ts @@ -28,6 +28,7 @@ class BrowserWorkerHandle implements WorkerHandle { private worker: Worker; // eslint-disable-next-line @typescript-eslint/no-explicit-any private handlers = new Map void>>(); + private pendingMessages = new Map(); private terminated = false; private terminationPromise: Promise | null = null; private shutdownAckResolver: (() => void) | null = null; @@ -44,16 +45,16 @@ class BrowserWorkerHandle implements WorkerHandle { this.shutdownAckResolver = null; return; } - for (const h of this.handlers.get("message") ?? []) h(e.data); + this.dispatchOrBuffer("message", e.data); }; worker.onerror = (e: ErrorEvent) => { - for (const h of this.handlers.get("error") ?? []) h(new Error(e.message)); + this.dispatchOrBuffer("error", new Error(e.message)); // Worker errors are unrecoverable — synthesize an exit event if (!this.terminated) { this.terminated = true; this.shutdownAckResolver?.(); this.shutdownAckResolver = null; - for (const h of this.handlers.get("exit") ?? []) h(1); + this.dispatchOrBuffer("exit", 1); } }; } @@ -73,6 +74,14 @@ class BrowserWorkerHandle implements WorkerHandle { this.handlers.set(event, set); } set.add(handler); + + const pending = this.pendingMessages.get(event); + if (pending && pending.length > 0) { + this.pendingMessages.delete(event); + for (const message of pending) { + handler(message); + } + } } // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -80,6 +89,21 @@ class BrowserWorkerHandle implements WorkerHandle { this.handlers.get(event)?.delete(handler); } + private dispatchOrBuffer(event: string, message: unknown): void { + const handlers = this.handlers.get(event); + if (!handlers || handlers.size === 0) { + const pending = this.pendingMessages.get(event); + if (pending) { + pending.push(message); + } else { + this.pendingMessages.set(event, [message]); + } + return; + } + + for (const h of handlers) h(message); + } + async terminate(): Promise { if (this.terminationPromise) return this.terminationPromise; this.terminationPromise = this.terminateOnce(); @@ -111,7 +135,7 @@ class BrowserWorkerHandle implements WorkerHandle { this.worker.terminate(); if (!this.terminated) { this.terminated = true; - for (const h of this.handlers.get("exit") ?? []) h(0); + this.dispatchOrBuffer("exit", 0); } return 0; } diff --git a/host/src/worker-main.ts b/host/src/worker-main.ts index 8a9ad298f..89d1a2ce3 100644 --- a/host/src/worker-main.ts +++ b/host/src/worker-main.ts @@ -8,6 +8,7 @@ import type { CentralizedWorkerInitMessage, CentralizedThreadInitMessage, + HostToWorkerMessage, WorkerToHostMessage, } from "./worker-protocol"; import { DynamicLinker, type LoadedSharedLibrary } from "./dylink"; @@ -60,6 +61,79 @@ type KernelImports = Record & { kernel_fork: (...args: unknown[]) => number; }; +function waitForThreadStart(port: MessagePort, pid: number, tid: number): Promise { + return new Promise((resolve, reject) => { + let settled = false; + port.on("message", (raw: unknown) => { + if (settled) return; + const msg = raw as HostToWorkerMessage; + if (msg.type === "start_thread") { + settled = true; + resolve(); + } else if (msg.type === "terminate") { + settled = true; + reject(new Error(`Thread worker terminated before start pid=${pid} tid=${tid}`)); + } + }); + }); +} + +function delayThreadStartTick(): Promise { + return new Promise((resolve) => setTimeout(resolve, 0)); +} + +async function waitForPthreadListInsertion( + memory: WebAssembly.Memory, + tlsPtr: number, + ptrWidth: 4 | 8, + expectedTid: number, + ctidPtr: number, +): Promise { + const ptrSize = ptrWidth === 8 ? 8 : 4; + const prevOffset = 2 * ptrSize; + const nextOffset = 3 * ptrSize; + const tidOffset = 6 * ptrSize; + const readPtr = (view: DataView, addr: number): number => + ptrWidth === 8 ? Number(view.getBigUint64(addr, true)) : view.getUint32(addr, true); + const inBounds = (view: DataView, addr: number, len = ptrSize): boolean => + Number.isSafeInteger(addr) && addr >= 0 && addr + len <= view.byteLength; + + const deadline = performance.now() + 5000; + while (performance.now() < deadline) { + const view = new DataView(memory.buffer); + const i32View = new Int32Array(memory.buffer); + if ( + inBounds(view, tlsPtr + tidOffset, 4) && + inBounds(view, tlsPtr + prevOffset) && + inBounds(view, tlsPtr + nextOffset) && + (ctidPtr === 0 || inBounds(view, ctidPtr, 4)) + ) { + const tid = Atomics.load(i32View, (tlsPtr + tidOffset) >>> 2); + const clearTidReady = + ctidPtr === 0 || + ctidPtr === tlsPtr + tidOffset || + Atomics.load(i32View, ctidPtr >>> 2) === 0; + const prev = readPtr(view, tlsPtr + prevOffset); + const next = readPtr(view, tlsPtr + nextOffset); + if ( + tid === expectedTid && + clearTidReady && + prev !== 0 && + next !== 0 && + prev !== tlsPtr && + next !== tlsPtr && + inBounds(view, prev + nextOffset) && + inBounds(view, next + prevOffset) && + readPtr(view, prev + nextOffset) === tlsPtr && + readPtr(view, next + prevOffset) === tlsPtr + ) { + return; + } + } + await delayThreadStartTick(); + } +} + function buildKernelImports( memory: WebAssembly.Memory, channelOffset: number, @@ -1800,7 +1874,11 @@ export async function centralizedThreadWorkerMain( } const threadArg = ptrWidth === 8 ? BigInt(argPtr) : argPtr; - let result = 0; + + port.postMessage({ type: "ready", pid } satisfies WorkerToHostMessage); + await waitForThreadStart(port, pid, tid); + await waitForPthreadListInsertion(memory, tlsPtr, ptrWidth, tid, ctidPtr); + if (hasForkInstrumentation) { const getState = instance.exports.wpk_fork_state as () => number; const unwindEnd = instance.exports.wpk_fork_unwind_end as () => void; @@ -1814,17 +1892,8 @@ export async function centralizedThreadWorkerMain( } try { - const raw = threadFn(threadArg); - result = Number(raw); + threadFn(threadArg); } catch (e) { - if (e instanceof Error && e.message.includes("unreachable")) { - result = 0; - break; - } - if (e instanceof Error && e.message.includes("null function or function signature mismatch")) { - result = 0; - break; - } throw e; } @@ -1839,51 +1908,17 @@ export async function centralizedThreadWorkerMain( needsRewind = true; continue; } - break; + throw new Error(`pthread start function returned unexpectedly pid=${pid} tid=${tid}`); } } else { - try { - const raw = threadFn(threadArg); - result = Number(raw); - } catch (e) { - if (e instanceof Error && e.message.includes("unreachable")) { - // Thread exited via kernel_exit → unreachable trap - result = 0; - } else if (e instanceof Error && e.message.includes("null function or function signature mismatch")) { - // call_indirect type mismatch — treat as thread crash but don't abort - result = 0; - } else { - throw e; - } - } - } - - // Send SYS_EXIT through the channel. The kernel worker performs - // CLONE_CHILD_CLEARTID after it observes SYS_EXIT; doing it here would - // let pthread_join reclaim the stack while this Worker is still running. - { - const view = new DataView(memory.buffer); - const base = channelOffset; - view.setInt32(base + CH_SYSCALL, ABI_SYSCALLS.Exit, true); - view.setInt32(base + CH_ARGS, result ?? 0, true); - const i32 = new Int32Array(memory.buffer); - Atomics.store(i32, (base + CH_STATUS) / 4, CHANNEL_STATUS_PENDING); - Atomics.notify(i32, (base + CH_STATUS) / 4, 1); - // Wait for kernel to process the exit - while (Atomics.wait(i32, (base + CH_STATUS) / 4, CHANNEL_STATUS_PENDING) === "ok") { /* */ } - Atomics.store(i32, (base + CH_STATUS) / 4, CHANNEL_STATUS_IDLE); + threadFn(threadArg); + throw new Error(`pthread start function returned unexpectedly pid=${pid} tid=${tid}`); } - - port.postMessage({ - type: "thread_exit", - pid, - tid, - } satisfies WorkerToHostMessage); } catch (err) { port.postMessage({ - type: "thread_exit", + type: "error", pid, - tid, + message: `Thread worker failed pid=${pid} tid=${tid}: ${err instanceof Error ? `${err.message}\n${err.stack}` : String(err)}`, } satisfies WorkerToHostMessage); } } diff --git a/host/src/worker-protocol.ts b/host/src/worker-protocol.ts index b7c4b127c..206e164ef 100644 --- a/host/src/worker-protocol.ts +++ b/host/src/worker-protocol.ts @@ -3,6 +3,7 @@ export type HostToWorkerMessage = | CentralizedWorkerInitMessage | CentralizedThreadInitMessage + | StartThreadMessage | WorkerTerminateMessage | DeliverSignalMessage | ExecReplyMessage; @@ -90,6 +91,10 @@ export interface WorkerTerminateMessage { type: "terminate"; } +export interface StartThreadMessage { + type: "start_thread"; +} + // --- Worker → Host messages --- export type WorkerToHostMessage = diff --git a/host/test/browser-worker-adapter.test.ts b/host/test/browser-worker-adapter.test.ts index f8e327f60..b0298a8ed 100644 --- a/host/test/browser-worker-adapter.test.ts +++ b/host/test/browser-worker-adapter.test.ts @@ -154,6 +154,17 @@ describe("BrowserWorkerAdapter", () => { // No handlers registered -- should not throw expect(() => lastMockWorker!.simulateMessage("orphan")).not.toThrow(); }); + + it("should buffer worker messages until a handler is registered", () => { + const adapter = new BrowserWorkerAdapter("worker.js"); + const handle = adapter.createWorker({}); + const received: unknown[] = []; + + lastMockWorker!.simulateMessage({ type: "error", pid: 1, message: "early" }); + handle.on("message", (msg) => received.push(msg)); + + expect(received).toEqual([{ type: "error", pid: 1, message: "early" }]); + }); }); // ---- BrowserWorkerHandle error routing ---------------------------------- @@ -180,6 +191,18 @@ describe("BrowserWorkerAdapter", () => { lastMockWorker!.simulateError("unhandled"), ).not.toThrow(); }); + + it("should buffer worker errors until a handler is registered", () => { + const adapter = new BrowserWorkerAdapter("worker.js"); + const handle = adapter.createWorker({}); + const errors: Error[] = []; + + lastMockWorker!.simulateError("early failure"); + handle.on("error", (err) => errors.push(err)); + + expect(errors).toHaveLength(1); + expect(errors[0].message).toBe("early failure"); + }); }); // ---- BrowserWorkerHandle postMessage ------------------------------------ diff --git a/host/test/multi-worker.test.ts b/host/test/multi-worker.test.ts index 1183a957f..3a60baf8b 100644 --- a/host/test/multi-worker.test.ts +++ b/host/test/multi-worker.test.ts @@ -256,6 +256,7 @@ describe("CentralizedKernelWorker Process Management", () => { activeChannels: [], channelTids: new Map(), threadForkContexts: new Map(), + syscallRing: new Map(), usePolling: true, kernel: { toKernelPtr(value: number | bigint): number { diff --git a/host/test/vfs.test.ts b/host/test/vfs.test.ts index 12288acde..563c44e11 100644 --- a/host/test/vfs.test.ts +++ b/host/test/vfs.test.ts @@ -386,6 +386,67 @@ describe("MemoryFileSystem", () => { mfs.close(fd); }); + it("does not restore a stale descriptor offset after positioned I/O", () => { + const sab = new SharedArrayBuffer(4 * 1024 * 1024); + const mfs = MemoryFileSystem.create(sab); + const fd = mfs.open("/positional.txt", O_CREAT | O_RDWR | O_TRUNC, 0o644); + const data = new TextEncoder().encode("0123456789"); + mfs.write(fd, data, null, data.length); + mfs.seek(fd, 2, 0); + + const sharedFs = (mfs as any).fs; + const originalRead = sharedFs.read.bind(sharedFs); + const originalPread = sharedFs.pread.bind(sharedFs); + const originalWrite = sharedFs.write.bind(sharedFs); + const originalPwrite = sharedFs.pwrite.bind(sharedFs); + + sharedFs.read = (readFd: number, buffer: Uint8Array) => { + const n = originalRead(readFd, buffer); + if (readFd === fd) sharedFs.lseek(fd, 7, 0); + return n; + }; + sharedFs.pread = (readFd: number, buffer: Uint8Array, offset: number) => { + const n = originalPread(readFd, buffer, offset); + if (readFd === fd) sharedFs.lseek(fd, 7, 0); + return n; + }; + + const readBuf = new Uint8Array(2); + expect(mfs.read(fd, readBuf, 4, readBuf.length)).toBe(2); + expect(new TextDecoder().decode(readBuf)).toBe("45"); + expect(mfs.seek(fd, 0, 1)).toBe(7); + + sharedFs.read = originalRead; + sharedFs.pread = originalPread; + mfs.seek(fd, 3, 0); + + sharedFs.write = (writeFd: number, buffer: Uint8Array) => { + const n = originalWrite(writeFd, buffer); + if (writeFd === fd) sharedFs.lseek(fd, 8, 0); + return n; + }; + sharedFs.pwrite = ( + writeFd: number, + buffer: Uint8Array, + offset: number, + ) => { + const n = originalPwrite(writeFd, buffer, offset); + if (writeFd === fd) sharedFs.lseek(fd, 8, 0); + return n; + }; + + const patch = new TextEncoder().encode("xy"); + expect(mfs.write(fd, patch, 5, patch.length)).toBe(2); + expect(mfs.seek(fd, 0, 1)).toBe(8); + + sharedFs.write = originalWrite; + sharedFs.pwrite = originalPwrite; + const finalBuf = new Uint8Array(data.length); + expect(mfs.read(fd, finalBuf, 0, finalBuf.length)).toBe(data.length); + expect(new TextDecoder().decode(finalBuf)).toBe("01234xy789"); + mfs.close(fd); + }); + it("opens more than the old 64-descriptor SharedFS table limit", () => { expect(MAX_FDS).toBe( Math.floor((BLOCK_SIZE - FD_TABLE_OFFSET) / FD_ENTRY_SIZE), @@ -488,6 +549,81 @@ describe("MemoryFileSystem", () => { expect(() => mfs.stat("/todelete.txt")).toThrow(); }); + it("keeps unlinked open files alive until the last close", () => { + const sab = new SharedArrayBuffer(4 * 1024 * 1024); + const mfs = MemoryFileSystem.create(sab); + const enc = new TextEncoder(); + const dec = new TextDecoder(); + + const fd = mfs.open("/temp.db", O_CREAT | O_RDWR | O_TRUNC, 0o600); + const original = enc.encode("sqlite-temp-content"); + mfs.write(fd, original, null, original.length); + + mfs.unlink("/temp.db"); + expect(() => mfs.stat("/temp.db")).toThrow(); + + const replacementFd = mfs.open( + "/replacement.db", + O_CREAT | O_RDWR | O_TRUNC, + 0o600, + ); + const replacement = enc.encode("replacement"); + mfs.write(replacementFd, replacement, null, replacement.length); + + mfs.seek(fd, 0, 0); + const readBack = new Uint8Array(original.length); + expect(mfs.read(fd, readBack, null, readBack.length)).toBe(original.length); + expect(dec.decode(readBack)).toBe("sqlite-temp-content"); + + mfs.seek(fd, original.length, 0); + const suffix = enc.encode("-after-unlink"); + mfs.write(fd, suffix, null, suffix.length); + expect(mfs.fstat(fd).size).toBe(original.length + suffix.length); + + mfs.seek(replacementFd, 0, 0); + const replacementRead = new Uint8Array(replacement.length); + expect( + mfs.read(replacementFd, replacementRead, null, replacementRead.length), + ).toBe(replacement.length); + expect(dec.decode(replacementRead)).toBe("replacement"); + + mfs.close(replacementFd); + mfs.close(fd); + expect(() => mfs.stat("/temp.db")).toThrow(); + }); + + it("keeps renamed-over open files alive until the last close", () => { + const sab = new SharedArrayBuffer(4 * 1024 * 1024); + const mfs = MemoryFileSystem.create(sab); + const enc = new TextEncoder(); + const dec = new TextDecoder(); + + const destFd = mfs.open("/dest.db", O_CREAT | O_RDWR | O_TRUNC, 0o600); + const dest = enc.encode("open-destination"); + mfs.write(destFd, dest, null, dest.length); + + const srcFd = mfs.open("/src.db", O_CREAT | O_RDWR | O_TRUNC, 0o600); + const src = enc.encode("replacement-source"); + mfs.write(srcFd, src, null, src.length); + mfs.close(srcFd); + + mfs.rename("/src.db", "/dest.db"); + + mfs.seek(destFd, 0, 0); + const destRead = new Uint8Array(dest.length); + expect(mfs.read(destFd, destRead, null, destRead.length)).toBe(dest.length); + expect(dec.decode(destRead)).toBe("open-destination"); + + const newDestFd = mfs.open("/dest.db", O_RDONLY, 0); + const srcRead = new Uint8Array(src.length); + expect(mfs.read(newDestFd, srcRead, null, srcRead.length)).toBe(src.length); + expect(dec.decode(srcRead)).toBe("replacement-source"); + + mfs.close(newDestFd); + mfs.close(destFd); + expect(() => mfs.stat("/src.db")).toThrow(); + }); + it("ftruncate changes file size", () => { const sab = new SharedArrayBuffer(4 * 1024 * 1024); const mfs = MemoryFileSystem.create(sab); diff --git a/libc/glue/channel_syscall.c b/libc/glue/channel_syscall.c index 882cd8211..fed4a6127 100644 --- a/libc/glue/channel_syscall.c +++ b/libc/glue/channel_syscall.c @@ -139,6 +139,13 @@ uintptr_t __get_channel_base_addr(void) { /* SYS_EXIT needs special handling */ #define SYS_EXIT 34 +/* + * SYS_exit is non-returning. After the channel notification is posted, park on + * a process-global word that is never notified so a terminated pthread cannot + * wake later through a recycled per-thread channel slot. + */ +static _Atomic int32_t exit_parking_lot; + /* SYS_FORK/VFORK — kernel_fork import is the fork-continuation boundary. * wasm-fork-instrument rewrites the call graph around kernel.kernel_fork, enabling * the host to save/restore the call stack across fork — so the child @@ -346,6 +353,13 @@ static long __do_syscall(long n, long long a1, long long a2, long long a3, (int32_t *)(uintptr_t)(addr + CH_STATUS), 1); } + if (n == SYS_EXIT) { + for (;;) { + __builtin_wasm_memory_atomic_wait32( + (int32_t *)&exit_parking_lot, 0, -1); + } + } + /* Block until the kernel sets status to COMPLETE or ERROR. * CRITICAL: Re-read __channel_base from the wasm global on every * iteration. The compiler at -O0 would spill the address to the diff --git a/scripts/browser-sqlite-official-runner.ts b/scripts/browser-sqlite-official-runner.ts index 2715c11f9..5b4786f71 100755 --- a/scripts/browser-sqlite-official-runner.ts +++ b/scripts/browser-sqlite-official-runner.ts @@ -12,6 +12,7 @@ const VITE_HOST = "127.0.0.1"; const VITE_BASE_PORT = Number(process.env.SQLITE_TEST_VITE_PORT ?? 5200); const SQLITE_TEST_UID = Number(process.env.SQLITE_TEST_UID ?? 1000); const SQLITE_TEST_GID = Number(process.env.SQLITE_TEST_GID ?? 1000); +const SQLITE_BROWSER_MAX_MEMORY_PAGES = process.env.SQLITE_BROWSER_MAX_MEMORY_PAGES ?? "4096"; interface BrowserArtifact { path: string; @@ -69,6 +70,7 @@ async function startViteServer(port: number): Promise { ...process.env, KANDELO_BROWSER_DEMO_INPUTS: "sqlite-test", KANDELO_BROWSER_TEST_NO_HMR: "1", + VITE_SQLITE_BROWSER_MAX_MEMORY_PAGES: SQLITE_BROWSER_MAX_MEMORY_PAGES, }, }, ); diff --git a/scripts/run-browser-sqlite-official-tests.sh b/scripts/run-browser-sqlite-official-tests.sh index 1103e76d7..52f7f8acb 100755 --- a/scripts/run-browser-sqlite-official-tests.sh +++ b/scripts/run-browser-sqlite-official-tests.sh @@ -21,6 +21,11 @@ Options: --results-dir DIR Copy testrunner.db/logs and summary files to DIR --explain Ask testrunner.tcl to print planned work --help Show this help + +Environment: + SQLITE_BROWSER_MAX_MEMORY_PAGES + Browser Wasm process memory cap in 64KiB pages + (default: 4096, 256MiB) EOF } diff --git a/scripts/run-libc-tests.sh b/scripts/run-libc-tests.sh index 95b47ede1..c43a204c9 100755 --- a/scripts/run-libc-tests.sh +++ b/scripts/run-libc-tests.sh @@ -39,6 +39,7 @@ REGRESSION_EXPECTED_FAIL=( ) REGRESSION_FLAKY=( pthread_cond-smasher # CI timing-sensitive pthread_cond stress test; can PASS or fail on slow runners + raise-race # fork-from-signal pthread stress test; skipped on CI by default, flaky when explicitly selected ) # ── Helper: check if a test is in an expected-failure list ── @@ -293,7 +294,11 @@ run_test() { # stdin redirected to /dev/null: run-example.ts reads process.stdin # when not a TTY, which would drain any pipe the caller supplies. set +e - output=$(cd "$REPO_ROOT" && timeout "$TEST_TIMEOUT" node --experimental-wasm-exnref --import tsx/esm examples/run-example.ts "${wasm}" &1) + # Make the outer timeout authoritative. run-example.ts has its own + # default 30000ms timeout; if that races and wins, the same hung test is + # misclassified as FAIL instead of TIME. + local runner_timeout_ms=$(((TEST_TIMEOUT + 5) * 1000)) + output=$(cd "$REPO_ROOT" && TIMEOUT="$runner_timeout_ms" timeout "$TEST_TIMEOUT" node --experimental-wasm-exnref --import tsx/esm examples/run-example.ts "${wasm}" &1) rc=$? set -e diff --git a/test-runs/gastown-sqlite-browser-full-pr5-snapshot/browser-fault-recheck.md b/test-runs/gastown-sqlite-browser-full-pr5-snapshot/browser-fault-recheck.md new file mode 100644 index 000000000..40f2f4b59 --- /dev/null +++ b/test-runs/gastown-sqlite-browser-full-pr5-snapshot/browser-fault-recheck.md @@ -0,0 +1,97 @@ +# Browser SQLite Fault/Crash Recheck + +Date: 2026-06-13 +Branch: `polecat/coma/kad-wtb.11@mqbytir6` + +This recheck used freshly rebuilt local artifacts after rebasing onto +`origin/main`: + +```bash +scripts/dev-shell.sh bash -c 'bash build.sh' +scripts/dev-shell.sh bash -c \ + 'bash packages/registry/tcl/build-tcl.sh && + bash packages/registry/zlib/build-zlib.sh && + bash packages/registry/sqlite/build-sqlite.sh && + bash packages/registry/sqlite/build-testfixture.sh && + bash images/vfs/scripts/build-sqlite-test-vfs-image.sh' +``` + +Important dev-shell note: use `bash -c`, not `bash -lc`. A login shell puts +Homebrew Cargo ahead of the Nix nightly toolchain and fails on the repo's +`-Z build-std`/`-Zunstable-options` settings. + +## Isolated Browser Results + +```bash +scripts/dev-shell.sh bash -c \ + 'bash scripts/run-sqlite-official-tests.sh --host browser \ + --permutation full --jobs 1 --timeout-ms 900000 \ + --results-dir test-runs/kad-wtb.11-browser-sysfault sysfault.test' +``` + +Result: pass. `sysfault.test` completed 1 job, 1365 cases, 0 errors. The +snapshot's `sysfault-1.2.1-vfsfault-transient.27` and +`sysfault-1.2.2-vfsfault-transient.3` failures did not reproduce with rebuilt +artifacts. + +```bash +scripts/dev-shell.sh bash -c \ + 'bash scripts/run-sqlite-official-tests.sh --host browser \ + --permutation full --jobs 1 --timeout-ms 900000 \ + --results-dir test-runs/kad-wtb.11-browser-writecrash writecrash.test' +``` + +Result: fail. The prior snapshot failure at `writecrash-1.6.1` did not +reproduce, but the isolated browser run fails later: + +```text +writecrash-1.52.1 expected: [0 {}] +writecrash-1.52.1 got: [1 {couldn't execute "/usr/bin/testfixture": no such file or directory}] +``` + +This is browser-specific. The same rebuilt `testfixture.wasm` under the Node +host completed `writecrash.test` with 995 cases and 0 errors. + +```bash +scripts/dev-shell.sh bash -c \ + 'bash scripts/run-sqlite-official-tests.sh --host browser \ + --permutation full --jobs 1 --timeout-ms 900000 \ + --results-dir test-runs/kad-wtb.11-browser-walfault walfault.test' +``` + +Result: fail after about 6 minutes, not a silent hang. The failure is late in +the `walfault-9-oom-transient.*` sequence. The Tcl output ends with: + +```text +UpdateStringProc should not be invoked for type (null) +Aborted +``` + +The browser console also reported a kernel trap while handling syscall 47 +(`munmap`) for pid 102: + +```text +[handleSyscall] kernel threw for pid=102 syscall=47 args=[180092928,32768,0,0,0,0]: RuntimeError: unreachable +``` + +The same isolated Node command did not hit this browser abort path before the +900000 ms outer timeout; its job remained `running` with no case errors in the +exported `testrunner.db`. + +## Classification + +The actionable browser set is now narrower than the original full-run snapshot: + +- `sysfault.test`: passes in isolation with current rebuilt artifacts. +- `writecrash.test`: browser-only executable resolution/VFS visibility failure + for `/usr/bin/testfixture` after repeated crash-child iterations. +- `walfault.test`: browser run reaches a late Tcl abort and browser kernel + `munmap` trap; the snapshot's `running` state was an interrupted run, not the + isolated terminal behavior. + +Both remaining failures involve browser-only behavior under repeated child +process crash/abort paths. The next fix should instrument browser +`resolveExecutableForLaunch()`/`readFileFromFs()` and process teardown around +these tests to determine whether `/usr/bin/testfixture` is actually unlinked +from the shared VFS, hidden by path-resolution state, or missed because teardown +left the browser host with stale process/VFS metadata. diff --git a/test-runs/gastown-sqlite-epic-synthesis/final-hard-counts.md b/test-runs/gastown-sqlite-epic-synthesis/final-hard-counts.md new file mode 100644 index 000000000..5c7a4089b --- /dev/null +++ b/test-runs/gastown-sqlite-epic-synthesis/final-hard-counts.md @@ -0,0 +1,132 @@ +# SQLite Final Hard Counts + +Issue: `kad-wtb.14` +Epic: `kad-wtb` +Integration branch: `integration/kad-wtb-sqlite-testing` +Integration head inspected: `40a9df5c947ca79c1952c451abfbe122deba75e4` +Date: 2026-06-14 + +This report records the hard pass/fail/skip counts from the preserved official +SQLite project-unit `full` snapshots plus focused reruns that supersede the +original browser failure rows. No SQLite test was skipped or xfailed as part of +this epic. The SQLite testrunner schema records `done`, `failed`, `omit`, +`running`, and `ready`; it does not record XFAIL/XPASS/flaky fields. + +## Official Full Snapshots + +### Node + +Command: + +```bash +/bin/bash scripts/dev-shell.sh /bin/bash scripts/run-sqlite-project-unit-tests.sh --host node --permutation full --jobs 2 --timeout-ms 21600000 --results-root test-runs/gastown-sqlite-node-full-pr5 +``` + +Artifacts: + +- `test-runs/gastown-sqlite-node-full-pr5/command.log` +- `test-runs/gastown-sqlite-node-full-pr5/host-status.tsv` +- `test-runs/gastown-sqlite-node-full-pr5/node/summary.txt` +- `test-runs/gastown-sqlite-node-full-pr5/node/failures.tsv` +- `test-runs/gastown-sqlite-node-full-pr5/node/testrunner.db` +- `test-runs/gastown-sqlite-node-full-pr5/node/testrunner.log` + +Runner status: `host-status.tsv` recorded `node 143`; `command.log` recorded +`exit_status=1` after writing the DB summary. The host failure was the old +Mach-O executable compile wedge later fixed by `kad-36g`; there is no later +full-suite Node DB in this artifact set. + +| Host | Total jobs | PASS/done jobs | FAIL jobs | SKIP/OMIT jobs | RUNNING jobs | READY/not-run jobs | SQLite cases | Case errors | XFAIL | XPASS/flaky | +|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:| +| node | 1394 | 0 | 0 | 0 | 1 | 1393 | 0 | 0 | not recorded | not recorded | + +Node running job in the preserved full snapshot: + +| Job | State | Cases | Errors | Classification | +|---|---|---:|---:|---| +| `ext/fts5/test/fts5optimize2.test` | running | 0 | 0 | Scheduler casualty of the old exec-resolution wedge, not a focused SQLite test failure. | + +### Browser + +Command: + +```bash +bash scripts/run-sqlite-project-unit-tests.sh --host browser --permutation full --jobs 2 --timeout-ms 21600000 --results-root test-runs/gastown-sqlite-browser-full-pr5-snapshot +``` + +Artifacts: + +- `test-runs/gastown-sqlite-browser-full-pr5-snapshot/run.log` +- `test-runs/gastown-sqlite-browser-full-pr5-snapshot/host-status.tsv` +- `test-runs/gastown-sqlite-browser-full-pr5-snapshot/combined-summary.md` +- `test-runs/gastown-sqlite-browser-full-pr5-snapshot/browser/summary.txt` +- `test-runs/gastown-sqlite-browser-full-pr5-snapshot/browser/failures.tsv` +- `test-runs/gastown-sqlite-browser-full-pr5-snapshot/browser/testrunner.db` +- `test-runs/gastown-sqlite-browser-full-pr5-snapshot/browser/testrunner.log` + +Runner status: `host-status.tsv` recorded `browser 1`. The page navigated or +reloaded while Playwright was waiting in `page.evaluate()`, but the testrunner +DB was preserved and readable. + +| Host | Total jobs | PASS/done jobs | FAIL jobs | SKIP/OMIT jobs | RUNNING jobs | READY/not-run jobs | SQLite cases | Case errors | XFAIL | XPASS/flaky | +|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:| +| browser | 1393 | 58 | 4 | 0 | 2 | 1329 | 20066 | 1004 | not recorded | not recorded | + +Browser failed/running rows from the preserved full snapshot: + +| Job | Full-snapshot state | Cases | Errors | Superseding focused status | +|---|---|---:|---:|---| +| `test/sysfault.test` | failed | 1360 | 2 | Browser focused PASS, 1365 cases / 0 errors. | +| `test/writecrash.test` | failed | 20 | 1 | Browser focused FAIL, 158 cases / 1 error; tracked by `kad-wtb.19`. | +| `test/like.test` | failed | 159 | 1 | Node focused PASS and browser default focused PASS twice, 159 cases / 0 errors. | +| `test/savepoint6.test` | failed | 3325 | 1000 | Browser focused PASS, 8007 cases / 0 errors. | +| `test/walfault.test` | running | 0 | 0 | Browser focused FAIL, 1 case / 1 error; tracked by `kad-wtb.20`. | +| `test/sort4.test` | running | 0 | 0 | Browser focused PASS, 11 cases / 0 errors; Node focused FAIL tracked by `kad-wtb.21`. | + +## Focused Rerun Counts + +| Scope | Command / artifact | PASS/done jobs | FAIL jobs | SKIP/OMIT jobs | RUNNING/TIME jobs | Cases | Errors | Classification | +|---|---|---:|---:|---:|---:|---:|---:|---| +| Browser `sysfault.test` | `scripts/run-sqlite-official-tests.sh --host browser --permutation full --jobs 1 --timeout-ms 900000 --results-dir test-runs/kad-wtb.11-browser-sysfault sysfault.test`; committed report `test-runs/gastown-sqlite-browser-full-pr5-snapshot/browser-fault-recheck.md` | 1 | 0 | 0 | 0 | 1365 | 0 | Original WAL/open-path failures did not reproduce after rebuild. | +| Browser `writecrash.test` | `scripts/run-sqlite-official-tests.sh --host browser --permutation full --jobs 1 --timeout-ms 900000 --results-dir test-runs/kad-wtb.11-browser-writecrash writecrash.test`; committed report `test-runs/gastown-sqlite-browser-full-pr5-snapshot/browser-fault-recheck.md` | 0 | 1 | 0 | 0 | 158 | 1 | Browser-only executable visibility/materialization failure for `/usr/bin/testfixture`; follow-up `kad-wtb.19`. | +| Node `writecrash.test` | Same committed fault/crash report | 1 | 0 | 0 | 0 | 995 | 0 | Node comparison passes. | +| Browser `walfault.test` | `scripts/run-sqlite-official-tests.sh --host browser --permutation full --jobs 1 --timeout-ms 900000 --results-dir test-runs/kad-wtb.11-browser-walfault walfault.test`; committed report `test-runs/gastown-sqlite-browser-full-pr5-snapshot/browser-fault-recheck.md` | 0 | 1 | 0 | 0 | 1 | 1 | Late Tcl abort plus browser kernel `munmap` trap; follow-up `kad-wtb.20`. | +| Node `walfault.test` | Same committed fault/crash report | 0 | 0 | 0 | 1 | 0 | 0 | Timed out/running without reaching the browser abort path. | +| Node `like.test` | `bash scripts/run-sqlite-official-tests.sh --host node --permutation full --jobs 1 --timeout-ms 600000 --results-dir test-runs/kad-wtb13-like-node-full like.test`; committed summary `test-runs/kad-wtb13-like-node-full/summary.txt` | 1 | 0 | 0 | 0 | 159 | 0 | Pass. | +| Browser `like.test` default run 1 | `bash scripts/run-sqlite-official-tests.sh --host browser --permutation full --jobs 1 --timeout-ms 600000 --results-dir test-runs/kad-wtb13-like-browser-full like.test`; committed summary `test-runs/kad-wtb13-like-browser-full/summary.txt` | 1 | 0 | 0 | 0 | 159 | 0 | Pass. | +| Browser `like.test` default run 2 | `bash scripts/run-sqlite-official-tests.sh --host browser --permutation full --jobs 1 --timeout-ms 600000 --results-dir test-runs/kad-wtb13-like-browser-full-rerun-default like.test`; committed summary `test-runs/kad-wtb13-like-browser-full-rerun-default/summary.txt` | 1 | 0 | 0 | 0 | 159 | 0 | Pass. | +| Browser `like.test` 16384-page diagnostic | `SQLITE_TEST_VITE_PORT=5260 SQLITE_BROWSER_MAX_MEMORY_PAGES=16384 bash scripts/run-sqlite-official-tests.sh --host browser --permutation full --jobs 1 --timeout-ms 600000 --results-dir test-runs/kad-wtb13-like-browser-full-1g like.test`; committed summary `test-runs/kad-wtb13-like-browser-full-1g/summary.txt` | 0 | 1 | 0 | 0 | 159 | 1 | Diagnostic failure on adjacent timing case `like-14.1`; original `like-14.2` passes. | +| Browser `savepoint6.test` | `scripts/run-sqlite-project-unit-tests.sh --host browser --permutation full --jobs 1 --timeout-ms 900000 --results-root test-runs/kad-wtb.12-savepoint6-focused-after-fix savepoint6.test`; local artifact `/Users/brandon/gt/kandelo/polecats/warboy/kandelo/test-runs/kad-wtb.12-savepoint6-focused-after-fix/browser/summary.txt`; durable close note `kad-wtb.12` | 1 | 0 | 0 | 0 | 8007 | 0 | Pass after SharedFS open-unlink/rename-over fix. | +| Browser `sort4.test` | `scripts/run-browser-sqlite-official-tests.sh --permutation full --jobs 1 --timeout-ms 300000 sort4`; durable close note `kad-wtb.9` | 1 | 0 | 0 | 0 | 11 | 0 | Pass after browser threaded-sorter stabilization; no preserved final results-dir was found. | +| Node `sort4.test` | `/Users/brandon/gt/kandelo/polecats/dag/kandelo/test-runs/sqlite-node-sort4-dag-trace-20260613/{summary.txt,failures.tsv,testrunner.db,testrunner.log}` and `/Users/brandon/gt/kandelo/polecats/dag/kandelo/test-runs/sqlite-official-node-full/20260613-095215/{summary.txt,failures.tsv,testrunner.db,testrunner.log}` | 0 | 1 | 0 | 0 | 11 | 5 | Fails `sort4-2.3/2.4/2.5/2.6/2.8` with `unable to open database file`; follow-up `kad-wtb.21`. | + +## Final Failure Inventory + +Active follow-up bugs filed from `kad-wtb.18`: + +| Bead | Host | Test | Classification | Current hard count | +|---|---|---|---|---| +| `kad-wtb.19` | browser | `writecrash.test` | Browser process teardown / executable materialization / VFS visibility after repeated crash-child iterations. | 1 failed job, 158 cases, 1 error. | +| `kad-wtb.20` | browser | `walfault.test` | Browser crash/abort cleanup path reaches `munmap` trap. | 1 failed job, 1 recorded case, 1 error. | +| `kad-wtb.21` | node | `sort4.test` | Node filesystem/temp database open failure in SQLite sorter coverage. | 1 failed job, 11 cases, 5 errors. | + +Resolved or superseded rows: + +| Host | Test | Resolution | +|---|---|---| +| node | Full-run Mach-O compile wedge | Fixed by `kad-36g`; preserved full DB remains the hard snapshot because no later full-suite Node DB is present. | +| browser | `sysfault.test` | Focused rebuilt browser run passes, 1365 cases / 0 errors. | +| browser | `like.test` / `like-14.2` | Focused Node and browser default runs pass; diagnostic 16384-page comparison fails adjacent timing case `like-14.1`, not the original string/collation concern. | +| browser | `savepoint6.test` | Focused browser run passes after SharedFS fix, 8007 cases / 0 errors. | +| browser | `sort4.test` | Focused browser run passes after threaded-sorter stabilization, 11 cases / 0 errors. | + +## Superseded Or Excluded Artifacts + +- `test-runs/gastown-sqlite-node-full-pr5/attempt1/combined-summary.md` has no + usable testrunner DB and is superseded by + `test-runs/gastown-sqlite-node-full-pr5/node/testrunner.db`. +- Early browser sort4 diagnostic directories under Dag's worktree are + intermediate failed/running probes and are superseded by the `kad-wtb.9` + focused browser pass close note. +- `test-runs/kad-wtb13-like-browser-full-1g` is a memory-cap diagnostic only; + it should not replace the two passing default browser focused runs. diff --git a/test-runs/gastown-sqlite-epic-synthesis/pr-body.md b/test-runs/gastown-sqlite-epic-synthesis/pr-body.md index 77b767a37..39583a2c7 100644 --- a/test-runs/gastown-sqlite-epic-synthesis/pr-body.md +++ b/test-runs/gastown-sqlite-epic-synthesis/pr-body.md @@ -1,54 +1,69 @@ ## Summary Adopts the SQLite project-unit harness work from PR #5 into Kandelo and records -current both-host validation status against SQLite's official `full` -permutation. +both-host validation status against SQLite's official `full` permutation. This PR adds `scripts/run-sqlite-project-unit-tests.sh`, documents the harness in the porting guide, disables accidental default browser syscall tracing for the -SQLite demo runner, and improves browser artifact snapshotting so partial -SQLite testrunner databases survive page reloads. +SQLite demo runner, improves browser artifact snapshotting, fixes browser VFS +open-unlink lifetime behavior, and stabilizes the browser threaded-sorter path +used by `sort4.test`. ## Validation Status Current completion target: SQLite official `full` permutation on both Node and browser. The larger `all` permutation is tracked separately as `kad-29m`. -Node full run: +Full hard-count report: `test-runs/gastown-sqlite-epic-synthesis/final-hard-counts.md`. + +Node full snapshot: - Command: `/bin/bash scripts/dev-shell.sh /bin/bash scripts/run-sqlite-project-unit-tests.sh --host node --permutation full --jobs 2 --timeout-ms 21600000 --results-root test-runs/gastown-sqlite-node-full-pr5` -- Result: did not complete. -- Snapshot: 1394 jobs total, 0 done, 0 failed, 1 running, 1393 ready, 0 cases. -- Blocker: `kad-36g`, Node scheduler/exec path feeds a Mach-O host executable to `WebAssembly.compile()` and leaves the SQLite scheduler wedged. Focused `fts5optimize2.test` reproductions pass, so that test name is not the root cause. +- Artifacts: `test-runs/gastown-sqlite-node-full-pr5/{command.log,host-status.tsv,node/summary.txt,node/failures.tsv,node/testrunner.db,node/testrunner.log}` +- Runner status: `node 143`, command `exit_status=1` after summary write. +- Hard counts: 1394 jobs total, 0 done, 0 failed, 0 omit/skip, 1 running, 1393 ready, 0 cases, 0 case errors. +- `kad-36g` fixed the Mach-O exec-resolution wedge that caused this snapshot. No later full-suite Node DB is present in the final artifact set. -Browser full run: +Browser full snapshot: - Command: `bash scripts/run-sqlite-project-unit-tests.sh --host browser --permutation full --jobs 2 --timeout-ms 21600000 --results-root test-runs/gastown-sqlite-browser-full-pr5-snapshot` -- Result: exited after browser page navigation/reload while Playwright was waiting in `page.evaluate()`. -- Snapshot: 1393 jobs total, 58 done, 4 failed, 2 running, 1329 ready, 20066 cases, 1004 case errors. -- Blockers: - - `kad-wtb.10`: browser runtime stability/page reload during `walfault`/`sort4`. - - `kad-wtb.9`: focused browser `sort4.test` threaded sorter memory OOB or CPU-bound stall. - - `kad-wtb.11`: browser fault/crash filesystem behavior in `sysfault`, `writecrash`, and likely `walfault`. - - `kad-wtb.12`: browser `savepoint6` tempdb/savepoint state divergence. - - `kad-wtb.13`: browser `like-14.2` string/collation/LIKE mismatch. +- Artifacts: `test-runs/gastown-sqlite-browser-full-pr5-snapshot/{run.log,host-status.tsv,combined-summary.md,browser/summary.txt,browser/failures.tsv,browser/testrunner.db,browser/testrunner.log}` +- Runner status: `browser 1`, page navigation/reload while Playwright was waiting in `page.evaluate()`. +- Hard counts: 1393 jobs total, 58 done, 4 failed, 0 omit/skip, 2 running, 1329 ready, 20066 cases, 1004 case errors. +- The SQLite testrunner records `done`, `failed`, `omit`, `running`, and `ready`; it does not record XFAIL/XPASS/flaky fields. -## Artifacts +## Focused Superseding Results -- Node: `test-runs/gastown-sqlite-node-full-pr5/` -- Browser: `test-runs/gastown-sqlite-browser-full-pr5-snapshot/` -- Synthesis: `test-runs/gastown-sqlite-epic-synthesis/summary.md` +The browser full snapshot's failed/running rows were followed by focused reruns: -## Test Verification +| Host | Test | Focused result | Follow-up | +|---|---|---:|---| +| browser | `sysfault.test` | PASS, 1365 cases / 0 errors | Original full-snapshot failures did not reproduce after rebuild. | +| browser | `writecrash.test` | FAIL, 158 cases / 1 error | `kad-wtb.19`: browser executable visibility/materialization after repeated crash-child iterations. | +| node | `writecrash.test` | PASS, 995 cases / 0 errors | Node comparison passes. | +| browser | `walfault.test` | FAIL, 1 recorded case / 1 error | `kad-wtb.20`: browser Tcl abort plus kernel `munmap` trap. | +| node | `walfault.test` | TIME/RUNNING, 0 cases / 0 errors | Did not hit the browser abort path before timeout. | +| node | `like.test` | PASS, 159 cases / 0 errors | Original browser `like-14.2` concern is timing-threshold behavior. | +| browser | `like.test` default cap | PASS twice, 159 cases / 0 errors each | Diagnostic 16384-page comparison fails adjacent timing case `like-14.1`, not `like-14.2`. | +| browser | `savepoint6.test` | PASS, 8007 cases / 0 errors | Fixed by SharedFS open-unlink/rename-over lifetime handling. | +| browser | `sort4.test` | PASS, 11 cases / 0 errors | Browser threaded-sorter crash/stall fixed by `kad-wtb.9`. | +| node | `sort4.test` | FAIL, 11 cases / 5 errors | `kad-wtb.21`: Node temp database open failures in `sort4-2.3/2.4/2.5/2.6/2.8`. | -Prior child branches recorded the full Kandelo gate suite against the harness and -browser changes: +No SQLite test was skipped or xfailed as a substitute for runtime/platform work. -- `cargo test -p kandelo --target aarch64-apple-darwin --lib` -- `cd host && npx vitest run` -- `scripts/run-libc-tests.sh` -- `scripts/run-posix-tests.sh` -- `bash scripts/check-abi-version.sh` +## Artifacts + +- Node full snapshot: `test-runs/gastown-sqlite-node-full-pr5/` +- Browser full snapshot: `test-runs/gastown-sqlite-browser-full-pr5-snapshot/` +- Epic synthesis: `test-runs/gastown-sqlite-epic-synthesis/summary.md` +- Final hard counts: `test-runs/gastown-sqlite-epic-synthesis/final-hard-counts.md` +- LIKE focused artifacts: `test-runs/kad-wtb13-like-*` +- Fault/crash focused report: `test-runs/gastown-sqlite-browser-full-pr5-snapshot/browser-fault-recheck.md` + +## Test Verification -This PR intentionally preserves the remaining SQLite failures as actionable -blockers instead of skipping or xfail-ing upstream tests. +Latest child branches recorded the full Kandelo gate suite before merge into +`integration/kad-wtb-sqlite-testing`: `cargo test -p kandelo --target +aarch64-apple-darwin --lib`, `cd host && npx vitest run`, +`scripts/run-libc-tests.sh`, `scripts/run-posix-tests.sh`, and +`scripts/dev-shell.sh bash scripts/check-abi-version.sh`. diff --git a/test-runs/kad-wtb13-like-browser-full-1g/failures.tsv b/test-runs/kad-wtb13-like-browser-full-1g/failures.tsv new file mode 100644 index 000000000..c8bf52413 --- /dev/null +++ b/test-runs/kad-wtb13-like-browser-full-1g/failures.tsv @@ -0,0 +1,2 @@ +jobid state displaytype displayname cases errors ms +1 failed tcl test/like.test 159 1 1395 diff --git a/test-runs/kad-wtb13-like-browser-full-1g/summary.txt b/test-runs/kad-wtb13-like-browser-full-1g/summary.txt new file mode 100644 index 000000000..245965032 --- /dev/null +++ b/test-runs/kad-wtb13-like-browser-full-1g/summary.txt @@ -0,0 +1,23 @@ +SQLite official testrunner summary +host=browser +permutation=full +jobs=1 +results_dir=test-runs/kad-wtb13-like-browser-full-1g + +total_jobs done_jobs failed_jobs omitted_jobs running_jobs ready_jobs total_cases total_case_errors +---------- --------- ----------- ------------ ------------ ---------- ----------- ----------------- +1 0 1 0 0 0 159 1 + +state jobs cases case_errors +------ ---- ----- ----------- +failed 1 159 1 + +Jobs by SQLite testrunner config: +config jobs cases case_errors +------ ---- ----- ----------- +full 1 159 1 + +Failed, running, and omitted jobs: +jobid state displaytype displayname cases errors ms +----- ------ ----------- -------------- ----- ------ ---- +1 failed tcl test/like.test 159 1 1395 diff --git a/test-runs/kad-wtb13-like-browser-full-1g/testrunner.log b/test-runs/kad-wtb13-like-browser-full-1g/testrunner.log new file mode 100644 index 000000000..81173ecdb --- /dev/null +++ b/test-runs/kad-wtb13-like-browser-full-1g/testrunner.log @@ -0,0 +1,173 @@ +### test/like.test 1395ms (failed) +like-1.0... Ok +like-1.1... Ok +like-1.2... Ok +like-1.3... Ok +like-1.4... Ok +like-1.5.1... Ok +like-1.5.2... Ok +like-1.5.3... Ok +like-1.6... Ok +like-1.7... Ok +like-1.8... Ok +like-1.9... Ok +like-1.10... Ok +like-2.1... Ok +like-2.2... Ok +like-2.3... Ok +like-2.4... Ok +like-3.1... Ok +like-3.2... Ok +like-3.3.100... Ok +like-3.3.100.cnt... Ok +like-3.3.102... Ok +like-3.3.103... Ok +like-3.3.104... Ok +like-3.3.105... Ok +like-3.3.105... Ok +like-3.3.106... Ok +like-3.4.2... Ok +like-3.4.3... Ok +like-3.4.4... Ok +like-3.4.5... Ok +like-3.5... Ok +like-3.6... Ok +like-3.7... Ok +like-3.8... Ok +like-3.9... Ok +like-3.10... Ok +like-3.11... Ok +like-3.12... Ok +like-3.13... Ok +like-3.14... Ok +like-3.15... Ok +like-3.16... Ok +like-3.17... Ok +like-3.18... Ok +like-3.19... Ok +like-3.20... Ok +like-3.21... Ok +like-3.22... Ok +like-3.23... Ok +like-3.24... Ok +like-3.25... Ok +like-3.26... Ok +like-3.27... Ok +like-4.1... Ok +like-4.2... Ok +like-4.3... Ok +like-4.4... Ok +like-4.5... Ok +like-4.6... Ok +like-5.1... Ok +like-5.2... Ok +like-5.3... Ok +like-5.4... Ok +like-5.5... Ok +like-5.6... Ok +like-5.7... Ok +like-5.8... Ok +like-5.11... Ok +like-5.12... Ok +like-5.13... Ok +like-5.14... Ok +like-5.15... Ok +like-5.16... Ok +like-5.17... Ok +like-5.18... Ok +like-5.21... Ok +like-5.22... Ok +like-5.23... Ok +like-5.24... Ok +like-5.25... Ok +like-5.26... Ok +like-5.27... Ok +like-5.28... Ok +like-6.1... Ok +like-7.1... Ok +like-8.1... Ok +like-8.2... Ok +like-8.3... Ok +like-8.4... Ok +like-9.1... Ok +like-9.2... Ok +like-9.3.1... Ok +like-9.3.2... Ok +like-9.4.1... Ok +like-9.4.2... Ok +like-9.4.3... Ok +like-9.5.1... Ok +like-9.5.2... Ok +like-10.1... Ok +like-10.2... Ok +like-10.3... Ok +like-10.4... Ok +like-10.5a... Ok +like-10.6... Ok +like-10.10... Ok +like-10.11... Ok +like-10.12... Ok +like-10.13... Ok +like-10.14... Ok +like-10.15... Ok +like-11.0... Ok +like-11.1... Ok +like-11.2... Ok +like-11.3... Ok +like-11.4... Ok +like-11.5... Ok +like-11.6... Ok +like-11.7... Ok +like-11.8... Ok +like-11.9... Ok +like-11.10... Ok +like-12.1... Ok +like-12.2... Ok +like-12.3... Ok +like-12.4... Ok +like-12.5... Ok +like-12.6... Ok +like-12.11... Ok +like-12.12... Ok +like-12.13... Ok +like-12.14... Ok +like-12.15... Ok +like-12.16... Ok +like-13.1... Ok +like-13.2... Ok +like-13.3... Ok +like-13.4... Ok +like-14.1... (1000 ms - want less than 1000.0) +! like-14.1 expected: [1] +! like-14.1 got: [0] +like-14.2... (0 ms - want less than 1000.0) Ok +like-15.100... Ok +like-15.101... Ok +like-15.102... Ok +like-15.103... Ok +like-15.110... Ok +like-15.111... Ok +like-15.112... Ok +like-15.120... Ok +like-15.121... Ok +like-16.0... Ok +like-16.1... Ok +like-16.2... Ok +like-17.0... Ok +like-17.1... Ok +like-17.1... Ok +like-18.0... Ok +like-18.1... Ok +like-18.2... Ok +SQLite 2025-02-18 13:38:58 873d4e274b4988d260ba8354a9718324a1c26187a4ab4c1cc0227c03d0f10e70 +1 errors out of 159 tests on 32-bit +!Failures on these tests: like-14.1 +WARNING: Multi-threaded tests skipped: Linked against a non-threadsafe Tcl build +All memory allocations freed - no leaks +Memory used: now 0 max 144056 max-size 48000 +Allocation count: now 0 max 404 +Page-cache used: now 0 max 0 max-size 1192 +Page-cache overflow: now 0 max 6824 +Maximum memory usage: 144056 bytes +Current memory usage: 0 bytes +Number of malloc() : -1 calls diff --git a/test-runs/kad-wtb13-like-browser-full-rerun-default/failures.tsv b/test-runs/kad-wtb13-like-browser-full-rerun-default/failures.tsv new file mode 100644 index 000000000..e69de29bb diff --git a/test-runs/kad-wtb13-like-browser-full-rerun-default/summary.txt b/test-runs/kad-wtb13-like-browser-full-rerun-default/summary.txt new file mode 100644 index 000000000..2ee20c817 --- /dev/null +++ b/test-runs/kad-wtb13-like-browser-full-rerun-default/summary.txt @@ -0,0 +1,20 @@ +SQLite official testrunner summary +host=browser +permutation=full +jobs=1 +results_dir=test-runs/kad-wtb13-like-browser-full-rerun-default + +total_jobs done_jobs failed_jobs omitted_jobs running_jobs ready_jobs total_cases total_case_errors +---------- --------- ----------- ------------ ------------ ---------- ----------- ----------------- +1 1 0 0 0 0 159 0 + +state jobs cases case_errors +----- ---- ----- ----------- +done 1 159 0 + +Jobs by SQLite testrunner config: +config jobs cases case_errors +------ ---- ----- ----------- +full 1 159 0 + +Failed, running, and omitted jobs: diff --git a/test-runs/kad-wtb13-like-browser-full-rerun-default/testrunner.log b/test-runs/kad-wtb13-like-browser-full-rerun-default/testrunner.log new file mode 100644 index 000000000..33e4671f9 --- /dev/null +++ b/test-runs/kad-wtb13-like-browser-full-rerun-default/testrunner.log @@ -0,0 +1,170 @@ +### test/like.test 1376ms (done) +like-1.0... Ok +like-1.1... Ok +like-1.2... Ok +like-1.3... Ok +like-1.4... Ok +like-1.5.1... Ok +like-1.5.2... Ok +like-1.5.3... Ok +like-1.6... Ok +like-1.7... Ok +like-1.8... Ok +like-1.9... Ok +like-1.10... Ok +like-2.1... Ok +like-2.2... Ok +like-2.3... Ok +like-2.4... Ok +like-3.1... Ok +like-3.2... Ok +like-3.3.100... Ok +like-3.3.100.cnt... Ok +like-3.3.102... Ok +like-3.3.103... Ok +like-3.3.104... Ok +like-3.3.105... Ok +like-3.3.105... Ok +like-3.3.106... Ok +like-3.4.2... Ok +like-3.4.3... Ok +like-3.4.4... Ok +like-3.4.5... Ok +like-3.5... Ok +like-3.6... Ok +like-3.7... Ok +like-3.8... Ok +like-3.9... Ok +like-3.10... Ok +like-3.11... Ok +like-3.12... Ok +like-3.13... Ok +like-3.14... Ok +like-3.15... Ok +like-3.16... Ok +like-3.17... Ok +like-3.18... Ok +like-3.19... Ok +like-3.20... Ok +like-3.21... Ok +like-3.22... Ok +like-3.23... Ok +like-3.24... Ok +like-3.25... Ok +like-3.26... Ok +like-3.27... Ok +like-4.1... Ok +like-4.2... Ok +like-4.3... Ok +like-4.4... Ok +like-4.5... Ok +like-4.6... Ok +like-5.1... Ok +like-5.2... Ok +like-5.3... Ok +like-5.4... Ok +like-5.5... Ok +like-5.6... Ok +like-5.7... Ok +like-5.8... Ok +like-5.11... Ok +like-5.12... Ok +like-5.13... Ok +like-5.14... Ok +like-5.15... Ok +like-5.16... Ok +like-5.17... Ok +like-5.18... Ok +like-5.21... Ok +like-5.22... Ok +like-5.23... Ok +like-5.24... Ok +like-5.25... Ok +like-5.26... Ok +like-5.27... Ok +like-5.28... Ok +like-6.1... Ok +like-7.1... Ok +like-8.1... Ok +like-8.2... Ok +like-8.3... Ok +like-8.4... Ok +like-9.1... Ok +like-9.2... Ok +like-9.3.1... Ok +like-9.3.2... Ok +like-9.4.1... Ok +like-9.4.2... Ok +like-9.4.3... Ok +like-9.5.1... Ok +like-9.5.2... Ok +like-10.1... Ok +like-10.2... Ok +like-10.3... Ok +like-10.4... Ok +like-10.5a... Ok +like-10.6... Ok +like-10.10... Ok +like-10.11... Ok +like-10.12... Ok +like-10.13... Ok +like-10.14... Ok +like-10.15... Ok +like-11.0... Ok +like-11.1... Ok +like-11.2... Ok +like-11.3... Ok +like-11.4... Ok +like-11.5... Ok +like-11.6... Ok +like-11.7... Ok +like-11.8... Ok +like-11.9... Ok +like-11.10... Ok +like-12.1... Ok +like-12.2... Ok +like-12.3... Ok +like-12.4... Ok +like-12.5... Ok +like-12.6... Ok +like-12.11... Ok +like-12.12... Ok +like-12.13... Ok +like-12.14... Ok +like-12.15... Ok +like-12.16... Ok +like-13.1... Ok +like-13.2... Ok +like-13.3... Ok +like-13.4... Ok +like-14.1... (0 ms - want less than 1000.0) Ok +like-14.2... (0 ms - want less than 1000.0) Ok +like-15.100... Ok +like-15.101... Ok +like-15.102... Ok +like-15.103... Ok +like-15.110... Ok +like-15.111... Ok +like-15.112... Ok +like-15.120... Ok +like-15.121... Ok +like-16.0... Ok +like-16.1... Ok +like-16.2... Ok +like-17.0... Ok +like-17.1... Ok +like-17.1... Ok +like-18.0... Ok +like-18.1... Ok +like-18.2... Ok +SQLite 2025-02-18 13:38:58 873d4e274b4988d260ba8354a9718324a1c26187a4ab4c1cc0227c03d0f10e70 +0 errors out of 159 tests on 32-bit +WARNING: Multi-threaded tests skipped: Linked against a non-threadsafe Tcl build +All memory allocations freed - no leaks +Memory used: now 0 max 144056 max-size 48000 +Allocation count: now 0 max 404 +Page-cache used: now 0 max 0 max-size 1192 +Page-cache overflow: now 0 max 6824 +Maximum memory usage: 144056 bytes +Current memory usage: 0 bytes +Number of malloc() : -1 calls diff --git a/test-runs/kad-wtb13-like-browser-full/failures.tsv b/test-runs/kad-wtb13-like-browser-full/failures.tsv new file mode 100644 index 000000000..e69de29bb diff --git a/test-runs/kad-wtb13-like-browser-full/summary.txt b/test-runs/kad-wtb13-like-browser-full/summary.txt new file mode 100644 index 000000000..daec05842 --- /dev/null +++ b/test-runs/kad-wtb13-like-browser-full/summary.txt @@ -0,0 +1,20 @@ +SQLite official testrunner summary +host=browser +permutation=full +jobs=1 +results_dir=test-runs/kad-wtb13-like-browser-full + +total_jobs done_jobs failed_jobs omitted_jobs running_jobs ready_jobs total_cases total_case_errors +---------- --------- ----------- ------------ ------------ ---------- ----------- ----------------- +1 1 0 0 0 0 159 0 + +state jobs cases case_errors +----- ---- ----- ----------- +done 1 159 0 + +Jobs by SQLite testrunner config: +config jobs cases case_errors +------ ---- ----- ----------- +full 1 159 0 + +Failed, running, and omitted jobs: diff --git a/test-runs/kad-wtb13-like-browser-full/testrunner.log b/test-runs/kad-wtb13-like-browser-full/testrunner.log new file mode 100644 index 000000000..1c2a08cfb --- /dev/null +++ b/test-runs/kad-wtb13-like-browser-full/testrunner.log @@ -0,0 +1,170 @@ +### test/like.test 1429ms (done) +like-1.0... Ok +like-1.1... Ok +like-1.2... Ok +like-1.3... Ok +like-1.4... Ok +like-1.5.1... Ok +like-1.5.2... Ok +like-1.5.3... Ok +like-1.6... Ok +like-1.7... Ok +like-1.8... Ok +like-1.9... Ok +like-1.10... Ok +like-2.1... Ok +like-2.2... Ok +like-2.3... Ok +like-2.4... Ok +like-3.1... Ok +like-3.2... Ok +like-3.3.100... Ok +like-3.3.100.cnt... Ok +like-3.3.102... Ok +like-3.3.103... Ok +like-3.3.104... Ok +like-3.3.105... Ok +like-3.3.105... Ok +like-3.3.106... Ok +like-3.4.2... Ok +like-3.4.3... Ok +like-3.4.4... Ok +like-3.4.5... Ok +like-3.5... Ok +like-3.6... Ok +like-3.7... Ok +like-3.8... Ok +like-3.9... Ok +like-3.10... Ok +like-3.11... Ok +like-3.12... Ok +like-3.13... Ok +like-3.14... Ok +like-3.15... Ok +like-3.16... Ok +like-3.17... Ok +like-3.18... Ok +like-3.19... Ok +like-3.20... Ok +like-3.21... Ok +like-3.22... Ok +like-3.23... Ok +like-3.24... Ok +like-3.25... Ok +like-3.26... Ok +like-3.27... Ok +like-4.1... Ok +like-4.2... Ok +like-4.3... Ok +like-4.4... Ok +like-4.5... Ok +like-4.6... Ok +like-5.1... Ok +like-5.2... Ok +like-5.3... Ok +like-5.4... Ok +like-5.5... Ok +like-5.6... Ok +like-5.7... Ok +like-5.8... Ok +like-5.11... Ok +like-5.12... Ok +like-5.13... Ok +like-5.14... Ok +like-5.15... Ok +like-5.16... Ok +like-5.17... Ok +like-5.18... Ok +like-5.21... Ok +like-5.22... Ok +like-5.23... Ok +like-5.24... Ok +like-5.25... Ok +like-5.26... Ok +like-5.27... Ok +like-5.28... Ok +like-6.1... Ok +like-7.1... Ok +like-8.1... Ok +like-8.2... Ok +like-8.3... Ok +like-8.4... Ok +like-9.1... Ok +like-9.2... Ok +like-9.3.1... Ok +like-9.3.2... Ok +like-9.4.1... Ok +like-9.4.2... Ok +like-9.4.3... Ok +like-9.5.1... Ok +like-9.5.2... Ok +like-10.1... Ok +like-10.2... Ok +like-10.3... Ok +like-10.4... Ok +like-10.5a... Ok +like-10.6... Ok +like-10.10... Ok +like-10.11... Ok +like-10.12... Ok +like-10.13... Ok +like-10.14... Ok +like-10.15... Ok +like-11.0... Ok +like-11.1... Ok +like-11.2... Ok +like-11.3... Ok +like-11.4... Ok +like-11.5... Ok +like-11.6... Ok +like-11.7... Ok +like-11.8... Ok +like-11.9... Ok +like-11.10... Ok +like-12.1... Ok +like-12.2... Ok +like-12.3... Ok +like-12.4... Ok +like-12.5... Ok +like-12.6... Ok +like-12.11... Ok +like-12.12... Ok +like-12.13... Ok +like-12.14... Ok +like-12.15... Ok +like-12.16... Ok +like-13.1... Ok +like-13.2... Ok +like-13.3... Ok +like-13.4... Ok +like-14.1... (0 ms - want less than 1000.0) Ok +like-14.2... (0 ms - want less than 1000.0) Ok +like-15.100... Ok +like-15.101... Ok +like-15.102... Ok +like-15.103... Ok +like-15.110... Ok +like-15.111... Ok +like-15.112... Ok +like-15.120... Ok +like-15.121... Ok +like-16.0... Ok +like-16.1... Ok +like-16.2... Ok +like-17.0... Ok +like-17.1... Ok +like-17.1... Ok +like-18.0... Ok +like-18.1... Ok +like-18.2... Ok +SQLite 2025-02-18 13:38:58 873d4e274b4988d260ba8354a9718324a1c26187a4ab4c1cc0227c03d0f10e70 +0 errors out of 159 tests on 32-bit +WARNING: Multi-threaded tests skipped: Linked against a non-threadsafe Tcl build +All memory allocations freed - no leaks +Memory used: now 0 max 144056 max-size 48000 +Allocation count: now 0 max 404 +Page-cache used: now 0 max 0 max-size 1192 +Page-cache overflow: now 0 max 6824 +Maximum memory usage: 144056 bytes +Current memory usage: 0 bytes +Number of malloc() : -1 calls diff --git a/test-runs/kad-wtb13-like-focused-summary.md b/test-runs/kad-wtb13-like-focused-summary.md new file mode 100644 index 000000000..07b9b7355 --- /dev/null +++ b/test-runs/kad-wtb13-like-focused-summary.md @@ -0,0 +1,84 @@ +# SQLite LIKE Focused Classification + +Issue: `kad-wtb.13` +Base branch: `integration/kad-wtb-sqlite-testing` + +## Scope + +The browser full-suite snapshot at +`test-runs/gastown-sqlite-browser-full-pr5-snapshot` recorded one +`test/like.test` failure: `like-14.2` expected `1` and got `0`. + +`like-14.2` is a timing assertion, not a LIKE result mismatch. The test runs: + +```sql +SELECT 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaz' LIKE '%a%a%a%a%a%a%a%a%y' +``` + +and expects the elapsed Tcl `time` measurement to be less than +`1000 * $::sqlite_options(configslower)` ms. + +## Focused Commands + +```bash +bash scripts/run-sqlite-official-tests.sh --host node --permutation full --jobs 1 --timeout-ms 600000 --results-dir test-runs/kad-wtb13-like-node-full like.test +``` + +```bash +bash scripts/run-sqlite-official-tests.sh --host browser --permutation full --jobs 1 --timeout-ms 600000 --results-dir test-runs/kad-wtb13-like-browser-full like.test +``` + +```bash +bash scripts/run-sqlite-official-tests.sh --host browser --permutation full --jobs 1 --timeout-ms 600000 --results-dir test-runs/kad-wtb13-like-browser-full-rerun-default like.test +``` + +```bash +SQLITE_TEST_VITE_PORT=5260 SQLITE_BROWSER_MAX_MEMORY_PAGES=16384 bash scripts/run-sqlite-official-tests.sh --host browser --permutation full --jobs 1 --timeout-ms 600000 --results-dir test-runs/kad-wtb13-like-browser-full-1g like.test +``` + +## Result + +| Host/config | Result | Cases | `like-14.1` timing | `like-14.2` timing | +|---|---:|---:|---:|---:| +| Node | pass | 159/159 | 194 ms | 175 ms | +| Browser default cap, run 1 | pass | 159/159 | 0 ms | 0 ms | +| Browser default cap, run 2 | pass | 159/159 | 0 ms | 0 ms | +| Browser 16384-page cap | fail | 158/159 | 1000 ms | 0 ms | + +The original browser snapshot failed because `like-14.2` measured exactly +`1000 ms`, and the upstream assertion is strictly `< 1000`. + +```text +like-14.2... (1000 ms - want less than 1000.0) +! like-14.2 expected: [1] +! like-14.2 got: [0] +``` + +## Classification + +Focused `test/like.test` classifies the original `like-14.2` failure as a +browser timing-threshold miss, not a string comparison, collation, or locale +semantic mismatch. On the final rebased branch, `like-14.2` passed in every +focused browser rerun. The adjacent pathological GLOB timing assertion, +`like-14.1`, can hit the same exact `1000 ms` strict threshold in the 16384-page +browser memory-cap diagnostic and make the focused `test/like.test` job fail +even though `like-14.2` passes. + +The default 4096-page browser cap passed twice on the final branch, while the +16384-page comparison failed on `like-14.1`. That separates the original +`like-14.2` report from any string semantics bug and shows this class of failure +belongs to strict browser timing thresholds around the pathological LIKE/GLOB +performance tests. + +No harness skip or workaround was introduced. + +## Artifacts + +- `test-runs/kad-wtb13-like-node-full/summary.txt` +- `test-runs/kad-wtb13-like-node-full/testrunner.log` +- `test-runs/kad-wtb13-like-browser-full/summary.txt` +- `test-runs/kad-wtb13-like-browser-full/testrunner.log` +- `test-runs/kad-wtb13-like-browser-full-rerun-default/summary.txt` +- `test-runs/kad-wtb13-like-browser-full-rerun-default/testrunner.log` +- `test-runs/kad-wtb13-like-browser-full-1g/summary.txt` +- `test-runs/kad-wtb13-like-browser-full-1g/testrunner.log` diff --git a/test-runs/kad-wtb13-like-node-full/failures.tsv b/test-runs/kad-wtb13-like-node-full/failures.tsv new file mode 100644 index 000000000..e69de29bb diff --git a/test-runs/kad-wtb13-like-node-full/summary.txt b/test-runs/kad-wtb13-like-node-full/summary.txt new file mode 100644 index 000000000..43c4dc3d2 --- /dev/null +++ b/test-runs/kad-wtb13-like-node-full/summary.txt @@ -0,0 +1,21 @@ +SQLite official testrunner summary +host=node +permutation=full +jobs=1 +workdir=/tmp/kandelo-sqlite-official.xiyxJN +results_dir=test-runs/kad-wtb13-like-node-full + +total_jobs done_jobs failed_jobs omitted_jobs running_jobs ready_jobs total_cases total_case_errors +---------- --------- ----------- ------------ ------------ ---------- ----------- ----------------- +1 1 0 0 0 0 159 0 + +state jobs cases case_errors +----- ---- ----- ----------- +done 1 159 0 + +Jobs by SQLite testrunner config: +config jobs cases case_errors +------ ---- ----- ----------- +full 1 159 0 + +Failed, running, and omitted jobs: diff --git a/test-runs/kad-wtb13-like-node-full/testrunner.log b/test-runs/kad-wtb13-like-node-full/testrunner.log new file mode 100644 index 000000000..b1d4db8f1 --- /dev/null +++ b/test-runs/kad-wtb13-like-node-full/testrunner.log @@ -0,0 +1,170 @@ +### test/like.test 6108ms (done) +like-1.0... Ok +like-1.1... Ok +like-1.2... Ok +like-1.3... Ok +like-1.4... Ok +like-1.5.1... Ok +like-1.5.2... Ok +like-1.5.3... Ok +like-1.6... Ok +like-1.7... Ok +like-1.8... Ok +like-1.9... Ok +like-1.10... Ok +like-2.1... Ok +like-2.2... Ok +like-2.3... Ok +like-2.4... Ok +like-3.1... Ok +like-3.2... Ok +like-3.3.100... Ok +like-3.3.100.cnt... Ok +like-3.3.102... Ok +like-3.3.103... Ok +like-3.3.104... Ok +like-3.3.105... Ok +like-3.3.105... Ok +like-3.3.106... Ok +like-3.4.2... Ok +like-3.4.3... Ok +like-3.4.4... Ok +like-3.4.5... Ok +like-3.5... Ok +like-3.6... Ok +like-3.7... Ok +like-3.8... Ok +like-3.9... Ok +like-3.10... Ok +like-3.11... Ok +like-3.12... Ok +like-3.13... Ok +like-3.14... Ok +like-3.15... Ok +like-3.16... Ok +like-3.17... Ok +like-3.18... Ok +like-3.19... Ok +like-3.20... Ok +like-3.21... Ok +like-3.22... Ok +like-3.23... Ok +like-3.24... Ok +like-3.25... Ok +like-3.26... Ok +like-3.27... Ok +like-4.1... Ok +like-4.2... Ok +like-4.3... Ok +like-4.4... Ok +like-4.5... Ok +like-4.6... Ok +like-5.1... Ok +like-5.2... Ok +like-5.3... Ok +like-5.4... Ok +like-5.5... Ok +like-5.6... Ok +like-5.7... Ok +like-5.8... Ok +like-5.11... Ok +like-5.12... Ok +like-5.13... Ok +like-5.14... Ok +like-5.15... Ok +like-5.16... Ok +like-5.17... Ok +like-5.18... Ok +like-5.21... Ok +like-5.22... Ok +like-5.23... Ok +like-5.24... Ok +like-5.25... Ok +like-5.26... Ok +like-5.27... Ok +like-5.28... Ok +like-6.1... Ok +like-7.1... Ok +like-8.1... Ok +like-8.2... Ok +like-8.3... Ok +like-8.4... Ok +like-9.1... Ok +like-9.2... Ok +like-9.3.1... Ok +like-9.3.2... Ok +like-9.4.1... Ok +like-9.4.2... Ok +like-9.4.3... Ok +like-9.5.1... Ok +like-9.5.2... Ok +like-10.1... Ok +like-10.2... Ok +like-10.3... Ok +like-10.4... Ok +like-10.5a... Ok +like-10.6... Ok +like-10.10... Ok +like-10.11... Ok +like-10.12... Ok +like-10.13... Ok +like-10.14... Ok +like-10.15... Ok +like-11.0... Ok +like-11.1... Ok +like-11.2... Ok +like-11.3... Ok +like-11.4... Ok +like-11.5... Ok +like-11.6... Ok +like-11.7... Ok +like-11.8... Ok +like-11.9... Ok +like-11.10... Ok +like-12.1... Ok +like-12.2... Ok +like-12.3... Ok +like-12.4... Ok +like-12.5... Ok +like-12.6... Ok +like-12.11... Ok +like-12.12... Ok +like-12.13... Ok +like-12.14... Ok +like-12.15... Ok +like-12.16... Ok +like-13.1... Ok +like-13.2... Ok +like-13.3... Ok +like-13.4... Ok +like-14.1... (194 ms - want less than 1000.0) Ok +like-14.2... (175 ms - want less than 1000.0) Ok +like-15.100... Ok +like-15.101... Ok +like-15.102... Ok +like-15.103... Ok +like-15.110... Ok +like-15.111... Ok +like-15.112... Ok +like-15.120... Ok +like-15.121... Ok +like-16.0... Ok +like-16.1... Ok +like-16.2... Ok +like-17.0... Ok +like-17.1... Ok +like-17.1... Ok +like-18.0... Ok +like-18.1... Ok +like-18.2... Ok +SQLite 2025-02-18 13:38:58 873d4e274b4988d260ba8354a9718324a1c26187a4ab4c1cc0227c03d0f10e70 +0 errors out of 159 tests on 32-bit +WARNING: Multi-threaded tests skipped: Linked against a non-threadsafe Tcl build +All memory allocations freed - no leaks +Memory used: now 0 max 144160 max-size 48000 +Allocation count: now 0 max 404 +Page-cache used: now 0 max 0 max-size 1192 +Page-cache overflow: now 0 max 6824 +Maximum memory usage: 144160 bytes +Current memory usage: 0 bytes +Number of malloc() : -1 calls