From 1b306c94a64ecb4c1e0e2ed0ea2806028d2699b3 Mon Sep 17 00:00:00 2001 From: shijiashuai Date: Fri, 22 May 2026 10:29:25 +0800 Subject: [PATCH 1/3] refactor: deepen inference model loading Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- openspec/specs/api/spec.md | 8 +- openspec/specs/architecture/spec.md | 4 + openspec/specs/product/spec.md | 4 + openspec/specs/testing/spec.md | 31 ++++ src/core/GPUContext.ts | 51 +++---- src/engine/InferenceEngine.ts | 129 +++++++++------- src/engine/ModelCompiler.ts | 78 ++++++++++ src/operators/AddOperator.ts | 19 +-- src/operators/convValidation.ts | 5 +- src/operators/tensorContracts.ts | 48 ++++++ tests/core/GPUContext.test.ts | 35 +++++ tests/engine/InferenceEngine.test.ts | 211 ++++++++++++++++++++++++--- tests/operators/AddOperator.test.ts | 10 ++ 13 files changed, 512 insertions(+), 121 deletions(-) create mode 100644 src/engine/ModelCompiler.ts create mode 100644 src/operators/tensorContracts.ts diff --git a/openspec/specs/api/spec.md b/openspec/specs/api/spec.md index 357b8a0..24e6f5a 100644 --- a/openspec/specs/api/spec.md +++ b/openspec/specs/api/spec.md @@ -568,7 +568,7 @@ const output = await add.forward([inputA, inputB]); | Input | Shape | Description | |-------|-------|-------------| | inputA | any | First input tensor | -| inputB | any | Second input tensor (must match inputA shape exactly) | +| inputB | any | Second input tensor (must match inputA shape and layout exactly) | ### Output @@ -669,6 +669,12 @@ Load a model definition. **Parameters:** - `modelDef`: Model definition with layers and weights +**Throws:** +- `Error`: If the model has no layers +- `Error`: If a layer type is unknown +- `Error`: If a layer input cannot be resolved from `input`, prior layers, or weights +- `Error`: If layer names are duplicated + #### tensorFromArray ```typescript diff --git a/openspec/specs/architecture/spec.md b/openspec/specs/architecture/spec.md index 2f0fd75..78d966e 100644 --- a/openspec/specs/architecture/spec.md +++ b/openspec/specs/architecture/spec.md @@ -99,6 +99,10 @@ The system SHALL provide high-level inference orchestration. - **WHEN** model is loaded - **THEN** operators are mapped by type name for dynamic dispatch +#### Scenario: Model graph compilation +- **WHEN** model is loaded +- **THEN** layer names, operator types, and tensor references are validated before inference begins + #### Scenario: Intermediate cleanup - **WHEN** inference completes - **THEN** intermediate tensors are destroyed to free GPU memory diff --git a/openspec/specs/product/spec.md b/openspec/specs/product/spec.md index d68d3e8..bcb8f97 100644 --- a/openspec/specs/product/spec.md +++ b/openspec/specs/product/spec.md @@ -227,6 +227,10 @@ The system SHALL provide model loading and end-to-end inference. - **WHEN** loading model definition with layers and weights - **THEN** weights are allocated as GPU tensors +#### Scenario: Reject invalid graph definitions +- **WHEN** model definition contains duplicate layer names, unknown operators, or unresolved tensor references +- **THEN** loading fails before inference starts + #### Scenario: Run inference - **WHEN** calling infer() with input tensor - **THEN** output tensor is returned with correct shape diff --git a/openspec/specs/testing/spec.md b/openspec/specs/testing/spec.md index 02cd102..960dd55 100644 --- a/openspec/specs/testing/spec.md +++ b/openspec/specs/testing/spec.md @@ -191,6 +191,27 @@ Feature: Dense Operator --- +## Feature: Add Operator + +```gherkin +Feature: Add Operator + As a deep learning developer + I want to add residual tensors element-wise + So that I can express skip connections safely +``` + +### Scenario: Add basic execution + +- **WHEN** I execute Add with two tensors of the same shape and layout +- **THEN** the output shape should equal the input shape + +### Scenario: Add rejects layout mismatch + +- **WHEN** I execute Add with tensors that share the same shape but use different layouts +- **THEN** it should throw an error "same layout" + +--- + ## Feature: Flatten Operator ```gherkin @@ -254,6 +275,16 @@ Feature: Inference Engine - **WHEN** I load a model with layers and weights into initialized InferenceEngine - **THEN** the weights should be allocated as GPU tensors +### Scenario: Reject invalid model graph at load time + +- **WHEN** I load a model with duplicate layer names, unknown operator types, or missing tensor references +- **THEN** `loadModel()` should throw before inference starts + +### Scenario: Preserve previous model on failed reload + +- **WHEN** I load a valid model and then attempt to load an invalid replacement model +- **THEN** the previously loaded model should remain executable + ### Scenario: Run inference - **WHEN** I run inference on loaded model with correct input tensor shape diff --git a/src/core/GPUContext.ts b/src/core/GPUContext.ts index 5c6ace0..622547e 100644 --- a/src/core/GPUContext.ts +++ b/src/core/GPUContext.ts @@ -1,4 +1,4 @@ -import { WebGPUNotSupportedError, DeviceInitializationError, ShaderCompilationError } from './errors'; +import { WebGPUNotSupportedError, DeviceInitializationError } from './errors'; export interface GPUContextConfig { powerPreference?: 'low-power' | 'high-performance'; @@ -12,42 +12,32 @@ export class GPUContext { private adapter: GPUAdapter | null = null; private device: GPUDevice | null = null; private _isInitialized = false; - private pendingResourceCleanup = new Set>(); - - private trackCleanup(cleanup: Promise): void { - this.pendingResourceCleanup.add(cleanup); - void cleanup.finally(() => { - this.pendingResourceCleanup.delete(cleanup); - }); - } + private deferredBuffers = new Set(); deferDestroy(buffer: GPUBuffer | null | undefined): void { if (!buffer) return; - const cleanup = this.waitForSubmittedWork() - .then(() => { - buffer.destroy(); - }) - .catch(() => { - try { - buffer.destroy(); - } catch { - // Ignore cleanup failures after device loss/destroy. - } - }); - - this.trackCleanup(cleanup); + this.deferredBuffers.add(buffer); } async flushDeferredDestroys(): Promise { - await Promise.allSettled([...this.pendingResourceCleanup]); + for (const buffer of this.deferredBuffers) { + buffer.destroy(); + } + this.deferredBuffers.clear(); } async waitForSubmittedWork(): Promise { - // Yield to the event loop to allow pending GPU work to complete. - // The deprecated onSubmittedWorkDone() was removed from the WebGPU spec. - // In practice, yielding briefly is sufficient for most testing scenarios. - return new Promise(resolve => setTimeout(resolve, 0)); + const queue = this.getDevice().queue as GPUQueue & { + onSubmittedWorkDone?: () => Promise; + }; + + if (typeof queue.onSubmittedWorkDone === 'function') { + await queue.onSubmittedWorkDone(); + return; + } + + await new Promise(resolve => setTimeout(resolve, 0)); } async sync(): Promise { @@ -213,15 +203,12 @@ export class GPUContext { * Release all GPU resources. */ destroy(): void { - for (const cleanup of this.pendingResourceCleanup) { - void cleanup.catch(() => {}); - } - this.pendingResourceCleanup.clear(); - if (this.device) { this.device.destroy(); this.device = null; } + + this.deferredBuffers.clear(); this.adapter = null; this._isInitialized = false; } diff --git a/src/engine/InferenceEngine.ts b/src/engine/InferenceEngine.ts index 0d2cd56..c8d8a95 100644 --- a/src/engine/InferenceEngine.ts +++ b/src/engine/InferenceEngine.ts @@ -11,16 +11,19 @@ import { DenseOperator } from '../operators/DenseOperator'; import { AddOperator } from '../operators/AddOperator'; import { BatchNorm2dOperator } from '../operators/BatchNorm2dOperator'; import { ModelDefinition } from './ModelLoader'; +import { CompiledModel, ModelCompiler } from './ModelCompiler'; export class InferenceEngine { private context: GPUContext; private operators: Map; private weights: Map = new Map(); - private modelDef: ModelDefinition | null = null; + private compiledModel: CompiledModel | null = null; + private readonly compiler: ModelCompiler; constructor() { this.context = new GPUContext(); this.operators = new Map(); + this.compiler = new ModelCompiler(); } async initialize(): Promise { @@ -39,20 +42,14 @@ export class InferenceEngine { } async loadModel(modelDef: ModelDefinition): Promise { - this.modelDef = modelDef; + const compiledModel = this.compiler.compile(modelDef, this.operators.keys()); + const nextWeights = this.materializeWeights(modelDef); for (const tensor of this.weights.values()) { tensor.destroy(); } - this.weights.clear(); - - for (const [name, weightDef] of Object.entries(modelDef.weights)) { - if (!weightDef.shape || weightDef.shape.length === 0) { - throw new Error(`Weight "${name}" is missing shape metadata`); - } - const tensor = Tensor.fromArray(this.context, weightDef.data, weightDef.shape); - this.weights.set(name, tensor); - } + this.weights = nextWeights; + this.compiledModel = compiledModel; } tensorFromArray( @@ -63,63 +60,92 @@ export class InferenceEngine { return Tensor.fromArray(this.context, data, shape, options); } - async infer(input: Tensor): Promise { - if (!this.modelDef) { - throw new Error('Model not loaded'); - } - if (!input.usesContext(this.context)) { - throw new Error('Input tensor must be created from the same GPUContext as the inference engine'); - } + private materializeWeights(modelDef: ModelDefinition): Map { + const nextWeights = new Map(); - const activations = new Map(); - activations.set('input', input); - - // Execute layers in order - for (const layer of this.modelDef.layers) { - const operator = this.operators.get(layer.type); - if (!operator) { - throw new Error(`Unknown operator type: ${layer.type}`); - } - - // Get inputs - const inputs: Tensor[] = []; - for (const inputName of layer.inputs) { - const tensor = activations.get(inputName) ?? this.weights.get(inputName); - if (!tensor) { - throw new Error(`Missing input: ${inputName}`); + try { + for (const [name, weightDef] of Object.entries(modelDef.weights)) { + if (!weightDef.shape || weightDef.shape.length === 0) { + throw new Error(`Weight "${name}" is missing shape metadata`); } - inputs.push(tensor); + const tensor = Tensor.fromArray(this.context, weightDef.data, weightDef.shape); + nextWeights.set(name, tensor); } - // Execute - const output = await operator.forward(inputs, layer.params); - activations.set(layer.name, output); - } - - // Return final output - const lastLayer = this.modelDef.layers[this.modelDef.layers.length - 1]; - const finalOutput = activations.get(lastLayer.name); - if (!finalOutput) { - throw new Error(`Final output not found for layer: ${lastLayer.name}`); + return nextWeights; + } catch (error) { + for (const tensor of nextWeights.values()) { + tensor.destroy(); + } + throw error; } + } - // Ensure queued GPU work sees all intermediate activations before releasing them. + private async cleanupActivations( + activations: Map, + retainedBuffer: GPUBuffer | null + ): Promise { await this.context.sync(); - // Destroy intermediate activations to free GPU memory. - // If the final output is a view (e.g. flatten/reshape), keep any tensor sharing its buffer alive. for (const [name, tensor] of activations.entries()) { if ( name !== 'input' && - name !== lastLayer.name && !this.weights.has(name) && - tensor.buffer !== finalOutput.buffer + tensor.buffer !== retainedBuffer ) { tensor.destroy(); } } + } + + async infer(input: Tensor): Promise { + if (!this.compiledModel) { + throw new Error('Model not loaded'); + } + if (!input.usesContext(this.context)) { + throw new Error('Input tensor must be created from the same GPUContext as the inference engine'); + } - return finalOutput; + const activations = new Map(); + activations.set('input', input); + let retainedBuffer: GPUBuffer | null = null; + + try { + // Execute layers in order + for (const layer of this.compiledModel.layers) { + const operator = this.operators.get(layer.type); + if (!operator) { + throw new Error(`Unknown operator type: ${layer.type}`); + } + + // Get inputs + const inputs: Tensor[] = []; + for (const source of layer.inputs) { + const tensor = source.kind === 'weight' + ? this.weights.get(source.name) + : activations.get(source.name); + if (!tensor) { + throw new Error(`Missing input: ${source.name}`); + } + inputs.push(tensor); + } + + // Execute + const output = await operator.forward(inputs, layer.params); + activations.set(layer.name, output); + } + // Return final output + // Return final output + const finalOutput = activations.get(this.compiledModel.outputName); + if (!finalOutput) { + throw new Error(`Final output not found for layer: ${this.compiledModel.outputName}`); + } + + retainedBuffer = finalOutput.buffer; + return finalOutput; + } finally { + await this.cleanupActivations(activations, retainedBuffer); + } } destroy(): void { @@ -127,6 +153,7 @@ export class InferenceEngine { tensor.destroy(); } this.weights.clear(); + this.compiledModel = null; for (const operator of this.operators.values()) { operator.destroy(); diff --git a/src/engine/ModelCompiler.ts b/src/engine/ModelCompiler.ts new file mode 100644 index 0000000..8749bfb --- /dev/null +++ b/src/engine/ModelCompiler.ts @@ -0,0 +1,78 @@ +import { ModelLoadError } from '../core/errors'; +import { ModelDefinition, LayerDefinition } from './ModelLoader'; + +export type TensorSourceKind = 'input' | 'weight' | 'activation'; + +export interface TensorSource { + kind: TensorSourceKind; + name: string; +} + +export interface CompiledLayer { + name: string; + type: LayerDefinition['type']; + params: LayerDefinition['params']; + inputs: TensorSource[]; +} + +export interface CompiledModel { + name: string; + layers: CompiledLayer[]; + outputName: string; +} + +function toTensorSource(name: string, weights: ModelDefinition['weights']): TensorSource { + if (name === 'input') { + return { kind: 'input', name }; + } + + if (Object.hasOwn(weights, name)) { + return { kind: 'weight', name }; + } + + return { kind: 'activation', name }; +} + +export class ModelCompiler { + compile(modelDef: ModelDefinition, operatorTypes: Iterable): CompiledModel { + if (modelDef.layers.length === 0) { + throw new ModelLoadError('Model must contain at least one layer'); + } + + const knownOperatorTypes = new Set(operatorTypes); + const availableNames = new Set(['input', ...Object.keys(modelDef.weights)]); + const compiledLayers: CompiledLayer[] = []; + + for (const layer of modelDef.layers) { + if (!knownOperatorTypes.has(layer.type)) { + throw new ModelLoadError(`Unknown operator type: ${layer.type}`); + } + + if (availableNames.has(layer.name)) { + throw new ModelLoadError(`Duplicate layer name: ${layer.name}`); + } + + const inputs = layer.inputs.map((inputName) => { + if (!availableNames.has(inputName)) { + throw new ModelLoadError(`Missing input: ${inputName} for layer: ${layer.name}`); + } + + return toTensorSource(inputName, modelDef.weights); + }); + + compiledLayers.push({ + name: layer.name, + type: layer.type, + params: layer.params, + inputs + }); + availableNames.add(layer.name); + } + + return { + name: modelDef.name, + layers: compiledLayers, + outputName: compiledLayers[compiledLayers.length - 1].name + }; + } +} diff --git a/src/operators/AddOperator.ts b/src/operators/AddOperator.ts index 6d09817..7bf2fdb 100644 --- a/src/operators/AddOperator.ts +++ b/src/operators/AddOperator.ts @@ -3,6 +3,7 @@ import { GPUContext } from '../core/GPUContext'; import { Tensor, TensorShape } from '../core/Tensor'; import { DEFAULT_WORKGROUP_SIZE } from './constants'; import { OperatorExecutionError } from '../core/errors'; +import { assertSameShapeAndLayout } from './tensorContracts'; // AddParams uses the base OperatorParams - no additional parameters needed for element-wise addition @@ -78,23 +79,7 @@ export class AddOperator extends Operator { } const [inputA, inputB] = inputs; - - // Validate shapes match - if (inputA.shape.length !== inputB.shape.length) { - throw new OperatorExecutionError( - 'Add', - `inputs must have same rank: got ${inputA.shape.length} and ${inputB.shape.length}` - ); - } - - for (let i = 0; i < inputA.shape.length; i++) { - if (inputA.shape[i] !== inputB.shape[i]) { - throw new OperatorExecutionError( - 'Add', - `inputs must have same shape: got [${inputA.shape.join(', ')}] and [${inputB.shape.join(', ')}]` - ); - } - } + assertSameShapeAndLayout('Add', inputA, inputB, 'inputs', 'inputs'); const outputShape = this.computeOutputShape(inputA.shape); const output = new Tensor(this.context, outputShape, { layout: inputA.layout }); diff --git a/src/operators/convValidation.ts b/src/operators/convValidation.ts index 2f47c05..9974ae7 100644 --- a/src/operators/convValidation.ts +++ b/src/operators/convValidation.ts @@ -1,5 +1,6 @@ import { Tensor, TensorShape } from '../core/Tensor'; import { OperatorExecutionError } from '../core/errors'; +import { assertTensorLayout } from './tensorContracts'; export type ConvShape = { batchSize: number; @@ -26,9 +27,7 @@ export function validateNonNegativePair(name: string, pair: [number, number]): v } export function validateNchwInput(name: string, input: Tensor): void { - if (input.layout !== 'NCHW') { - throw new OperatorExecutionError(name, 'currently supports NCHW layout only'); - } + assertTensorLayout(name, input, 'NCHW', 'input', 'currently supports NCHW layout only'); if (input.shape.length !== 4) { throw new OperatorExecutionError(name, 'expects a 4D input tensor'); } diff --git a/src/operators/tensorContracts.ts b/src/operators/tensorContracts.ts new file mode 100644 index 0000000..c91e0f1 --- /dev/null +++ b/src/operators/tensorContracts.ts @@ -0,0 +1,48 @@ +import { DataLayout, Tensor } from '../core/Tensor'; +import { OperatorExecutionError } from '../core/errors'; + +export function assertTensorLayout( + operatorName: string, + tensor: Tensor, + expectedLayout: DataLayout, + label = 'input', + details?: string +): void { + if (tensor.layout !== expectedLayout) { + throw new OperatorExecutionError( + operatorName, + details ?? `${label} must use ${expectedLayout} layout, got ${tensor.layout}` + ); + } +} + +export function assertSameShapeAndLayout( + operatorName: string, + first: Tensor, + second: Tensor, + firstLabel = 'inputA', + secondLabel = 'inputB' +): void { + if (first.shape.length !== second.shape.length) { + throw new OperatorExecutionError( + operatorName, + `${firstLabel} and ${secondLabel} must have same rank: got ${first.shape.length} and ${second.shape.length}` + ); + } + + for (let i = 0; i < first.shape.length; i++) { + if (first.shape[i] !== second.shape[i]) { + throw new OperatorExecutionError( + operatorName, + `${firstLabel} and ${secondLabel} must have same shape: got [${first.shape.join(', ')}] and [${second.shape.join(', ')}]` + ); + } + } + + if (first.layout !== second.layout) { + throw new OperatorExecutionError( + operatorName, + `${firstLabel} and ${secondLabel} must have same layout: got ${first.layout} and ${second.layout}` + ); + } +} diff --git a/tests/core/GPUContext.test.ts b/tests/core/GPUContext.test.ts index 275c2fa..d01c06d 100644 --- a/tests/core/GPUContext.test.ts +++ b/tests/core/GPUContext.test.ts @@ -48,6 +48,7 @@ describe('GPUContext', () => { }); afterEach(() => { + vi.useRealTimers(); setNavigator(originalNavigator); }); @@ -174,5 +175,39 @@ describe('GPUContext', () => { context.submit([mockCommandBuffer]); expect(mockDevice.queue.submit).toHaveBeenCalledWith([mockCommandBuffer]); }); + + it('should wait for queue completion during sync', async () => { + const { mockGPU, mockDevice } = createMockGPU(); + setNavigator({ gpu: mockGPU }); + + const context = new GPUContext(); + await context.initialize(); + + await context.sync(); + + expect(mockDevice.queue.onSubmittedWorkDone).toHaveBeenCalledTimes(1); + }); + + it('should not destroy deferred buffers before sync flushes them', async () => { + vi.useFakeTimers(); + const { mockGPU } = createMockGPU(); + setNavigator({ gpu: mockGPU }); + + const context = new GPUContext(); + await context.initialize(); + + const buffer = { + destroy: vi.fn() + } as unknown as GPUBuffer; + + context.deferDestroy(buffer); + await vi.runAllTimersAsync(); + + expect(buffer.destroy).not.toHaveBeenCalled(); + + await context.sync(); + + expect(buffer.destroy).toHaveBeenCalledTimes(1); + }); }); }); diff --git a/tests/engine/InferenceEngine.test.ts b/tests/engine/InferenceEngine.test.ts index cf965f9..918c40d 100644 --- a/tests/engine/InferenceEngine.test.ts +++ b/tests/engine/InferenceEngine.test.ts @@ -100,7 +100,14 @@ describe('InferenceEngine', () => { const invalidModel = { name: 'invalid', - layers: [], + layers: [ + { + name: 'output', + type: 'relu', + inputs: ['input'], + params: {} + } + ], weights: { conv_weight: { data: new Float32Array([1, 2, 3, 4]) @@ -158,7 +165,7 @@ describe('InferenceEngine', () => { const engine = new InferenceEngine(); await engine.initialize(); - await engine.loadModel({ + await expect(engine.loadModel({ name: 'bad-model', layers: [ { @@ -169,12 +176,7 @@ describe('InferenceEngine', () => { } ], weights: {} - }); - - const input = Tensor.zeros(engine['context'], [1, 3]); - await expect(engine.infer(input)).rejects.toThrow('Unknown operator type'); - - input.destroy(); + })).rejects.toThrow('Unknown operator type'); }); it('throws error for missing input tensor', async () => { @@ -183,7 +185,7 @@ describe('InferenceEngine', () => { const engine = new InferenceEngine(); await engine.initialize(); - await engine.loadModel({ + await expect(engine.loadModel({ name: 'bad-model', layers: [ { @@ -194,12 +196,48 @@ describe('InferenceEngine', () => { } ], weights: {} - }); + })).rejects.toThrow('Missing input'); + }); - const input = Tensor.zeros(engine['context'], [1, 3]); - await expect(engine.infer(input)).rejects.toThrow('Missing input'); + it('rejects empty model graphs at load time', async () => { + const { mockGPU } = createMockGPU(); + setNavigator({ gpu: mockGPU }); - input.destroy(); + const engine = new InferenceEngine(); + await engine.initialize(); + + await expect(engine.loadModel({ + name: 'empty-model', + layers: [], + weights: {} + })).rejects.toThrow('at least one layer'); + }); + + it('rejects duplicate layer names at load time', async () => { + const { mockGPU } = createMockGPU(); + setNavigator({ gpu: mockGPU }); + + const engine = new InferenceEngine(); + await engine.initialize(); + + await expect(engine.loadModel({ + name: 'duplicate-layer-model', + layers: [ + { + name: 'hidden', + type: 'relu', + inputs: ['input'], + params: {} + }, + { + name: 'hidden', + type: 'softmax', + inputs: ['hidden'], + params: { axis: -1 } + } + ], + weights: {} + })).rejects.toThrow('Duplicate layer name'); }); it('executes single layer model correctly', async () => { @@ -264,6 +302,47 @@ describe('InferenceEngine', () => { output.destroy(); }); + it('cleans intermediate activations when a later layer fails', async () => { + const { mockGPU } = createMockGPU(); + setNavigator({ gpu: mockGPU }); + + const engine = new InferenceEngine(); + await engine.initialize(); + await engine.loadModel({ + name: 'failing-multi-layer', + layers: [ + { + name: 'relu1', + type: 'relu', + inputs: ['input'], + params: {} + }, + { + name: 'output', + type: 'softmax', + inputs: ['relu1'], + params: { axis: -1 } + } + ], + weights: {} + }); + + const intermediate = Tensor.zeros(engine['context'], [1, 3]); + const destroySpy = vi.spyOn(intermediate, 'destroy'); + const relu = engine['operators'].get('relu'); + const softmax = engine['operators'].get('softmax'); + + vi.spyOn(relu!, 'forward').mockResolvedValue(intermediate); + vi.spyOn(softmax!, 'forward').mockRejectedValue(new Error('softmax boom')); + + const input = Tensor.zeros(engine['context'], [1, 3]); + + await expect(engine.infer(input)).rejects.toThrow('softmax boom'); + expect(destroySpy).toHaveBeenCalledTimes(1); + + input.destroy(); + }); + it('executes add layers through the inference engine', async () => { const { mockGPU } = createMockGPU(); setNavigator({ gpu: mockGPU }); @@ -345,7 +424,14 @@ describe('InferenceEngine', () => { const weightData = new Float32Array([1, 2, 3, 4, 5, 6]); await engine.loadModel({ name: 'with-weights', - layers: [], + layers: [ + { + name: 'output', + type: 'relu', + inputs: ['input'], + params: {} + } + ], weights: { my_weight: { data: weightData, @@ -370,7 +456,14 @@ describe('InferenceEngine', () => { await engine.loadModel({ name: 'model1', - layers: [], + layers: [ + { + name: 'output', + type: 'relu', + inputs: ['input'], + params: {} + } + ], weights: { weight1: { data: new Float32Array([1, 2, 3]), @@ -383,7 +476,14 @@ describe('InferenceEngine', () => { await engine.loadModel({ name: 'model2', - layers: [], + layers: [ + { + name: 'output', + type: 'relu', + inputs: ['input'], + params: {} + } + ], weights: { weight2: { data: new Float32Array([4, 5]), @@ -396,6 +496,51 @@ describe('InferenceEngine', () => { expect(engine['weights'].has('weight2')).toBe(true); }); + it('keeps previous model when replacement load fails', async () => { + const { mockGPU } = createMockGPU(); + setNavigator({ gpu: mockGPU }); + + const engine = new InferenceEngine(); + await engine.initialize(); + await engine.loadModel({ + name: 'stable-model', + layers: [ + { + name: 'output', + type: 'flatten', + inputs: ['input'], + params: {} + } + ], + weights: {} + }); + + await expect(engine.loadModel({ + name: 'broken-model', + layers: [ + { + name: 'output', + type: 'relu', + inputs: ['input'], + params: {} + } + ], + weights: { + broken_weight: { + data: new Float32Array([1, 2, 3]) + } + } + } as unknown as ModelDefinition)).rejects.toThrow('missing shape metadata'); + + const input = Tensor.zeros(engine['context'], [1, 1, 2, 2]); + const output = await engine.infer(input); + + expect(output.shape).toEqual([1, 4]); + + input.destroy(); + output.destroy(); + }); + it('destroys all resources on destroy', async () => { const { mockGPU, mockDevice } = createMockGPU(); setNavigator({ gpu: mockGPU }); @@ -404,7 +549,14 @@ describe('InferenceEngine', () => { await engine.initialize(); await engine.loadModel({ name: 'test', - layers: [], + layers: [ + { + name: 'output', + type: 'relu', + inputs: ['input'], + params: {} + } + ], weights: { w: { data: new Float32Array([1, 2, 3]), @@ -420,6 +572,31 @@ describe('InferenceEngine', () => { expect(engine['operators'].size).toBe(0); }); + it('unloads compiled model on destroy', async () => { + const { mockGPU } = createMockGPU(); + setNavigator({ gpu: mockGPU }); + + const engine = new InferenceEngine(); + await engine.initialize(); + await engine.loadModel({ + name: 'test', + layers: [ + { + name: 'output', + type: 'relu', + inputs: ['input'], + params: {} + } + ], + weights: {} + }); + + const input = Tensor.zeros(engine['context'], [1, 3]); + engine.destroy(); + + await expect(engine.infer(input)).rejects.toThrow('Model not loaded'); + }); + it('creates tensors via tensorFromArray', async () => { const { mockGPU } = createMockGPU(); setNavigator({ gpu: mockGPU }); diff --git a/tests/operators/AddOperator.test.ts b/tests/operators/AddOperator.test.ts index a6ff5f3..3ac0809 100644 --- a/tests/operators/AddOperator.test.ts +++ b/tests/operators/AddOperator.test.ts @@ -36,6 +36,16 @@ describe('AddOperator', () => { await expect(add.forward([inputA, inputB])).rejects.toThrow(/same shape/); }); + it('should throw error for different layouts', async () => { + const context = createMockContext(); + const add = new AddOperator(context); + + const inputA = Tensor.fromArray(context, new Float32Array(4).fill(1), [1, 1, 2, 2], { layout: 'NCHW' }); + const inputB = Tensor.fromArray(context, new Float32Array(4).fill(1), [1, 1, 2, 2], { layout: 'NHWC' }); + + await expect(add.forward([inputA, inputB])).rejects.toThrow(/same layout/); + }); + it('should throw error for wrong number of inputs', async () => { const context = createMockContext(); const add = new AddOperator(context); From f0fd347e811de4a6447ede0f77e9a6ebf7ed51f2 Mon Sep 17 00:00:00 2001 From: shijiashuai Date: Fri, 22 May 2026 10:39:16 +0800 Subject: [PATCH 2/3] fix: destroy deferred GPU buffers on teardown Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/core/GPUContext.ts | 7 +++++-- tests/core/GPUContext.test.ts | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/core/GPUContext.ts b/src/core/GPUContext.ts index 622547e..5c95e24 100644 --- a/src/core/GPUContext.ts +++ b/src/core/GPUContext.ts @@ -203,12 +203,15 @@ export class GPUContext { * Release all GPU resources. */ destroy(): void { + for (const buffer of this.deferredBuffers) { + buffer.destroy(); + } + this.deferredBuffers.clear(); + if (this.device) { this.device.destroy(); this.device = null; } - - this.deferredBuffers.clear(); this.adapter = null; this._isInitialized = false; } diff --git a/tests/core/GPUContext.test.ts b/tests/core/GPUContext.test.ts index d01c06d..6733536 100644 --- a/tests/core/GPUContext.test.ts +++ b/tests/core/GPUContext.test.ts @@ -209,5 +209,22 @@ describe('GPUContext', () => { expect(buffer.destroy).toHaveBeenCalledTimes(1); }); + + it('should destroy deferred buffers during teardown', async () => { + const { mockGPU } = createMockGPU(); + setNavigator({ gpu: mockGPU }); + + const context = new GPUContext(); + await context.initialize(); + + const buffer = { + destroy: vi.fn() + } as unknown as GPUBuffer; + + context.deferDestroy(buffer); + context.destroy(); + + expect(buffer.destroy).toHaveBeenCalledTimes(1); + }); }); }); From c7ca039afb480efdab77f20e1b35459daea22427 Mon Sep 17 00:00:00 2001 From: shijiashuai Date: Fri, 22 May 2026 10:51:49 +0800 Subject: [PATCH 3/3] fix: preserve deferred cleanup semantics Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/core/GPUContext.ts | 34 +++++++++++++++++++++++++++++----- tests/core/GPUContext.test.ts | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 5 deletions(-) diff --git a/src/core/GPUContext.ts b/src/core/GPUContext.ts index 5c95e24..6891bc8 100644 --- a/src/core/GPUContext.ts +++ b/src/core/GPUContext.ts @@ -20,13 +20,17 @@ export class GPUContext { this.deferredBuffers.add(buffer); } - async flushDeferredDestroys(): Promise { + private destroyDeferredBuffers(): void { for (const buffer of this.deferredBuffers) { buffer.destroy(); } this.deferredBuffers.clear(); } + async flushDeferredDestroys(): Promise { + this.destroyDeferredBuffers(); + } + async waitForSubmittedWork(): Promise { const queue = this.getDevice().queue as GPUQueue & { onSubmittedWorkDone?: () => Promise; @@ -41,8 +45,18 @@ export class GPUContext { } async sync(): Promise { - await this.waitForSubmittedWork(); + let waitError: unknown; + try { + await this.waitForSubmittedWork(); + } catch (error) { + waitError = error; + } + await this.flushDeferredDestroys(); + + if (waitError) { + throw waitError; + } } /** @@ -203,10 +217,20 @@ export class GPUContext { * Release all GPU resources. */ destroy(): void { - for (const buffer of this.deferredBuffers) { - buffer.destroy(); + const queue = this.device?.queue as (GPUQueue & { + onSubmittedWorkDone?: () => Promise; + }) | undefined; + + if (this.deferredBuffers.size > 0) { + if (typeof queue?.onSubmittedWorkDone === 'function') { + void queue.onSubmittedWorkDone().then( + () => this.destroyDeferredBuffers(), + () => this.destroyDeferredBuffers() + ); + } else { + this.destroyDeferredBuffers(); + } } - this.deferredBuffers.clear(); if (this.device) { this.device.destroy(); diff --git a/tests/core/GPUContext.test.ts b/tests/core/GPUContext.test.ts index 6733536..53ba9d4 100644 --- a/tests/core/GPUContext.test.ts +++ b/tests/core/GPUContext.test.ts @@ -211,11 +211,20 @@ describe('GPUContext', () => { }); it('should destroy deferred buffers during teardown', async () => { + let settleQueue: (() => void) | undefined; const { mockGPU } = createMockGPU(); setNavigator({ gpu: mockGPU }); const context = new GPUContext(); await context.initialize(); + const mockDevice = context.getDevice() as unknown as { + queue: { onSubmittedWorkDone: ReturnType }; + }; + mockDevice.queue.onSubmittedWorkDone.mockImplementation( + () => new Promise((resolve) => { + settleQueue = resolve; + }) + ); const buffer = { destroy: vi.fn() @@ -224,6 +233,32 @@ describe('GPUContext', () => { context.deferDestroy(buffer); context.destroy(); + expect(buffer.destroy).not.toHaveBeenCalled(); + + settleQueue?.(); + await Promise.resolve(); + + expect(buffer.destroy).toHaveBeenCalledTimes(1); + }); + + it('should flush deferred buffers even if queue completion rejects', async () => { + const { mockGPU } = createMockGPU(); + setNavigator({ gpu: mockGPU }); + + const context = new GPUContext(); + await context.initialize(); + const mockDevice = context.getDevice() as unknown as { + queue: { onSubmittedWorkDone: ReturnType }; + }; + mockDevice.queue.onSubmittedWorkDone.mockRejectedValueOnce(new Error('queue lost')); + + const buffer = { + destroy: vi.fn() + } as unknown as GPUBuffer; + + context.deferDestroy(buffer); + + await expect(context.sync()).rejects.toThrow('queue lost'); expect(buffer.destroy).toHaveBeenCalledTimes(1); }); });