From f04b35f88e19ec39318b4fb1d50d3e10c1bae15c Mon Sep 17 00:00:00 2001 From: makachanm Date: Tue, 7 Apr 2026 02:49:48 -0400 Subject: [PATCH 01/10] Implement Tail Call Optimize into ReturnCall ReturnCallIndirect ReturnCallRef instruction --- .gitignore | 7 + src/interpreter/ByteCode.cpp | 15 ++ src/interpreter/ByteCode.h | 172 ++++++++++++++++++ src/interpreter/Interpreter.cpp | 91 +++++++++ src/interpreter/Interpreter.h | 22 ++- src/parser/WASMParser.cpp | 57 +++++- src/runtime/ExecutionState.h | 18 ++ src/runtime/Function.cpp | 5 + test/wasi/write_to_this.txt | 1 - .../wabt/walrus/binary-reader-walrus.h | 3 + .../wabt/src/walrus/binary-reader-walrus.cc | 15 ++ 11 files changed, 401 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index aa056f2d5..4c7e9450a 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,10 @@ rules.ninja node_modules /test/wasmBenchmarker/ctests/wasm /test/wasmBenchmarker/emsdk +/.cache +/build/.cache +/build/.cmake +/build/_deps +/build/third_party +/build/walrus +compile_commands.json diff --git a/src/interpreter/ByteCode.cpp b/src/interpreter/ByteCode.cpp index 3794f75ba..5115a34a7 100644 --- a/src/interpreter/ByteCode.cpp +++ b/src/interpreter/ByteCode.cpp @@ -71,6 +71,21 @@ size_t ByteCode::getSize() const return ByteCode::pointerAlignedSize(sizeof(CallRef) + sizeof(ByteCodeStackOffset) * callRef->parameterOffsetsSize() + sizeof(ByteCodeStackOffset) * callRef->resultOffsetsSize()); } + case ReturnCallOpcode: { + const ReturnCall* returnCall = reinterpret_cast(this); + return ByteCode::pointerAlignedSize(sizeof(ReturnCall) + sizeof(ByteCodeStackOffset) * returnCall->parameterOffsetsSize() + + sizeof(ByteCodeStackOffset) * returnCall->resultOffsetsSize()); + } + case ReturnCallIndirectOpcode: { + const ReturnCallIndirect* returnCallIndirect = reinterpret_cast(this); + return ByteCode::pointerAlignedSize(sizeof(ReturnCallIndirect) + sizeof(ByteCodeStackOffset) * returnCallIndirect->parameterOffsetsSize() + + sizeof(ByteCodeStackOffset) * returnCallIndirect->resultOffsetsSize()); + } + case ReturnCallRefOpcode: { + const ReturnCallRef* returnCallRef = reinterpret_cast(this); + return ByteCode::pointerAlignedSize(sizeof(ReturnCallRef) + sizeof(ByteCodeStackOffset) * returnCallRef->parameterOffsetsSize() + + sizeof(ByteCodeStackOffset) * returnCallRef->resultOffsetsSize()); + } case EndOpcode: { const End* end = reinterpret_cast(this); return ByteCode::pointerAlignedSize(sizeof(End) + sizeof(ByteCodeStackOffset) * end->offsetsSize()); diff --git a/src/interpreter/ByteCode.h b/src/interpreter/ByteCode.h index 4ced54101..010dfabeb 100644 --- a/src/interpreter/ByteCode.h +++ b/src/interpreter/ByteCode.h @@ -47,6 +47,9 @@ class FunctionType; F(Call) \ F(CallIndirect) \ F(CallRef) \ + F(ReturnCall) \ + F(ReturnCallIndirect) \ + F(ReturnCallRef) \ F(Select) \ F(MemorySize) \ F(MemorySizeM64) \ @@ -2128,6 +2131,175 @@ class CallRef : public ByteCode { uint16_t m_resultOffsetsSize; }; +class ReturnCall : public ByteCode { +public: + ReturnCall(uint32_t index, uint16_t parameterOffsetsSize, uint16_t resultOffsetsSize, + FunctionType* functionType) + : ByteCode(Opcode::ReturnCallOpcode) + , m_index(index) + , m_parameterOffsetsSize(parameterOffsetsSize) + , m_resultOffsetsSize(resultOffsetsSize) + { + } + + uint32_t index() const { return m_index; } + ByteCodeStackOffset* stackOffsets() const + { + return reinterpret_cast(reinterpret_cast(this) + sizeof(ReturnCall)); + } + + uint16_t parameterOffsetsSize() const + { + return m_parameterOffsetsSize; + } + + uint16_t resultOffsetsSize() const + { + return m_resultOffsetsSize; + } + +#if !defined(NDEBUG) + void dump(size_t pos) + { + printf("return_call "); + printf("index: %" PRId32 " ", m_index); + size_t c = 0; + auto arr = stackOffsets(); + printf("paramOffsets: "); + for (size_t i = 0; i < m_parameterOffsetsSize; i++) { + printf("%" PRIu32 " ", (uint32_t)arr[c++]); + } + printf(" "); + + printf("resultOffsets: "); + for (size_t i = 0; i < m_resultOffsetsSize; i++) { + printf("%" PRIu32 " ", (uint32_t)arr[c++]); + } + } +#endif + +protected: + uint32_t m_index; + uint16_t m_parameterOffsetsSize; + uint16_t m_resultOffsetsSize; +}; + +class ReturnCallIndirect : public ByteCode { +public: + ReturnCallIndirect(ByteCodeStackOffset stackOffset, uint32_t tableIndex, FunctionType* functionType, + uint16_t parameterOffsetsSize, uint16_t resultOffsetsSize) + : ByteCode(Opcode::ReturnCallIndirectOpcode) + , m_calleeOffset(stackOffset) + , m_tableIndex(tableIndex) + , m_functionType(functionType) + , m_parameterOffsetsSize(parameterOffsetsSize) + , m_resultOffsetsSize(resultOffsetsSize) + { + } + + ByteCodeStackOffset calleeOffset() const { return m_calleeOffset; } + uint32_t tableIndex() const { return m_tableIndex; } + FunctionType* functionType() const { return m_functionType; } + ByteCodeStackOffset* stackOffsets() const + { + return reinterpret_cast(reinterpret_cast(this) + sizeof(ReturnCallIndirect)); + } + + uint16_t parameterOffsetsSize() const + { + return m_parameterOffsetsSize; + } + + uint16_t resultOffsetsSize() const + { + return m_resultOffsetsSize; + } + +#if !defined(NDEBUG) + void dump(size_t pos) + { + printf("return_call_indirect "); + printf("tableIndex: %" PRId32 " ", m_tableIndex); + DUMP_BYTECODE_OFFSET(calleeOffset); + + size_t c = 0; + auto arr = stackOffsets(); + printf("paramOffsets: "); + for (size_t i = 0; i < m_parameterOffsetsSize; i++) { + printf("%" PRIu32 " ", (uint32_t)arr[c++]); + } + printf(" "); + + printf("resultOffsets: "); + for (size_t i = 0; i < m_resultOffsetsSize; i++) { + printf("%" PRIu32 " ", (uint32_t)arr[c++]); + } + } +#endif + +protected: + ByteCodeStackOffset m_calleeOffset; + uint32_t m_tableIndex; + FunctionType* m_functionType; + uint16_t m_parameterOffsetsSize; + uint16_t m_resultOffsetsSize; +}; + +class ReturnCallRef : public ByteCode { +public: + ReturnCallRef(ByteCodeStackOffset stackOffset, FunctionType* functionType, + uint16_t parameterOffsetsSize, uint16_t resultOffsetsSize) + : ByteCode(Opcode::ReturnCallRefOpcode) + , m_calleeOffset(stackOffset) + , m_functionType(functionType) + , m_parameterOffsetsSize(parameterOffsetsSize) + , m_resultOffsetsSize(resultOffsetsSize) + { + } + + ByteCodeStackOffset calleeOffset() const { return m_calleeOffset; } + FunctionType* functionType() const { return m_functionType; } + ByteCodeStackOffset* stackOffsets() const + { + return reinterpret_cast(reinterpret_cast(this) + sizeof(ReturnCallRef)); + } + + uint16_t parameterOffsetsSize() const + { + return m_parameterOffsetsSize; + } + + uint16_t resultOffsetsSize() const + { + return m_resultOffsetsSize; + } + +#if !defined(NDEBUG) + void dump(size_t pos) + { + printf("return_call_ref "); + size_t c = 0; + auto arr = stackOffsets(); + printf("paramOffsets: "); + for (size_t i = 0; i < m_parameterOffsetsSize; i++) { + printf("%" PRIu32 " ", (uint32_t)arr[c++]); + } + printf(" "); + + printf("resultOffsets: "); + for (size_t i = 0; i < m_resultOffsetsSize; i++) { + printf("%" PRIu32 " ", (uint32_t)arr[c++]); + } + } +#endif + +protected: + ByteCodeStackOffset m_calleeOffset; + FunctionType* m_functionType; + uint16_t m_parameterOffsetsSize; + uint16_t m_resultOffsetsSize; +}; + #define DEFINE_LOAD_OP(className, opcodeType, opStr) \ class className : public ByteCodeOffset2 { \ public: \ diff --git a/src/interpreter/Interpreter.cpp b/src/interpreter/Interpreter.cpp index e9459938b..1c1d23b59 100644 --- a/src/interpreter/Interpreter.cpp +++ b/src/interpreter/Interpreter.cpp @@ -17,6 +17,7 @@ #include "Walrus.h" +#include "interpreter/ByteCode.h" #include "interpreter/Interpreter.h" #include "runtime/Instance.h" #include "runtime/Function.h" @@ -1711,6 +1712,75 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, NEXT_INSTRUCTION(); } + DEFINE_OPCODE(ReturnCall) + : + { + ReturnCall* code = (ReturnCall*)programCounter; + auto target = instance->function(code->index()); + + auto paramSize = code->parameterOffsetsSize(); + auto offsets = code->stackOffsets(); + + state.m_tcoParamStore.reserve(paramSize); + memcpy(state.m_tcoParamStore.data(), bp + offsets[0], paramSize * sizeof(size_t)); + state.m_tcoFunctionTarget = target; + + return nullptr; + } + + DEFINE_OPCODE(ReturnCallIndirect) + : + { + ReturnCallIndirect* code = (ReturnCallIndirect*)programCounter; + Table* table = instance->table(code->tableIndex()); + + uint32_t idx = readValue(bp, code->calleeOffset()); + if (idx >= table->size()) { + Trap::throwException(state, "undefined element"); + } + auto target = reinterpret_cast(table->uncheckedGetElement(idx)); + if (UNLIKELY(Value::isNull(target))) { + Trap::throwException(state, "uninitialized element " + std::to_string(idx)); + } + const FunctionType* ft = target->functionType(); + if (!ft->equals(code->functionType())) { + Trap::throwException(state, "indirect call type mismatch"); + } + + auto paramSize = code->parameterOffsetsSize(); + auto offsets = code->stackOffsets(); + + state.m_tcoParamStore.reserve(paramSize); + memcpy(state.m_tcoParamStore.data(), bp + offsets[0], paramSize * sizeof(size_t)); + state.m_tcoFunctionTarget = target; + + return nullptr; + } + + DEFINE_OPCODE(ReturnCallRef) + : + { + ReturnCallRef* code = (ReturnCallRef*)programCounter; + + auto target = readValue(bp, code->calleeOffset()); + if (UNLIKELY(Value::isNull(target))) { + Trap::throwException(state, "null function reference"); + } + const FunctionType* ft = target->functionType(); + if (!ft->equals(code->functionType())) { + Trap::throwException(state, "call by reference type mismatch"); + } + + auto paramSize = code->parameterOffsetsSize(); + auto offsets = code->stackOffsets(); + + state.m_tcoParamStore.reserve(paramSize); + memcpy(state.m_tcoParamStore.data(), bp + offsets[0], paramSize * sizeof(size_t)); + state.m_tcoFunctionTarget = target; + + return nullptr; + } + DEFINE_OPCODE(Select) : { @@ -3042,6 +3112,13 @@ NEVER_INLINE void Interpreter::callOperation( Call* code = (Call*)programCounter; Function* target = instance->function(code->index()); target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); + + while (UNLIKELY(state.hasTCO())) { + target = state.m_tcoFunctionTarget; + state.destroyTCO(); + target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); + } + programCounter += ByteCode::pointerAlignedSize(sizeof(Call) + sizeof(ByteCodeStackOffset) * code->parameterOffsetsSize() + sizeof(ByteCodeStackOffset) * code->resultOffsetsSize()); } @@ -3069,6 +3146,13 @@ NEVER_INLINE void Interpreter::callIndirectOperation( } target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); + + while (UNLIKELY(state.hasTCO())) { + target = state.m_tcoFunctionTarget; + state.destroyTCO(); + target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); + } + programCounter += ByteCode::pointerAlignedSize(sizeof(CallIndirect) + sizeof(ByteCodeStackOffset) * code->parameterOffsetsSize() + sizeof(ByteCodeStackOffset) * code->resultOffsetsSize()); } @@ -3091,6 +3175,13 @@ NEVER_INLINE void Interpreter::callRefOperation( } target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); + + while (UNLIKELY(state.hasTCO())) { + target = state.m_tcoFunctionTarget; + state.destroyTCO(); + target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); + } + programCounter += ByteCode::pointerAlignedSize(sizeof(CallRef) + sizeof(ByteCodeStackOffset) * code->parameterOffsetsSize() + sizeof(ByteCodeStackOffset) * code->resultOffsetsSize()); } diff --git a/src/interpreter/Interpreter.h b/src/interpreter/Interpreter.h index 35fc7b0e3..b422df173 100644 --- a/src/interpreter/Interpreter.h +++ b/src/interpreter/Interpreter.h @@ -52,9 +52,13 @@ class Interpreter { auto moduleFunction = function->moduleFunction(); ALLOCA(uint8_t, functionStackBase, moduleFunction->requiredStackSize()); - // init parameter space - for (size_t i = 0; i < parameterOffsetCount; i++) { - ((size_t*)functionStackBase)[i] = *((size_t*)(bp + offsets[i])); + if (state.hasTCO()) { + VectorCopier::copy((size_t*)functionStackBase, state.m_tcoParamStore.data(), state.m_tcoParamStore.size()); + state.destroyTCO(); + } else { + for (size_t i = 0; i < parameterOffsetCount; i++) { + ((size_t*)functionStackBase)[i] = *((size_t*)(bp + offsets[i])); + } } size_t programCounter = reinterpret_cast(moduleFunction->byteCode()); @@ -106,6 +110,13 @@ class Interpreter { resultOffsets = interpret(newState, programCounter, functionStackBase, function->instance()); } + if (newState.hasTCO()) { + state.m_tcoParamStore.reserve(newState.m_tcoParamStore.size()); + VectorCopier::copy(state.m_tcoParamStore.data(), newState.m_tcoParamStore.data(), newState.m_tcoParamStore.size()); + state.m_tcoFunctionTarget = newState.m_tcoFunctionTarget; + return; + } + offsets += parameterOffsetCount; for (size_t i = 0; i < resultOffsetCount; i++) { *((size_t*)(bp + offsets[i])) = *((size_t*)(functionStackBase + resultOffsets[i])); @@ -132,6 +143,11 @@ class Interpreter { uint8_t* bp, Instance* instance); + static void returnCallOperation(ExecutionState& state, + size_t& programCounter, + uint8_t* bp, + Instance* instance); + static bool testRefGeneric(void* refPtr, Value::Type type); static bool testRefDefined(void* refPtr, const CompositeType** typeInfo); }; diff --git a/src/parser/WASMParser.cpp b/src/parser/WASMParser.cpp index 28cafbe6b..0629b3879 100644 --- a/src/parser/WASMParser.cpp +++ b/src/parser/WASMParser.cpp @@ -1505,7 +1505,7 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { template void generateCallExpr(CodeType* code, uint16_t parameterCount, uint16_t resultCount, - Walrus::FunctionType* functionType) + Walrus::FunctionType* functionType, bool isReturnCall = false) { size_t offsetIndex = 0; const Walrus::TypeVector::Types& param = functionType->param().types(); @@ -1524,6 +1524,11 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { offsetIndex += subIndexCount; } + if (isReturnCall) { + ASSERT(offsetIndex == code->parameterOffsetsSize()); + return; + } + const Walrus::TypeVector::Types& result = functionType->result().types(); siz = result.size(); for (size_t i = 0; i < siz; i++) { @@ -1582,6 +1587,56 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { generateCallExpr(code, parameterCount, resultCount, functionType); } + virtual void OnReturnCallExpr(uint32_t index) override + { + m_preprocessData.seenBranch(); + auto functionType = m_result.m_functions[index]->functionType(); + auto callPos = m_currentByteCode.size(); + auto parameterCount = computeFunctionParameterOrResultOffsetCount(functionType->param()); + auto resultCount = computeFunctionParameterOrResultOffsetCount(functionType->result()); + pushByteCode(Walrus::ReturnCall(index, parameterCount, resultCount, functionType), WASMOpcode::ReturnCallOpcode); + + expandByteCode(Walrus::ByteCode::pointerAlignedSize(sizeof(Walrus::ByteCodeStackOffset) * (parameterCount + resultCount))); + ASSERT(m_currentByteCode.size() % sizeof(void*) == 0); + auto code = peekByteCode(callPos); + + generateCallExpr(code, parameterCount, resultCount, functionType, true); + stopToGenerateByteCodeWhileBlockEnd(); + } + + virtual void OnReturnCallIndirectExpr(Index sigIndex, Index tableIndex) override + { + m_preprocessData.seenBranch(); + auto functionType = getFunctionType(sigIndex); + auto callPos = m_currentByteCode.size(); + auto parameterCount = computeFunctionParameterOrResultOffsetCount(functionType->param()); + auto resultCount = computeFunctionParameterOrResultOffsetCount(functionType->result()); + pushByteCode(Walrus::ReturnCallIndirect(popVMStack(), tableIndex, functionType, parameterCount, resultCount), + WASMOpcode::ReturnCallIndirectOpcode); + expandByteCode(Walrus::ByteCode::pointerAlignedSize(sizeof(Walrus::ByteCodeStackOffset) * (parameterCount + resultCount))); + ASSERT(m_currentByteCode.size() % sizeof(void*) == 0); + + auto code = peekByteCode(callPos); + generateCallExpr(code, parameterCount, resultCount, functionType, true); + stopToGenerateByteCodeWhileBlockEnd(); + } + + virtual void OnReturnCallRefExpr(Type sig_type) override + { + m_preprocessData.seenBranch(); + auto functionType = getFunctionType(sig_type.GetReferenceIndex()); + auto callPos = m_currentByteCode.size(); + auto parameterCount = computeFunctionParameterOrResultOffsetCount(functionType->param()); + auto resultCount = computeFunctionParameterOrResultOffsetCount(functionType->result()); + pushByteCode(Walrus::ReturnCallRef(popVMStack(), functionType, parameterCount, resultCount), WASMOpcode::ReturnCallRefOpcode); + expandByteCode(Walrus::ByteCode::pointerAlignedSize(sizeof(Walrus::ByteCodeStackOffset) * (parameterCount + resultCount))); + ASSERT(m_currentByteCode.size() % sizeof(void*) == 0); + + auto code = peekByteCode(callPos); + generateCallExpr(code, parameterCount, resultCount, functionType, true); + stopToGenerateByteCodeWhileBlockEnd(); + } + bool processConstValue(const Walrus::Value& value) { if (!m_inInitExpr) { diff --git a/src/runtime/ExecutionState.h b/src/runtime/ExecutionState.h index 07dbb1d0b..bf44fd80f 100644 --- a/src/runtime/ExecutionState.h +++ b/src/runtime/ExecutionState.h @@ -19,6 +19,7 @@ #include "util/Optional.h" #include "util/Util.h" +#include "util/Vector.h" namespace Walrus { @@ -33,6 +34,7 @@ class ExecutionState { ExecutionState(ExecutionState& parent) : m_parent(&parent) , m_stackLimit(parent.m_stackLimit) + , m_tcoFunctionTarget(nullptr) { } @@ -40,6 +42,7 @@ class ExecutionState { : m_parent(&parent) , m_currentFunction(currentFunction) , m_stackLimit(parent.m_stackLimit) + , m_tcoFunctionTarget(nullptr) { } @@ -53,9 +56,24 @@ class ExecutionState { return m_stackLimit; } + void destroyTCO() + { + m_tcoParamStore.clear(); + m_tcoFunctionTarget = nullptr; + } + + bool hasTCO() + { + return m_tcoFunctionTarget != nullptr; + } + + VectorWithFixedSize> m_tcoParamStore; + Function* m_tcoFunctionTarget; + private: friend class ByteCodeTable; ExecutionState() + : m_tcoFunctionTarget(nullptr) { m_stackLimit = (size_t)currentStackPointer(); diff --git a/src/runtime/Function.cpp b/src/runtime/Function.cpp index 92980c724..a3c3d401f 100644 --- a/src/runtime/Function.cpp +++ b/src/runtime/Function.cpp @@ -92,6 +92,11 @@ void DefinedFunction::call(ExecutionState& state, Value* argv, Value* result) ASSERT(offsetIndex == parameterOffsetSize + resultOffsetSize); interpreterCall(state, valueBuffer, offsetBuffer, parameterOffsetSize, resultOffsetSize); + while (UNLIKELY(state.hasTCO())) { + auto target = state.m_tcoFunctionTarget; + target->interpreterCall(state, valueBuffer, offsetBuffer, parameterOffsetSize, resultOffsetSize); + } + size_t resultOffsetIndex = 0; for (size_t i = 0; i < resultTypeInfo.size(); i++) { result[i] = Value(resultTypeInfo[i], valueBuffer + offsetBuffer[resultOffsetIndex + parameterOffsetSize]); diff --git a/test/wasi/write_to_this.txt b/test/wasi/write_to_this.txt index 980a0d5f1..e69de29bb 100644 --- a/test/wasi/write_to_this.txt +++ b/test/wasi/write_to_this.txt @@ -1 +0,0 @@ -Hello World! diff --git a/third_party/wabt/include/wabt/walrus/binary-reader-walrus.h b/third_party/wabt/include/wabt/walrus/binary-reader-walrus.h index 716334129..a8fdd1713 100644 --- a/third_party/wabt/include/wabt/walrus/binary-reader-walrus.h +++ b/third_party/wabt/include/wabt/walrus/binary-reader-walrus.h @@ -119,6 +119,9 @@ class WASMBinaryReaderDelegate { virtual void OnCallExpr(Index index) = 0; virtual void OnCallIndirectExpr(Index sigIndex, Index tableIndex) = 0; virtual void OnCallRefExpr(Type sig_type) = 0; + virtual void OnReturnCallExpr(Index index) = 0; + virtual void OnReturnCallIndirectExpr(Index sigIndex, Index tableIndex) = 0; + virtual void OnReturnCallRefExpr(Type sig_type) = 0; virtual void OnI32ConstExpr(uint32_t value) = 0; virtual void OnI64ConstExpr(uint64_t value) = 0; virtual void OnF32ConstExpr(uint32_t value) = 0; diff --git a/third_party/wabt/src/walrus/binary-reader-walrus.cc b/third_party/wabt/src/walrus/binary-reader-walrus.cc index 5f0a9651e..59c5bb3f2 100644 --- a/third_party/wabt/src/walrus/binary-reader-walrus.cc +++ b/third_party/wabt/src/walrus/binary-reader-walrus.cc @@ -924,8 +924,13 @@ class BinaryReaderDelegateWalrus: public BinaryReaderDelegate { CHECK_RESULT(m_validator.OnReturnCall(GetLocation(), Var(func_index, GetLocation()))); SHOULD_GENERATE_BYTECODE; + +#if defined(WALRUS_ENABLE_JIT) m_externalDelegate->OnCallExpr(func_index); m_externalDelegate->OnReturnExpr(); +#else + m_externalDelegate->OnReturnCallExpr(func_index); +#endif return Result::Ok; } Result OnReturnCallIndirectExpr(Index sig_index, Index table_index) override { @@ -939,8 +944,13 @@ class BinaryReaderDelegateWalrus: public BinaryReaderDelegate { CHECK_RESULT(m_validator.OnReturnCallIndirect(GetLocation(), Var(sig_index, GetLocation()), Var(table_index, GetLocation()))); SHOULD_GENERATE_BYTECODE; + +#if defined(WALRUS_ENABLE_JIT) m_externalDelegate->OnCallIndirectExpr(sig_index, table_index); m_externalDelegate->OnReturnExpr(); +#else + m_externalDelegate->OnReturnCallIndirectExpr(sig_index, table_index); +#endif return Result::Ok; } Result OnReturnCallRefExpr(Type sig_type) override @@ -955,8 +965,13 @@ class BinaryReaderDelegateWalrus: public BinaryReaderDelegate { CHECK_RESULT(m_validator.OnReturnCallRef(GetLocation(), Var(sig_type, GetLocation()))); SHOULD_GENERATE_BYTECODE; + +#if defined(WALRUS_ENABLE_JIT) m_externalDelegate->OnCallRefExpr(sig_type); m_externalDelegate->OnReturnExpr(); +#else + m_externalDelegate->OnReturnCallRefExpr(sig_type); +#endif return Result::Ok; } Result OnReturnExpr() override { From eab6fcbd5397382ff1e1be216b0349b3ec73f3e5 Mon Sep 17 00:00:00 2001 From: makachanm Date: Mon, 13 Apr 2026 22:40:56 -0400 Subject: [PATCH 02/10] Reorder parameter order --- src/runtime/ExecutionState.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/runtime/ExecutionState.h b/src/runtime/ExecutionState.h index bf44fd80f..d14c73ec1 100644 --- a/src/runtime/ExecutionState.h +++ b/src/runtime/ExecutionState.h @@ -32,17 +32,18 @@ class ExecutionState { friend class Interpreter; ExecutionState(ExecutionState& parent) - : m_parent(&parent) + : m_tcoFunctionTarget(nullptr) + , m_parent(&parent) + , m_currentFunction(nullptr) , m_stackLimit(parent.m_stackLimit) - , m_tcoFunctionTarget(nullptr) { } ExecutionState(ExecutionState& parent, Function* currentFunction) - : m_parent(&parent) + : m_tcoFunctionTarget(nullptr) + , m_parent(&parent) , m_currentFunction(currentFunction) , m_stackLimit(parent.m_stackLimit) - , m_tcoFunctionTarget(nullptr) { } @@ -74,6 +75,8 @@ class ExecutionState { friend class ByteCodeTable; ExecutionState() : m_tcoFunctionTarget(nullptr) + , m_parent(nullptr) + , m_currentFunction(nullptr) { m_stackLimit = (size_t)currentStackPointer(); From 6d9ecab88a6c06e1fc300a6c5c2137504fe8a4c3 Mon Sep 17 00:00:00 2001 From: makachanm Date: Thu, 16 Apr 2026 00:43:10 -0400 Subject: [PATCH 03/10] Add UNLIKELY --- src/interpreter/Interpreter.cpp | 6 +++--- src/interpreter/Interpreter.h | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/interpreter/Interpreter.cpp b/src/interpreter/Interpreter.cpp index 1c1d23b59..dd89eb903 100644 --- a/src/interpreter/Interpreter.cpp +++ b/src/interpreter/Interpreter.cpp @@ -1735,7 +1735,7 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, Table* table = instance->table(code->tableIndex()); uint32_t idx = readValue(bp, code->calleeOffset()); - if (idx >= table->size()) { + if (UNLIKELY(idx >= table->size())) { Trap::throwException(state, "undefined element"); } auto target = reinterpret_cast(table->uncheckedGetElement(idx)); @@ -1743,7 +1743,7 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, Trap::throwException(state, "uninitialized element " + std::to_string(idx)); } const FunctionType* ft = target->functionType(); - if (!ft->equals(code->functionType())) { + if (UNLIKELY(!ft->equals(code->functionType()))) { Trap::throwException(state, "indirect call type mismatch"); } @@ -1767,7 +1767,7 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, Trap::throwException(state, "null function reference"); } const FunctionType* ft = target->functionType(); - if (!ft->equals(code->functionType())) { + if (UNLIKELY(!ft->equals(code->functionType()))) { Trap::throwException(state, "call by reference type mismatch"); } diff --git a/src/interpreter/Interpreter.h b/src/interpreter/Interpreter.h index b422df173..95eb85f39 100644 --- a/src/interpreter/Interpreter.h +++ b/src/interpreter/Interpreter.h @@ -54,7 +54,6 @@ class Interpreter { if (state.hasTCO()) { VectorCopier::copy((size_t*)functionStackBase, state.m_tcoParamStore.data(), state.m_tcoParamStore.size()); - state.destroyTCO(); } else { for (size_t i = 0; i < parameterOffsetCount; i++) { ((size_t*)functionStackBase)[i] = *((size_t*)(bp + offsets[i])); From 8272c22accb8f2d99e94772d1112226b5a175ce9 Mon Sep 17 00:00:00 2001 From: makachanm Date: Thu, 16 Apr 2026 00:43:41 -0400 Subject: [PATCH 04/10] Remove unused code --- src/interpreter/Interpreter.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/interpreter/Interpreter.h b/src/interpreter/Interpreter.h index 95eb85f39..7e881bfa8 100644 --- a/src/interpreter/Interpreter.h +++ b/src/interpreter/Interpreter.h @@ -142,11 +142,6 @@ class Interpreter { uint8_t* bp, Instance* instance); - static void returnCallOperation(ExecutionState& state, - size_t& programCounter, - uint8_t* bp, - Instance* instance); - static bool testRefGeneric(void* refPtr, Value::Type type); static bool testRefDefined(void* refPtr, const CompositeType** typeInfo); }; From cd8f1ef6919e47435a9d43c3e15772f7d3e1c930 Mon Sep 17 00:00:00 2001 From: makachanm Date: Tue, 21 Apr 2026 17:28:56 +0900 Subject: [PATCH 05/10] Fix for wrong destroy of TCO buffer --- src/interpreter/Interpreter.cpp | 3 --- src/interpreter/Interpreter.h | 3 +++ .../wabt/src/walrus/binary-reader-walrus.cc | 19 ++----------------- 3 files changed, 5 insertions(+), 20 deletions(-) diff --git a/src/interpreter/Interpreter.cpp b/src/interpreter/Interpreter.cpp index dd89eb903..1241256ce 100644 --- a/src/interpreter/Interpreter.cpp +++ b/src/interpreter/Interpreter.cpp @@ -3115,7 +3115,6 @@ NEVER_INLINE void Interpreter::callOperation( while (UNLIKELY(state.hasTCO())) { target = state.m_tcoFunctionTarget; - state.destroyTCO(); target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); } @@ -3149,7 +3148,6 @@ NEVER_INLINE void Interpreter::callIndirectOperation( while (UNLIKELY(state.hasTCO())) { target = state.m_tcoFunctionTarget; - state.destroyTCO(); target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); } @@ -3178,7 +3176,6 @@ NEVER_INLINE void Interpreter::callRefOperation( while (UNLIKELY(state.hasTCO())) { target = state.m_tcoFunctionTarget; - state.destroyTCO(); target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); } diff --git a/src/interpreter/Interpreter.h b/src/interpreter/Interpreter.h index 7e881bfa8..809ace83d 100644 --- a/src/interpreter/Interpreter.h +++ b/src/interpreter/Interpreter.h @@ -110,12 +110,15 @@ class Interpreter { } if (newState.hasTCO()) { + state.m_tcoParamStore.clear(); state.m_tcoParamStore.reserve(newState.m_tcoParamStore.size()); VectorCopier::copy(state.m_tcoParamStore.data(), newState.m_tcoParamStore.data(), newState.m_tcoParamStore.size()); state.m_tcoFunctionTarget = newState.m_tcoFunctionTarget; return; } + state.destroyTCO(); + offsets += parameterOffsetCount; for (size_t i = 0; i < resultOffsetCount; i++) { *((size_t*)(bp + offsets[i])) = *((size_t*)(functionStackBase + resultOffsets[i])); diff --git a/third_party/wabt/src/walrus/binary-reader-walrus.cc b/third_party/wabt/src/walrus/binary-reader-walrus.cc index 59c5bb3f2..73dc30a5d 100644 --- a/third_party/wabt/src/walrus/binary-reader-walrus.cc +++ b/third_party/wabt/src/walrus/binary-reader-walrus.cc @@ -925,12 +925,7 @@ class BinaryReaderDelegateWalrus: public BinaryReaderDelegate { SHOULD_GENERATE_BYTECODE; -#if defined(WALRUS_ENABLE_JIT) - m_externalDelegate->OnCallExpr(func_index); - m_externalDelegate->OnReturnExpr(); -#else m_externalDelegate->OnReturnCallExpr(func_index); -#endif return Result::Ok; } Result OnReturnCallIndirectExpr(Index sig_index, Index table_index) override { @@ -944,13 +939,8 @@ class BinaryReaderDelegateWalrus: public BinaryReaderDelegate { CHECK_RESULT(m_validator.OnReturnCallIndirect(GetLocation(), Var(sig_index, GetLocation()), Var(table_index, GetLocation()))); SHOULD_GENERATE_BYTECODE; - -#if defined(WALRUS_ENABLE_JIT) - m_externalDelegate->OnCallIndirectExpr(sig_index, table_index); - m_externalDelegate->OnReturnExpr(); -#else + m_externalDelegate->OnReturnCallIndirectExpr(sig_index, table_index); -#endif return Result::Ok; } Result OnReturnCallRefExpr(Type sig_type) override @@ -965,13 +955,8 @@ class BinaryReaderDelegateWalrus: public BinaryReaderDelegate { CHECK_RESULT(m_validator.OnReturnCallRef(GetLocation(), Var(sig_type, GetLocation()))); SHOULD_GENERATE_BYTECODE; - -#if defined(WALRUS_ENABLE_JIT) - m_externalDelegate->OnCallRefExpr(sig_type); - m_externalDelegate->OnReturnExpr(); -#else + m_externalDelegate->OnReturnCallRefExpr(sig_type); -#endif return Result::Ok; } Result OnReturnExpr() override { From a48f69b77756e7ff8aa5ea7027e86dc762e008a0 Mon Sep 17 00:00:00 2001 From: makachanm Date: Tue, 21 Apr 2026 17:46:19 +0900 Subject: [PATCH 06/10] Migrate TCO buffer into Store --- src/interpreter/Interpreter.cpp | 30 +++++++++++++++--------------- src/interpreter/Interpreter.h | 15 ++++++--------- src/runtime/ExecutionState.h | 24 +++--------------------- src/runtime/Function.cpp | 7 ++++--- src/runtime/Store.cpp | 1 + src/runtime/Store.h | 23 +++++++++++++++++++++++ 6 files changed, 52 insertions(+), 48 deletions(-) diff --git a/src/interpreter/Interpreter.cpp b/src/interpreter/Interpreter.cpp index 1241256ce..5d90ef143 100644 --- a/src/interpreter/Interpreter.cpp +++ b/src/interpreter/Interpreter.cpp @@ -1721,9 +1721,8 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, auto paramSize = code->parameterOffsetsSize(); auto offsets = code->stackOffsets(); - state.m_tcoParamStore.reserve(paramSize); - memcpy(state.m_tcoParamStore.data(), bp + offsets[0], paramSize * sizeof(size_t)); - state.m_tcoFunctionTarget = target; + auto store = instance->module()->store(); + store->setTCO(bp + offsets[0], paramSize, target); return nullptr; } @@ -1750,9 +1749,8 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, auto paramSize = code->parameterOffsetsSize(); auto offsets = code->stackOffsets(); - state.m_tcoParamStore.reserve(paramSize); - memcpy(state.m_tcoParamStore.data(), bp + offsets[0], paramSize * sizeof(size_t)); - state.m_tcoFunctionTarget = target; + auto store = instance->module()->store(); + store->setTCO(bp + offsets[0], paramSize, target); return nullptr; } @@ -1774,9 +1772,8 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, auto paramSize = code->parameterOffsetsSize(); auto offsets = code->stackOffsets(); - state.m_tcoParamStore.reserve(paramSize); - memcpy(state.m_tcoParamStore.data(), bp + offsets[0], paramSize * sizeof(size_t)); - state.m_tcoFunctionTarget = target; + auto store = instance->module()->store(); + store->setTCO(bp + offsets[0], paramSize, target); return nullptr; } @@ -3111,10 +3108,11 @@ NEVER_INLINE void Interpreter::callOperation( { Call* code = (Call*)programCounter; Function* target = instance->function(code->index()); + auto store = instance->module()->store(); target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); - while (UNLIKELY(state.hasTCO())) { - target = state.m_tcoFunctionTarget; + while (UNLIKELY(store->hasTCO())) { + target = store->tcoFunctionTarget(); target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); } @@ -3144,10 +3142,11 @@ NEVER_INLINE void Interpreter::callIndirectOperation( Trap::throwException(state, "indirect call type mismatch"); } + auto store = instance->module()->store(); target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); - while (UNLIKELY(state.hasTCO())) { - target = state.m_tcoFunctionTarget; + while (UNLIKELY(store->hasTCO())) { + target = store->tcoFunctionTarget(); target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); } @@ -3172,10 +3171,11 @@ NEVER_INLINE void Interpreter::callRefOperation( Trap::throwException(state, "call by reference type mismatch"); } + auto store = instance->module()->store(); target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); - while (UNLIKELY(state.hasTCO())) { - target = state.m_tcoFunctionTarget; + while (UNLIKELY(store->hasTCO())) { + target = store->tcoFunctionTarget(); target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); } diff --git a/src/interpreter/Interpreter.h b/src/interpreter/Interpreter.h index 809ace83d..00fce4ef0 100644 --- a/src/interpreter/Interpreter.h +++ b/src/interpreter/Interpreter.h @@ -22,6 +22,7 @@ #include "runtime/Instance.h" #include "runtime/JITExec.h" #include "runtime/Module.h" +#include "runtime/Store.h" #include "runtime/Tag.h" #include "interpreter/ByteCode.h" @@ -50,10 +51,12 @@ class Interpreter { CHECK_STACK_LIMIT(newState); auto moduleFunction = function->moduleFunction(); + auto store = function->instance()->module()->store(); ALLOCA(uint8_t, functionStackBase, moduleFunction->requiredStackSize()); - if (state.hasTCO()) { - VectorCopier::copy((size_t*)functionStackBase, state.m_tcoParamStore.data(), state.m_tcoParamStore.size()); + if (store->hasTCO()) { + VectorCopier::copy((size_t*)functionStackBase, store->tcoBuffer(), store->tcoBufferSize()); + store->clearTCO(); } else { for (size_t i = 0; i < parameterOffsetCount; i++) { ((size_t*)functionStackBase)[i] = *((size_t*)(bp + offsets[i])); @@ -109,16 +112,10 @@ class Interpreter { resultOffsets = interpret(newState, programCounter, functionStackBase, function->instance()); } - if (newState.hasTCO()) { - state.m_tcoParamStore.clear(); - state.m_tcoParamStore.reserve(newState.m_tcoParamStore.size()); - VectorCopier::copy(state.m_tcoParamStore.data(), newState.m_tcoParamStore.data(), newState.m_tcoParamStore.size()); - state.m_tcoFunctionTarget = newState.m_tcoFunctionTarget; + if (store->hasTCO()) { return; } - state.destroyTCO(); - offsets += parameterOffsetCount; for (size_t i = 0; i < resultOffsetCount; i++) { *((size_t*)(bp + offsets[i])) = *((size_t*)(functionStackBase + resultOffsets[i])); diff --git a/src/runtime/ExecutionState.h b/src/runtime/ExecutionState.h index d14c73ec1..4b5f3bf3c 100644 --- a/src/runtime/ExecutionState.h +++ b/src/runtime/ExecutionState.h @@ -19,7 +19,6 @@ #include "util/Optional.h" #include "util/Util.h" -#include "util/Vector.h" namespace Walrus { @@ -32,16 +31,14 @@ class ExecutionState { friend class Interpreter; ExecutionState(ExecutionState& parent) - : m_tcoFunctionTarget(nullptr) - , m_parent(&parent) + : m_parent(&parent) , m_currentFunction(nullptr) , m_stackLimit(parent.m_stackLimit) { } ExecutionState(ExecutionState& parent, Function* currentFunction) - : m_tcoFunctionTarget(nullptr) - , m_parent(&parent) + : m_parent(&parent) , m_currentFunction(currentFunction) , m_stackLimit(parent.m_stackLimit) { @@ -57,25 +54,10 @@ class ExecutionState { return m_stackLimit; } - void destroyTCO() - { - m_tcoParamStore.clear(); - m_tcoFunctionTarget = nullptr; - } - - bool hasTCO() - { - return m_tcoFunctionTarget != nullptr; - } - - VectorWithFixedSize> m_tcoParamStore; - Function* m_tcoFunctionTarget; - private: friend class ByteCodeTable; ExecutionState() - : m_tcoFunctionTarget(nullptr) - , m_parent(nullptr) + : m_parent(nullptr) , m_currentFunction(nullptr) { m_stackLimit = (size_t)currentStackPointer(); diff --git a/src/runtime/Function.cpp b/src/runtime/Function.cpp index a3c3d401f..b48a44d4c 100644 --- a/src/runtime/Function.cpp +++ b/src/runtime/Function.cpp @@ -17,9 +17,9 @@ #include "Walrus.h" #include "runtime/Function.h" +#include "runtime/Module.h" #include "runtime/Store.h" #include "interpreter/Interpreter.h" -#include "runtime/Module.h" #include "runtime/Tag.h" #include "runtime/Instance.h" #include "runtime/Value.h" @@ -92,8 +92,9 @@ void DefinedFunction::call(ExecutionState& state, Value* argv, Value* result) ASSERT(offsetIndex == parameterOffsetSize + resultOffsetSize); interpreterCall(state, valueBuffer, offsetBuffer, parameterOffsetSize, resultOffsetSize); - while (UNLIKELY(state.hasTCO())) { - auto target = state.m_tcoFunctionTarget; + auto store = m_instance->module()->store(); + while (UNLIKELY(store->hasTCO())) { + auto target = store->tcoFunctionTarget(); target->interpreterCall(state, valueBuffer, offsetBuffer, parameterOffsetSize, resultOffsetSize); } diff --git a/src/runtime/Store.cpp b/src/runtime/Store.cpp index ed4cbdfe8..90831c0f6 100644 --- a/src/runtime/Store.cpp +++ b/src/runtime/Store.cpp @@ -38,6 +38,7 @@ static const FunctionType g_defaultFunctionTypes[] = { Store::Store(Engine* engine) : m_engine(engine) + , m_tcoFunctionTarget(nullptr) { #ifdef ENABLE_GC GC_INIT(); diff --git a/src/runtime/Store.h b/src/runtime/Store.h index 9bd7ac801..ba16e1bf0 100644 --- a/src/runtime/Store.h +++ b/src/runtime/Store.h @@ -26,6 +26,7 @@ namespace Walrus { class Engine; +class Function; class Module; class Instance; class Extern; @@ -89,6 +90,25 @@ class Store { Waiter* getWaiter(void* address); + bool hasTCO() const { return m_tcoFunctionTarget != nullptr; } + + Function* tcoFunctionTarget() const { return m_tcoFunctionTarget; } + + void setTCO(const void* src, size_t paramCount, Function* target) + { + m_tcoBuffer.resize(paramCount); + memcpy(m_tcoBuffer.data(), src, paramCount * sizeof(size_t)); + m_tcoFunctionTarget = target; + } + + size_t* tcoBuffer() { return m_tcoBuffer.data(); } + size_t tcoBufferSize() const { return m_tcoBuffer.size(); } + + void clearTCO() + { + m_tcoFunctionTarget = nullptr; + } + private: Engine* m_engine; TypeStore m_typeStore; @@ -99,6 +119,9 @@ class Store { std::mutex m_waiterListLock; std::vector m_waiterList; + + std::vector m_tcoBuffer; + Function* m_tcoFunctionTarget; }; } // namespace Walrus From 57d549db9adf733708644cd52fcac35c0be289f6 Mon Sep 17 00:00:00 2001 From: makachanm Date: Wed, 22 Apr 2026 13:00:43 +0900 Subject: [PATCH 07/10] Disable ReturnCall generation in JIT mode --- src/parser/WASMParser.cpp | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/parser/WASMParser.cpp b/src/parser/WASMParser.cpp index 0629b3879..4665d20fc 100644 --- a/src/parser/WASMParser.cpp +++ b/src/parser/WASMParser.cpp @@ -648,6 +648,7 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { // i32.eqz and JumpIf can be unified in some cases static const size_t s_noI32Eqz = SIZE_MAX - sizeof(Walrus::I32Eqz); size_t m_lastI32EqzPos; + bool m_useJIT; Walrus::FunctionType* getFunctionType(Index index) { @@ -860,7 +861,7 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { } public: - WASMBinaryReader(Walrus::TypeStore& typeStore) + WASMBinaryReader(Walrus::TypeStore& typeStore, bool useJIT = false) : m_readerOffsetPointer(nullptr) , m_readerDataPointer(nullptr) , m_codeEndOffset(0) @@ -876,6 +877,7 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { , m_segmentMode(Walrus::SegmentMode::None) , m_preprocessData(*this) , m_lastI32EqzPos(s_noI32Eqz) + , m_useJIT(useJIT) { } @@ -1590,6 +1592,11 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { virtual void OnReturnCallExpr(uint32_t index) override { m_preprocessData.seenBranch(); + if (UNLIKELY(m_useJIT)) { + OnCallExpr(index); + generateFunctionReturnCode(); + return; + } auto functionType = m_result.m_functions[index]->functionType(); auto callPos = m_currentByteCode.size(); auto parameterCount = computeFunctionParameterOrResultOffsetCount(functionType->param()); @@ -1607,6 +1614,11 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { virtual void OnReturnCallIndirectExpr(Index sigIndex, Index tableIndex) override { m_preprocessData.seenBranch(); + if (UNLIKELY(m_useJIT)) { + OnCallIndirectExpr(sigIndex, tableIndex); + generateFunctionReturnCode(); + return; + } auto functionType = getFunctionType(sigIndex); auto callPos = m_currentByteCode.size(); auto parameterCount = computeFunctionParameterOrResultOffsetCount(functionType->param()); @@ -1624,6 +1636,11 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { virtual void OnReturnCallRefExpr(Type sig_type) override { m_preprocessData.seenBranch(); + if (UNLIKELY(m_useJIT)) { + OnCallRefExpr(sig_type); + generateFunctionReturnCode(); + return; + } auto functionType = getFunctionType(sig_type.GetReferenceIndex()); auto callPos = m_currentByteCode.size(); auto parameterCount = computeFunctionParameterOrResultOffsetCount(functionType->param()); @@ -3953,7 +3970,7 @@ void WASMParsingResult::clear() std::pair, std::string> WASMParser::parseBinary(Store* store, const std::string& filename, const uint8_t* data, size_t len, const uint32_t JITFlags, const uint32_t featureFlags) { - wabt::WASMBinaryReader delegate(store->getTypeStore()); + wabt::WASMBinaryReader delegate(store->getTypeStore(), JITFlags & JITFlagValue::useJIT); std::string error = ReadWasmBinary(filename, data, len, &delegate, featureFlags); if (error.length()) { From 577bef842b5d16dc98e4073f976fac5533f86cea Mon Sep 17 00:00:00 2001 From: makachanm Date: Thu, 23 Apr 2026 14:56:08 +0900 Subject: [PATCH 08/10] Original parameter space is not a linear --- src/interpreter/Interpreter.cpp | 6 +++--- src/runtime/Store.h | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/interpreter/Interpreter.cpp b/src/interpreter/Interpreter.cpp index 5d90ef143..14d8b85a2 100644 --- a/src/interpreter/Interpreter.cpp +++ b/src/interpreter/Interpreter.cpp @@ -1722,7 +1722,7 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, auto offsets = code->stackOffsets(); auto store = instance->module()->store(); - store->setTCO(bp + offsets[0], paramSize, target); + store->setTCO(bp, offsets, paramSize, target); return nullptr; } @@ -1750,7 +1750,7 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, auto offsets = code->stackOffsets(); auto store = instance->module()->store(); - store->setTCO(bp + offsets[0], paramSize, target); + store->setTCO(bp, offsets, paramSize, target); return nullptr; } @@ -1773,7 +1773,7 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, auto offsets = code->stackOffsets(); auto store = instance->module()->store(); - store->setTCO(bp + offsets[0], paramSize, target); + store->setTCO(bp, offsets, paramSize, target); return nullptr; } diff --git a/src/runtime/Store.h b/src/runtime/Store.h index ba16e1bf0..cd554dc19 100644 --- a/src/runtime/Store.h +++ b/src/runtime/Store.h @@ -94,10 +94,12 @@ class Store { Function* tcoFunctionTarget() const { return m_tcoFunctionTarget; } - void setTCO(const void* src, size_t paramCount, Function* target) - { + void setTCO(const uint8_t* bp, const ByteCodeStackOffset* offsets, size_t paramCount, Function* target) + { m_tcoBuffer.resize(paramCount); - memcpy(m_tcoBuffer.data(), src, paramCount * sizeof(size_t)); + for (size_t i = 0; i < paramCount; i++) { + m_tcoBuffer[i] = *((size_t*)(bp + offsets[i])); + } m_tcoFunctionTarget = target; } From 79a79abccd64a83cbf1c6e5a1b275393984394b8 Mon Sep 17 00:00:00 2001 From: makachanm Date: Fri, 24 Apr 2026 15:53:39 +0900 Subject: [PATCH 09/10] Implement ReturnOffset copy --- src/interpreter/Interpreter.cpp | 15 +++++++++------ src/parser/WASMParser.cpp | 13 ++++--------- src/runtime/Function.cpp | 3 ++- src/runtime/Store.cpp | 1 + src/runtime/Store.h | 7 +++++-- 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/interpreter/Interpreter.cpp b/src/interpreter/Interpreter.cpp index 14d8b85a2..1d79c217e 100644 --- a/src/interpreter/Interpreter.cpp +++ b/src/interpreter/Interpreter.cpp @@ -1722,7 +1722,7 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, auto offsets = code->stackOffsets(); auto store = instance->module()->store(); - store->setTCO(bp, offsets, paramSize, target); + store->setTCO(bp, offsets, paramSize, code->resultOffsetsSize(), target); return nullptr; } @@ -1750,7 +1750,7 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, auto offsets = code->stackOffsets(); auto store = instance->module()->store(); - store->setTCO(bp, offsets, paramSize, target); + store->setTCO(bp, offsets, paramSize, code->resultOffsetsSize(), target); return nullptr; } @@ -1773,7 +1773,7 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, auto offsets = code->stackOffsets(); auto store = instance->module()->store(); - store->setTCO(bp, offsets, paramSize, target); + store->setTCO(bp, offsets, paramSize, code->resultOffsetsSize(), target); return nullptr; } @@ -3112,8 +3112,9 @@ NEVER_INLINE void Interpreter::callOperation( target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); while (UNLIKELY(store->hasTCO())) { + auto resultOffsetCount = store->tcoResultOffsetCount(); target = store->tcoFunctionTarget(); - target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); + target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), resultOffsetCount); } programCounter += ByteCode::pointerAlignedSize(sizeof(Call) + sizeof(ByteCodeStackOffset) * code->parameterOffsetsSize() @@ -3146,8 +3147,9 @@ NEVER_INLINE void Interpreter::callIndirectOperation( target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); while (UNLIKELY(store->hasTCO())) { + auto resultOffsetCount = store->tcoResultOffsetCount(); target = store->tcoFunctionTarget(); - target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); + target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), resultOffsetCount); } programCounter += ByteCode::pointerAlignedSize(sizeof(CallIndirect) + sizeof(ByteCodeStackOffset) * code->parameterOffsetsSize() @@ -3175,8 +3177,9 @@ NEVER_INLINE void Interpreter::callRefOperation( target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); while (UNLIKELY(store->hasTCO())) { + auto resultOffsetCount = store->tcoResultOffsetCount(); target = store->tcoFunctionTarget(); - target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), code->resultOffsetsSize()); + target->interpreterCall(state, bp, code->stackOffsets(), code->parameterOffsetsSize(), resultOffsetCount); } programCounter += ByteCode::pointerAlignedSize(sizeof(CallRef) + sizeof(ByteCodeStackOffset) * code->parameterOffsetsSize() diff --git a/src/parser/WASMParser.cpp b/src/parser/WASMParser.cpp index 4665d20fc..d10595d34 100644 --- a/src/parser/WASMParser.cpp +++ b/src/parser/WASMParser.cpp @@ -1507,7 +1507,7 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { template void generateCallExpr(CodeType* code, uint16_t parameterCount, uint16_t resultCount, - Walrus::FunctionType* functionType, bool isReturnCall = false) + Walrus::FunctionType* functionType) { size_t offsetIndex = 0; const Walrus::TypeVector::Types& param = functionType->param().types(); @@ -1526,11 +1526,6 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { offsetIndex += subIndexCount; } - if (isReturnCall) { - ASSERT(offsetIndex == code->parameterOffsetsSize()); - return; - } - const Walrus::TypeVector::Types& result = functionType->result().types(); siz = result.size(); for (size_t i = 0; i < siz; i++) { @@ -1607,7 +1602,7 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { ASSERT(m_currentByteCode.size() % sizeof(void*) == 0); auto code = peekByteCode(callPos); - generateCallExpr(code, parameterCount, resultCount, functionType, true); + generateCallExpr(code, parameterCount, resultCount, functionType); stopToGenerateByteCodeWhileBlockEnd(); } @@ -1629,7 +1624,7 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { ASSERT(m_currentByteCode.size() % sizeof(void*) == 0); auto code = peekByteCode(callPos); - generateCallExpr(code, parameterCount, resultCount, functionType, true); + generateCallExpr(code, parameterCount, resultCount, functionType); stopToGenerateByteCodeWhileBlockEnd(); } @@ -1650,7 +1645,7 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { ASSERT(m_currentByteCode.size() % sizeof(void*) == 0); auto code = peekByteCode(callPos); - generateCallExpr(code, parameterCount, resultCount, functionType, true); + generateCallExpr(code, parameterCount, resultCount, functionType); stopToGenerateByteCodeWhileBlockEnd(); } diff --git a/src/runtime/Function.cpp b/src/runtime/Function.cpp index b48a44d4c..e078f8e98 100644 --- a/src/runtime/Function.cpp +++ b/src/runtime/Function.cpp @@ -94,8 +94,9 @@ void DefinedFunction::call(ExecutionState& state, Value* argv, Value* result) auto store = m_instance->module()->store(); while (UNLIKELY(store->hasTCO())) { + auto resultOffsetCount = store->tcoResultOffsetCount(); auto target = store->tcoFunctionTarget(); - target->interpreterCall(state, valueBuffer, offsetBuffer, parameterOffsetSize, resultOffsetSize); + target->interpreterCall(state, valueBuffer, offsetBuffer, parameterOffsetSize, resultOffsetCount); } size_t resultOffsetIndex = 0; diff --git a/src/runtime/Store.cpp b/src/runtime/Store.cpp index 90831c0f6..8a3fd9b47 100644 --- a/src/runtime/Store.cpp +++ b/src/runtime/Store.cpp @@ -38,6 +38,7 @@ static const FunctionType g_defaultFunctionTypes[] = { Store::Store(Engine* engine) : m_engine(engine) + , m_tcoResultOffsetCount(0) , m_tcoFunctionTarget(nullptr) { #ifdef ENABLE_GC diff --git a/src/runtime/Store.h b/src/runtime/Store.h index cd554dc19..c798d12a7 100644 --- a/src/runtime/Store.h +++ b/src/runtime/Store.h @@ -94,17 +94,19 @@ class Store { Function* tcoFunctionTarget() const { return m_tcoFunctionTarget; } - void setTCO(const uint8_t* bp, const ByteCodeStackOffset* offsets, size_t paramCount, Function* target) - { + void setTCO(const uint8_t* bp, const ByteCodeStackOffset* offsets, size_t paramCount, uint16_t resultOffsetCount, Function* target) + { m_tcoBuffer.resize(paramCount); for (size_t i = 0; i < paramCount; i++) { m_tcoBuffer[i] = *((size_t*)(bp + offsets[i])); } + m_tcoResultOffsetCount = resultOffsetCount; m_tcoFunctionTarget = target; } size_t* tcoBuffer() { return m_tcoBuffer.data(); } size_t tcoBufferSize() const { return m_tcoBuffer.size(); } + uint16_t tcoResultOffsetCount() const { return m_tcoResultOffsetCount; } void clearTCO() { @@ -123,6 +125,7 @@ class Store { std::vector m_waiterList; std::vector m_tcoBuffer; + uint16_t m_tcoResultOffsetCount; Function* m_tcoFunctionTarget; }; From a4d9172f194981fd40ed5aecf865a80caf5f4e58 Mon Sep 17 00:00:00 2001 From: makachanm Date: Fri, 24 Apr 2026 16:20:58 +0900 Subject: [PATCH 10/10] Add exception handle in TCO --- src/interpreter/Interpreter.cpp | 20 ++++++++++++++++---- src/interpreter/Interpreter.h | 1 + 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/interpreter/Interpreter.cpp b/src/interpreter/Interpreter.cpp index 1d79c217e..33c4efabc 100644 --- a/src/interpreter/Interpreter.cpp +++ b/src/interpreter/Interpreter.cpp @@ -22,6 +22,7 @@ #include "runtime/Instance.h" #include "runtime/Function.h" #include "runtime/Memory.h" +#include "runtime/Store.h" #include "runtime/Table.h" #include "runtime/GCArray.h" #include "runtime/GCStruct.h" @@ -3043,11 +3044,22 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, uint8_t* ptr = userExceptionData.data(); auto& param = tag->functionType()->param().types(); - for (size_t i = 0; i < param.size(); i++) { - auto sz = valueStackAllocatedSize(param[i]); - memcpy(ptr, bp + code->dataOffsets()[i], sz); - ptr += sz; + auto store = instance->module()->store(); + + if (!store->hasTCO()) { + for (size_t i = 0; i < param.size(); i++) { + auto sz = valueStackAllocatedSize(param[i]); + memcpy(ptr, bp + code->dataOffsets()[i], sz); + ptr += sz; + } + } else { + for (size_t i = 0; i < param.size(); i++) { + auto sz = valueStackAllocatedSize(param[i]); + memcpy(ptr, bp + store->tcoBuffer()[i], sz); + ptr += sz; + } } + Trap::throwException(state, tag, std::move(userExceptionData)); ASSERT_NOT_REACHED(); NEXT_INSTRUCTION(); diff --git a/src/interpreter/Interpreter.h b/src/interpreter/Interpreter.h index 00fce4ef0..7905c8198 100644 --- a/src/interpreter/Interpreter.h +++ b/src/interpreter/Interpreter.h @@ -78,6 +78,7 @@ class Interpreter { resultOffsets = interpret(newState, programCounter, functionStackBase, function->instance()); break; } catch (std::unique_ptr& e) { + store->clearTCO(); for (size_t i = e->m_programCounterInfo.size(); i > 0; i--) { if (e->m_programCounterInfo[i - 1].first == &newState) { programCounter = e->m_programCounterInfo[i - 1].second;