diff --git a/ps2xAnalyzer/include/ps2recomp/elf_analyzer.h b/ps2xAnalyzer/include/ps2recomp/elf_analyzer.h index 25175c36..4b757a66 100644 --- a/ps2xAnalyzer/include/ps2recomp/elf_analyzer.h +++ b/ps2xAnalyzer/include/ps2recomp/elf_analyzer.h @@ -37,6 +37,7 @@ namespace ps2recomp static bool isReliableSymbolNameForHeuristics(const std::string &name); static bool isSystemSymbolNameForHeuristics(const std::string &name); static bool shouldAutoSkipNameForHeuristics(const std::string &name); + static bool shouldSkipSystemSymbolForHeuristics(const std::string &name, const std::unordered_set &forcedRecompileNames); static int findEntryFunctionIndexForHeuristics(const std::vector &functions, uint32_t entryAddress); static int findFallbackEntryFunctionIndexForHeuristics(const std::vector &functions); static bool hasHardwareIOSignalForHeuristics(const std::vector &instructions); diff --git a/ps2xAnalyzer/src/elf_analyzer.cpp b/ps2xAnalyzer/src/elf_analyzer.cpp index df7d40d5..de700d7f 100644 --- a/ps2xAnalyzer/src/elf_analyzer.cpp +++ b/ps2xAnalyzer/src/elf_analyzer.cpp @@ -24,6 +24,7 @@ namespace ps2recomp static bool hasPs2ApiPrefix(const std::string &name); static bool hasReliableSymbolName(const std::string &name); static bool isDoNotSkipOrStub(const std::string &name); + static bool matchesKernelRuntimeName(const std::string &name); static uint32_t decodeAbsoluteJumpTarget(uint32_t instructionAddress, uint32_t targetField); static bool tryReadWord(const ElfParser *parser, uint32_t address, uint32_t &outWord); @@ -444,6 +445,16 @@ namespace ps2recomp void ElfAnalyzer::analyzeLibraryFunctions() { + std::unordered_set forcedRecompileNames; + forcedRecompileNames.reserve(m_forceRecompileStarts.size()); + for (const auto &func : m_functions) + { + if (m_forceRecompileStarts.contains(func.start)) + { + forcedRecompileNames.insert(func.name); + } + } + for (const auto &symbol : m_symbols) { if (symbol.isFunction) @@ -457,7 +468,7 @@ namespace ps2recomp { m_libFunctions.insert(symbol.name); } - else if (isSystemFunction(symbol.name)) + else if (shouldSkipSystemSymbolForHeuristics(symbol.name, forcedRecompileNames)) { m_skipFunctions.insert(symbol.name); } @@ -475,7 +486,7 @@ namespace ps2recomp { m_libFunctions.insert(func.name); } - else if (isSystemFunction(func.name)) + else if (shouldSkipSystemSymbolForHeuristics(func.name, forcedRecompileNames)) { m_skipFunctions.insert(func.name); } @@ -1606,6 +1617,10 @@ namespace ps2recomp if (funcIt != m_functions.end()) { const Function &func = *funcIt; + if (m_forceRecompileStarts.contains(func.start)) + { + continue; + } if (patchAddrs.size() > 3) { @@ -2032,6 +2047,18 @@ namespace ps2recomp return false; } + static bool matchesKernelRuntimeName(const std::string &name) + { + if (name.empty()) + { + return false; + } + + static const std::regex kernelRuntimePattern( + "^(?:(?:Create|Delete|Start|ExitDelete|Exit|Terminate|Suspend|Resume|Sleep|Wakeup|CancelWakeup|Change|Rotate|Release|Setup|Register|Query|Get|Set|Refer|Poll|Wait|Signal|Enable|Disable|Flush|Reset|Add|Init)(?:Thread|Sema|EventFlag|Alarm|Intc|IntcHandler2|Dmac|DmacHandler2|OsdConfigParam|MemorySize|VSyncFlag|Heap|TLS|Status|Cache|Syscall|TLB|TLBEntry|GsCrt)|EndOfHeap|GsGetIMR|GsPutIMR|Deci2Call|Sif[A-Za-z0-9_]+|i(?:SignalSema|PollSema|ReferSemaStatus|SetEventFlag|ClearEventFlag|PollEventFlag|ReferEventFlagStatus|WakeupThread|CancelWakeupThread|ReleaseWaitThread|SetAlarm|CancelAlarm|FlushCache|sceSifSetDma|sceSifSetDChain))$"); + return std::regex_match(name, kernelRuntimePattern); + } + static bool isDoNotSkipOrStub(const std::string &name) { static const std::unordered_set kDoNotSkipOrStub = { @@ -2131,6 +2158,17 @@ namespace ps2recomp return isSystemSymbolNameForHeuristics(name); } + bool ElfAnalyzer::shouldSkipSystemSymbolForHeuristics( + const std::string &name, + const std::unordered_set &forcedRecompileNames) + { + if (forcedRecompileNames.contains(name)) + { + return false; + } + return isSystemSymbolNameForHeuristics(name); + } + bool ElfAnalyzer::isSystemFunction(const std::string &name) const { return isSystemSymbolNameForHeuristics(name); @@ -2144,15 +2182,27 @@ namespace ps2recomp if (!hasReliableSymbolName(name)) return false; + std::string normalizedName = name; + if (normalizedName[0] == '_' && normalizedName.size() > 1) + { + normalizedName = normalizedName.substr(1); + } + + if (matchesKernelRuntimeName(normalizedName)) + return true; + if (m_knownLibNames.find(name) != m_knownLibNames.end()) return true; + if (m_knownLibNames.find(normalizedName) != m_knownLibNames.end()) + return true; + if (hasPs2ApiPrefix(name)) return true; // Check for common C/C++ library function names static const std::regex cLibPattern("^_*(mem|str|time|f?printf|f?scanf|malloc|free|calloc|realloc|atoi|itoa|rand|srand|abort|exit|atexit|getenv|system|bsearch|qsort|abs|labs|div|ldiv|mblen|mbtowc|wctomb|mbstowcs|wcstombs).*"); - if (std::regex_match(name, cLibPattern)) + if (std::regex_match(normalizedName, cLibPattern)) { return true; } diff --git a/ps2xRecomp/include/ps2recomp/code_generator.h b/ps2xRecomp/include/ps2recomp/code_generator.h index 9d4960aa..04adfdea 100644 --- a/ps2xRecomp/include/ps2recomp/code_generator.h +++ b/ps2xRecomp/include/ps2recomp/code_generator.h @@ -11,6 +11,7 @@ namespace ps2recomp { struct JumpTableEntry; + struct JumpTable; struct Instruction; struct Function; struct Symbol; @@ -47,6 +48,7 @@ namespace ps2recomp void setRenamedFunctions(const std::unordered_map &renames); void setBootstrapInfo(const BootstrapInfo &info); void setRelocationCallNames(const std::unordered_map &callNames); + void setConfiguredJumpTables(const std::vector &jumpTables); AnalysisResult collectInternalBranchTargets(const Function &function, const std::vector &instructions); @@ -55,6 +57,7 @@ namespace ps2recomp std::unordered_map m_symbols; std::unordered_map m_renamedFunctions; std::unordered_map m_relocationCallNames; + std::unordered_map> m_configJumpTableTargetsByAddress; const std::vector
& m_sections; BootstrapInfo m_bootstrapInfo; diff --git a/ps2xRecomp/include/ps2recomp/types.h b/ps2xRecomp/include/ps2recomp/types.h index 0e825f08..4b667e68 100644 --- a/ps2xRecomp/include/ps2recomp/types.h +++ b/ps2xRecomp/include/ps2recomp/types.h @@ -180,6 +180,7 @@ namespace ps2recomp std::unordered_map patches; std::vector stubImplementations; std::unordered_map mmioByInstructionAddress; + std::vector jumpTables; }; } // namespace ps2recomp diff --git a/ps2xRecomp/src/lib/code_generator.cpp b/ps2xRecomp/src/lib/code_generator.cpp index dde376c1..6f02e2f9 100644 --- a/ps2xRecomp/src/lib/code_generator.cpp +++ b/ps2xRecomp/src/lib/code_generator.cpp @@ -123,6 +123,26 @@ namespace ps2recomp m_relocationCallNames = callNames; } + void CodeGenerator::setConfiguredJumpTables(const std::vector &jumpTables) + { + m_configJumpTableTargetsByAddress.clear(); + for (const auto &table : jumpTables) + { + auto &targets = m_configJumpTableTargetsByAddress[table.address]; + for (const auto &entry : table.entries) + { + targets.push_back(entry.target); + } + } + + for (auto &[address, targets] : m_configJumpTableTargetsByAddress) + { + (void)address; + std::sort(targets.begin(), targets.end()); + targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); + } + } + std::string CodeGenerator::getFunctionName(uint32_t address) const { auto it = m_renamedFunctions.find(address); @@ -376,6 +396,13 @@ namespace ps2recomp ss << " " << delaySlotPrefix << delaySlotCode << delaySlotSuffix << "\n"; } + if (branchInst.function == SPECIAL_JALR) + { + ss << " if (jumpTarget == 0u) {\n"; + ss << fmt::format(" ctx->pc = 0x{:X}u;\n", fallthroughPc); + ss << " } else {\n"; + } + ss << " ctx->pc = jumpTarget;\n"; if (!sortedInternalTargets.empty()) @@ -404,6 +431,11 @@ namespace ps2recomp ss << " }\n"; } + if (branchInst.function == SPECIAL_JALR) + { + ss << " }\n"; + } + ss << " }\n"; } // ------------------------- @@ -658,7 +690,7 @@ namespace ps2recomp if (hasIndirectRegisterJump) { - bool hasFallback = false; + bool needsJrFallback = false; for (const Instruction* jrInst : indirectJumps) { bool foundTable = false; @@ -726,6 +758,33 @@ namespace ps2recomp if (foundTableAddress) { tableAddress += lwOffset; + const auto configuredTableIt = m_configJumpTableTargetsByAddress.find(tableAddress); + if (configuredTableIt != m_configJumpTableTargetsByAddress.end()) + { + std::vector jrTargets; + jrTargets.reserve(configuredTableIt->second.size()); + for (uint32_t target : configuredTableIt->second) + { + if (target >= function.start && target < function.end && + instructionAddresses.contains(target)) + { + jrTargets.push_back(target); + } + } + + if (!jrTargets.empty()) + { + std::sort(jrTargets.begin(), jrTargets.end()); + jrTargets.erase(std::unique(jrTargets.begin(), jrTargets.end()), jrTargets.end()); + result.jumpTableTargets[jrInst->address] = jrTargets; + for (uint32_t target : jrTargets) + { + result.entryPoints.insert(target); + } + foundTable = true; + } + } + uint32_t unshiftedIndexReg = 0; for (int i = adduIndex - 1; i >= 0 && i >= adduIndex - 10; --i) { const auto& inst = instructions[i]; @@ -746,7 +805,7 @@ namespace ps2recomp } } - if (numCases > 0 && numCases <= 1000) { + if (!foundTable && numCases > 0 && numCases <= 1000) { const Section* rodata = nullptr; for (const auto& sec : m_sections) { if (tableAddress >= sec.address && tableAddress < sec.address + sec.size) { @@ -788,11 +847,14 @@ namespace ps2recomp } } if (!foundTable) { - hasFallback = true; + if (!(jrInst->function == SPECIAL_JALR)) + { + needsJrFallback = true; + } } } - if (hasFallback) { + if (needsJrFallback) { for (uint32_t addr : instructionAddresses) { if (addr >= function.start && addr < function.end) @@ -821,7 +883,7 @@ namespace ps2recomp ss << "#include \"ps2_recompiled_stubs.h\"\n\n"; ss << "#include \"ps2_syscalls.h\"\n"; ss << "#include \"ps2_stubs.h\"\n\n"; - ss << "#ifdef _DEBUG\n"; + ss << "#ifdef PS2_FUNCTION_LOG_TRACKER\n"; ss << "#include \"ps2_log.h\"\n"; ss << "#endif\n\n"; } @@ -840,7 +902,7 @@ namespace ps2recomp } ss << "void " << sanitizedName << "(uint8_t* rdram, R5900Context* ctx, PS2Runtime *runtime) {\n"; - ss << "#ifdef _DEBUG\n"; + ss << "#ifdef PS2_FUNCTION_LOG_TRACKER\n"; ss << " PS_LOG_ENTRY(\"" << sanitizedName << "\");\n"; ss << "#endif\n"; ss << "\n"; @@ -964,11 +1026,11 @@ namespace ps2recomp case OPCODE_SLTIU: return fmt::format("SET_GPR_U64(ctx, {}, ((uint64_t)GPR_U64(ctx, {}) < (uint64_t)(int64_t)(int32_t){}) ? 1 : 0);", inst.rt, inst.rs, inst.simmediate); case OPCODE_ANDI: - return fmt::format("SET_GPR_VEC(ctx, {}, PS2_PAND(GPR_VEC(ctx, {}), _mm_cvtsi32_si128((int){}{})));", inst.rt, inst.rs, inst.immediate, "u"); + return fmt::format("SET_GPR_U64(ctx, {}, GPR_U64(ctx, {}) & (uint64_t)(uint16_t){});", inst.rt, inst.rs, inst.immediate); case OPCODE_ORI: - return fmt::format("SET_GPR_VEC(ctx, {}, PS2_POR(GPR_VEC(ctx, {}), _mm_cvtsi32_si128((int){}{})));", inst.rt, inst.rs, inst.immediate, "u"); + return fmt::format("SET_GPR_U64(ctx, {}, GPR_U64(ctx, {}) | (uint64_t)(uint16_t){});", inst.rt, inst.rs, inst.immediate); case OPCODE_XORI: - return fmt::format("SET_GPR_VEC(ctx, {}, PS2_PXOR(GPR_VEC(ctx, {}), _mm_cvtsi32_si128((int){}{})));", inst.rt, inst.rs, inst.immediate, "u"); + return fmt::format("SET_GPR_U64(ctx, {}, GPR_U64(ctx, {}) ^ (uint64_t)(uint16_t){});", inst.rt, inst.rs, inst.immediate); case OPCODE_LUI: return fmt::format("SET_GPR_S32(ctx, {}, (int32_t)((uint32_t){} << 16));", inst.rt, inst.immediate); case OPCODE_LB: @@ -1140,10 +1202,10 @@ namespace ps2recomp case OPCODE_SC: return fmt::format( "{{ uint32_t addr = ADD32(GPR_U32(ctx, {}), {}); " - "if (ctx->llbit) {{ WRITE32(addr, GPR_U32(ctx, {})); " + "if (ctx->llbit && ctx->lladdr == addr) {{ WRITE32(addr, GPR_U32(ctx, {})); " "SET_GPR_S32(ctx, {}, 1); }} " "else {{ SET_GPR_S32(ctx, {}, 0); }} " - "ctx->llbit = 0; }}", + "ctx->llbit = 0; ctx->lladdr = 0; }}", inst.rs, inst.simmediate, inst.rt, inst.rt, inst.rt); default: return fmt::format("// Unhandled opcode: 0x{:X}", inst.opcode); @@ -1189,8 +1251,16 @@ namespace ps2recomp case SPECIAL_MTLO: return fmt::format("ctx->lo = GPR_U64(ctx, {});", inst.rs); case SPECIAL_MULT: + if (inst.rd != 0) + { + return fmt::format("{{ int64_t result = (int64_t)GPR_S32(ctx, {}) * (int64_t)GPR_S32(ctx, {}); ctx->lo = (uint64_t)(int64_t)(int32_t)result; ctx->hi = (uint64_t)(int64_t)(int32_t)(result >> 32); SET_GPR_S32(ctx, {}, (int32_t)result); }}", inst.rs, inst.rt, inst.rd); + } return fmt::format("{{ int64_t result = (int64_t)GPR_S32(ctx, {}) * (int64_t)GPR_S32(ctx, {}); ctx->lo = (uint64_t)(int64_t)(int32_t)result; ctx->hi = (uint64_t)(int64_t)(int32_t)(result >> 32); }}", inst.rs, inst.rt); case SPECIAL_MULTU: + if (inst.rd != 0) + { + return fmt::format("{{ uint64_t result = (uint64_t)GPR_U32(ctx, {}) * (uint64_t)GPR_U32(ctx, {}); ctx->lo = (uint64_t)(int64_t)(int32_t)result; ctx->hi = (uint64_t)(int64_t)(int32_t)(result >> 32); SET_GPR_S32(ctx, {}, (int32_t)result); }}", inst.rs, inst.rt, inst.rd); + } return fmt::format("{{ uint64_t result = (uint64_t)GPR_U32(ctx, {}) * (uint64_t)GPR_U32(ctx, {}); ctx->lo = (uint64_t)(int64_t)(int32_t)result; ctx->hi = (uint64_t)(int64_t)(int32_t)(result >> 32); }}", inst.rs, inst.rt); case SPECIAL_DIV: return fmt::format("{{ int32_t divisor = GPR_S32(ctx, {}); " @@ -1233,13 +1303,13 @@ namespace ps2recomp case SPECIAL_SUBU: return fmt::format("SET_GPR_S32(ctx, {}, (int32_t)SUB32(GPR_U32(ctx, {}), GPR_U32(ctx, {})));", inst.rd, inst.rs, inst.rt); case SPECIAL_AND: - return fmt::format("SET_GPR_VEC(ctx, {}, PS2_PAND(GPR_VEC(ctx, {}), GPR_VEC(ctx, {})));", inst.rd, inst.rs, inst.rt); + return fmt::format("SET_GPR_U64(ctx, {}, GPR_U64(ctx, {}) & GPR_U64(ctx, {}));", inst.rd, inst.rs, inst.rt); case SPECIAL_OR: - return fmt::format("SET_GPR_VEC(ctx, {}, PS2_POR(GPR_VEC(ctx, {}), GPR_VEC(ctx, {})));", inst.rd, inst.rs, inst.rt); + return fmt::format("SET_GPR_U64(ctx, {}, GPR_U64(ctx, {}) | GPR_U64(ctx, {}));", inst.rd, inst.rs, inst.rt); case SPECIAL_XOR: - return fmt::format("SET_GPR_VEC(ctx, {}, PS2_PXOR(GPR_VEC(ctx, {}), GPR_VEC(ctx, {})));", inst.rd, inst.rs, inst.rt); + return fmt::format("SET_GPR_U64(ctx, {}, GPR_U64(ctx, {}) ^ GPR_U64(ctx, {}));", inst.rd, inst.rs, inst.rt); case SPECIAL_NOR: - return fmt::format("SET_GPR_VEC(ctx, {}, PS2_PNOR(GPR_VEC(ctx, {}), GPR_VEC(ctx, {})));", inst.rd, inst.rs, inst.rt); + return fmt::format("SET_GPR_U64(ctx, {}, ~(GPR_U64(ctx, {}) | GPR_U64(ctx, {})));", inst.rd, inst.rs, inst.rt); case SPECIAL_SLT: return fmt::format("SET_GPR_U64(ctx, {}, ((int64_t)GPR_S64(ctx, {}) < (int64_t)GPR_S64(ctx, {})) ? 1 : 0);", inst.rd, inst.rs, inst.rt); case SPECIAL_SLTU: @@ -1641,8 +1711,16 @@ namespace ps2recomp case MMI_MTLO1: return fmt::format("ctx->lo1 = GPR_U64(ctx, {});", rs); case MMI_MULT1: + if (rd != 0) + { + return fmt::format("{{ int64_t result = (int64_t)GPR_S32(ctx, {}) * (int64_t)GPR_S32(ctx, {}); ctx->lo1 = (uint64_t)(int64_t)(int32_t)result; ctx->hi1 = (uint64_t)(int64_t)(int32_t)(result >> 32); SET_GPR_S32(ctx, {}, (int32_t)result); }}", rs, rt, rd); + } return fmt::format("{{ int64_t result = (int64_t)GPR_S32(ctx, {}) * (int64_t)GPR_S32(ctx, {}); ctx->lo1 = (uint64_t)(int64_t)(int32_t)result; ctx->hi1 = (uint64_t)(int64_t)(int32_t)(result >> 32); }}", rs, rt); case MMI_MULTU1: + if (rd != 0) + { + return fmt::format("{{ uint64_t result = (uint64_t)GPR_U32(ctx, {}) * (uint64_t)GPR_U32(ctx, {}); ctx->lo1 = (uint64_t)(int64_t)(int32_t)result; ctx->hi1 = (uint64_t)(int64_t)(int32_t)(result >> 32); SET_GPR_S32(ctx, {}, (int32_t)result); }}", rs, rt, rd); + } return fmt::format("{{ uint64_t result = (uint64_t)GPR_U32(ctx, {}) * (uint64_t)GPR_U32(ctx, {}); ctx->lo1 = (uint64_t)(int64_t)(int32_t)result; ctx->hi1 = (uint64_t)(int64_t)(int32_t)(result >> 32); }}", rs, rt); case MMI_DIV1: return fmt::format("{{ int32_t divisor = GPR_S32(ctx, {}); " @@ -1661,16 +1739,40 @@ namespace ps2recomp case MMI_DIVU1: return fmt::format("{{ uint32_t divisor = GPR_U32(ctx, {}); if (divisor != 0) {{ ctx->lo1 = (uint64_t)(int64_t)(int32_t)(GPR_U32(ctx, {}) / divisor); ctx->hi1 = (uint64_t)(int64_t)(int32_t)(GPR_U32(ctx, {}) % divisor); }} else {{ ctx->lo1=0xFFFFFFFFFFFFFFFFull; ctx->hi1=(uint64_t)(int64_t)(int32_t)GPR_U32(ctx,{}); }} }}", rt, rs, rs, rs); case MMI_MADD: + if (rd != 0) + { + return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi, ctx->lo); int64_t prod = (int64_t)GPR_S32(ctx, {}) * (int64_t)GPR_S32(ctx, {}); int64_t result = acc + prod; ctx->lo = Ps2SignExt32ToU64((uint32_t)result); ctx->hi = Ps2SignExt32ToU64((uint32_t)(result >> 32)); SET_GPR_S32(ctx, {}, (int32_t)result); }}", rs, rt, rd); + } return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi, ctx->lo); int64_t prod = (int64_t)GPR_S32(ctx, {}) * (int64_t)GPR_S32(ctx, {}); int64_t result = acc + prod; ctx->lo = Ps2SignExt32ToU64((uint32_t)result); ctx->hi = Ps2SignExt32ToU64((uint32_t)(result >> 32)); }}", rs, rt); case MMI_MADDU: + if (rd != 0) + { + return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi, ctx->lo); uint64_t prod = (uint64_t)GPR_U32(ctx, {}) * (uint64_t)GPR_U32(ctx, {}); uint64_t result = acc + prod; ctx->lo = Ps2SignExt32ToU64((uint32_t)result); ctx->hi = Ps2SignExt32ToU64((uint32_t)(result >> 32)); SET_GPR_S32(ctx, {}, (int32_t)result); }}", rs, rt, rd); + } return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi, ctx->lo); uint64_t prod = (uint64_t)GPR_U32(ctx, {}) * (uint64_t)GPR_U32(ctx, {}); uint64_t result = acc + prod; ctx->lo = Ps2SignExt32ToU64((uint32_t)result); ctx->hi = Ps2SignExt32ToU64((uint32_t)(result >> 32)); }}", rs, rt); case MMI_MSUB: + if (rd != 0) + { + return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi, ctx->lo); int64_t prod = (int64_t)GPR_S32(ctx, {}) * (int64_t)GPR_S32(ctx, {}); int64_t result = acc - prod; ctx->lo = Ps2SignExt32ToU64((uint32_t)result); ctx->hi = Ps2SignExt32ToU64((uint32_t)(result >> 32)); SET_GPR_S32(ctx, {}, (int32_t)result); }}", rs, rt, rd); + } return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi, ctx->lo); int64_t prod = (int64_t)GPR_S32(ctx, {}) * (int64_t)GPR_S32(ctx, {}); int64_t result = acc - prod; ctx->lo = Ps2SignExt32ToU64((uint32_t)result); ctx->hi = Ps2SignExt32ToU64((uint32_t)(result >> 32)); }}", rs, rt); case MMI_MSUBU: + if (rd != 0) + { + return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi, ctx->lo); uint64_t prod = (uint64_t)GPR_U32(ctx, {}) * (uint64_t)GPR_U32(ctx, {}); uint64_t result = acc - prod; ctx->lo = Ps2SignExt32ToU64((uint32_t)result); ctx->hi = Ps2SignExt32ToU64((uint32_t)(result >> 32)); SET_GPR_S32(ctx, {}, (int32_t)result); }}", rs, rt, rd); + } return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi, ctx->lo); uint64_t prod = (uint64_t)GPR_U32(ctx, {}) * (uint64_t)GPR_U32(ctx, {}); uint64_t result = acc - prod; ctx->lo = Ps2SignExt32ToU64((uint32_t)result); ctx->hi = Ps2SignExt32ToU64((uint32_t)(result >> 32)); }}", rs, rt); case MMI_MADD1: + if (rd != 0) + { + return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi1, ctx->lo1); int64_t prod = (int64_t)GPR_S32(ctx, {}) * (int64_t)GPR_S32(ctx, {}); int64_t result = acc + prod; ctx->lo1 = Ps2SignExt32ToU64((uint32_t)result); ctx->hi1 = Ps2SignExt32ToU64((uint32_t)(result >> 32)); SET_GPR_S32(ctx, {}, (int32_t)result); }}", rs, rt, rd); + } return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi1, ctx->lo1); int64_t prod = (int64_t)GPR_S32(ctx, {}) * (int64_t)GPR_S32(ctx, {}); int64_t result = acc + prod; ctx->lo1 = Ps2SignExt32ToU64((uint32_t)result); ctx->hi1 = Ps2SignExt32ToU64((uint32_t)(result >> 32)); }}", rs, rt); case MMI_MADDU1: + if (rd != 0) + { + return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi1, ctx->lo1); uint64_t prod = (uint64_t)GPR_U32(ctx, {}) * (uint64_t)GPR_U32(ctx, {}); uint64_t result = acc + prod; ctx->lo1 = Ps2SignExt32ToU64((uint32_t)result); ctx->hi1 = Ps2SignExt32ToU64((uint32_t)(result >> 32)); SET_GPR_S32(ctx, {}, (int32_t)result); }}", rs, rt, rd); + } return fmt::format("{{ uint64_t acc = Ps2HiLoToU64(ctx->hi1, ctx->lo1); uint64_t prod = (uint64_t)GPR_U32(ctx, {}) * (uint64_t)GPR_U32(ctx, {}); uint64_t result = acc + prod; ctx->lo1 = Ps2SignExt32ToU64((uint32_t)result); ctx->hi1 = Ps2SignExt32ToU64((uint32_t)(result >> 32)); }}", rs, rt); case MMI_PLZCW: return fmt::format( @@ -2535,9 +2637,9 @@ namespace ps2recomp std::string CodeGenerator::translatePCPYLD(const Instruction &inst) { - // Copies lower 64 of rs to lower 64 of rd, lower 64 of rt to upper 64 of rd - return fmt::format("SET_GPR_VEC(ctx, {}, _mm_unpacklo_epi64(GPR_VEC(ctx, {}), GPR_VEC(ctx, {})));", - inst.rd, inst.rs, inst.rt); // Order matters for unpack + // PCPYLD uses rs as the upper source and rt as the lower source. + return fmt::format("SET_GPR_VEC(ctx, {}, PS2_PCPYLD(GPR_VEC(ctx, {}), GPR_VEC(ctx, {})));", + inst.rd, inst.rs, inst.rt); } std::string CodeGenerator::translatePMADDH(const Instruction &inst) @@ -2663,8 +2765,7 @@ namespace ps2recomp std::string CodeGenerator::translatePEXEW(const Instruction &inst) { - // Swaps words 0<->2 and 1<->3 - return fmt::format("SET_GPR_VEC(ctx, {}, _mm_shuffle_epi32(GPR_VEC(ctx, {}), _MM_SHUFFLE(1,0,3,2)));", + return fmt::format("SET_GPR_VEC(ctx, {}, PS2_PEXEW(GPR_VEC(ctx, {})));", inst.rd, inst.rs); } @@ -3553,42 +3654,9 @@ namespace ps2recomp uint8_t rd = inst.rd; uint8_t rs = inst.rs; uint8_t rt = inst.rt; - // PS2 MMI QFSRV uses the lower 7 bits of the SA register. - return fmt::format( - "{{ \n" - " __m128i val_rt = GPR_VEC(ctx, {});\n" // Get rt (higher bits of the 256-bit value) - " __m128i val_rs = GPR_VEC(ctx, {});\n" // Get rs (lower bits of the 256-bit value) - " uint32_t shift_amount = ctx->sa & 0x7F; \n" // Get shift amount (0-127) from SA reg - - // Perform the shift using 64-bit parts for easier SSE2 implementation - " uint64_t rt_hi = _mm_cvtsi128_si64(_mm_srli_si128(val_rt, 8));\n" - " uint64_t rt_lo = _mm_cvtsi128_si64(val_rt);\n" - " uint64_t rs_hi = _mm_cvtsi128_si64(_mm_srli_si128(val_rs, 8));\n" - " uint64_t rs_lo = _mm_cvtsi128_si64(val_rs);\n" - - " __m128i result; \n" - " if (shift_amount == 0) {{ \n" - " result = val_rs; \n" // No shift, result is just rs - " }} else if (shift_amount < 64) {{ \n" - " uint64_t res_lo = (rs_lo >> shift_amount) | (rs_hi << (64 - shift_amount)); \n" - " uint64_t res_hi = (rs_hi >> shift_amount) | (rt_lo << (64 - shift_amount)); \n" - " result = _mm_set_epi64x(res_hi, res_lo); \n" - " }} else if (shift_amount == 64) {{ \n" - " result = _mm_set_epi64x(rt_lo, rs_hi); \n" // Shift exactly 64 bits - " }} else if (shift_amount < 128) {{ \n" // shift_amount > 64 - " uint32_t sub_shift = shift_amount - 64; \n" - " uint64_t res_lo = (rs_hi >> sub_shift) | (rt_lo << (64 - sub_shift)); \n" - " uint64_t res_hi = (rt_lo >> sub_shift) | (rt_hi << (64 - sub_shift)); \n" - " result = _mm_set_epi64x(res_hi, res_lo); \n" - " }} else {{ // shift_amount >= 128 \n" - " uint32_t sub_shift = shift_amount - 128; \n" - " uint64_t res_lo = (rt_lo >> sub_shift) | (rt_hi << (64 - sub_shift)); \n" // Shift rt into result - " uint64_t res_hi = (rt_hi >> sub_shift); \n" // Shift hi part of rt - " result = _mm_set_epi64x(res_hi, res_lo); \n" - " }} \n" - " SET_GPR_VEC(ctx, {}, result); \n" - "}}", - rt, rs, rd); + // QFSRV semantics are centralized in runtime macro helpers. + return fmt::format("SET_GPR_VEC(ctx, {}, PS2_QFSRV(GPR_VEC(ctx, {}), GPR_VEC(ctx, {}), ctx->sa & 0x7F));", + rd, rs, rt); } std::string CodeGenerator::generateFunctionRegistration(const std::vector &functions, diff --git a/ps2xRecomp/src/lib/config_manager.cpp b/ps2xRecomp/src/lib/config_manager.cpp index 7d8c26f4..ab15df17 100644 --- a/ps2xRecomp/src/lib/config_manager.cpp +++ b/ps2xRecomp/src/lib/config_manager.cpp @@ -112,6 +112,109 @@ namespace ps2recomp config.mmioByInstructionAddress[instAddr] = mmioAddr; } } + + if (data.contains("jump_tables") && data.at("jump_tables").is_table()) + { + const auto &jumpTablesNode = data.at("jump_tables"); + if (jumpTablesNode.contains("table") && jumpTablesNode.at("table").is_array()) + { + const auto &tables = jumpTablesNode.at("table").as_array(); + for (const auto &tableNode : tables) + { + if (!tableNode.is_table()) + { + continue; + } + + JumpTable table{}; + + if (tableNode.contains("address")) + { + const auto &addressValue = tableNode.at("address"); + if (addressValue.is_string()) + { + table.address = std::stoul(addressValue.as_string(), nullptr, 0); + } + else if (addressValue.is_integer()) + { + table.address = static_cast(addressValue.as_integer()); + } + } + + if (tableNode.contains("base_register")) + { + const auto &baseRegisterValue = tableNode.at("base_register"); + if (baseRegisterValue.is_string()) + { + table.baseRegister = std::stoul(baseRegisterValue.as_string(), nullptr, 0); + } + else if (baseRegisterValue.is_integer()) + { + table.baseRegister = static_cast(baseRegisterValue.as_integer()); + } + } + + if (table.address == 0u || !tableNode.contains("entries") || !tableNode.at("entries").is_array()) + { + continue; + } + + const auto &entries = tableNode.at("entries").as_array(); + uint32_t fallbackIndex = 0u; + for (const auto &entryNode : entries) + { + if (!entryNode.is_table()) + { + ++fallbackIndex; + continue; + } + + JumpTableEntry entry{}; + entry.index = fallbackIndex; + + if (entryNode.contains("index")) + { + const auto &indexValue = entryNode.at("index"); + if (indexValue.is_string()) + { + entry.index = std::stoul(indexValue.as_string(), nullptr, 0); + } + else if (indexValue.is_integer()) + { + entry.index = static_cast(indexValue.as_integer()); + } + } + + bool hasTarget = false; + if (entryNode.contains("target")) + { + const auto &targetValue = entryNode.at("target"); + if (targetValue.is_string()) + { + entry.target = std::stoul(targetValue.as_string(), nullptr, 0); + hasTarget = true; + } + else if (targetValue.is_integer()) + { + entry.target = static_cast(targetValue.as_integer()); + hasTarget = true; + } + } + + if (hasTarget) + { + table.entries.push_back(entry); + } + ++fallbackIndex; + } + + if (!table.entries.empty()) + { + config.jumpTables.push_back(std::move(table)); + } + } + } + } } catch (const std::exception &e) { @@ -152,6 +255,35 @@ namespace ps2recomp data["mmio"] = mmioTable; } + if (!config.jumpTables.empty()) + { + toml::table jumpTables; + toml::array tableArray; + for (const auto &table : config.jumpTables) + { + toml::table tableNode; + std::ostringstream addressStream; + addressStream << "0x" << std::hex << table.address; + tableNode["address"] = addressStream.str(); + tableNode["base_register"] = static_cast(table.baseRegister); + + toml::array entries; + for (const auto &entry : table.entries) + { + toml::table entryNode; + entryNode["index"] = static_cast(entry.index); + std::ostringstream targetStream; + targetStream << "0x" << std::hex << entry.target; + entryNode["target"] = targetStream.str(); + entries.push_back(entryNode); + } + tableNode["entries"] = entries; + tableArray.push_back(tableNode); + } + jumpTables["table"] = tableArray; + data["jump_tables"] = jumpTables; + } + toml::table patches; toml::array instPatches; for (const auto &[addr, value] : config.patches) diff --git a/ps2xRecomp/src/lib/elf_parser.cpp b/ps2xRecomp/src/lib/elf_parser.cpp index bb082088..b96888ea 100644 --- a/ps2xRecomp/src/lib/elf_parser.cpp +++ b/ps2xRecomp/src/lib/elf_parser.cpp @@ -143,6 +143,69 @@ namespace return nullptr; } + bool HasAnyExecutableSection(const std::vector §ions) + { + for (const auto §ion : sections) + { + if (section.isCode) + { + return true; + } + } + return false; + } + + const ps2recomp::Section *FindFunctionSectionByAddress(const std::vector §ions, uint32_t address) + { + const ps2recomp::Section *codeSection = FindCodeSectionByAddress(sections, address); + if (codeSection) + { + return codeSection; + } + + // Some malformed/stripped ELFs may not carry executable section flags. + if (!HasAnyExecutableSection(sections)) + { + return FindSectionByAddress(sections, address); + } + + return nullptr; + } + + uint32_t ClampFunctionEndToSection(const ps2recomp::Section *section, uint32_t start, uint32_t requestedEnd) + { + if (!section) + { + return requestedEnd; + } + + const uint64_t sectionEnd64 = static_cast(section->address) + static_cast(section->size); + const uint32_t sectionEnd = (sectionEnd64 > 0xFFFFFFFFull) + ? 0xFFFFFFFFu + : static_cast(sectionEnd64); + + uint32_t end = requestedEnd; + if (end == 0 || end > sectionEnd) + { + end = sectionEnd; + } + + if (end <= start) + { + const uint64_t minimumEnd64 = static_cast(start) + 4ull; + if (minimumEnd64 <= sectionEnd64) + { + end = static_cast(minimumEnd64); + } + else + { + end = sectionEnd; + } + } + + return end; + } + std::string MakeAutoFunctionName(uint32_t address) { char buffer[32]{}; @@ -503,7 +566,22 @@ namespace ps2recomp continue; } - const uint32_t symbolEnd = symbol.address + symbol.size; + const Section *functionSection = FindFunctionSectionByAddress(m_sections, symbol.address); + if (!functionSection) + { + continue; + } + + const uint64_t symbolEnd64 = static_cast(symbol.address) + static_cast(symbol.size); + uint32_t symbolEnd = (symbolEnd64 > 0xFFFFFFFFull) + ? 0xFFFFFFFFu + : static_cast(symbolEnd64); + symbolEnd = ClampFunctionEndToSection(functionSection, symbol.address, symbolEnd); + if (symbolEnd <= symbol.address) + { + continue; + } + auto inserted = authoritativeEndByStart.emplace(symbol.address, symbolEnd); if (!inserted.second && symbolEnd > inserted.first->second) { @@ -519,10 +597,22 @@ namespace ps2recomp continue; } - auto inserted = authoritativeEndByStart.emplace(extra.start, extra.end); - if (!inserted.second && extra.end > inserted.first->second) + const Section *functionSection = FindFunctionSectionByAddress(m_sections, extra.start); + if (!functionSection) { - inserted.first->second = extra.end; + continue; + } + + const uint32_t clampedEnd = ClampFunctionEndToSection(functionSection, extra.start, extra.end); + if (clampedEnd <= extra.start) + { + continue; + } + + auto inserted = authoritativeEndByStart.emplace(extra.start, clampedEnd); + if (!inserted.second && clampedEnd > inserted.first->second) + { + inserted.first->second = clampedEnd; } } @@ -566,6 +656,11 @@ namespace ps2recomp return; } + if (!FindFunctionSectionByAddress(m_sections, newFunction.start)) + { + return; + } + const bool insideAuthoritativeRange = isInsideAuthoritativeRange(newFunction.start); const bool hasOwnAuthoritativeRange = authoritativeEndByStart.contains(newFunction.start); const bool hasAutoName = newFunction.name.empty() || IsAutoGeneratedName(newFunction.name); @@ -585,7 +680,10 @@ namespace ps2recomp auto authoritativeIt = authoritativeEndByStart.find(insertedFunction.start); if (authoritativeIt != authoritativeEndByStart.end()) { - insertedFunction.end = authoritativeIt->second; + insertedFunction.end = ClampFunctionEndToSection( + FindFunctionSectionByAddress(m_sections, insertedFunction.start), + insertedFunction.start, + authoritativeIt->second); } return; } @@ -603,7 +701,10 @@ namespace ps2recomp auto authoritativeIt = authoritativeEndByStart.find(existing.start); if (authoritativeIt != authoritativeEndByStart.end()) { - existing.end = authoritativeIt->second; + existing.end = ClampFunctionEndToSection( + FindFunctionSectionByAddress(m_sections, existing.start), + existing.start, + authoritativeIt->second); } else if (newFunction.end > existing.end) { @@ -620,10 +721,24 @@ namespace ps2recomp { continue; } + + if (!FindFunctionSectionByAddress(m_sections, symbol.address)) + { + continue; + } + Function func; func.name = symbol.name; func.start = symbol.address; - func.end = (symbol.size > 0) ? (symbol.address + symbol.size) : 0; + if (symbol.size > 0) + { + const uint64_t end64 = static_cast(symbol.address) + static_cast(symbol.size); + func.end = (end64 > 0xFFFFFFFFull) ? 0xFFFFFFFFu : static_cast(end64); + } + else + { + func.end = 0; + } func.isRecompiled = false; func.isStub = false; func.isSkipped = false; @@ -656,7 +771,7 @@ namespace ps2recomp continue; } - const Section *section = FindSectionByAddress(m_sections, func.start); + const Section *section = FindFunctionSectionByAddress(m_sections, func.start); uint32_t sectionEnd = section ? (section->address + section->size) : (func.start + 4); uint32_t nextStart = sectionEnd; @@ -846,6 +961,8 @@ namespace ps2recomp } int count = 0; + int skippedNonExecutable = 0; + int skippedInvalidRange = 0; while (std::getline(file, line)) { if (line.empty()) @@ -867,6 +984,20 @@ namespace ps2recomp uint32_t start = std::stoul(startStr, nullptr, 0); uint32_t end = std::stoul(endStr, nullptr, 0); + const Section *section = FindFunctionSectionByAddress(m_sections, start); + if (!section) + { + ++skippedNonExecutable; + continue; + } + + end = ClampFunctionEndToSection(section, start, end); + if (end <= start) + { + ++skippedInvalidRange; + continue; + } + Function func{}; func.name = name; func.start = start; @@ -887,6 +1018,16 @@ namespace ps2recomp if (count > 0) { std::cout << "Loaded " << count << " functions from Ghidra map" << std::endl; + if (skippedNonExecutable > 0) + { + std::cout << "Ignored " << skippedNonExecutable + << " Ghidra function(s) outside executable sections." << std::endl; + } + if (skippedInvalidRange > 0) + { + std::cout << "Ignored " << skippedInvalidRange + << " Ghidra function(s) with invalid ranges after section clamping." << std::endl; + } std::sort(m_extraFunctions.begin(), m_extraFunctions.end(), [](const Function &a, const Function &b) @@ -908,6 +1049,13 @@ namespace ps2recomp return true; } + if (skippedNonExecutable > 0 || skippedInvalidRange > 0) + { + std::cout << "Loaded 0 functions from Ghidra map after filtering (" + << skippedNonExecutable << " non-executable, " + << skippedInvalidRange << " invalid range)." << std::endl; + } + return false; } diff --git a/ps2xRecomp/src/lib/ps2_recompiler.cpp b/ps2xRecomp/src/lib/ps2_recompiler.cpp index e52400b9..a85cb2a9 100644 --- a/ps2xRecomp/src/lib/ps2_recompiler.cpp +++ b/ps2xRecomp/src/lib/ps2_recompiler.cpp @@ -285,6 +285,13 @@ namespace ps2recomp return std::nullopt; }; + auto isSimpleReturnThunkStart = [](const Instruction &inst) -> bool + { + return inst.opcode == OPCODE_SPECIAL && + inst.function == SPECIAL_JR && + inst.rs == 31; + }; + auto findContainingFunction = [&](uint32_t address) -> const Function * { const Function *best = nullptr; @@ -460,6 +467,16 @@ namespace ps2recomp sliceEndAddress = nextStartOpt.value(); } + if (isSimpleReturnThunkStart(*sliceIt) && + target <= (std::numeric_limits::max() - 8u)) + { + const uint32_t returnThunkEnd = target + 8u; + if (returnThunkEnd < sliceEndAddress) + { + sliceEndAddress = returnThunkEnd; + } + } + if (sliceEndAddress <= target) { continue; @@ -837,6 +854,7 @@ namespace ps2recomp } m_codeGenerator->setRelocationCallNames(relocationCallNames); m_codeGenerator->setBootstrapInfo(m_bootstrapInfo); + m_codeGenerator->setConfiguredJumpTables(m_config.jumpTables); fs::create_directories(m_config.outputPath); diff --git a/ps2xRecomp/src/lib/r5900_decoder.cpp b/ps2xRecomp/src/lib/r5900_decoder.cpp index 99e1b507..463bddae 100644 --- a/ps2xRecomp/src/lib/r5900_decoder.cpp +++ b/ps2xRecomp/src/lib/r5900_decoder.cpp @@ -162,6 +162,15 @@ namespace ps2recomp inst.isMultimedia = true; } + if (inst.opcode == OPCODE_SPECIAL) + { + decodeSpecial(inst); + } + else if (inst.opcode == OPCODE_MMI) + { + decodeMMI(inst); + } + if (inst.isMMI || inst.isVU) { inst.isMultimedia = true; @@ -258,16 +267,18 @@ namespace ps2recomp inst.modificationInfo.modifiesGPR = false; // Doesn't modify rd inst.modificationInfo.modifiesControl = true; // HI/LO break; - - case SPECIAL_MULT: - case SPECIAL_MULTU: case SPECIAL_DIV: case SPECIAL_DIVU: // Multiplication and division operations inst.modificationInfo.modifiesGPR = false; // Doesn't modify rd inst.modificationInfo.modifiesControl = true; // HI/LO break; - + case SPECIAL_MULT: + case SPECIAL_MULTU: + // R5900 MULT/MULTU also write rd when rd != 0. + inst.modificationInfo.modifiesGPR = (inst.rd != 0); + inst.modificationInfo.modifiesControl = true; // HI/LO + break; case SPECIAL_ADD: case SPECIAL_ADDU: case SPECIAL_SUB: @@ -473,11 +484,17 @@ namespace ps2recomp case MMI_MSUBU: case MMI_MADD1: case MMI_MADDU1: + inst.modificationInfo.modifiesGPR = (inst.rd != 0); // Also writes rd on R5900 I checkd on EE manual + inst.modificationInfo.modifiesControl = true; + break; case MMI_MULT1: case MMI_MULTU1: + inst.modificationInfo.modifiesGPR = (inst.rd != 0); // same + inst.modificationInfo.modifiesControl = true; + break; case MMI_DIV1: case MMI_DIVU1: - inst.modificationInfo.modifiesGPR = false; // Writes to HI/LO or HI1/LO1 + inst.modificationInfo.modifiesGPR = false; // Writes to HI1/LO1 inst.modificationInfo.modifiesControl = true; break; case MMI_PMTHL: @@ -490,7 +507,6 @@ namespace ps2recomp decodePMFHL(inst); break; default: - // Unknown or unsupported MMI function std::cerr << "Unknown MMI function: " << std::hex << mmiFunction << std::endl; break; } diff --git a/ps2xRecomp/tools/ghidra/ExportPS2Functions.java b/ps2xRecomp/tools/ghidra/ExportPS2Functions.java index 41982581..c6474788 100644 --- a/ps2xRecomp/tools/ghidra/ExportPS2Functions.java +++ b/ps2xRecomp/tools/ghidra/ExportPS2Functions.java @@ -1,4 +1,4 @@ -// Exports function addresses and names to CSV for PS2Recomp +// Exports PS2Recomp TOML config (+ optional CSV) from Ghidra // @category PS2Recomp import ghidra.app.script.GhidraScript; @@ -9,46 +9,429 @@ import java.io.File; import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Pattern; public class ExportPS2Functions extends GhidraScript { + private static final Set SYSTEM_FUNCTION_NAMES = new HashSet<>(Arrays.asList( + "entry", "_start", "_init", "_fini", + "abort", "exit", "_exit", + "_profiler_start", "_profiler_stop", + "__main", "__do_global_ctors", "__do_global_dtors", + "_GLOBAL__sub_I_", "_GLOBAL__sub_D_", + "__ctor_list", "__dtor_list", "_edata", "_end", + "etext", "__exidx_start", "__exidx_end", + "_ftext", "__bss_start", "__bss_start__", + "__bss_end__", "__end__", "_stack", "_dso_handle" + )); + + private static final Set DO_NOT_SKIP_OR_STUB = new HashSet<>(Arrays.asList( + "entry", + "_start", + "_init", + "topThread", + "cmd_sem_init" + )); + + private static final Set PS2_API_PREFIXES = new HashSet<>(Arrays.asList( + "sce", "sif", "pad", "gs", "dma", "iop", "vif", "spu", "mc", "libc" + )); + + private static final Set KNOWN_STDLIB_NAMES = new HashSet<>(Arrays.asList( + "printf", "sprintf", "snprintf", "fprintf", "vprintf", "vfprintf", "vsprintf", "vsnprintf", + "puts", "putchar", "getchar", "gets", "fgets", "fputs", "scanf", "fscanf", "sscanf", + "sprint", "sbprintf", + "malloc", "free", "calloc", "realloc", "aligned_alloc", "posix_memalign", + "memcpy", "memset", "memmove", "memcmp", "memcpy2", "memchr", "bcopy", "bzero", + "strcpy", "strncpy", "strcat", "strncat", "strcmp", "strncmp", "strlen", "strstr", + "strchr", "strrchr", "strdup", "strtok", "strtok_r", "strerror", + "fopen", "fclose", "fread", "fwrite", "fseek", "ftell", "rewind", "fflush", + "fgetc", "feof", "ferror", "clearerr", "fileno", "tmpfile", "remove", "rename", + "open", "close", "read", "write", "lseek", "stat", "fstat", + "atoi", "atol", "atoll", "atof", "strtol", "strtoul", "strtoll", "strtoull", "strtod", "strtof", + "rand", "srand", "random", "srandom", "drand48", "sqrt", "pow", "exp", "log", "log10", + "sin", "cos", "tan", "asin", "acos", "atan", "atan2", "sinh", "cosh", "tanh", + "floor", "ceil", "fabs", "fmod", "frexp", "ldexp", "modf", + "time", "ctime", "clock", "difftime", "mktime", "localtime", "gmtime", "asctime", "strftime", + "gettimeofday", "nanosleep", "usleep", + "atexit", "system", "getpid", "fork", "waitpid", + "qsort", "bsearch", "abs", "div", "labs", "ldiv", "llabs", "lldiv", + "isalnum", "isalpha", "isdigit", "islower", "isupper", "isspace", "tolower", "toupper", + "setjmp", "longjmp", "getenv", "setenv", "unsetenv", + "perror", "fputc", "getc", "ungetc", "freopen", "setvbuf", "setbuf", + "strnlen", "strspn", "strcspn", "strcasecmp", "strncasecmp" + )); + + private static final Pattern C_LIB_PATTERN = Pattern.compile( + "^_*(mem|str|time|f?printf|f?scanf|malloc|free|calloc|realloc|atoi|itoa|rand|srand|abort|exit|atexit|getenv|system|bsearch|qsort|abs|labs|div|ldiv|mblen|mbtowc|wctomb|mbstowcs|wcstombs).*" + ); + + private static final Pattern KERNEL_RUNTIME_NAME_PATTERN = Pattern.compile( + "^(?:" + + "(?:Create|Delete|Start|ExitDelete|Exit|Terminate|Suspend|Resume|Sleep|Wakeup|CancelWakeup|Change|Rotate|Release|Setup|Register|Query|Get|Set|Refer|Poll|Wait|Signal|Enable|Disable|Flush|Reset|Add|Init)" + + "(?:Thread|Sema|EventFlag|Alarm|Intc|IntcHandler2|Dmac|DmacHandler2|OsdConfigParam|MemorySize|VSyncFlag|Heap|TLS|Status|Cache|Syscall|TLB|TLBEntry|GsCrt)" + + "|EndOfHeap" + + "|GsGetIMR|GsPutIMR" + + "|Deci2Call" + + "|Sif[A-Za-z0-9_]+" + + "|i(?:SignalSema|PollSema|ReferSemaStatus|SetEventFlag|ClearEventFlag|PollEventFlag|ReferEventFlagStatus|WakeupThread|CancelWakeupThread|ReleaseWaitThread|SetAlarm|CancelAlarm|FlushCache|sceSifSetDma|sceSifSetDChain)" + + ")$" + ); + + private static final class FunctionRecord { + String name; + long start; + long endExclusive; + long size; + } + + private enum ClassificationKind { + STUB, + SKIP, + NONE + } + + private static final class ClassificationResult { + final ClassificationKind kind; + final String name; + + ClassificationResult(ClassificationKind kind, String name) { + this.kind = kind; + this.name = name; + } + } + + private static String hex(long value) { + return String.format("0x%08X", value & 0xFFFFFFFFL); + } + + private static String tomlString(String value) { + if (value == null) { + return "\"\""; + } + return "\"" + value.replace("\\", "\\\\").replace("\"", "\\\"") + "\""; + } + + private static String normalizeOptionalLeadingUnderscore(String value) { + if (value == null || value.isEmpty()) { + return ""; + } + return value.startsWith("_") && value.length() > 1 ? value.substring(1) : value; + } + + private static boolean hasReliableSymbolName(String name) { + if (name == null || name.isEmpty()) { + return false; + } + + if (name.startsWith("sub_") || name.startsWith("FUN_") || name.startsWith("func_") || + name.startsWith("entry_") || name.startsWith("function_") || name.startsWith("LAB_")) { + return false; + } + + boolean hasAlpha = false; + boolean allHexOrPrefix = true; + for (int i = 0; i < name.length(); ++i) { + char c = name.charAt(i); + if (Character.isAlphabetic(c)) { + hasAlpha = true; + } + if (!(Character.digit(c, 16) >= 0 || c == 'x' || c == 'X' || c == '_')) { + allHexOrPrefix = false; + } + } + + if (!hasAlpha) { + return false; + } + + if ((name.startsWith("0x") || name.startsWith("0X")) && allHexOrPrefix) { + return false; + } + + return true; + } + + private static boolean hasPs2ApiPrefix(String name) { + if (name == null || name.isEmpty()) { + return false; + } + + String base = normalizeOptionalLeadingUnderscore(name).toLowerCase(); + for (String prefix : PS2_API_PREFIXES) { + if (base.startsWith(prefix)) { + return true; + } + } + return false; + } + + private static boolean isSystemSymbolNameForHeuristics(String name) { + if (!hasReliableSymbolName(name)) { + return false; + } + + return SYSTEM_FUNCTION_NAMES.contains(name) || name.startsWith("__") || name.startsWith("."); + } + + private static boolean matchesWithOptionalLeadingUnderscoreAlias(String candidate, Set names) { + if (candidate == null || candidate.isEmpty() || names == null || names.isEmpty()) { + return false; + } + + if (names.contains(candidate)) { + return true; + } + + String normalized = normalizeOptionalLeadingUnderscore(candidate); + if (!normalized.equals(candidate) && names.contains(normalized)) { + return true; + } + + if (!candidate.startsWith("_") && names.contains("_" + candidate)) { + return true; + } + + return false; + } + + private static boolean isLibraryFunctionName(String name) { + if (name == null || name.isEmpty() || !hasReliableSymbolName(name)) { + return false; + } + + String normalized = normalizeOptionalLeadingUnderscore(name); + if (KERNEL_RUNTIME_NAME_PATTERN.matcher(normalized).matches()) { + return true; + } + + if (matchesWithOptionalLeadingUnderscoreAlias(normalized, KNOWN_STDLIB_NAMES)) { + return true; + } + + if (hasPs2ApiPrefix(normalized)) { + return true; + } + + return C_LIB_PATTERN.matcher(normalized).matches(); + } + + private static ClassificationResult classifyFunction(Function function) { + if (function == null) { + return new ClassificationResult(ClassificationKind.NONE, ""); + } + + String name = function.getName(); + if (name == null || name.isEmpty() || DO_NOT_SKIP_OR_STUB.contains(name)) { + return new ClassificationResult(ClassificationKind.NONE, name == null ? "" : name); + } + + if (function.isThunk()) { + if (isLibraryFunctionName(name)) { + return new ClassificationResult(ClassificationKind.STUB, name); + } + + Function target = function.getThunkedFunction(true); + if (target != null) { + String targetName = target.getName(); + if (isLibraryFunctionName(targetName)) { + return new ClassificationResult(ClassificationKind.STUB, targetName); + } + } + + if (isSystemSymbolNameForHeuristics(name)) { + return new ClassificationResult(ClassificationKind.SKIP, name); + } + + return new ClassificationResult(ClassificationKind.NONE, name); + } + + if (isLibraryFunctionName(name)) { + return new ClassificationResult(ClassificationKind.STUB, name); + } + + if (isSystemSymbolNameForHeuristics(name)) { + return new ClassificationResult(ClassificationKind.SKIP, name); + } + + return new ClassificationResult(ClassificationKind.NONE, name); + } + + private static String makeSelector(String name, long start, boolean includeAddress) { + if (includeAddress) { + return name + "@" + hex(start); + } + return name; + } + + private static List collectFunctionSelectors( + Set names, + List records, + boolean includeAddress + ) { + List ordered = new ArrayList<>(records); + ordered.sort(Comparator.comparingLong(r -> r.start)); + + List selectors = new ArrayList<>(); + Set seenSelectors = new LinkedHashSet<>(); + Set coveredNames = new HashSet<>(); + + for (FunctionRecord record : ordered) { + if (record.name == null || !names.contains(record.name)) { + continue; + } + + coveredNames.add(record.name); + String selector = makeSelector(record.name, record.start, includeAddress); + if (seenSelectors.add(selector)) { + selectors.add(selector); + } + } + + if (includeAddress) { + List unresolved = new ArrayList<>(); + for (String name : names) { + if (!coveredNames.contains(name)) { + unresolved.add(name); + } + } + Collections.sort(unresolved); + for (String name : unresolved) { + System.out.println("Warning: unresolved selector name without address, omitting from TOML: " + name); + } + } else { + Collections.sort(selectors); + } + + return selectors; + } + @Override public void run() throws Exception { - File file = askFile("Choose output CSV file", "Save"); - - if (file == null) { + File tomlFile = askFile("Choose output TOML config file", "Save"); + if (tomlFile == null) { return; } - int count = 0; - try (PrintWriter writer = new PrintWriter(file)) { - writer.println("Name,Start,End,Size"); - - FunctionManager fm = currentProgram.getFunctionManager(); - FunctionIterator it = fm.getFunctions(true); - - while (it.hasNext() && !monitor.isCancelled()) { - Function func = it.next(); - - String name = func.getName(); - long start = func.getEntryPoint().getOffset(); - - AddressSetView body = func.getBody(); - long maxAddr = body.getMaxAddress().getOffset(); - - long size = body.getNumAddresses(); - - writer.printf("%s,0x%08X,0x%08X,%d%n", - name, - start, - maxAddr + 1, // End address is exclusive - size - ); - - count++; - } - } - - println(String.format("Exported %d functions to %s", count, file.getAbsolutePath())); + boolean exportCsv = askYesNo("Export CSV", "Also export compatibility CSV function map?"); + File csvFile = null; + if (exportCsv) { + csvFile = askFile("Choose output CSV file", "Save"); + if (csvFile == null) { + exportCsv = false; + } + } + + FunctionManager fm = currentProgram.getFunctionManager(); + FunctionIterator it = fm.getFunctions(true); + + List functionRecords = new ArrayList<>(); + Set stubNames = new LinkedHashSet<>(); + Set skipNames = new LinkedHashSet<>(); + int uncategorizedCount = 0; + + while (it.hasNext() && !monitor.isCancelled()) { + Function func = it.next(); + + AddressSetView body = func.getBody(); + if (body == null || body.getNumAddresses() == 0) { + continue; + } + + FunctionRecord record = new FunctionRecord(); + record.name = func.getName(); + record.start = func.getEntryPoint().getOffset(); + record.endExclusive = body.getMaxAddress().getOffset() + 1L; + record.size = body.getNumAddresses(); + functionRecords.add(record); + + ClassificationResult classification = classifyFunction(func); + if (classification.kind == ClassificationKind.STUB) { + stubNames.add(classification.name); + } else if (classification.kind == ClassificationKind.SKIP) { + skipNames.add(classification.name); + } else { + uncategorizedCount++; + } + } + + List stubSelectors = collectFunctionSelectors(stubNames, functionRecords, true); + List skipSelectors = collectFunctionSelectors(skipNames, functionRecords, true); + + if (exportCsv && csvFile != null) { + try (PrintWriter writer = new PrintWriter(csvFile)) { + writer.println("Name,Start,End,Size"); + functionRecords.sort(Comparator.comparingLong(r -> r.start)); + for (FunctionRecord record : functionRecords) { + writer.printf("%s,0x%08X,0x%08X,%d%n", + record.name, + record.start, + record.endExclusive, + record.size + ); + } + } + } + + String programPath = currentProgram.getExecutablePath(); + if (programPath == null) { + programPath = ""; + } + + File outputDir = tomlFile.getParentFile() == null ? new File("output") : new File(tomlFile.getParentFile(), "output"); + String ghidraCsvPath = (exportCsv && csvFile != null) ? csvFile.getAbsolutePath() : ""; + + try (PrintWriter writer = new PrintWriter(tomlFile)) { + writer.println("# Auto-generated by ExportPS2Functions.java"); + writer.println("#"); + writer.println("# Classification policy (aligned with analyzer intent):"); + writer.println("# - library/runtime names -> [general].stubs"); + writer.println("# - system names -> [general].skip"); + writer.println("# - others are left for recompilation"); + writer.println(); + + writer.println("[general]"); + writer.println("input = " + tomlString(programPath)); + writer.println("output = " + tomlString(outputDir.getAbsolutePath())); + writer.println("ghidra_output = " + tomlString(ghidraCsvPath)); + writer.println("single_file_output = false"); + writer.println("patch_syscalls = false"); + writer.println("patch_cop0 = true"); + writer.println("patch_cache = true"); + writer.println("stubs = ["); + for (String selector : stubSelectors) { + writer.println(" " + tomlString(selector) + ","); + } + writer.println("]"); + writer.println("skip = ["); + for (String selector : skipSelectors) { + writer.println(" " + tomlString(selector) + ","); + } + writer.println("]"); + writer.println(); + + writer.println("[ghidra_export]"); + writer.println("function_count = " + functionRecords.size()); + writer.println("stub_count = " + stubSelectors.size()); + writer.println("skip_count = " + skipSelectors.size()); + writer.println("uncategorized_count = " + uncategorizedCount); + writer.println("runtime_call_name_count = 0"); + writer.println("runtime_call_source = \"regex_only\""); + } + + if (exportCsv && csvFile != null) { + println(String.format("Exported %d functions to %s", functionRecords.size(), csvFile.getAbsolutePath())); + } + + println("Using regex-only runtime/library classification (no ps2_call_list.h)."); + println(String.format("Exported TOML config to %s", tomlFile.getAbsolutePath())); } } diff --git a/ps2xRuntime/include/ps2_call_list.h b/ps2xRuntime/include/ps2_call_list.h index 87b56a37..869ad2e6 100644 --- a/ps2xRuntime/include/ps2_call_list.h +++ b/ps2xRuntime/include/ps2_call_list.h @@ -4,6 +4,7 @@ #define PS2_SYSCALL_LIST(X) \ X(FlushCache) \ + X(iFlushCache) \ X(ResetEE) \ X(SetMemoryMode) \ \ @@ -17,13 +18,16 @@ X(ResumeThread) \ X(GetThreadId) \ X(ReferThreadStatus) \ + X(iReferThreadStatus) \ X(SleepThread) \ X(WakeupThread) \ X(iWakeupThread) \ X(CancelWakeupThread) \ X(iCancelWakeupThread) \ X(ChangeThreadPriority) \ + X(iChangeThreadPriority) \ X(RotateThreadReadyQueue) \ + X(iRotateThreadReadyQueue)\ X(ReleaseWaitThread) \ X(iReleaseWaitThread) \ \ @@ -54,10 +58,20 @@ X(CancelAlarm) \ X(iCancelAlarm) \ \ + X(AddIntcHandler) \ + X(AddIntcHandler2) \ + X(RemoveIntcHandler) \ + X(AddDmacHandler) \ + X(AddDmacHandler2) \ + X(RemoveDmacHandler) \ X(EnableIntc) \ + X(iEnableIntc) \ X(DisableIntc) \ + X(iDisableIntc) \ X(EnableDmac) \ + X(iEnableDmac) \ X(DisableDmac) \ + X(iDisableDmac) \ \ X(SifStopModule) \ X(SifLoadModule) \ @@ -84,9 +98,13 @@ X(fioGetstat) \ X(fioRemove) \ \ + X(SetGsCrt) \ X(GsSetCrt) \ X(GsGetIMR) \ + X(iGsGetIMR) \ X(GsPutIMR) \ + X(iGsPutIMR) \ + X(SetVSyncFlag) \ X(GsSetVideoMode) \ \ X(GetOsdConfigParam) \ @@ -99,6 +117,9 @@ X(sceSifLoadModuleBuffer) \ \ X(SetupThread) \ + X(EndOfHeap) \ + X(GetMemorySize) \ + X(Deci2Call) \ X(QueryBootMode) \ X(GetThreadTLS) \ X(RegisterExitHandler) @@ -621,4 +642,5 @@ X(syHwInit2) \ X(syMallocInit) \ X(syRtcInit) \ + X(InitThread) \ /* Game/middleware */ diff --git a/ps2xRuntime/include/ps2_gif_arbiter.h b/ps2xRuntime/include/ps2_gif_arbiter.h index 6fc4a523..600076b5 100644 --- a/ps2xRuntime/include/ps2_gif_arbiter.h +++ b/ps2xRuntime/include/ps2_gif_arbiter.h @@ -15,6 +15,8 @@ enum class GifPathId : uint8_t struct GifArbiterPacket { GifPathId pathId; + bool path2DirectHl = false; + bool path3Image = false; std::vector data; }; @@ -28,7 +30,7 @@ class GifArbiter void setProcessPacketFn(ProcessPacketFn fn) { m_processFn = std::move(fn); } - void submit(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes); + void submit(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes, bool path2DirectHl = false); void drain(); @@ -36,6 +38,7 @@ class GifArbiter ProcessPacketFn m_processFn; std::vector m_queue; + static bool isImagePacket(const uint8_t *data, uint32_t sizeBytes); static uint8_t pathPriority(GifPathId id); }; diff --git a/ps2xRuntime/include/ps2_iop.h b/ps2xRuntime/include/ps2_iop.h index b531eaeb..4f7aac10 100644 --- a/ps2xRuntime/include/ps2_iop.h +++ b/ps2xRuntime/include/ps2_iop.h @@ -5,11 +5,11 @@ constexpr uint32_t IOP_SID_LIBSD = 0x80000701u; -class IOP +class ps2_iop { public: - IOP(); - ~IOP() = default; + ps2_iop(); + ~ps2_iop() = default; void init(uint8_t *rdram); void reset(); diff --git a/ps2xRuntime/include/ps2_memory.h b/ps2xRuntime/include/ps2_memory.h index 85642fef..c95187bd 100644 --- a/ps2xRuntime/include/ps2_memory.h +++ b/ps2xRuntime/include/ps2_memory.h @@ -283,8 +283,9 @@ class PS2Memory const uint8_t *getVU1Data() const { return m_vu1Data; } bool isPath3Masked() const { return m_path3Masked; } + void flushMaskedPath3Packets(bool drainImmediately = true); - void submitGifPacket(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes, bool drainImmediately = true); + void submitGifPacket(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes, bool drainImmediately = true, bool path2DirectHl = false); void processGIFPacket(uint32_t srcPhysAddr, uint32_t qwCount); void processGIFPacket(const uint8_t *data, uint32_t sizeBytes); void processVIF1Data(uint32_t srcPhysAddr, uint32_t sizeBytes); @@ -295,6 +296,7 @@ class PS2Memory // Track code modifications for self-modifying code void registerCodeRegion(uint32_t start, uint32_t end); + bool isCodeAddress(uint32_t address) const; bool isCodeModified(uint32_t address, uint32_t size); void clearModifiedFlag(uint32_t address, uint32_t size); @@ -346,6 +348,7 @@ class PS2Memory uint8_t *m_vu1Code = nullptr; uint8_t *m_vu1Data = nullptr; bool m_path3Masked = false; + std::vector> m_path3MaskedFifo; struct PendingTransfer { diff --git a/ps2xRuntime/include/ps2_runtime.h b/ps2xRuntime/include/ps2_runtime.h index 27340dd6..4480b64b 100644 --- a/ps2xRuntime/include/ps2_runtime.h +++ b/ps2xRuntime/include/ps2_runtime.h @@ -202,9 +202,9 @@ inline void setReturnU64(R5900Context *ctx, uint64_t value) ctx->r[3] = _mm_set_epi64x(0, static_cast(static_cast(value >> 32))); } -inline constexpr uint32_t PS2_PATH_WATCH_ADDR = 0x00369F2Fu; -inline constexpr uint32_t PS2_PATH_WATCH_BYTES = 32u; -inline constexpr uint32_t PS2_PATH_WATCH_MAX_LOGS = 512u; +inline constexpr uint32_t PS2_PATH_WATCH_ADDR = 0x01EFFFA0u; +inline constexpr uint32_t PS2_PATH_WATCH_BYTES = 0x200u; +inline constexpr uint32_t PS2_PATH_WATCH_MAX_LOGS = 4096u; inline std::atomic g_ps2PathWatchLogCount{0}; inline uint32_t ps2PathWatchPhysAddr() @@ -470,8 +470,8 @@ class PS2Runtime inline VU1Interpreter &vu1() { return m_vu1; } inline const VU1Interpreter &vu1() const { return m_vu1; } - inline IOP &iop() { return m_iop; } - inline const IOP &iop() const { return m_iop; } + inline ps2_iop &iop() { return m_iop; } + inline const ps2_iop &iop() const { return m_iop; } inline PS2AudioBackend &audioBackend() { return m_audioBackend; } inline const PS2AudioBackend &audioBackend() const { return m_audioBackend; } inline PSPadBackend &padBackend() { return m_padBackend; } @@ -503,7 +503,7 @@ class PS2Runtime PS2Memory m_memory; GifArbiter m_gifArbiter; GS m_gs; - IOP m_iop; + ps2_iop m_iop; PS2AudioBackend m_audioBackend; PSPadBackend m_padBackend; VU1Interpreter m_vu1; diff --git a/ps2xRuntime/include/ps2_runtime_macros.h b/ps2xRuntime/include/ps2_runtime_macros.h index 5f24d107..27c66839 100644 --- a/ps2xRuntime/include/ps2_runtime_macros.h +++ b/ps2xRuntime/include/ps2_runtime_macros.h @@ -152,6 +152,11 @@ static inline uint32_t ps2_plzcw32(uint32_t x) // Fast path: Direct RDRAM access (masked). // Slow path: Full runtime->Load/Store +static inline bool Ps2FastRangeIsContiguous(uint32_t offset, uint32_t bytes) +{ + return offset <= (PS2_RAM_SIZE - bytes); +} + static inline uint8_t Ps2FastRead8(const uint8_t *rdram, uint32_t addr) { return rdram[addr & PS2_RAM_MASK]; @@ -159,29 +164,81 @@ static inline uint8_t Ps2FastRead8(const uint8_t *rdram, uint32_t addr) static inline uint16_t Ps2FastRead16(const uint8_t *rdram, uint32_t addr) { + const uint32_t offset = addr & PS2_RAM_MASK; + if (!Ps2FastRangeIsContiguous(offset, sizeof(uint16_t))) + { + uint8_t wrapped[sizeof(uint16_t)]; + for (uint32_t i = 0; i < sizeof(uint16_t); ++i) + { + wrapped[i] = rdram[(offset + i) & PS2_RAM_MASK]; + } + uint16_t value; + std::memcpy(&value, wrapped, sizeof(value)); + return value; + } + uint16_t value; - std::memcpy(&value, rdram + (addr & PS2_RAM_MASK), sizeof(value)); + std::memcpy(&value, rdram + offset, sizeof(value)); return value; } static inline uint32_t Ps2FastRead32(const uint8_t *rdram, uint32_t addr) { + const uint32_t offset = addr & PS2_RAM_MASK; + if (!Ps2FastRangeIsContiguous(offset, sizeof(uint32_t))) + { + uint8_t wrapped[sizeof(uint32_t)]; + for (uint32_t i = 0; i < sizeof(uint32_t); ++i) + { + wrapped[i] = rdram[(offset + i) & PS2_RAM_MASK]; + } + uint32_t value; + std::memcpy(&value, wrapped, sizeof(value)); + return value; + } + uint32_t value; - std::memcpy(&value, rdram + (addr & PS2_RAM_MASK), sizeof(value)); + std::memcpy(&value, rdram + offset, sizeof(value)); return value; } static inline uint64_t Ps2FastRead64(const uint8_t *rdram, uint32_t addr) { + const uint32_t offset = addr & PS2_RAM_MASK; + if (!Ps2FastRangeIsContiguous(offset, sizeof(uint64_t))) + { + uint8_t wrapped[sizeof(uint64_t)]; + for (uint32_t i = 0; i < sizeof(uint64_t); ++i) + { + wrapped[i] = rdram[(offset + i) & PS2_RAM_MASK]; + } + uint64_t value; + std::memcpy(&value, wrapped, sizeof(value)); + return value; + } + uint64_t value; - std::memcpy(&value, rdram + (addr & PS2_RAM_MASK), sizeof(value)); + std::memcpy(&value, rdram + offset, sizeof(value)); return value; } static inline __m128i Ps2FastRead128(const uint8_t *rdram, uint32_t addr) { + const uint32_t offset = addr & PS2_RAM_MASK; + if (!Ps2FastRangeIsContiguous(offset, sizeof(__m128i))) + { + alignas(16) uint8_t wrapped[sizeof(__m128i)]; + for (uint32_t i = 0; i < sizeof(__m128i); ++i) + { + wrapped[i] = rdram[(offset + i) & PS2_RAM_MASK]; + } + __m128i value; + std::memcpy(&value, wrapped, sizeof(value)); + return value; + } + __m128i value; - std::memcpy(&value, rdram + (addr & PS2_RAM_MASK), sizeof(value)); + std::memcpy(&value, rdram + offset, sizeof(value)); return value; } @@ -192,22 +249,66 @@ static inline void Ps2FastWrite8(uint8_t *rdram, uint32_t addr, uint8_t value) static inline void Ps2FastWrite16(uint8_t *rdram, uint32_t addr, uint16_t value) { - std::memcpy(rdram + (addr & PS2_RAM_MASK), &value, sizeof(value)); + const uint32_t offset = addr & PS2_RAM_MASK; + if (!Ps2FastRangeIsContiguous(offset, sizeof(uint16_t))) + { + uint8_t wrapped[sizeof(uint16_t)]; + std::memcpy(wrapped, &value, sizeof(value)); + for (uint32_t i = 0; i < sizeof(uint16_t); ++i) + { + rdram[(offset + i) & PS2_RAM_MASK] = wrapped[i]; + } + return; + } + std::memcpy(rdram + offset, &value, sizeof(value)); } static inline void Ps2FastWrite32(uint8_t *rdram, uint32_t addr, uint32_t value) { - std::memcpy(rdram + (addr & PS2_RAM_MASK), &value, sizeof(value)); + const uint32_t offset = addr & PS2_RAM_MASK; + if (!Ps2FastRangeIsContiguous(offset, sizeof(uint32_t))) + { + uint8_t wrapped[sizeof(uint32_t)]; + std::memcpy(wrapped, &value, sizeof(value)); + for (uint32_t i = 0; i < sizeof(uint32_t); ++i) + { + rdram[(offset + i) & PS2_RAM_MASK] = wrapped[i]; + } + return; + } + std::memcpy(rdram + offset, &value, sizeof(value)); } static inline void Ps2FastWrite64(uint8_t *rdram, uint32_t addr, uint64_t value) { - std::memcpy(rdram + (addr & PS2_RAM_MASK), &value, sizeof(value)); + const uint32_t offset = addr & PS2_RAM_MASK; + if (!Ps2FastRangeIsContiguous(offset, sizeof(uint64_t))) + { + uint8_t wrapped[sizeof(uint64_t)]; + std::memcpy(wrapped, &value, sizeof(value)); + for (uint32_t i = 0; i < sizeof(uint64_t); ++i) + { + rdram[(offset + i) & PS2_RAM_MASK] = wrapped[i]; + } + return; + } + std::memcpy(rdram + offset, &value, sizeof(value)); } static inline void Ps2FastWrite128(uint8_t *rdram, uint32_t addr, __m128i value) { - std::memcpy(rdram + (addr & PS2_RAM_MASK), &value, sizeof(value)); + const uint32_t offset = addr & PS2_RAM_MASK; + if (!Ps2FastRangeIsContiguous(offset, sizeof(__m128i))) + { + alignas(16) uint8_t wrapped[sizeof(__m128i)]; + std::memcpy(wrapped, &value, sizeof(value)); + for (uint32_t i = 0; i < sizeof(__m128i); ++i) + { + rdram[(offset + i) & PS2_RAM_MASK] = wrapped[i]; + } + return; + } + std::memcpy(rdram + offset, &value, sizeof(value)); } #define FAST_READ8(addr) Ps2FastRead8(rdram, (uint32_t)(addr)) @@ -304,16 +405,20 @@ static inline void Ps2FastWrite128(uint8_t *rdram, uint32_t addr, __m128i value) } \ } while (0) -#define WRITE128(addr, val) \ - do \ - { \ - uint32_t _addr = (addr); \ - if (PS2Runtime::isSpecialAddress(_addr)) \ - runtime->Store128(rdram, ctx, _addr, (val)); \ - else \ - { \ - FAST_WRITE128(_addr, (val)); \ - } \ +#define WRITE128(addr, val) \ + do \ + { \ + uint32_t _addr = (addr); \ + __m128i _value = (val); \ + if (PS2Runtime::isSpecialAddress(_addr)) \ + runtime->Store128(rdram, ctx, _addr, _value); \ + else \ + { \ + const uint64_t _lo = static_cast(PS2_EXTRACT_EPI64_0(_value)); \ + const uint64_t _hi = static_cast(PS2_EXTRACT_EPI64_1(_value)); \ + ps2TraceGuestWrite(rdram, _addr, 16u, _lo, _hi, "WRITE128", ctx); \ + FAST_WRITE128(_addr, _value); \ + } \ } while (0) // Packed Compare Greater Than (PCGT) @@ -330,13 +435,13 @@ static inline void Ps2FastWrite128(uint8_t *rdram, uint32_t addr, __m128i value) #define PS2_PABSW(a) _mm_abs_epi32((__m128i)(a)) #define PS2_PABSH(a) _mm_abs_epi16((__m128i)(a)) #define PS2_PABSB(a) _mm_abs_epi8((__m128i)(a)) - + // Packed Pack (PPAC) - Packs larger elements into smaller ones inline __m128i ps2_paddu32(__m128i a, __m128i b) { __m128i sum = _mm_add_epi32(a, b); __m128i overflow = _mm_cmpgt_epi32(_mm_xor_si128(a, _mm_set1_epi32(INT32_MIN)), - _mm_xor_si128(sum, _mm_set1_epi32(INT32_MIN))); + _mm_xor_si128(sum, _mm_set1_epi32(INT32_MIN))); return _mm_or_si128(sum, overflow); // overflow lanes become all-1s } inline __m128i ps2_psubu32(__m128i a, __m128i b) @@ -344,7 +449,7 @@ inline __m128i ps2_psubu32(__m128i a, __m128i b) __m128i diff = _mm_sub_epi32(a, b); // Underflow if a < b (unsigned). Clamp to 0. __m128i underflow = _mm_cmpgt_epi32(_mm_xor_si128(b, _mm_set1_epi32(INT32_MIN)), - _mm_xor_si128(a, _mm_set1_epi32(INT32_MIN))); + _mm_xor_si128(a, _mm_set1_epi32(INT32_MIN))); return _mm_andnot_si128(underflow, diff); // underflow lanes become 0 } @@ -358,8 +463,8 @@ inline __m128i ps2_ppacw(__m128i rs, __m128i rt) inline __m128i ps2_ppach(__m128i rs, __m128i rt) { const __m128i mask = _mm_setr_epi8( - 0, 1, 4, 5, 8, 9, 12, 13, // from rt: halfwords 0,2,4,6 - 0, 1, 4, 5, 8, 9, 12, 13); // from rs: halfwords 0,2,4,6 + 0, 1, 4, 5, 8, 9, 12, 13, // from rt: halfwords 0,2,4,6 + 0, 1, 4, 5, 8, 9, 12, 13); // from rs: halfwords 0,2,4,6 __m128i lo = _mm_shuffle_epi8(rt, mask); __m128i hi = _mm_shuffle_epi8(rs, mask); return _mm_unpacklo_epi64(lo, hi); @@ -486,21 +591,25 @@ inline __m128i ps2_u64_to_epi64_pair(uint64_t value) // Concatenates rs || rt (256 bits) and right-shifts by SA bits, taking lower 128 bits. inline __m128i ps2_qfsrv(__m128i rs, __m128i rt, uint32_t sa) { - if (sa == 0) return rt; - if (sa >= 128) { - if (sa >= 256) return _mm_setzero_si128(); + if (sa == 0) + return rt; + if (sa >= 128) + { + if (sa >= 256) + return _mm_setzero_si128(); uint32_t shift = sa - 128; - if (shift == 0) return rs; + if (shift == 0) + return rs; // Shift rs right by (sa-128) bits uint32_t byteShift = shift / 8; uint32_t bitShift = shift % 8; // Byte shift rs right alignas(16) uint8_t buf[16] = {}; alignas(16) uint8_t src[16]; - _mm_store_si128((__m128i*)src, rs); + _mm_store_si128((__m128i *)src, rs); for (uint32_t i = 0; i + byteShift < 16; i++) buf[i] = src[i + byteShift]; - __m128i result = _mm_load_si128((__m128i*)buf); + __m128i result = _mm_load_si128((__m128i *)buf); if (bitShift > 0) result = _mm_or_si128(_mm_srli_epi64(result, bitShift), _mm_slli_epi64(_mm_bsrli_si128(result, 8), 64 - bitShift)); @@ -510,18 +619,20 @@ inline __m128i ps2_qfsrv(__m128i rs, __m128i rt, uint32_t sa) uint32_t byteShift = sa / 8; uint32_t bitShift = sa % 8; alignas(16) uint8_t combined[32]; - _mm_store_si128((__m128i*)(combined), rt); // low 128 bits - _mm_store_si128((__m128i*)(combined + 16), rs); // high 128 bits + _mm_store_si128((__m128i *)(combined), rt); // low 128 bits + _mm_store_si128((__m128i *)(combined + 16), rs); // high 128 bits // Shift right by byteShift bytes alignas(16) uint8_t shifted[16]; for (uint32_t i = 0; i < 16; i++) shifted[i] = (i + byteShift < 32) ? combined[i + byteShift] : 0; - __m128i result = _mm_load_si128((__m128i*)shifted); - if (bitShift > 0) { + __m128i result = _mm_load_si128((__m128i *)shifted); + if (bitShift > 0) + { uint8_t extra = (byteShift + 16 < 32) ? combined[byteShift + 16] : 0; __m128i hi_byte = _mm_insert_epi8(_mm_setzero_si128(), extra, 15); alignas(16) uint8_t src32[32]; - for (uint32_t i = 0; i < 32; i++) src32[i] = combined[i]; + for (uint32_t i = 0; i < 32; i++) + src32[i] = combined[i]; uint64_t lo0, lo1, hi0, hi1; std::memcpy(&lo0, src32, 8); std::memcpy(&lo1, src32 + 8, 8); @@ -529,15 +640,29 @@ inline __m128i ps2_qfsrv(__m128i rs, __m128i rt, uint32_t sa) std::memcpy(&hi1, src32 + 24, 8); // 256-bit right shift by sa bits uint64_t r0, r1; - if (sa < 64) { + if (sa < 64) + { r0 = (lo0 >> sa) | (lo1 << (64 - sa)); r1 = (lo1 >> sa) | (hi0 << (64 - sa)); - } else if (sa < 128) { + } + else if (sa < 128) + { uint32_t s = sa - 64; - if (s == 0) { r0 = lo1; r1 = hi0; } - else { r0 = (lo1 >> s) | (hi0 << (64 - s)); r1 = (hi0 >> s) | (hi1 << (64 - s)); } - } else { - r0 = 0; r1 = 0; // handled above + if (s == 0) + { + r0 = lo1; + r1 = hi0; + } + else + { + r0 = (lo1 >> s) | (hi0 << (64 - s)); + r1 = (hi0 >> s) | (hi1 << (64 - s)); + } + } + else + { + r0 = 0; + r1 = 0; // handled above } result = _mm_set_epi64x((long long)r1, (long long)r0); } @@ -567,15 +692,15 @@ static inline void Ps2SetGprLow64(R5900Context *ctx, int reg, __m128i new_low) } } -#define SET_GPR_U32(ctx_ptr, reg_idx, val) \ - do \ - { \ - if ((reg_idx) != 0) \ - { \ +#define SET_GPR_U32(ctx_ptr, reg_idx, val) \ + do \ + { \ + if ((reg_idx) != 0) \ + { \ __m128i _newVal = _mm_cvtsi64_si128((int64_t)(int32_t)(val)); \ - \ - Ps2SetGprLow64(ctx_ptr, reg_idx, _newVal); \ - } \ + \ + Ps2SetGprLow64(ctx_ptr, reg_idx, _newVal); \ + } \ } while (0) #define SET_GPR_S32(ctx_ptr, reg_idx, val) \ diff --git a/ps2xRuntime/include/ps2_stubs.h b/ps2xRuntime/include/ps2_stubs.h index 963caa94..f9386917 100644 --- a/ps2xRuntime/include/ps2_stubs.h +++ b/ps2xRuntime/include/ps2_stubs.h @@ -11,6 +11,9 @@ namespace ps2_stubs PS2_STUB_LIST(PS2_DECLARE_STUB) #undef PS2_DECLARE_STUB + void resetGsSyncVCallbackState(); + void dispatchGsSyncVCallback(uint8_t *rdram, PS2Runtime *runtime); + void syMalloc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime); void sndr_trans_func(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime); diff --git a/ps2xRuntime/src/lib/ps2_gif_arbiter.cpp b/ps2xRuntime/src/lib/ps2_gif_arbiter.cpp index 3a137b19..ca983d93 100644 --- a/ps2xRuntime/src/lib/ps2_gif_arbiter.cpp +++ b/ps2xRuntime/src/lib/ps2_gif_arbiter.cpp @@ -7,13 +7,26 @@ GifArbiter::GifArbiter(ProcessPacketFn processFn) { } -void GifArbiter::submit(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes) +bool GifArbiter::isImagePacket(const uint8_t *data, uint32_t sizeBytes) +{ + if (!data || sizeBytes < 16u) + return false; + + uint64_t tagLo = 0; + std::memcpy(&tagLo, data, sizeof(tagLo)); + const uint8_t flg = static_cast((tagLo >> 58) & 0x3u); + return flg == 2u; +} + +void GifArbiter::submit(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes, bool path2DirectHl) { if (!data || sizeBytes < 16 || !m_processFn) return; GifArbiterPacket pkt; pkt.pathId = pathId; + pkt.path2DirectHl = (pathId == GifPathId::Path2) && path2DirectHl; + pkt.path3Image = (pathId == GifPathId::Path3) && isImagePacket(data, sizeBytes); pkt.data.resize(sizeBytes); std::memcpy(pkt.data.data(), data, sizeBytes); m_queue.push_back(std::move(pkt)); @@ -26,6 +39,14 @@ void GifArbiter::drain() std::stable_sort(m_queue.begin(), m_queue.end(), [](const GifArbiterPacket &a, const GifArbiterPacket &b) { + // DIRECTHL cannot preempt PATH3 IMAGE transfers. + if (a.path2DirectHl != b.path2DirectHl || a.path3Image != b.path3Image) + { + if (a.path3Image && b.path2DirectHl) + return true; + if (a.path2DirectHl && b.path3Image) + return false; + } return pathPriority(a.pathId) < pathPriority(b.pathId); }); diff --git a/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp b/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp index 4cdc407d..71c39c51 100644 --- a/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp +++ b/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp @@ -221,6 +221,13 @@ void GSRasterizer::drawSprite(GS *gs) if (x0 > x1) std::swap(x0, x1); if (y0 > y1) std::swap(y0, y1); + // If the sprite rectangle is fully outside scissor, nothing should render. + if (x1 < ctx.scissor.x0 || x0 > ctx.scissor.x1 || + y1 < ctx.scissor.y0 || y0 > ctx.scissor.y1) + { + return; + } + x0 = clampInt(x0, ctx.scissor.x0, ctx.scissor.x1); y0 = clampInt(y0, ctx.scissor.y0, ctx.scissor.y1); x1 = clampInt(x1, ctx.scissor.x0, ctx.scissor.x1); diff --git a/ps2xRuntime/src/lib/ps2_iop.cpp b/ps2xRuntime/src/lib/ps2_iop.cpp index 0a903564..41ce06d6 100644 --- a/ps2xRuntime/src/lib/ps2_iop.cpp +++ b/ps2xRuntime/src/lib/ps2_iop.cpp @@ -1,20 +1,20 @@ #include "ps2_iop.h" -IOP::IOP() +ps2_iop::ps2_iop() { reset(); } -void IOP::init(uint8_t *rdram) +void ps2_iop::init(uint8_t *rdram) { m_rdram = rdram; } -void IOP::reset() +void ps2_iop::reset() { } -bool IOP::handleRPC(uint32_t /*sid*/, uint32_t /*rpcNum*/, +bool ps2_iop::handleRPC(uint32_t /*sid*/, uint32_t /*rpcNum*/, uint32_t /*sendBufAddr*/, uint32_t /*sendSize*/, uint32_t /*recvBufAddr*/, uint32_t /*recvSize*/) { diff --git a/ps2xRuntime/src/lib/ps2_memory.cpp b/ps2xRuntime/src/lib/ps2_memory.cpp index afb73e32..4f64b62b 100644 --- a/ps2xRuntime/src/lib/ps2_memory.cpp +++ b/ps2xRuntime/src/lib/ps2_memory.cpp @@ -39,7 +39,8 @@ namespace inline uint64_t *gsRegPtr(GSRegisters &gs, uint32_t addr) { - uint32_t off = addr - PS2_GS_PRIV_REG_BASE; + // Support both 64-bit base offsets and +4 dword aliases. + uint32_t off = (addr - PS2_GS_PRIV_REG_BASE) & ~0x7u; switch (off) { case 0x0000: @@ -234,6 +235,14 @@ uint32_t PS2Memory::translateAddress(uint32_t virtualAddress) return virtualAddress - PS2_SCRATCHPAD_BASE; } + // EE uncached aliases of main RAM (per PS2 memory map): + // 0x20000000-0x3FFFFFFF -> 32MB mirror of RDRAM + // This includes the accelerated window rooted at 0x30100000. + if (virtualAddress >= 0x20000000u && virtualAddress < 0x40000000u) + { + return virtualAddress & PS2_RAM_MASK; + } + // KSEG0/KSEG1 direct-mapped window. if (virtualAddress >= 0x80000000 && virtualAddress < 0xC0000000) { @@ -539,9 +548,24 @@ void PS2Memory::write32(uint32_t address, uint32_t value) if (reg) { uint32_t off = address & 7; - uint64_t mask = 0xFFFFFFFFULL << (off * 8); - uint64_t newVal = (*reg & ~mask) | ((uint64_t)value << (off * 8)); - *reg = newVal; + const uint32_t regOff = (address - PS2_GS_PRIV_REG_BASE) & ~0x7u; + if (regOff == 0x1000u && off == 0u) + { + // CSR low dword: bits 0..1 are write-one-to-clear status bits. + constexpr uint32_t kW1cMask = 0x3u; + uint64_t current = *reg; + uint32_t oldLow = static_cast(current & 0xFFFFFFFFull); + uint32_t mergedLow = (oldLow & kW1cMask) | (value & ~kW1cMask); + current = (current & 0xFFFFFFFF00000000ull) | static_cast(mergedLow); + current &= ~static_cast(value & kW1cMask); + *reg = current; + } + else + { + uint64_t mask = 0xFFFFFFFFULL << (off * 8); + uint64_t newVal = (*reg & ~mask) | ((uint64_t)value << (off * 8)); + *reg = newVal; + } } return; } @@ -578,7 +602,19 @@ void PS2Memory::write64(uint32_t address, uint64_t value) uint64_t *reg = gsRegPtr(gs_regs, address); if (reg) { - *reg = value; + const uint32_t regOff = (address - PS2_GS_PRIV_REG_BASE) & ~0x7u; + if (regOff == 0x1000u) + { + // CSR: bits 0..1 are write-one-to-clear status bits. + constexpr uint64_t kW1cMask = 0x3ull; + uint64_t next = (*reg & kW1cMask) | (value & ~kW1cMask); + next &= ~(value & kW1cMask); + *reg = next; + } + else + { + *reg = value; + } } return; } @@ -636,6 +672,33 @@ void PS2Memory::write128(uint32_t address, __m128i value) bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) { + if (isGsPrivReg(address)) + { + m_ioRegisters[address] = value; + if (uint64_t *reg = gsRegPtr(gs_regs, address)) + { + const uint32_t off = address & 7u; + const uint32_t regOff = (address - PS2_GS_PRIV_REG_BASE) & ~0x7u; + if (regOff == 0x1000u && off == 0u) + { + constexpr uint32_t kW1cMask = 0x3u; + uint64_t current = *reg; + uint32_t oldLow = static_cast(current & 0xFFFFFFFFull); + uint32_t mergedLow = (oldLow & kW1cMask) | (value & ~kW1cMask); + current = (current & 0xFFFFFFFF00000000ull) | static_cast(mergedLow); + current &= ~static_cast(value & kW1cMask); + *reg = current; + } + else + { + const uint64_t mask = 0xFFFFFFFFull << (off * 8u); + *reg = (*reg & ~mask) | (static_cast(value) << (off * 8u)); + } + } + m_gsWriteCount.fetch_add(1, std::memory_order_relaxed); + return true; + } + if (address >= 0x10002000 && address <= 0x10002030) { if (address == 0x10002010) @@ -659,12 +722,103 @@ bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) return true; } + if (address == 0x1000E010u) + { + const uint32_t current = m_ioRegisters.count(address) ? m_ioRegisters[address] : 0u; + uint32_t status = current & 0x3FFu; + uint32_t mask = (current >> 16) & 0x3FFu; + + // D_STAT low bits are W1C status, high bits [16..25] toggle masks on write-one. + status &= ~(value & 0x3FFu); + mask ^= ((value >> 16) & 0x3FFu); + + uint32_t next = (current & ~((0x3FFu) | (0x3FFu << 16) | (1u << 31))); + next |= status | (mask << 16); + if ((status & mask) != 0u) + next |= (1u << 31); + m_ioRegisters[address] = next; + return true; + } + m_ioRegisters[address] = value; + if (address >= 0x10003C00u && address < 0x10003E00u) + { + m_vifWriteCount.fetch_add(1, std::memory_order_relaxed); + + switch (address) + { + case 0x10003C10u: // VIF1_FBRST + if (value & 0x1u) // RST + { + std::memset(&vif1_regs, 0, sizeof(vif1_regs)); + } + if (value & 0x8u) // STC + { + vif1_regs.stat &= ~((1u << 8) | (1u << 9) | (1u << 10) | (1u << 11) | (1u << 12) | (1u << 13)); + } + break; + case 0x10003C30u: + vif1_regs.mark = value & 0xFFFFu; + vif1_regs.stat &= ~(1u << 6); // clear MRK flag on CPU write + break; + case 0x10003C40u: + vif1_regs.cycle = value & 0xFFFFu; + break; + case 0x10003C50u: + vif1_regs.mode = value & 0x3u; + break; + case 0x10003C60u: + vif1_regs.num = value & 0xFFu; + break; + case 0x10003C70u: + vif1_regs.mask = value; + break; + case 0x10003C80u: + vif1_regs.code = value; + break; + case 0x10003C90u: + vif1_regs.itops = value & 0x3FFu; + break; + case 0x10003CA0u: + vif1_regs.base = value & 0x3FFu; + break; + case 0x10003CB0u: + vif1_regs.ofst = value & 0x3FFu; + break; + case 0x10003CC0u: + vif1_regs.tops = value & 0x3FFu; + break; + case 0x10003CD0u: + vif1_regs.itop = value & 0x3FFu; + break; + case 0x10003CE0u: + vif1_regs.top = value & 0x3FFu; + break; + default: + break; + } + + return true; + } + + if (address >= 0x10003800u && address < 0x10003A00u) + { + m_vifWriteCount.fetch_add(1, std::memory_order_relaxed); + return true; + } + if (address >= 0x10008000 && address < 0x1000F000) { if ((address & 0xFF) == 0x00 && (value & 0x100)) { + const auto dctrlIt = m_ioRegisters.find(0x1000E000u); + const bool dmacEnabled = (dctrlIt == m_ioRegisters.end()) || ((dctrlIt->second & 0x1u) != 0u); + if (!dmacEnabled) + { + return true; + } + const uint32_t channelBase = address & 0xFFFFFF00; const uint32_t madr = m_ioRegisters[channelBase + 0x10]; const uint32_t qwc = m_ioRegisters[channelBase + 0x20]; @@ -697,6 +851,10 @@ bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) else if (mode == 1) { uint32_t tagAddr = m_ioRegisters[channelBase + 0x30]; + uint32_t asr0 = m_ioRegisters[channelBase + 0x40]; + uint32_t asr1 = m_ioRegisters[channelBase + 0x50]; + uint32_t asp = (chcr >> 4) & 0x3u; + const bool tieEnabled = (chcr & (1u << 7)) != 0u; const int kMaxChainTags = 4096; std::vector chainBuf; @@ -735,8 +893,6 @@ bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) chainBuf.insert(chainBuf.end(), base2 + src, base2 + src + bytes); }; - std::vector retStack; - retStack.reserve(8); int tagsProcessed = 0; while (tagsProcessed < kMaxChainTags) @@ -770,56 +926,92 @@ bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) uint64_t tag = loadScalar(tp, 0, 16, "dma chain tag", tagAddr); uint16_t tagQwc = static_cast(tag & 0xFFFF); uint32_t id = static_cast((tag >> 28) & 0x7); + const bool irq = ((tag >> 31) & 0x1ull) != 0ull; uint32_t addr = static_cast((tag >> 32) & 0x7FFFFFFF); ++tagsProcessed; + uint32_t dataAddr = 0; + bool hasPayload = (tagQwc > 0); + bool endChain = false; + switch (id) { case 0: - if (tagQwc > 0) - appendData(addr, tagQwc); - goto chain_done; + dataAddr = addr; + tagAddr = tagAddr + 16; + endChain = true; + break; case 1: - if (tagQwc > 0) - appendData(tagAddr + 16, tagQwc); - tagAddr = tagAddr + 16 + tagQwc * 16; + dataAddr = tagAddr + 16; + tagAddr = dataAddr + static_cast(tagQwc) * 16u; break; case 2: - if (tagQwc > 0) - appendData(tagAddr + 16, tagQwc); + dataAddr = tagAddr + 16; tagAddr = addr; break; case 3: case 4: - if (tagQwc > 0) - appendData(addr, tagQwc); + dataAddr = addr; tagAddr = tagAddr + 16; break; case 5: - if (tagQwc > 0) - appendData(addr, tagQwc); - if (retStack.size() < 16) - retStack.push_back(tagAddr + 16); + dataAddr = tagAddr + 16; + { + const uint32_t retAddr = dataAddr + static_cast(tagQwc) * 16u; + if (asp == 0u) + { + asr0 = retAddr; + asp = 1u; + } + else if (asp == 1u) + { + asr1 = retAddr; + asp = 2u; + } + } tagAddr = addr; break; case 6: - if (!retStack.empty()) + dataAddr = tagAddr + 16; + if (asp == 2u) + { + tagAddr = asr1; + asp = 1u; + } + else if (asp == 1u) { - tagAddr = retStack.back(); - retStack.pop_back(); + tagAddr = asr0; + asp = 0u; } else - goto chain_done; + { + endChain = true; + } break; case 7: - if (tagQwc > 0) - appendData(tagAddr + 16, tagQwc); - goto chain_done; + dataAddr = tagAddr + 16; + endChain = true; + break; default: - goto chain_done; + hasPayload = false; + endChain = true; + break; } + + if (hasPayload) + appendData(dataAddr, tagQwc); + if (irq && tieEnabled) + endChain = true; + if (endChain) + break; } - chain_done: + + m_ioRegisters[channelBase + 0x30] = tagAddr; + m_ioRegisters[channelBase + 0x40] = asr0; + m_ioRegisters[channelBase + 0x50] = asr1; + chcr = (chcr & ~(0x3u << 4)) | ((asp & 0x3u) << 4); + m_ioRegisters[channelBase + 0x00] = chcr; + if (!chainBuf.empty()) { PendingTransfer pt; @@ -844,14 +1036,6 @@ bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) if (address >= 0x10000000 && address < 0x10010000) { - if (address >= 0x10003800 && address < 0x10003A00) - { - m_vifWriteCount.fetch_add(1, std::memory_order_relaxed); - } - if (address >= 0x10003C00 && address < 0x10003E00) - { - m_vifWriteCount.fetch_add(1, std::memory_order_relaxed); - } if (address >= 0x10000200 && address < 0x10000300) { return true; @@ -862,12 +1046,6 @@ bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) } } - if (address >= 0x12000000 && address < 0x12001000) - { - m_gsWriteCount.fetch_add(1, std::memory_order_relaxed); - return true; - } - return false; } @@ -956,34 +1134,83 @@ void PS2Memory::processPendingTransfers() static constexpr uint32_t GIF_CHANNEL = 0x1000A000; static constexpr uint32_t VIF1_CHANNEL = 0x10009000; + static constexpr uint32_t D_STAT = 0x1000E010u; + + auto raiseDStatChannel = [&](uint32_t channelBit) + { + uint32_t dstat = m_ioRegisters.count(D_STAT) ? m_ioRegisters[D_STAT] : 0u; + dstat |= (1u << channelBit); + + const uint32_t status = dstat & 0x3FFu; + const uint32_t mask = (dstat >> 16) & 0x3FFu; + if ((status & mask) != 0u) + dstat |= (1u << 31); + else + dstat &= ~(1u << 31); + + m_ioRegisters[D_STAT] = dstat; + }; + if (hadGif) { + raiseDStatChannel(2u); // GIF channel m_ioRegisters[GIF_CHANNEL + 0x00] &= ~0x100u; m_ioRegisters[GIF_CHANNEL + 0x20] = 0; } if (hadVif1) { + raiseDStatChannel(1u); // VIF1 channel m_ioRegisters[VIF1_CHANNEL + 0x00] &= ~0x100u; m_ioRegisters[VIF1_CHANNEL + 0x20] = 0; } } -void PS2Memory::submitGifPacket(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes, bool drainImmediately) +void PS2Memory::flushMaskedPath3Packets(bool drainImmediately) { - if (!data || sizeBytes < 16) + if (m_path3Masked || m_path3MaskedFifo.empty()) return; - if (pathId == GifPathId::Path3 && m_path3Masked) + + auto emit = [&](const uint8_t *packetData, uint32_t packetSize) + { + if (m_gifArbiter) + m_gifArbiter->submit(GifPathId::Path3, packetData, packetSize, false); + else if (m_gifPacketCallback) + m_gifPacketCallback(packetData, packetSize); + }; + + for (const auto &packet : m_path3MaskedFifo) + { + if (packet.size() >= 16u) + emit(packet.data(), static_cast(packet.size())); + } + m_path3MaskedFifo.clear(); + + if (m_gifArbiter && drainImmediately) + m_gifArbiter->drain(); +} + +void PS2Memory::submitGifPacket(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes, bool drainImmediately, bool path2DirectHl) +{ + if (!data || sizeBytes < 16) return; - if (m_gifArbiter) + + if (pathId == GifPathId::Path3) { - m_gifArbiter->submit(pathId, data, sizeBytes); - if (drainImmediately) - m_gifArbiter->drain(); + if (m_path3Masked) + { + m_path3MaskedFifo.emplace_back(data, data + sizeBytes); + return; + } + flushMaskedPath3Packets(false); } + + if (m_gifArbiter) + m_gifArbiter->submit(pathId, data, sizeBytes, path2DirectHl); else if (m_gifPacketCallback) - { m_gifPacketCallback(data, sizeBytes); - } + + if (m_gifArbiter && drainImmediately) + m_gifArbiter->drain(); } void PS2Memory::processGIFPacket(uint32_t srcPhysAddr, uint32_t qwCount) @@ -1018,6 +1245,16 @@ int PS2Memory::pollDmaRegisters() uint32_t PS2Memory::readIORegister(uint32_t address) { + if (isGsPrivReg(address)) + { + if (uint64_t *reg = gsRegPtr(gs_regs, address)) + { + const uint32_t off = address & 7u; + return static_cast((*reg >> (off * 8u)) & 0xFFFFFFFFull); + } + return 0u; + } + if (address >= 0x10002000 && address <= 0x10002030) { uint32_t val = 0; @@ -1127,6 +1364,18 @@ bool PS2Memory::isAddressInRegion(uint32_t address, const CodeRegion ®ion) return (address >= region.start && address < region.end); } +bool PS2Memory::isCodeAddress(uint32_t address) const +{ + for (const auto ®ion : m_codeRegions) + { + if (address >= region.start && address < region.end) + { + return true; + } + } + return false; +} + void PS2Memory::markModified(uint32_t address, uint32_t size) { if (size == 0) diff --git a/ps2xRuntime/src/lib/ps2_runtime.cpp b/ps2xRuntime/src/lib/ps2_runtime.cpp index 394d1e52..713d6ecc 100644 --- a/ps2xRuntime/src/lib/ps2_runtime.cpp +++ b/ps2xRuntime/src/lib/ps2_runtime.cpp @@ -1,5 +1,6 @@ #include "ps2_runtime.h" #include "ps2_syscalls.h" +#include "ps2_stubs.h" #include "game_overrides.h" #include "ps2_runtime_macros.h" #include @@ -13,6 +14,7 @@ #include #include #include +#include #include "raylib.h" #include "ps2_gs_gpu.h" #include @@ -77,6 +79,85 @@ namespace constexpr uint32_t EXCEPTION_VECTOR_TLB_REFILL = 0x80000000u; constexpr uint32_t EXCEPTION_VECTOR_BOOT = 0xBFC00200u; + struct DispatchHistory + { + std::array pcs{}; + uint32_t next = 0u; + bool wrapped = false; + }; + + thread_local DispatchHistory g_dispatchHistory; + + void pushDispatchPc(uint32_t pc) + { + DispatchHistory &h = g_dispatchHistory; + h.pcs[h.next] = pc; + h.next = (h.next + 1u) % static_cast(h.pcs.size()); + if (h.next == 0u) + { + h.wrapped = true; + } + } + + std::string formatDispatchHistory() + { + const DispatchHistory &h = g_dispatchHistory; + const uint32_t count = h.wrapped ? static_cast(h.pcs.size()) : h.next; + if (count == 0u) + { + return "(empty)"; + } + + std::ostringstream oss; + bool first = true; + for (uint32_t i = 0u; i < count; ++i) + { + const uint32_t idx = (h.next + h.pcs.size() - count + i) % static_cast(h.pcs.size()); + if (!first) + { + oss << " -> "; + } + first = false; + oss << "0x" << std::hex << h.pcs[idx]; + } + return oss.str(); + } + + uint32_t selectDispatchRecoveryPc(const PS2Runtime *runtime) + { + const DispatchHistory &h = g_dispatchHistory; + const uint32_t count = h.wrapped ? static_cast(h.pcs.size()) : h.next; + if (count == 0u) + { + return 0u; + } + + uint32_t firstHigh = 0u; + for (uint32_t step = 1u; step <= count; ++step) + { + const uint32_t idx = (h.next + h.pcs.size() - step) % static_cast(h.pcs.size()); + const uint32_t pc = h.pcs[idx]; + if (pc < 0x00100000u) + { + continue; + } + if (runtime && !runtime->hasFunction(pc)) + { + continue; + } + + if (firstHigh == 0u) + { + firstHigh = pc; + continue; + } + + return pc; + } + + return firstHigh; + } + uint32_t selectExceptionVector(const R5900Context *ctx, bool tlbRefill) { if (ctx->cop0_status & COP0_STATUS_BEV) @@ -155,6 +236,56 @@ namespace return value; } + uint64_t readGuestU64Wrapped(const uint8_t *rdram, uint32_t addr) + { + const uint64_t lo = readGuestU32Wrapped(rdram, addr); + const uint64_t hi = readGuestU32Wrapped(rdram, addr + 4u); + return lo | (hi << 32); + } + + uint32_t selectStackRecoveryPc(const uint8_t *rdram, const R5900Context *ctx, const PS2Runtime *runtime) + { + if (!rdram || !ctx || !runtime) + { + return 0u; + } + + const uint32_t sp = static_cast(_mm_extract_epi32(ctx->r[29], 0)); + constexpr uint32_t kScanBytes = 0x200u; + + for (uint32_t offset = 0u; offset < kScanBytes; offset += 8u) + { + const uint32_t slotAddr = sp + offset; + const uint32_t ra32 = static_cast(readGuestU64Wrapped(rdram, slotAddr)); + if (ra32 < 0x00100000u) + { + continue; + } + if (!runtime->hasFunction(ra32)) + { + continue; + } + return ra32; + } + + for (uint32_t offset = 0u; offset < kScanBytes; offset += 4u) + { + const uint32_t slotAddr = sp + offset; + const uint32_t ra32 = readGuestU32Wrapped(rdram, slotAddr); + if (ra32 < 0x00100000u) + { + continue; + } + if (!runtime->hasFunction(ra32)) + { + continue; + } + return ra32; + } + + return 0u; + } + std::string readGuestPrintableString(const uint8_t *rdram, uint32_t addr, size_t maxLen) { std::string out; @@ -186,6 +317,9 @@ namespace static void UploadFrame(Texture2D &tex, PS2Runtime *rt) { + // For now lets keep the display snapshot in sync with rasterized VRAM so the host frame + rt->gs().refreshDisplaySnapshot(); + const GSRegisters &gs = rt->memory().gs(); uint32_t dispfb = static_cast(gs.dispfb1 & 0xFFFFFFFFULL); @@ -650,17 +784,149 @@ bool PS2Runtime::hasFunction(uint32_t address) const PS2Runtime::RecompiledFunction PS2Runtime::lookupFunction(uint32_t address) { + pushDispatchPc(address); + auto it = m_functionTable.find(address); if (it != m_functionTable.end()) { return it->second; } + // Some games dispatch to internal basic-block addresses that belong to a + // larger recompiled function. Map known hot-path aliases to their parent + // function entry so execution can resume from the current ctx->pc. + if (address == 0x2913E4u) + { + auto parent = m_functionTable.find(0x2913B0u); + if (parent != m_functionTable.end()) + { + return parent->second; + } + } + std::cerr << "Warning: Function at address 0x" << std::hex << address << std::dec << " not found" << std::endl; static RecompiledFunction defaultFunction = [](uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - std::cerr << "Error: Called unimplemented function at address 0x" << std::hex << ctx->pc << std::dec << std::endl; + const uint32_t ra = ctx ? static_cast(_mm_extract_epi32(ctx->r[31], 0)) : 0u; + const uint32_t sp = ctx ? static_cast(_mm_extract_epi32(ctx->r[29], 0)) : 0u; + const uint32_t gp = ctx ? static_cast(_mm_extract_epi32(ctx->r[28], 0)) : 0u; + const uint32_t a0 = ctx ? static_cast(_mm_extract_epi32(ctx->r[4], 0)) : 0u; + const uint32_t a1 = ctx ? static_cast(_mm_extract_epi32(ctx->r[5], 0)) : 0u; + const uint32_t v0 = ctx ? static_cast(_mm_extract_epi32(ctx->r[2], 0)) : 0u; + const uint32_t v1 = ctx ? static_cast(_mm_extract_epi32(ctx->r[3], 0)) : 0u; + + if (ctx && runtime) + { + thread_local uint32_t s_recoverCount = 0u; + thread_local bool s_loggedContext = false; + const uint32_t pc = ctx->pc; + const bool hasPcFunction = runtime->hasFunction(pc); + + if (!hasPcFunction && s_recoverCount < 8192u) + { + if (!s_loggedContext) + { + std::ostringstream stackDump; + if (rdram) + { + stackDump << " [stack]"; + for (uint32_t off = 0u; off < 0x40u; off += 4u) + { + const uint32_t slot = readGuestU32Wrapped(rdram, sp + off); + stackDump << " +" << std::hex << off << "=0x" << slot; + } + } + std::cerr << "[dispatch:first-bad-pc] bad=0x" << std::hex << pc + << " ra=0x" << ra + << " sp=0x" << sp + << " gp=0x" << gp + << " v0=0x" << v0 + << " v1=0x" << v1 + << " a0=0x" << a0 + << " a1=0x" << a1 + << " trace=" << formatDispatchHistory() + << stackDump.str() + << std::dec << std::endl; + s_loggedContext = true; + } + + uint32_t recoveryPc = 0u; + if (ra != 0u && runtime->hasFunction(ra)) + { + recoveryPc = ra; + } + + if (recoveryPc == 0u) + { + recoveryPc = selectStackRecoveryPc(rdram, ctx, runtime); + } + + if (recoveryPc == 0u) + { + recoveryPc = selectDispatchRecoveryPc(runtime); + } + + if (recoveryPc != 0u && recoveryPc != pc) + { + if (s_recoverCount < 256u) + { + std::cerr << "[dispatch:recover-pc] bad=0x" << std::hex << pc + << " ra=0x" << ra + << " fallback=0x" << recoveryPc + << " sp=0x" << sp + << std::dec << std::endl; + } + ++s_recoverCount; + ctx->pc = recoveryPc; + return; + } + } + + if (hasPcFunction) + { + s_recoverCount = 0u; + s_loggedContext = false; + } + else if (pc < 0x00100000u && ra == pc && s_recoverCount < 4096u) + { + uint32_t recoveryPc = selectStackRecoveryPc(rdram, ctx, runtime); + if (recoveryPc == 0u) + { + recoveryPc = selectDispatchRecoveryPc(runtime); + } + if (recoveryPc != 0u && recoveryPc != pc) + { + if (s_recoverCount < 128u) + { + std::cerr << "[dispatch:recover-low-pc] bad=0x" << std::hex << pc + << " ra=0x" << ra + << " fallback=0x" << recoveryPc + << " sp=0x" << sp + << std::dec << std::endl; + } + ++s_recoverCount; + ctx->pc = recoveryPc; + return; + } + } + } + + std::ostringstream oss; + oss << "Error: Called unimplemented function at address 0x" << std::hex << (ctx ? ctx->pc : 0u) + << " ra=0x" << ra + << " sp=0x" << sp + << " gp=0x" << gp + << " a0=0x" << a0 + << " hostTid=" << std::this_thread::get_id() + << " pcTrace=" << formatDispatchHistory() + << std::dec; + + static std::mutex s_defaultFnLogMutex; + { + std::lock_guard lock(s_defaultFnLogMutex); + std::cerr << oss.str() << std::endl; + } runtime->requestStop(); }; @@ -1259,12 +1525,26 @@ void PS2Runtime::dispatchLoop(uint8_t *rdram, R5900Context *ctx) m_debugGp.store(static_cast(_mm_extract_epi32(ctx->r[28], 0)), std::memory_order_relaxed); RecompiledFunction fn = lookupFunction(pc); + const uint32_t dispatchedPc = pc; + const uint32_t dispatchedRa = static_cast(_mm_extract_epi32(ctx->r[31], 0)); fn(rdram, ctx, this); if (ctx->pc == 0u) { - requestStop(); + const uint32_t ra = static_cast(_mm_extract_epi32(ctx->r[31], 0)); + const uint32_t sp = static_cast(_mm_extract_epi32(ctx->r[29], 0)); + const uint32_t gp = static_cast(_mm_extract_epi32(ctx->r[28], 0)); + std::cerr << "[dispatch:pc-zero] from=0x" << std::hex << dispatchedPc + << " fromRa=0x" << dispatchedRa + << " ra=0x" << ra + << " sp=0x" << sp + << " gp=0x" << gp + << " trace=" << formatDispatchHistory() + << std::dec << std::endl; + + // PC=0 means this guest thread returned (usually via jr $ra with RA=0). + // Do not request a global runtime stop here: other guest threads may still run. break; } } @@ -1404,11 +1684,8 @@ void PS2Runtime::Store128(uint8_t *rdram, R5900Context *ctx, uint32_t vaddr, __m void PS2Runtime::requestStop() { - const bool alreadyRequested = m_stopRequested.exchange(true, std::memory_order_relaxed); - if (!alreadyRequested) - { - ps2_syscalls::notifyRuntimeStop(); - } + m_stopRequested.store(true, std::memory_order_relaxed); + ps2_syscalls::notifyRuntimeStop(); } bool PS2Runtime::isStopRequested() const @@ -1424,6 +1701,7 @@ void PS2Runtime::HandleIntegerOverflow(R5900Context *ctx) void PS2Runtime::run() { m_stopRequested.store(false, std::memory_order_relaxed); + ps2_stubs::resetGsSyncVCallbackState(); m_cpuContext.r[4] = _mm_setzero_si128(); m_cpuContext.r[5] = _mm_setzero_si128(); m_cpuContext.r[29] = _mm_set_epi64x(0, static_cast(PS2_RAM_SIZE - 0x10u)); @@ -1464,23 +1742,81 @@ void PS2Runtime::run() gameThreadFinished.store(true, std::memory_order_release); }); uint64_t tick = 0; - while (!gameThreadFinished.load(std::memory_order_acquire)) + while (!isStopRequested() && g_activeThreads.load(std::memory_order_relaxed) > 0) { tick++; - if ((tick % 600) == 0) + ps2_stubs::dispatchGsSyncVCallback(m_memory.getRDRAM(), this); + if ((tick % 120) == 0) { - static uint64_t lastDma = 0, lastGif = 0, lastGs = 0, lastVif = 0; uint64_t curDma = m_memory.dmaStartCount(); uint64_t curGif = m_memory.gifCopyCount(); uint64_t curGs = m_memory.gsWriteCount(); uint64_t curVif = m_memory.vifWriteCount(); - if (curDma != lastDma || curGif != lastGif || curGs != lastGs || curVif != lastVif) + const GSRegisters &gs = m_memory.gs(); + const uint32_t dbgPc = m_debugPc.load(std::memory_order_relaxed); + const uint32_t dbgRa = m_debugRa.load(std::memory_order_relaxed); + const uint32_t dbgSp = m_debugSp.load(std::memory_order_relaxed); + const uint32_t dbgGp = m_debugGp.load(std::memory_order_relaxed); + const int activeThreads = g_activeThreads.load(std::memory_order_relaxed); + + constexpr uint32_t kSndTransTypeAddr = 0x01E0E1C0u; + constexpr uint32_t kSndTransBankAddr = 0x01E0E1C8u; + constexpr uint32_t kSndTransLevelAddr = 0x01E0E1B8u; + constexpr uint32_t kSndGetAdrsAddr = 0x01E212D8u; + constexpr uint32_t kSndStatusMirrorAddr = 0x01E213C0u; + constexpr uint32_t kSndSeCheckAddr = 0x01E0EF10u; + constexpr uint32_t kSndMidiCheckAddr = 0x01E0EF20u; + + const uint32_t sndTransType = readGuestU32Wrapped(m_memory.getRDRAM(), kSndTransTypeAddr); + const uint32_t sndTransLevel = readGuestU32Wrapped(m_memory.getRDRAM(), kSndTransLevelAddr); + const uint32_t sndTransBank = readGuestU32Wrapped(m_memory.getRDRAM(), kSndTransBankAddr); + const uint32_t sndGetAdrs = readGuestU32Wrapped(m_memory.getRDRAM(), kSndGetAdrsAddr); + auto readGuestS16 = [&](uint32_t addr) -> int32_t + { + const uint8_t *rdram = m_memory.getRDRAM(); + if (!rdram) + { + return 0; + } + const uint16_t raw = static_cast( + static_cast(rdram[(addr + 0u) & PS2_RAM_MASK]) | + (static_cast(rdram[(addr + 1u) & PS2_RAM_MASK]) << 8)); + return static_cast(raw); + }; + const int32_t sndMirrorMidi0 = readGuestS16(kSndStatusMirrorAddr + 0x1Eu); + const int32_t sndMirrorSe0 = readGuestS16(kSndStatusMirrorAddr + 0x26u); + int32_t sndBankMidiCheck = 0; + int32_t sndBankSeCheck = 0; + if (sndTransBank < 4u) { - lastDma = curDma; - lastGif = curGif; - lastGs = curGs; - lastVif = curVif; + sndBankMidiCheck = readGuestS16(kSndMidiCheckAddr + (sndTransBank * 2u)); } + if (sndTransBank < 5u) + { + sndBankSeCheck = readGuestS16(kSndSeCheckAddr + (sndTransBank * 2u)); + } + std::cout << "[run:tick] tick=" << tick + << " pc=0x" << std::hex << dbgPc + << " ra=0x" << dbgRa + << " sp=0x" << dbgSp + << " gp=0x" << dbgGp + << " dispfb1=0x" << gs.dispfb1 + << " display1=0x" << gs.display1 + << std::dec + << " activeThreads=" << activeThreads + << " dma=" << curDma + << " gif=" << curGif + << " gsw=" << curGs + << " vif=" << curVif + << " sndType=" << sndTransType + << " sndLvl=" << sndTransLevel + << " sndBank=" << sndTransBank + << " getAdrs=0x" << std::hex << sndGetAdrs << std::dec + << " sndMirrorMidi0=" << sndMirrorMidi0 + << " sndMirrorSe0=" << sndMirrorSe0 + << " sndChkMidi=" << sndBankMidiCheck + << " sndChkSe=" << sndBankSeCheck + << std::endl; } UploadFrame(frameTex, this); @@ -1519,13 +1855,24 @@ void PS2Runtime::run() } } - const auto workerDeadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(250); + const auto workerDeadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(1000); while (g_activeThreads.load(std::memory_order_relaxed) > 0 && std::chrono::steady_clock::now() < workerDeadline) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + if (g_activeThreads.load(std::memory_order_relaxed) > 0) + { + requestStop(); + const auto finalWorkerDeadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(1000); + while (g_activeThreads.load(std::memory_order_relaxed) > 0 && + std::chrono::steady_clock::now() < finalWorkerDeadline) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } + UnloadTexture(frameTex); CloseWindow(); diff --git a/ps2xRuntime/src/lib/ps2_stubs.cpp b/ps2xRuntime/src/lib/ps2_stubs.cpp index 527681f3..3d80578f 100644 --- a/ps2xRuntime/src/lib/ps2_stubs.cpp +++ b/ps2xRuntime/src/lib/ps2_stubs.cpp @@ -39,6 +39,7 @@ namespace ps2_stubs void TODO_NAMED(const char *name, uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { const std::string stubName = name ? name : "unknown"; + uint32_t callCount = 0; { std::lock_guard lock(g_stubWarningMutex); diff --git a/ps2xRuntime/src/lib/ps2_syscalls.cpp b/ps2xRuntime/src/lib/ps2_syscalls.cpp index 2bdefc3c..67647c59 100644 --- a/ps2xRuntime/src/lib/ps2_syscalls.cpp +++ b/ps2xRuntime/src/lib/ps2_syscalls.cpp @@ -328,7 +328,10 @@ namespace ps2_syscalls threads.push_back(entry.second); } } + g_threads.clear(); + g_nextThreadId = 2; // Reserve id 1 for main thread. } + g_currentThreadId = 1; for (const auto &threadInfo : threads) { @@ -340,6 +343,8 @@ namespace ps2_syscalls threadInfo->cv.notify_all(); } + joinAllHostThreads(); + std::vector> semas; { std::lock_guard lock(g_sema_map_mutex); @@ -351,6 +356,8 @@ namespace ps2_syscalls semas.push_back(entry.second); } } + g_semas.clear(); + g_nextSemaId = 1; } for (const auto &sema : semas) { @@ -368,6 +375,8 @@ namespace ps2_syscalls eventFlags.push_back(entry.second); } } + g_eventFlags.clear(); + g_nextEventFlagId = 1; } for (const auto &eventFlag : eventFlags) { diff --git a/ps2xRuntime/src/lib/ps2_vif1_interpreter.cpp b/ps2xRuntime/src/lib/ps2_vif1_interpreter.cpp index 28a0b4cb..5e97254b 100644 --- a/ps2xRuntime/src/lib/ps2_vif1_interpreter.cpp +++ b/ps2xRuntime/src/lib/ps2_vif1_interpreter.cpp @@ -46,6 +46,14 @@ void PS2Memory::processVIF1Data(const uint8_t *data, uint32_t sizeBytes) if (!data || !m_gsVRAM || sizeBytes == 0u) return; + auto recomputeVif1Tops = [&]() + { + const bool dbf = (vif1_regs.stat & (1u << 7)) != 0u; + const uint32_t base = vif1_regs.base & 0x3FFu; + const uint32_t ofst = vif1_regs.ofst & 0x3FFu; + vif1_regs.tops = dbf ? ((base + ofst) & 0x3FFu) : base; + }; + uint32_t pos = 0; while (pos + 4 <= sizeBytes) @@ -57,6 +65,13 @@ void PS2Memory::processVIF1Data(const uint8_t *data, uint32_t sizeBytes) uint8_t opcode = (cmd >> 24) & 0x7F; uint16_t imm = cmd & 0xFFFF; uint8_t num = (cmd >> 16) & 0xFF; + const bool irq = (cmd & 0x80000000u) != 0u; + + // Track most-recent command for VIFn_CODE emulation. + vif1_regs.code = cmd; + vif1_regs.num = num; + if (irq) + vif1_regs.stat |= (1u << 11); // INT if (opcode == VIF_NOP) { @@ -69,12 +84,17 @@ void PS2Memory::processVIF1Data(const uint8_t *data, uint32_t sizeBytes) } else if (opcode == VIF_OFFSET) { + const uint32_t oldTops = vif1_regs.tops & 0x3FFu; vif1_regs.ofst = imm & 0x3FFu; + vif1_regs.base = oldTops; + vif1_regs.stat &= ~(1u << 7); // clear DBF + recomputeVif1Tops(); continue; } else if (opcode == VIF_BASE) { vif1_regs.base = imm & 0x3FFu; + recomputeVif1Tops(); continue; } else if (opcode == VIF_ITOP) @@ -89,11 +109,17 @@ void PS2Memory::processVIF1Data(const uint8_t *data, uint32_t sizeBytes) } else if (opcode == VIF_MSKPATH3) { - m_path3Masked = (imm & 1u) != 0; + // VIF command docs: MSKPATH3 uses IMMEDIATE bit 15. + const bool wasMasked = m_path3Masked; + m_path3Masked = (imm & 0x8000u) != 0u; + if (wasMasked && !m_path3Masked) + flushMaskedPath3Packets(); continue; } else if (opcode == VIF_MARK) { + vif1_regs.mark = imm; + vif1_regs.stat |= (1u << 6); // MRK continue; } else if (opcode == VIF_FLUSHE || opcode == VIF_FLUSH || opcode == VIF_FLUSHA) @@ -102,6 +128,9 @@ void PS2Memory::processVIF1Data(const uint8_t *data, uint32_t sizeBytes) } else if (opcode == VIF_MSCAL || opcode == VIF_MSCALF) { + vif1_regs.itops = vif1_regs.itop & 0x3FFu; + vif1_regs.stat ^= (1u << 7); // toggle DBF + recomputeVif1Tops(); uint32_t startPC = (uint32_t)imm * 8u; if (m_vu1MscalCallback) m_vu1MscalCallback(startPC, vif1_regs.itop); @@ -109,34 +138,44 @@ void PS2Memory::processVIF1Data(const uint8_t *data, uint32_t sizeBytes) } else if (opcode == VIF_MSCNT) { + vif1_regs.itops = vif1_regs.itop & 0x3FFu; + vif1_regs.stat ^= (1u << 7); // toggle DBF + recomputeVif1Tops(); continue; } else if (opcode == VIF_STMASK) { - pos += 4; - if (pos > sizeBytes) + if (pos + 4 > sizeBytes) break; + uint32_t maskValue = 0; + std::memcpy(&maskValue, data + pos, sizeof(maskValue)); + vif1_regs.mask = maskValue; + pos += 4; continue; } else if (opcode == VIF_STROW) { - pos += 16; - if (pos > sizeBytes) + if (pos + 16 > sizeBytes) break; + std::memcpy(vif1_regs.row, data + pos, 16); + pos += 16; continue; } else if (opcode == VIF_STCOL) { - pos += 16; - if (pos > sizeBytes) + if (pos + 16 > sizeBytes) break; + std::memcpy(vif1_regs.col, data + pos, 16); + pos += 16; continue; } else if (opcode == VIF_MPG) { uint32_t destAddr = (uint32_t)imm * 8u; - uint32_t mpgBytes = (uint32_t)num * 8u; - mpgBytes = (mpgBytes + 15) & ~15u; + // VIF MPG semantics: NUM==0 means 256 instructions (2048 bytes). + // MPG payload is instruction-packed and should not be QW-aligned. + const uint32_t instructionCount = (num == 0u) ? 256u : static_cast(num); + const uint32_t mpgBytes = instructionCount * 8u; if (m_vu1Code && destAddr < PS2_VU1_CODE_SIZE && mpgBytes > 0) { uint32_t copyBytes = mpgBytes; @@ -162,7 +201,8 @@ void PS2Memory::processVIF1Data(const uint8_t *data, uint32_t sizeBytes) if (qwCount > 0) { - submitGifPacket(GifPathId::Path2, data + pos, qwCount * 16); + const bool directHl = (opcode == VIF_DIRECTHL); + submitGifPacket(GifPathId::Path2, data + pos, qwCount * 16, true, directHl); } pos += qwCount * 16; @@ -177,6 +217,7 @@ void PS2Memory::processVIF1Data(const uint8_t *data, uint32_t sizeBytes) { uint8_t vn = (opcode >> 2) & 0x3; uint8_t vl = opcode & 0x3; + const bool maskEnable = (opcode & 0x10u) != 0u; int components = vn + 1; int bitsPerComponent = 32; switch (vl) @@ -189,31 +230,230 @@ void PS2Memory::processVIF1Data(const uint8_t *data, uint32_t sizeBytes) } int bitsPerVector = (vl == 3 && vn == 3) ? 16 : (components * bitsPerComponent); uint32_t bytesPerVector = (bitsPerVector + 7) / 8; - uint32_t totalBytes = (uint32_t)num * bytesPerVector; + // UNPACK semantics: NUM is 8-bit and NUM==0 means 256 vectors (writes). + const uint32_t writeVectorCount = (num == 0u) ? 256u : static_cast(num); + + // STCYCL controls write cycles for UNPACK. + uint32_t cl = vif1_regs.cycle & 0xFFu; + uint32_t wl = (vif1_regs.cycle >> 8) & 0xFFu; + if (cl == 0u) + cl = 1u; + if (wl == 0u) + wl = 1u; + + uint32_t sourceVectorCount = writeVectorCount; + if (cl < wl) + { + const uint32_t fullBlocks = writeVectorCount / wl; + uint32_t remainder = writeVectorCount % wl; + if (remainder > cl) + remainder = cl; + sourceVectorCount = fullBlocks * cl + remainder; + } + + uint32_t totalBytes = sourceVectorCount * bytesPerVector; totalBytes = (totalBytes + 3) & ~3u; uint32_t vuAddr = (uint32_t)imm & 0x3FFu; + if ((imm & 0x8000u) != 0u) + vuAddr = (vuAddr + (vif1_regs.tops & 0x3FFu)) & 0x3FFu; + + const bool zeroExtend = (imm & 0x4000u) != 0u; if (m_vu1Data && totalBytes > 0 && pos + totalBytes <= sizeBytes) { - if (bytesPerVector == 16 && vuAddr * 16u < PS2_VU1_DATA_SIZE) + const uint8_t *srcBase = data + pos; + uint32_t srcIndex = 0u; + for (uint32_t writeIndex = 0; writeIndex < writeVectorCount; ++writeIndex) { - for (uint32_t i = 0; i < num; ++i) + const uint32_t cyclePos = writeIndex % wl; + const bool sourceAvailable = (cl >= wl) || (cyclePos < cl); + + uint32_t destVec = 0; + if (cl >= wl) { - uint32_t destOff = ((vuAddr + i) & 0x3FFu) * 16u; - if (destOff + 16 <= PS2_VU1_DATA_SIZE) - std::memcpy(m_vu1Data + destOff, data + pos + i * 16, 16); + destVec = (vuAddr + (writeIndex / wl) * cl + cyclePos) & 0x3FFu; } - } - else - { - uint32_t destOff = vuAddr * 16u; - if (destOff < PS2_VU1_DATA_SIZE) + else + { + destVec = (vuAddr + writeIndex) & 0x3FFu; + } + + uint32_t destOff = destVec * 16u; + if (destOff + 16u > PS2_VU1_DATA_SIZE) + { + if (sourceAvailable && srcIndex < sourceVectorCount) + ++srcIndex; + continue; + } + + uint32_t lanes[4] = {0u, 0u, 0u, 0u}; + std::memcpy(lanes, m_vu1Data + destOff, sizeof(lanes)); + uint32_t decompressed[4] = {lanes[0], lanes[1], lanes[2], lanes[3]}; + bool decoded = false; + + const uint8_t *srcVec = nullptr; + if (sourceAvailable && srcIndex < sourceVectorCount) { - uint32_t copyBytes = totalBytes; - if (destOff + copyBytes > PS2_VU1_DATA_SIZE) - copyBytes = PS2_VU1_DATA_SIZE - destOff; - std::memcpy(m_vu1Data + destOff, data + pos, copyBytes); + srcVec = srcBase + srcIndex * bytesPerVector; + ++srcIndex; + decoded = true; } + + auto extend16 = [&](uint16_t raw) -> uint32_t + { + if (zeroExtend) + return static_cast(raw); + return static_cast(static_cast(static_cast(raw))); + }; + + auto extend8 = [&](uint8_t raw) -> uint32_t + { + if (zeroExtend) + return static_cast(raw); + return static_cast(static_cast(static_cast(raw))); + }; + + bool handledFormat = true; + if (!decoded) + { + handledFormat = false; + } + else if (vl == 0u) + { + if (components == 1) + { + uint32_t scalar = 0; + std::memcpy(&scalar, srcVec, sizeof(scalar)); + decompressed[0] = scalar; + decompressed[1] = scalar; + decompressed[2] = scalar; + decompressed[3] = scalar; + } + else + { + const uint32_t limit = (components > 4) ? 4u : static_cast(components); + for (uint32_t c = 0; c < limit; ++c) + { + uint32_t scalar = 0; + std::memcpy(&scalar, srcVec + c * 4u, sizeof(scalar)); + decompressed[c] = scalar; + } + } + } + else if (vl == 1u) + { + if (components == 1) + { + uint16_t raw = 0; + std::memcpy(&raw, srcVec, sizeof(raw)); + const uint32_t scalar = extend16(raw); + decompressed[0] = scalar; + decompressed[1] = scalar; + decompressed[2] = scalar; + decompressed[3] = scalar; + } + else + { + const uint32_t limit = (components > 4) ? 4u : static_cast(components); + for (uint32_t c = 0; c < limit; ++c) + { + uint16_t raw = 0; + std::memcpy(&raw, srcVec + c * 2u, sizeof(raw)); + decompressed[c] = extend16(raw); + } + } + } + else if (vl == 2u) + { + if (components == 1) + { + const uint32_t scalar = extend8(srcVec[0]); + decompressed[0] = scalar; + decompressed[1] = scalar; + decompressed[2] = scalar; + decompressed[3] = scalar; + } + else + { + const uint32_t limit = (components > 4) ? 4u : static_cast(components); + for (uint32_t c = 0; c < limit; ++c) + { + decompressed[c] = extend8(srcVec[c]); + } + } + } + else if (vl == 3u && vn == 3u) + { + // V4-5: packed color-like format in a single 16-bit value. + uint16_t packed = 0; + std::memcpy(&packed, srcVec, sizeof(packed)); + decompressed[0] = packed & 0x1Fu; + decompressed[1] = (packed >> 5) & 0x1Fu; + decompressed[2] = (packed >> 10) & 0x1Fu; + decompressed[3] = (packed >> 15) & 0x01u; + } + else + { + handledFormat = false; + } + + // Unknown compressed format fallback: preserve legacy raw-copy behavior. + if (!handledFormat && decoded && !maskEnable && (vif1_regs.mode == 0u || vif1_regs.mode == 3u)) + { + uint32_t copyBytes = (bytesPerVector < 16u) ? bytesPerVector : 16u; + std::memcpy(m_vu1Data + destOff, srcVec, copyBytes); + continue; + } + + const bool canAdd = (vl != 3u || vn != 3u); + const uint32_t mode = vif1_regs.mode & 3u; + const uint32_t colIdx = (cyclePos > 3u) ? 3u : cyclePos; + const uint32_t maskCycle = (cyclePos > 3u) ? 3u : cyclePos; + + for (uint32_t field = 0u; field < 4u; ++field) + { + uint32_t maskSpec = 0u; + if (maskEnable) + { + const uint32_t shift = ((maskCycle * 4u) + field) * 2u; + maskSpec = (vif1_regs.mask >> shift) & 0x3u; + } + + // In fill-write cycles with suspended source reads, treat raw-data selections as row-fill. + if (!decoded && maskSpec == 0u) + maskSpec = 1u; + + uint32_t writeVal = lanes[field]; + if (maskSpec == 0u) + { + if (handledFormat) + { + writeVal = decompressed[field]; + if (canAdd && (mode == 1u || mode == 2u)) + { + writeVal = writeVal + vif1_regs.row[field]; + if (mode == 2u) + vif1_regs.row[field] = writeVal; + } + } + } + else if (maskSpec == 1u) + { + writeVal = vif1_regs.row[field]; + } + else if (maskSpec == 2u) + { + writeVal = vif1_regs.col[colIdx]; + } + else + { + continue; // write-protect + } + + lanes[field] = writeVal; + } + + std::memcpy(m_vu1Data + destOff, lanes, sizeof(lanes)); } } pos += totalBytes; diff --git a/ps2xRuntime/src/lib/ps2_vu1.cpp b/ps2xRuntime/src/lib/ps2_vu1.cpp index d89d98f7..5a5af6bc 100644 --- a/ps2xRuntime/src/lib/ps2_vu1.cpp +++ b/ps2xRuntime/src/lib/ps2_vu1.cpp @@ -6,6 +6,7 @@ #include #include #include +#include // Instruction field extraction helpers static inline uint8_t DEST(uint32_t i) { return (uint8_t)((i >> 21) & 0xF); } @@ -99,8 +100,9 @@ void VU1Interpreter::run(uint8_t *vuCode, uint32_t codeSize, bool mBit = (upper >> 31) & 1; (void)mBit; - // LOI: if bit 31 of lower is set, the upper word is an immediate float loaded into I - bool loi = (lower >> 31) & 1; + // LOI uses a dedicated lower opcode (0x8000033C). Do not key on bit31 alone: + // opHi=0x40 instructions (including XGKICK) also have bit31 set. + bool loi = (lower == 0x8000033Cu); if (loi) { std::memcpy(&m_state.i, &upper, 4); @@ -108,8 +110,8 @@ void VU1Interpreter::run(uint8_t *vuCode, uint32_t codeSize, else { execUpper(upper); + execLower(lower, vuData, dataSize, gs, memory, upper); } - execLower(lower & 0x7FFFFFFF, vuData, dataSize, gs, memory, upper); // Enforce VF0 invariant m_state.vf[0][0] = 0.0f; @@ -966,48 +968,93 @@ void VU1Interpreter::execLower(uint32_t instr, uint8_t *vuData, uint32_t dataSiz return; } } - case 0x3D: // XGKICK — send GIF packet from VU1 data memory + case 0x3D: // XGKICK - send GIF packet from VU1 data memory { + if (!vuData || dataSize < 16u) + return; + + auto wrapOffset = [&](uint32_t off) -> uint32_t + { + return off % dataSize; + }; + + auto read64Wrap = [&](uint32_t off) -> uint64_t + { + uint8_t bytes[8]; + for (uint32_t i = 0; i < 8u; ++i) + { + bytes[i] = vuData[wrapOffset(off + i)]; + } + uint64_t value = 0; + std::memcpy(&value, bytes, sizeof(value)); + return value; + }; + uint32_t addr = ((uint32_t)(uint16_t)m_state.vi[is]) * 16u; - addr &= (dataSize - 1); - // Walk the GIF packet to find its total size + addr = wrapOffset(addr); uint32_t pktOff = addr; - uint32_t totalBytes = 0; + uint32_t totalBytes = 0u; bool done = false; + for (int safety = 0; safety < 256 && !done; ++safety) { - if (pktOff + 16 > dataSize) break; - uint64_t tagLo; - std::memcpy(&tagLo, vuData + pktOff, 8); - uint32_t nloop = (uint32_t)(tagLo & 0x7FFF); - uint8_t flg = (uint8_t)((tagLo >> 58) & 0x3); - uint32_t nreg = (uint32_t)((tagLo >> 60) & 0xF); - if (nreg == 0) nreg = 16; - bool eop = (tagLo >> 15) & 1; + uint64_t tagLo = read64Wrap(pktOff); + uint32_t nloop = (uint32_t)(tagLo & 0x7FFFu); + uint8_t flg = (uint8_t)((tagLo >> 58) & 0x3u); + uint32_t nreg = (uint32_t)((tagLo >> 60) & 0xFu); + if (nreg == 0u) + nreg = 16u; + bool eop = ((tagLo >> 15) & 0x1ull) != 0ull; - uint32_t pktSize = 16; // GIF tag - if (flg == 0) // PACKED - pktSize += nloop * nreg * 16; - else if (flg == 1) // REGLIST + uint32_t pktSize = 16u; + if (flg == 0u) + { + pktSize += nloop * nreg * 16u; + } + else if (flg == 1u) { uint32_t regs = nloop * nreg; - pktSize += regs * 8; - if (regs & 1) pktSize += 8; // pad to 128-bit + pktSize += regs * 8u; + if ((regs & 1u) != 0u) + pktSize += 8u; + } + else if (flg == 2u) + { + pktSize += nloop * 16u; } - else if (flg == 2) // IMAGE - pktSize += nloop * 16; - pktOff += pktSize; + if (pktSize == 0u) + break; + totalBytes += pktSize; - if (eop) done = true; + pktOff = wrapOffset(pktOff + pktSize); + if (eop) + done = true; } - if (totalBytes > 0 && addr + totalBytes <= dataSize) + + if (totalBytes == 0u) + return; + + if (addr + totalBytes <= dataSize) { if (memory) memory->submitGifPacket(GifPathId::Path1, vuData + addr, totalBytes); else gs.processGIFPacket(vuData + addr, totalBytes); } + else + { + std::vector wrappedPacket(totalBytes); + for (uint32_t i = 0; i < totalBytes; ++i) + { + wrappedPacket[i] = vuData[wrapOffset(addr + i)]; + } + + if (memory) + memory->submitGifPacket(GifPathId::Path1, wrappedPacket.data(), totalBytes); + else + gs.processGIFPacket(wrappedPacket.data(), totalBytes); + } return; } case 0x3E: // XTOP diff --git a/ps2xRuntime/src/lib/stubs/ps2_stubs_gs.inl b/ps2xRuntime/src/lib/stubs/ps2_stubs_gs.inl index e2466faa..b553dc1f 100644 --- a/ps2xRuntime/src/lib/stubs/ps2_stubs_gs.inl +++ b/ps2xRuntime/src/lib/stubs/ps2_stubs_gs.inl @@ -1,3 +1,120 @@ +namespace +{ + std::mutex g_gs_sync_v_callback_mutex; + uint32_t g_gs_sync_v_callback_func = 0u; + uint32_t g_gs_sync_v_callback_gp = 0u; + uint32_t g_gs_sync_v_callback_sp = 0u; + uint32_t g_gs_sync_v_callback_stack_base = 0u; + uint32_t g_gs_sync_v_callback_stack_top = 0u; + uint64_t g_gs_sync_v_callback_tick = 0u; + uint32_t g_gs_sync_v_callback_bad_pc_logs = 0u; +} + +void resetGsSyncVCallbackState() +{ + std::lock_guard lock(g_gs_sync_v_callback_mutex); + g_gs_sync_v_callback_func = 0u; + g_gs_sync_v_callback_gp = 0u; + g_gs_sync_v_callback_sp = 0u; + g_gs_sync_v_callback_stack_base = 0u; + g_gs_sync_v_callback_stack_top = 0u; + g_gs_sync_v_callback_tick = 0u; + g_gs_sync_v_callback_bad_pc_logs = 0u; +} + +void dispatchGsSyncVCallback(uint8_t *rdram, PS2Runtime *runtime) +{ + if (!rdram || !runtime) + { + return; + } + + uint32_t callback = 0u; + uint32_t gp = 0u; + uint32_t sp = 0u; + uint32_t callbackStackTop = 0u; + uint64_t tick = 0u; + { + std::lock_guard lock(g_gs_sync_v_callback_mutex); + callback = g_gs_sync_v_callback_func; + gp = g_gs_sync_v_callback_gp; + sp = g_gs_sync_v_callback_sp; + callbackStackTop = g_gs_sync_v_callback_stack_top; + if (callback == 0u) + { + return; + } + tick = ++g_gs_sync_v_callback_tick; + } + + if (!runtime->hasFunction(callback)) + { + return; + } + + if (callbackStackTop == 0u) + { + constexpr uint32_t kCallbackStackSize = 0x4000u; + const uint32_t stackBase = runtime->guestMalloc(kCallbackStackSize, 16u); + if (stackBase != 0u) + { + std::lock_guard lock(g_gs_sync_v_callback_mutex); + if (g_gs_sync_v_callback_stack_top == 0u) + { + g_gs_sync_v_callback_stack_base = stackBase; + g_gs_sync_v_callback_stack_top = stackBase + kCallbackStackSize - 0x10u; + } + callbackStackTop = g_gs_sync_v_callback_stack_top; + } + } + + try + { + R5900Context callbackCtx{}; + SET_GPR_U32(&callbackCtx, 28, gp); + SET_GPR_U32(&callbackCtx, 29, (callbackStackTop != 0u) ? callbackStackTop : ((sp != 0u) ? sp : (PS2_RAM_SIZE - 0x10u))); + SET_GPR_U32(&callbackCtx, 31, 0u); + SET_GPR_U32(&callbackCtx, 4, static_cast(tick)); + callbackCtx.pc = callback; + + uint32_t steps = 0u; + while (callbackCtx.pc != 0u && !runtime->isStopRequested() && steps < 1024u) + { + if (!runtime->hasFunction(callbackCtx.pc)) + { + if (g_gs_sync_v_callback_bad_pc_logs < 16u) + { + std::cerr << "[sceGsSyncVCallback:bad-pc] pc=0x" << std::hex << callbackCtx.pc + << " ra=0x" << getRegU32(&callbackCtx, 31) + << " sp=0x" << getRegU32(&callbackCtx, 29) + << " gp=0x" << getRegU32(&callbackCtx, 28) + << std::dec << std::endl; + ++g_gs_sync_v_callback_bad_pc_logs; + } + callbackCtx.pc = 0u; + break; + } + + auto step = runtime->lookupFunction(callbackCtx.pc); + if (!step) + { + break; + } + ++steps; + step(rdram, &callbackCtx, runtime); + } + } + catch (const std::exception &e) + { + static uint32_t warnCount = 0u; + if (warnCount < 8u) + { + std::cerr << "[sceGsSyncVCallback] callback exception: " << e.what() << std::endl; + ++warnCount; + } + } +} + void sceGsExecLoadImage(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { uint32_t imgAddr = getRegU32(ctx, 4); @@ -465,7 +582,40 @@ void sceGsSyncV(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) void sceGsSyncVCallback(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - setReturnS32(ctx, 0); + (void)rdram; + + const uint32_t newCallback = getRegU32(ctx, 4); + const uint32_t callerPc = ctx ? ctx->pc : 0u; + const uint32_t callerRa = ctx ? getRegU32(ctx, 31) : 0u; + const uint32_t gp = getRegU32(ctx, 28); + const uint32_t sp = getRegU32(ctx, 29); + + uint32_t oldCallback = 0u; + { + std::lock_guard lock(g_gs_sync_v_callback_mutex); + oldCallback = g_gs_sync_v_callback_func; + g_gs_sync_v_callback_func = newCallback; + if (newCallback != 0u) + { + g_gs_sync_v_callback_gp = gp; + g_gs_sync_v_callback_sp = sp; + } + } + + static uint32_t s_syncVCallbackLogCount = 0u; + if (s_syncVCallbackLogCount < 128u) + { + std::cout << "[sceGsSyncVCallback:set] new=0x" << std::hex << newCallback + << " old=0x" << oldCallback + << " callerPc=0x" << callerPc + << " callerRa=0x" << callerRa + << " gp=0x" << gp + << " sp=0x" << sp + << std::dec << std::endl; + ++s_syncVCallbackLogCount; + } + + setReturnU32(ctx, oldCallback); } void sceGszbufaddr(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) diff --git a/ps2xRuntime/src/lib/stubs/ps2_stubs_libc.inl b/ps2xRuntime/src/lib/stubs/ps2_stubs_libc.inl index d5d0cead..94ff6599 100644 --- a/ps2xRuntime/src/lib/stubs/ps2_stubs_libc.inl +++ b/ps2xRuntime/src/lib/stubs/ps2_stubs_libc.inl @@ -1,3 +1,45 @@ +namespace +{ + uint32_t sanitizeMemTransferSize(uint32_t size, const char *op) + { + constexpr uint32_t kMaxTransfer = PS2_RAM_SIZE; + if (size <= kMaxTransfer) + { + return size; + } + + static std::mutex s_warnMutex; + static std::unordered_map s_warnCounts; + uint32_t warnCount = 0u; + { + std::lock_guard lock(s_warnMutex); + warnCount = ++s_warnCounts[op ? op : "memop"]; + } + if (warnCount <= 16u) + { + std::cerr << "[" << (op ? op : "memop") << "] size clamp from 0x" + << std::hex << size << " to 0x" << kMaxTransfer + << std::dec << std::endl; + } + return kMaxTransfer; + } + + uint32_t guestContiguousBytes(uint32_t guestAddr) + { + uint32_t offset = 0u; + bool scratch = false; + if (!ps2ResolveGuestPointer(guestAddr, offset, scratch)) + { + return 0u; + } + if (scratch) + { + return (offset < PS2_SCRATCHPAD_SIZE) ? (PS2_SCRATCHPAD_SIZE - offset) : 0u; + } + return (offset < PS2_RAM_SIZE) ? (PS2_RAM_SIZE - offset) : 0u; + } +} + void malloc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { const uint32_t size = getRegU32(ctx, 4); // $a0 @@ -34,22 +76,38 @@ void memcpy(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { uint32_t destAddr = getRegU32(ctx, 4); // $a0 uint32_t srcAddr = getRegU32(ctx, 5); // $a1 - size_t size = getRegU32(ctx, 6); // $a2 - - uint8_t *hostDest = getMemPtr(rdram, destAddr); - const uint8_t *hostSrc = getConstMemPtr(rdram, srcAddr); + uint32_t size = getRegU32(ctx, 6); // $a2 + size = sanitizeMemTransferSize(size, "memcpy"); - if (hostDest && hostSrc) + uint32_t copied = 0u; + uint32_t curDst = destAddr; + uint32_t curSrc = srcAddr; + while (copied < size) { - ::memcpy(hostDest, hostSrc, size); - ps2TraceGuestRangeWrite(rdram, destAddr, static_cast(size), "memcpy", ctx); + uint8_t *hostDest = getMemPtr(rdram, curDst); + const uint8_t *hostSrc = getConstMemPtr(rdram, curSrc); + if (!hostDest || !hostSrc) + { + break; + } + + uint32_t chunk = size - copied; + chunk = std::min(chunk, guestContiguousBytes(curDst)); + chunk = std::min(chunk, guestContiguousBytes(curSrc)); + if (chunk == 0u) + { + break; + } + + ::memcpy(hostDest, hostSrc, chunk); + copied += chunk; + curDst += chunk; + curSrc += chunk; } - else + + if (copied != 0u) { - std::cerr << "memcpy error: Attempted copy involving non-RDRAM address (or invalid RDRAM address)." - << " Dest: 0x" << std::hex << destAddr << " (host ptr valid: " << (hostDest != nullptr) << ")" - << ", Src: 0x" << srcAddr << " (host ptr valid: " << (hostSrc != nullptr) << ")" << std::dec - << ", Size: " << size << std::endl; + ps2TraceGuestRangeWrite(rdram, destAddr, copied, "memcpy", ctx); } // returns dest pointer ($v0 = $a0) @@ -61,17 +119,33 @@ void memset(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) uint32_t destAddr = getRegU32(ctx, 4); // $a0 int value = (int)(getRegU32(ctx, 5) & 0xFF); // $a1 (char value) uint32_t size = getRegU32(ctx, 6); // $a2 + size = sanitizeMemTransferSize(size, "memset"); - uint8_t *hostDest = getMemPtr(rdram, destAddr); - - if (hostDest) + uint32_t written = 0u; + uint32_t curDst = destAddr; + while (written < size) { - ::memset(hostDest, value, size); - ps2TraceGuestRangeWrite(rdram, destAddr, size, "memset", ctx); + uint8_t *hostDest = getMemPtr(rdram, curDst); + if (!hostDest) + { + break; + } + + uint32_t chunk = size - written; + chunk = std::min(chunk, guestContiguousBytes(curDst)); + if (chunk == 0u) + { + break; + } + + ::memset(hostDest, value, chunk); + written += chunk; + curDst += chunk; } - else + + if (written != 0u) { - std::cerr << "memset error: Invalid address provided." << std::endl; + ps2TraceGuestRangeWrite(rdram, destAddr, written, "memset", ctx); } // returns dest pointer ($v0 = $a0) @@ -82,22 +156,36 @@ void memmove(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { uint32_t destAddr = getRegU32(ctx, 4); // $a0 uint32_t srcAddr = getRegU32(ctx, 5); // $a1 - size_t size = getRegU32(ctx, 6); // $a2 + uint32_t size = getRegU32(ctx, 6); // $a2 + size = sanitizeMemTransferSize(size, "memmove"); - uint8_t *hostDest = getMemPtr(rdram, destAddr); - const uint8_t *hostSrc = getConstMemPtr(rdram, srcAddr); + uint32_t copied = 0u; + std::vector tmp; + tmp.reserve(size); + for (uint32_t i = 0u; i < size; ++i) + { + const uint8_t *src = getConstMemPtr(rdram, srcAddr + i); + if (!src) + { + break; + } + tmp.push_back(*src); + } - if (hostDest && hostSrc) + for (uint32_t i = 0u; i < static_cast(tmp.size()); ++i) { - ::memmove(hostDest, hostSrc, size); - ps2TraceGuestRangeWrite(rdram, destAddr, static_cast(size), "memmove", ctx); + uint8_t *dst = getMemPtr(rdram, destAddr + i); + if (!dst) + { + break; + } + *dst = tmp[i]; + ++copied; } - else + + if (copied != 0u) { - std::cerr << "memmove error: Attempted move involving potentially invalid RDRAM address." - << " Dest: 0x" << std::hex << destAddr << " (host ptr valid: " << (hostDest != nullptr) << ")" - << ", Src: 0x" << srcAddr << " (host ptr valid: " << (hostSrc != nullptr) << ")" << std::dec - << ", Size: " << size << std::endl; + ps2TraceGuestRangeWrite(rdram, destAddr, copied, "memmove", ctx); } // returns dest pointer ($v0 = $a0) @@ -109,25 +197,23 @@ void memcmp(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) uint32_t ptr1Addr = getRegU32(ctx, 4); // $a0 uint32_t ptr2Addr = getRegU32(ctx, 5); // $a1 uint32_t size = getRegU32(ctx, 6); // $a2 - - const uint8_t *hostPtr1 = getConstMemPtr(rdram, ptr1Addr); - const uint8_t *hostPtr2 = getConstMemPtr(rdram, ptr2Addr); + size = sanitizeMemTransferSize(size, "memcmp"); int result = 0; - if (hostPtr1 && hostPtr2) - { - result = ::memcmp(hostPtr1, hostPtr2, size); - } - else + for (uint32_t i = 0u; i < size; ++i) { - std::cerr << "memcmp error: Invalid address provided." - << " Ptr1: 0x" << std::hex << ptr1Addr << " (host ptr valid: " << (hostPtr1 != nullptr) << ")" - << ", Ptr2: 0x" << ptr2Addr << " (host ptr valid: " << (hostPtr2 != nullptr) << ")" << std::dec - << std::endl; - - result = (hostPtr1 == nullptr) - (hostPtr2 == nullptr); - if (result == 0) - result = 1; // If both null, still different? Or 0? + const uint8_t *lhs = getConstMemPtr(rdram, ptr1Addr + i); + const uint8_t *rhs = getConstMemPtr(rdram, ptr2Addr + i); + if (!lhs || !rhs) + { + result = (!lhs && !rhs) ? 0 : (lhs ? 1 : -1); + break; + } + if (*lhs != *rhs) + { + result = static_cast(*lhs) - static_cast(*rhs); + break; + } } setReturnS32(ctx, result); } @@ -427,6 +513,7 @@ void sprintf(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { uint32_t str_addr = getRegU32(ctx, 4); // $a0 uint32_t format_addr = getRegU32(ctx, 5); // $a1 + constexpr size_t kSafeSprintfBytes = 256u; // Keep guest stack temporaries from being overwritten. const std::string formatOwned = readPs2CStringBounded(rdram, runtime, format_addr, 1024); int ret = -1; @@ -454,9 +541,9 @@ void sprintf(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) } std::string rendered = formatPs2StringWithArgs(rdram, ctx, runtime, formatOwned.c_str(), 2); - if (rendered.size() >= kMaxFormattedOutputBytes) + if (rendered.size() >= kSafeSprintfBytes) { - rendered.resize(kMaxFormattedOutputBytes - 1); + rendered.resize(kSafeSprintfBytes - 1); } const size_t writeLen = rendered.size() + 1u; if (writeGuestBytes(rdram, runtime, str_addr, reinterpret_cast(rendered.c_str()), writeLen)) diff --git a/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl b/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl index 077ff136..bd6f0ebd 100644 --- a/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl +++ b/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl @@ -1606,6 +1606,24 @@ void sceeFontSetScale(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) void sceIoctl(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { + const int32_t cmd = static_cast(getRegU32(ctx, 5)); + const uint32_t argAddr = getRegU32(ctx, 6); + + // HTCI wait paths poll sceIoctl(fd, 1, &state) and expect state to move + // away from 1 once host-side I/O is no longer busy. + if (cmd == 1 && argAddr != 0u) + { + uint8_t *argPtr = getMemPtr(rdram, argAddr); + if (!argPtr) + { + setReturnS32(ctx, -1); + return; + } + + const uint32_t ready = 0u; + std::memcpy(argPtr, &ready, sizeof(ready)); + } + setReturnS32(ctx, 0); } @@ -2550,9 +2568,72 @@ namespace return id; } + bool isCopyableGuestAddress(uint32_t addr) + { + if (addr >= PS2_SCRATCHPAD_BASE && addr < (PS2_SCRATCHPAD_BASE + PS2_SCRATCHPAD_SIZE)) + { + return true; + } + + if (addr < 0x20000000u) + { + return true; + } + + if (addr >= 0x20000000u && addr < 0x40000000u) + { + return true; + } + + if (addr >= 0x80000000u && addr < 0xC0000000u) + { + return true; + } + + return false; + } + + bool canCopyGuestByteRange(const uint8_t *rdram, uint32_t dstAddr, uint32_t srcAddr, uint32_t sizeBytes) + { + if (!rdram) + { + return false; + } + + if (sizeBytes == 0u) + { + return true; + } + + for (uint32_t i = 0u; i < sizeBytes; ++i) + { + const uint32_t srcByteAddr = srcAddr + i; + const uint32_t dstByteAddr = dstAddr + i; + + if (!isCopyableGuestAddress(srcByteAddr) || !isCopyableGuestAddress(dstByteAddr)) + { + return false; + } + + const uint8_t *src = getConstMemPtr(rdram, srcByteAddr); + const uint8_t *dst = getConstMemPtr(rdram, dstByteAddr); + if (!src || !dst) + { + return false; + } + } + + return true; + } + bool copyGuestByteRange(uint8_t *rdram, uint32_t dstAddr, uint32_t srcAddr, uint32_t sizeBytes) { - if (!rdram || sizeBytes == 0u) + if (!canCopyGuestByteRange(rdram, dstAddr, srcAddr, sizeBytes)) + { + return false; + } + + if (sizeBytes == 0u) { return true; } @@ -2704,6 +2785,142 @@ void sceSifGetOtherData(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) return; } + auto readGuestU32Local = [&](uint32_t addr, uint32_t &out) -> bool + { + const uint8_t *ptr = getConstMemPtr(rdram, addr); + if (!ptr) + { + out = 0u; + return false; + } + std::memcpy(&out, ptr, sizeof(out)); + return true; + }; + + auto readGuestS16Local = [&](uint32_t addr, int16_t &out) -> bool + { + const uint8_t *b0 = getConstMemPtr(rdram, addr + 0u); + const uint8_t *b1 = getConstMemPtr(rdram, addr + 1u); + if (!b0 || !b1) + { + out = 0; + return false; + } + const uint16_t raw = static_cast(static_cast(*b0) | + (static_cast(*b1) << 8)); + out = static_cast(raw); + return true; + }; + + constexpr uint32_t kSndTransTypeAddr = 0x01E0E1C0u; + constexpr uint32_t kSndTransBankAddr = 0x01E0E1C8u; + constexpr uint32_t kSndTransLevelAddr = 0x01E0E1B8u; + constexpr uint32_t kSndGetAdrsAddr = 0x01E212D8u; + constexpr uint32_t kSndStatusMirrorAddr = 0x01E213C0u; + constexpr uint32_t kSndSeCheckAddr = 0x01E0EF10u; + constexpr uint32_t kSndMidiCheckAddr = 0x01E0EF20u; + + static uint32_t s_sifGetOtherDataStatusLogs = 0u; + const bool isSndStatusTransfer = (size == 0x42u); + const uint32_t statusLogIndex = s_sifGetOtherDataStatusLogs++; + const bool shouldLogStatus = + isSndStatusTransfer && + (statusLogIndex < 96u || (statusLogIndex % 256u) == 0u); + + if (shouldLogStatus) + { + uint32_t transType = 0u; + uint32_t transLevel = 0u; + uint32_t transBank = 0u; + uint32_t getAdrs = 0u; + (void)readGuestU32Local(kSndTransTypeAddr, transType); + (void)readGuestU32Local(kSndTransLevelAddr, transLevel); + (void)readGuestU32Local(kSndTransBankAddr, transBank); + (void)readGuestU32Local(kSndGetAdrsAddr, getAdrs); + std::cout << "[sceSifGetOtherData] src=0x" << std::hex << srcAddr + << " dst=0x" << dstAddr + << " size=0x" << size + << " get_adrs=0x" << getAdrs + << std::dec + << " transType=" << transType + << " transLevel=" << transLevel + << " transBank=" << transBank + << std::endl; + } + + // Keep RECVX SND_STATUS checksums synchronized with EE-side transfer checks. + if (srcAddr == 0x00012000u && size == 0x42u) + { + constexpr uint32_t kPrimarySeAddr = 0x01E0EF10u; + constexpr uint32_t kPrimaryMidiAddr = 0x01E0EF20u; + constexpr uint32_t kFallbackSeAddr = 0x01E1EF10u; + constexpr uint32_t kFallbackMidiAddr = 0x01E1EF20u; + + auto hasAnyNonZero = [](const uint8_t *ptr, size_t bytes) -> bool + { + if (!ptr) + { + return false; + } + for (size_t i = 0; i < bytes; ++i) + { + if (ptr[i] != 0u) + { + return true; + } + } + return false; + }; + + const uint8_t *selectedSe = getConstMemPtr(rdram, kPrimarySeAddr); + const uint8_t *selectedMidi = getConstMemPtr(rdram, kPrimaryMidiAddr); + + const bool primaryLooksLive = + hasAnyNonZero(selectedSe, 5u * sizeof(int16_t)) || + hasAnyNonZero(selectedMidi, 4u * sizeof(int16_t)); + + if ((!selectedSe || !selectedMidi) || !primaryLooksLive) + { + const uint8_t *fallbackSe = getConstMemPtr(rdram, kFallbackSeAddr); + const uint8_t *fallbackMidi = getConstMemPtr(rdram, kFallbackMidiAddr); + const bool fallbackLooksLive = + hasAnyNonZero(fallbackSe, 5u * sizeof(int16_t)) || + hasAnyNonZero(fallbackMidi, 4u * sizeof(int16_t)); + + if (fallbackLooksLive) + { + selectedSe = fallbackSe; + selectedMidi = fallbackMidi; + } + } + + if (selectedSe && selectedMidi) + { + if (uint8_t *status = getMemPtr(rdram, srcAddr)) + { + std::memcpy(status + 0x26u, selectedSe, 5u * sizeof(int16_t)); // se_sum[5] + std::memcpy(status + 0x1Eu, selectedMidi, 4u * sizeof(int16_t)); // midi_sum[4] + } + } + + if (shouldLogStatus) + { + int16_t se0 = 0; + int16_t midi0 = 0; + int16_t seChk0 = 0; + int16_t midiChk0 = 0; + (void)readGuestS16Local(srcAddr + 0x26u, se0); + (void)readGuestS16Local(srcAddr + 0x1Eu, midi0); + (void)readGuestS16Local(kSndSeCheckAddr + 0u, seChk0); + (void)readGuestS16Local(kSndMidiCheckAddr + 0u, midiChk0); + std::cout << "[sceSifGetOtherData:sndstatus] srcSe0=" << se0 + << " srcMidi0=" << midi0 + << " chkSe0=" << seChk0 + << " chkMidi0=" << midiChk0 + << std::endl; + } + } + if (!copyGuestByteRange(rdram, dstAddr, srcAddr, size)) { static uint32_t warnCount = 0; @@ -2727,6 +2944,38 @@ void sceSifGetOtherData(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) std::memcpy(rd + 0x18u, &size, sizeof(size)); } + if (shouldLogStatus) + { + uint32_t transBank = 0u; + (void)readGuestU32Local(kSndTransBankAddr, transBank); + int16_t dstSe = 0; + int16_t dstMidi = 0; + int16_t mirrorSe = 0; + int16_t mirrorMidi = 0; + int16_t bankSeChk = 0; + int16_t bankMidiChk = 0; + (void)readGuestS16Local(dstAddr + 0x26u, dstSe); + (void)readGuestS16Local(dstAddr + 0x1Eu, dstMidi); + (void)readGuestS16Local(kSndStatusMirrorAddr + 0x26u, mirrorSe); + (void)readGuestS16Local(kSndStatusMirrorAddr + 0x1Eu, mirrorMidi); + if (transBank < 5u) + { + (void)readGuestS16Local(kSndSeCheckAddr + (transBank * 2u), bankSeChk); + } + if (transBank < 4u) + { + (void)readGuestS16Local(kSndMidiCheckAddr + (transBank * 2u), bankMidiChk); + } + std::cout << "[sceSifGetOtherData:post] bank=" << transBank + << " dstSe=" << dstSe + << " dstMidi=" << dstMidi + << " mirrorSe=" << mirrorSe + << " mirrorMidi=" << mirrorMidi + << " bankSeChk=" << bankSeChk + << " bankMidiChk=" << bankMidiChk + << std::endl; + } + setReturnS32(ctx, 0); } @@ -2877,6 +3126,8 @@ void sceSifSetDma(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) return; } + std::array pending{}; + uint32_t pendingCount = 0u; bool ok = true; for (uint32_t i = 0; i < count; ++i) { @@ -2901,11 +3152,26 @@ void sceSifSetDma(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) ok = false; break; } - if (!copyGuestByteRange(rdram, xfer.dest, xfer.src, sizeBytes)) + if (!canCopyGuestByteRange(rdram, xfer.dest, xfer.src, sizeBytes)) { ok = false; break; } + + pending[pendingCount++] = xfer; + } + + if (ok) + { + for (uint32_t i = 0; i < pendingCount; ++i) + { + const Ps2SifDmaTransfer &xfer = pending[i]; + if (!copyGuestByteRange(rdram, xfer.dest, xfer.src, static_cast(xfer.size))) + { + ok = false; + break; + } + } } if (!ok) @@ -3963,12 +4229,17 @@ void vsprintf(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) uint32_t str_addr = getRegU32(ctx, 4); // $a0 uint32_t format_addr = getRegU32(ctx, 5); // $a1 uint32_t va_list_addr = getRegU32(ctx, 6); // $a2 + constexpr size_t kSafeVsprintfBytes = 256u; // Keep guest stack temporaries from being overwritten. const std::string formatOwned = readPs2CStringBounded(rdram, runtime, format_addr, 1024); int ret = -1; if (format_addr != 0) { std::string rendered = formatPs2StringWithVaList(rdram, runtime, formatOwned.c_str(), va_list_addr); + if (rendered.size() >= kSafeVsprintfBytes) + { + rendered.resize(kSafeVsprintfBytes - 1); + } if (writeGuestBytes(rdram, runtime, str_addr, reinterpret_cast(rendered.c_str()), rendered.size() + 1u)) { ret = static_cast(rendered.size()); diff --git a/ps2xRuntime/src/lib/stubs/ps2_stubs_ps2.inl b/ps2xRuntime/src/lib/stubs/ps2_stubs_ps2.inl index 9efde9e0..b7ebf030 100644 --- a/ps2xRuntime/src/lib/stubs/ps2_stubs_ps2.inl +++ b/ps2xRuntime/src/lib/stubs/ps2_stubs_ps2.inl @@ -138,3 +138,13 @@ void builtin_set_imask(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, 0); } +void InitThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + static int logCount = 0; + if (logCount < 8) + { + std::cout << "ps2_stub InitThread" << std::endl; + ++logCount; + } + setReturnS32(ctx, 1); // success +} \ No newline at end of file diff --git a/ps2xRuntime/src/lib/syscalls/helpers/ps2_syscalls_helpers_runtime.inl b/ps2xRuntime/src/lib/syscalls/helpers/ps2_syscalls_helpers_runtime.inl index 834a0a57..4e4ab488 100644 --- a/ps2xRuntime/src/lib/syscalls/helpers/ps2_syscalls_helpers_runtime.inl +++ b/ps2xRuntime/src/lib/syscalls/helpers/ps2_syscalls_helpers_runtime.inl @@ -271,18 +271,66 @@ static bool readStackU32(uint8_t *rdram, uint32_t sp, uint32_t offset, uint32_t static bool rpcInvokeFunction(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime, uint32_t funcAddr, uint32_t a0, uint32_t a1, uint32_t a2, uint32_t a3, uint32_t *outV0) { - if (!runtime || !funcAddr || !runtime->hasFunction(funcAddr)) + if (!runtime || !ctx || !funcAddr || !runtime->hasFunction(funcAddr)) return false; + constexpr uint32_t kRpcInvokeStackSize = 0x4000u; + constexpr uint32_t kRpcInvokeReturnSentinel = 0x00FFF000u; + constexpr uint32_t kRpcInvokeMaxSteps = 0x8000u; + R5900Context tmp = *ctx; setRegU32(&tmp, 4, a0); setRegU32(&tmp, 5, a1); setRegU32(&tmp, 6, a2); setRegU32(&tmp, 7, a3); + + thread_local uint32_t s_rpcInvokeStackBase = 0u; + thread_local uint32_t s_rpcInvokeStackTop = 0u; + if (s_rpcInvokeStackTop == 0u) + { + const uint32_t stackBase = runtime->guestMalloc(kRpcInvokeStackSize, 16u); + if (stackBase != 0u) + { + s_rpcInvokeStackBase = stackBase; + s_rpcInvokeStackTop = (stackBase + kRpcInvokeStackSize) & ~0xFu; + } + } + if (s_rpcInvokeStackTop != 0u) + { + setRegU32(&tmp, 29, s_rpcInvokeStackTop); + } + (void)s_rpcInvokeStackBase; + + setRegU32(&tmp, 31, kRpcInvokeReturnSentinel); tmp.pc = funcAddr; - PS2Runtime::RecompiledFunction func = runtime->lookupFunction(funcAddr); - func(rdram, &tmp, runtime); + uint32_t steps = 0u; + uint32_t lastPc = 0xFFFFFFFFu; + uint32_t samePcCount = 0u; + while (tmp.pc != 0u && + tmp.pc != kRpcInvokeReturnSentinel && + runtime->hasFunction(tmp.pc) && + steps < kRpcInvokeMaxSteps) + { + const uint32_t pc = tmp.pc; + if (pc == lastPc) + { + ++samePcCount; + if (samePcCount > 0x2000u) + { + break; + } + } + else + { + lastPc = pc; + samePcCount = 0u; + } + + PS2Runtime::RecompiledFunction func = runtime->lookupFunction(pc); + func(rdram, &tmp, runtime); + ++steps; + } if (outV0) { diff --git a/ps2xRuntime/src/lib/syscalls/helpers/ps2_syscalls_helpers_state.inl b/ps2xRuntime/src/lib/syscalls/helpers/ps2_syscalls_helpers_state.inl index 5df47eb8..12d4f781 100644 --- a/ps2xRuntime/src/lib/syscalls/helpers/ps2_syscalls_helpers_state.inl +++ b/ps2xRuntime/src/lib/syscalls/helpers/ps2_syscalls_helpers_state.inl @@ -201,6 +201,8 @@ static std::unordered_map> g_threads; static int g_nextThreadId = 2; // Reserve 1 for the main thread static thread_local int g_currentThreadId = 1; static std::mutex g_thread_map_mutex; +static std::unordered_map g_hostThreads; +static std::mutex g_host_thread_mutex; static std::unordered_map> g_semas; static int g_nextSemaId = 1; @@ -216,6 +218,92 @@ static std::once_flag g_alarm_worker_once; std::atomic g_activeThreads{0}; static std::mutex g_fd_mutex; +static void registerHostThread(int tid, std::thread worker) +{ + std::thread stale; + { + std::lock_guard lock(g_host_thread_mutex); + auto it = g_hostThreads.find(tid); + if (it != g_hostThreads.end()) + { + stale = std::move(it->second); + g_hostThreads.erase(it); + } + g_hostThreads.emplace(tid, std::move(worker)); + } + + if (stale.joinable()) + { + if (stale.get_id() == std::this_thread::get_id()) + { + stale.detach(); + } + else + { + stale.join(); + } + } +} + +static void joinHostThreadById(int tid) +{ + std::thread worker; + { + std::lock_guard lock(g_host_thread_mutex); + auto it = g_hostThreads.find(tid); + if (it != g_hostThreads.end()) + { + worker = std::move(it->second); + g_hostThreads.erase(it); + } + } + + if (!worker.joinable()) + { + return; + } + + if (worker.get_id() == std::this_thread::get_id()) + { + worker.detach(); + } + else + { + worker.join(); + } +} + +static void joinAllHostThreads() +{ + std::vector workers; + { + std::lock_guard lock(g_host_thread_mutex); + workers.reserve(g_hostThreads.size()); + const std::thread::id selfId = std::this_thread::get_id(); + for (auto it = g_hostThreads.begin(); it != g_hostThreads.end();) + { + std::thread &worker = it->second; + if (worker.joinable() && worker.get_id() == selfId) + { + ++it; + continue; + } + + workers.push_back(std::move(worker)); + it = g_hostThreads.erase(it); + } + } + + for (auto &worker : workers) + { + if (!worker.joinable()) + { + continue; + } + worker.join(); + } +} + struct RpcServerState { uint32_t sid = 0; @@ -232,6 +320,7 @@ struct RpcClientState static std::unordered_map g_rpc_servers; static std::unordered_map g_rpc_clients; static std::mutex g_rpc_mutex; +static std::recursive_mutex g_sif_call_rpc_mutex; static bool g_rpc_initialized = false; static uint32_t g_rpc_next_id = 1; static uint32_t g_rpc_packet_index = 0; diff --git a/ps2xRuntime/src/lib/syscalls/ps2_syscalls_flags.inl b/ps2xRuntime/src/lib/syscalls/ps2_syscalls_flags.inl index 03bda44d..8df21672 100644 --- a/ps2xRuntime/src/lib/syscalls/ps2_syscalls_flags.inl +++ b/ps2xRuntime/src/lib/syscalls/ps2_syscalls_flags.inl @@ -232,8 +232,11 @@ void SignalSema(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) } int ret = KE_OK; + int beforeCount = 0; + int afterCount = 0; { std::lock_guard lock(sema->m); + beforeCount = sema->count; if (sema->count >= sema->maxCount) { ret = KE_SEMA_OVF; @@ -243,6 +246,18 @@ void SignalSema(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) sema->count++; sema->cv.notify_one(); } + afterCount = sema->count; + } + + static std::atomic s_signalSemaLogs{0}; + const uint32_t sigLog = s_signalSemaLogs.fetch_add(1, std::memory_order_relaxed); + if (sigLog < 256u) + { + std::cout << "[SignalSema] tid=" << g_currentThreadId + << " sid=" << sid + << " count=" << beforeCount << "->" << afterCount + << " ret=" << ret + << std::endl; } setReturnS32(ctx, ret); @@ -270,6 +285,18 @@ void WaitSema(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) if (sema->count == 0) { + static std::atomic s_waitSemaBlockLogs{0}; + const uint32_t blockLog = s_waitSemaBlockLogs.fetch_add(1, std::memory_order_relaxed); + if (blockLog < 256u) + { + std::cout << "[WaitSema:block] tid=" << g_currentThreadId + << " sid=" << sid + << " pc=0x" << std::hex << ctx->pc + << " ra=0x" << getRegU32(ctx, 31) + << std::dec + << std::endl; + } + if (info) { std::lock_guard tLock(info->m); @@ -315,6 +342,17 @@ void WaitSema(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { sema->count--; } + + static std::atomic s_waitSemaWakeLogs{0}; + const uint32_t wakeLog = s_waitSemaWakeLogs.fetch_add(1, std::memory_order_relaxed); + if (wakeLog < 256u) + { + std::cout << "[WaitSema:wake] tid=" << g_currentThreadId + << " sid=" << sid + << " ret=" << ret + << " count=" << sema->count + << std::endl; + } lock.unlock(); waitWhileSuspended(info); setReturnS32(ctx, ret); @@ -456,9 +494,22 @@ void SetEventFlag(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) return; } + uint32_t newBits = 0u; { std::lock_guard lock(info->m); info->bits |= bits; + newBits = info->bits; + } + + static std::atomic s_setEventFlagLogs{0}; + const uint32_t setLog = s_setEventFlagLogs.fetch_add(1, std::memory_order_relaxed); + if (setLog < 256u) + { + std::cout << "[SetEventFlag] tid=" << g_currentThreadId + << " eid=" << eid + << " bits=0x" << std::hex << bits + << " newBits=0x" << newBits + << std::dec << std::endl; } info->cv.notify_all(); setReturnS32(ctx, 0); @@ -551,6 +602,21 @@ void WaitEventFlag(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) if (!satisfied()) { + static std::atomic s_waitEventBlockLogs{0}; + const uint32_t evBlockLog = s_waitEventBlockLogs.fetch_add(1, std::memory_order_relaxed); + if (evBlockLog < 256u) + { + std::cout << "[WaitEventFlag:block] tid=" << g_currentThreadId + << " eid=" << eid + << " waitBits=0x" << std::hex << waitBits + << " mode=0x" << mode + << " bits=0x" << info->bits + << " pc=0x" << ctx->pc + << " ra=0x" << getRegU32(ctx, 31) + << std::dec + << std::endl; + } + if (tInfo) { std::lock_guard tLock(tInfo->m); @@ -610,6 +676,18 @@ void WaitEventFlag(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) } } + static std::atomic s_waitEventWakeLogs{0}; + const uint32_t evWakeLog = s_waitEventWakeLogs.fetch_add(1, std::memory_order_relaxed); + if (evWakeLog < 256u) + { + std::cout << "[WaitEventFlag:wake] tid=" << g_currentThreadId + << " eid=" << eid + << " ret=" << ret + << " bits=0x" << std::hex << info->bits + << std::dec + << std::endl; + } + lock.unlock(); waitWhileSuspended(tInfo); setReturnS32(ctx, ret); @@ -671,10 +749,14 @@ void PollEventFlag(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) *resBitsPtr = info->bits; } - if (mode & (WEF_CLEAR | WEF_CLEAR_ALL)) + if (mode & WEF_CLEAR_ALL) { info->bits = 0; } + else if (mode & WEF_CLEAR) + { + info->bits &= ~waitBits; + } setReturnS32(ctx, KE_OK); } diff --git a/ps2xRuntime/src/lib/syscalls/ps2_syscalls_interrupt.inl b/ps2xRuntime/src/lib/syscalls/ps2_syscalls_interrupt.inl index 00fe7ce4..6f482e2d 100644 --- a/ps2xRuntime/src/lib/syscalls/ps2_syscalls_interrupt.inl +++ b/ps2xRuntime/src/lib/syscalls/ps2_syscalls_interrupt.inl @@ -274,6 +274,11 @@ void EnableIntc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, KE_OK); } +void iEnableIntc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + EnableIntc(rdram, ctx, runtime); +} + void DisableIntc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { const uint32_t cause = getRegU32(ctx, 4); @@ -285,6 +290,11 @@ void DisableIntc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, KE_OK); } +void iDisableIntc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + DisableIntc(rdram, ctx, runtime); +} + void AddIntcHandler(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { IrqHandlerInfo info{}; @@ -309,6 +319,11 @@ void AddIntcHandler(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, handlerId); } +void AddIntcHandler2(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + AddIntcHandler(rdram, ctx, runtime); +} + void RemoveIntcHandler(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { const uint32_t cause = getRegU32(ctx, 4); @@ -347,6 +362,11 @@ void AddDmacHandler(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, handlerId); } +void AddDmacHandler2(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + AddDmacHandler(rdram, ctx, runtime); +} + void RemoveDmacHandler(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { const uint32_t cause = getRegU32(ctx, 4); @@ -426,6 +446,11 @@ void EnableDmac(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, KE_OK); } +void iEnableDmac(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + EnableDmac(rdram, ctx, runtime); +} + void DisableDmac(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { const uint32_t cause = getRegU32(ctx, 4); @@ -436,3 +461,8 @@ void DisableDmac(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) } setReturnS32(ctx, KE_OK); } + +void iDisableDmac(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + DisableDmac(rdram, ctx, runtime); +} diff --git a/ps2xRuntime/src/lib/syscalls/ps2_syscalls_rpc.inl b/ps2xRuntime/src/lib/syscalls/ps2_syscalls_rpc.inl index d190b0b5..6524667e 100644 --- a/ps2xRuntime/src/lib/syscalls/ps2_syscalls_rpc.inl +++ b/ps2xRuntime/src/lib/syscalls/ps2_syscalls_rpc.inl @@ -156,6 +156,8 @@ void SifBindRpc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) void SifCallRpc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { + std::lock_guard rpcCallLock(g_sif_call_rpc_mutex); + uint32_t clientPtr = getRegU32(ctx, 4); uint32_t rpcNum = getRegU32(ctx, 5); uint32_t mode = getRegU32(ctx, 6); @@ -198,7 +200,7 @@ void SifCallRpc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) auto looksLikeSize = [&](uint32_t v) -> bool { - return v <= 0x100000u; + return v <= 0x2000000u; }; auto looksLikeFunc = [&](uint32_t v) -> bool @@ -211,10 +213,42 @@ void SifCallRpc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) return looksLikeSize(sendSz) && looksLikeGuestPtr(rbuf) && looksLikeSize(rsz) && looksLikeFunc(endFn); }; + const bool regPackPlausible = plausiblePack(sendSizeReg, recvBufReg, recvSizeReg, endFuncReg); + const bool stackPackPlausible = plausiblePack(sendSizeStk, recvBufStk, recvSizeStk, endFuncStk); + + uint32_t boundSidHint = 0u; + { + std::lock_guard lock(g_rpc_mutex); + auto it = g_rpc_clients.find(clientPtr); + if (it != g_rpc_clients.end()) + { + boundSidHint = it->second.sid; + } + } + + auto looksLikeDtxCreatePack = [&](uint32_t sendSz, uint32_t rbuf, uint32_t rsz) -> bool + { + return rbuf != 0u && rsz >= 4u && rsz <= 0x40u && + sendSz >= 12u && sendSz <= 0x1000u; + }; + + const bool isDtxCreate34Call = (boundSidHint == kDtxRpcSid) && (rpcNum == 0x422u); + const bool forceStackForDtxCreate34 = + isDtxCreate34Call && + stackPackPlausible && + looksLikeDtxCreatePack(sendSizeStk, recvBufStk, recvSizeStk) && + !looksLikeDtxCreatePack(sendSizeReg, recvBufReg, recvSizeReg); + bool useRegConvention = true; - if (!plausiblePack(sendSizeReg, recvBufReg, recvSizeReg, endFuncReg)) + if (forceStackForDtxCreate34) { - if (plausiblePack(sendSizeStk, recvBufStk, recvSizeStk, endFuncStk)) + useRegConvention = false; + } + else if (!regPackPlausible && stackPackPlausible) + { + const bool regHasValidCallback = (endFuncReg != 0u) && looksLikeFunc(endFuncReg); + const bool stackHasValidCallback = (endFuncStk != 0u) && looksLikeFunc(endFuncStk); + if (!(regHasValidCallback && !stackHasValidCallback)) { useRegConvention = false; } @@ -226,6 +260,22 @@ void SifCallRpc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) endFunc = useRegConvention ? endFuncReg : endFuncStk; endParam = useRegConvention ? endParamReg : endParamStk; + const bool isDtxLikeRpc = (boundSidHint == kDtxRpcSid) || ((rpcNum & 0xFF00u) == 0x0400u); + static uint32_t dtxAbiLogCount = 0u; + if (isDtxLikeRpc && dtxAbiLogCount < 96u) + { + std::cout << "[SifCallRpc:ABI] client=0x" << std::hex << clientPtr + << " rpc=0x" << rpcNum + << " sidHint=0x" << boundSidHint + << " useReg=" << (useRegConvention ? 1 : 0) + << " reg=(" << sendSizeReg << "," << recvBufReg << "," << recvSizeReg << "," << endFuncReg << "," << endParamReg << ")" + << " stk=(" << sendSizeStk << "," << recvBufStk << "," << recvSizeStk << "," << endFuncStk << "," << endParamStk << ")" + << " plausible=(" << (regPackPlausible ? 1 : 0) << "," << (stackPackPlausible ? 1 : 0) << ")" + << " force34=" << (forceStackForDtxCreate34 ? 1 : 0) + << std::dec << std::endl; + ++dtxAbiLogCount; + } + t_SifRpcClientData *client = reinterpret_cast(getMemPtr(rdram, clientPtr)); if (!client) @@ -425,6 +475,16 @@ void SifCallRpc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { rpcZeroRdram(rdram, recvBuf + sizeof(uint32_t), recvSize - sizeof(uint32_t)); } + static uint32_t dtxCreateLogCount = 0; + if (dtxCreateLogCount < 64u) + { + std::cout << "[SifCallRpc:DTX_CREATE] dtxId=0x" << std::hex << dtxId + << " remote=0x" << remoteHandle + << " recvBuf=0x" << recvBuf + << " recvSize=0x" << recvSize + << std::dec << std::endl; + ++dtxCreateLogCount; + } handled = true; resultPtr = recvBuf; } @@ -818,24 +878,22 @@ void SifCallRpc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) if (sid == 1u && (rpcNum == 0x12u || rpcNum == 0x13u)) { - uint32_t responseWord = 1u; - if (rpcNum == 0x13u) - { - static uint32_t sdrStateBlobAddr = 0u; - if (sdrStateBlobAddr == 0u) - { - sdrStateBlobAddr = rpcAllocPacketAddr(rdram); - if (sdrStateBlobAddr == 0u) - { - sdrStateBlobAddr = kRpcPacketPoolBase; - } - } - - rpcZeroRdram(rdram, sdrStateBlobAddr, 64u); - (void)writeRpcU32(sdrStateBlobAddr + 0u, 1u); - responseWord = sdrStateBlobAddr; - } - + // RECVX snddrv expects: + // cmd 0x12 -> SND_STATUS* (get_adrs) + // cmd 0x13 -> int[16]* (iop_data_adr_top) + constexpr uint32_t kSdrStatusAddr = 0x00012000u; + constexpr uint32_t kSdrAddrTableAddr = 0x00012100u; + constexpr uint32_t kSdrHdBaseAddr = 0x00014000u; + constexpr uint32_t kSdrSqBaseAddr = 0x00018000u; + constexpr uint32_t kSdrDataBaseAddr = 0x00030000u; + + rpcZeroRdram(rdram, kSdrStatusAddr, 0x42u); + rpcZeroRdram(rdram, kSdrAddrTableAddr, 16u * sizeof(uint32_t)); + (void)writeRpcU32(kSdrAddrTableAddr + (0u * sizeof(uint32_t)), kSdrHdBaseAddr); + (void)writeRpcU32(kSdrAddrTableAddr + (1u * sizeof(uint32_t)), kSdrSqBaseAddr); + (void)writeRpcU32(kSdrAddrTableAddr + (2u * sizeof(uint32_t)), kSdrDataBaseAddr); + + const uint32_t responseWord = (rpcNum == 0x12u) ? kSdrStatusAddr : kSdrAddrTableAddr; if (recvBuf && recvSize >= sizeof(uint32_t)) { (void)writeRpcU32(recvBuf, responseWord); diff --git a/ps2xRuntime/src/lib/syscalls/ps2_syscalls_system.inl b/ps2xRuntime/src/lib/syscalls/ps2_syscalls_system.inl index d9c9ad3f..bba0b959 100644 --- a/ps2xRuntime/src/lib/syscalls/ps2_syscalls_system.inl +++ b/ps2xRuntime/src/lib/syscalls/ps2_syscalls_system.inl @@ -9,6 +9,11 @@ void GsSetCrt(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) << ", frameMode=" << frameMode << std::endl; } +void SetGsCrt(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + GsSetCrt(rdram, ctx, runtime); +} + void GsGetIMR(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { uint64_t imr = 0; @@ -22,6 +27,11 @@ void GsGetIMR(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnU64(ctx, imr); // Return in $v0/$v1 } +void iGsGetIMR(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + GsGetIMR(rdram, ctx, runtime); +} + void GsPutIMR(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { uint64_t newImr = getRegU32(ctx, 4) | ((uint64_t)getRegU32(ctx, 5) << 32); // $a0 = lower 32 bits, $a1 = upper 32 bits @@ -35,6 +45,11 @@ void GsPutIMR(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnU64(ctx, oldImr); } +void iGsPutIMR(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + GsPutIMR(rdram, ctx, runtime); +} + void GsSetVideoMode(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { int mode = getRegU32(ctx, 4); // $a0 - video mode (various flags) @@ -249,14 +264,54 @@ void TODO(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime, uint32_t encod setReturnS32(ctx, 0); } -// 0x3C SetupThread: returns stack pointer (stack + stack_size) -// args: $a0 = stack base, $a1 = stack size, $a2 = gp, $a3 = entry point +// 0x3C SetupThread +// args: $a0 = gp, $a1 = stack, $a2 = stack_size, $a3 = args, $t0 = root_func void SetupThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - uint32_t stackBase = getRegU32(ctx, 4); - uint32_t stackSize = getRegU32(ctx, 5); - uint32_t sp = stackBase + stackSize; - setReturnS32(ctx, sp); + const uint32_t gp = getRegU32(ctx, 4); + const uint32_t stack = getRegU32(ctx, 5); + const int32_t stackSizeSigned = static_cast(getRegU32(ctx, 6)); + const uint32_t currentSp = getRegU32(ctx, 29); + + if (gp != 0u) + { + setRegU32(ctx, 28, gp); + } + + uint32_t sp = currentSp; + if (stack == 0xFFFFFFFFu) + { + if (stackSizeSigned > 0) + { + const uint32_t requestedSize = static_cast(stackSizeSigned); + if (requestedSize < PS2_RAM_SIZE) + { + sp = PS2_RAM_SIZE - requestedSize; + } + else + { + sp = PS2_RAM_SIZE; + } + } + else + { + sp = PS2_RAM_SIZE; + } + } + else if (stack != 0u) + { + if (stackSizeSigned > 0) + { + sp = stack + static_cast(stackSizeSigned); + } + else + { + sp = stack; + } + } + + sp &= ~0xFu; + setReturnU32(ctx, sp); } // 0x3D SetupHeap: returns heap base/start pointer @@ -293,6 +348,20 @@ void EndOfHeap(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnU32(ctx, getRegU32(ctx, 4)); } +void GetMemorySize(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + (void)rdram; + (void)runtime; + setReturnU32(ctx, PS2_RAM_SIZE); +} + +void Deci2Call(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + (void)rdram; + (void)runtime; + setReturnS32(ctx, KE_OK); +} + // 0x5A QueryBootMode (stub): return 0 for now void QueryBootMode(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { diff --git a/ps2xRuntime/src/lib/syscalls/ps2_syscalls_thread.inl b/ps2xRuntime/src/lib/syscalls/ps2_syscalls_thread.inl index 6efe05f8..78b08f3c 100644 --- a/ps2xRuntime/src/lib/syscalls/ps2_syscalls_thread.inl +++ b/ps2xRuntime/src/lib/syscalls/ps2_syscalls_thread.inl @@ -48,10 +48,15 @@ void FlushCache(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, KE_OK); } +void iFlushCache(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + FlushCache(rdram, ctx, runtime); +} + void ResetEE(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - std::cerr << "Syscall: ResetEE - requesting runtime stop" << std::endl; - runtime->requestStop(); + std::cerr << "Syscall: ResetEE - requesting runtime stop" << std::endl; + // runtime->requestStop(); setReturnS32(ctx, KE_OK); } @@ -60,6 +65,12 @@ void SetMemoryMode(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, KE_OK); } +void InitThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + // This is a common ps2sdk helper that some games link against. + setReturnS32(ctx, 1); +} + void CreateThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { uint32_t paramAddr = getRegU32(ctx, 4); // $a0 points to ThreadParam @@ -251,6 +262,13 @@ void StartThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, KE_ERROR); return; } + if (runtime->isStopRequested()) + { + setReturnS32(ctx, KE_ERROR); + return; + } + + joinHostThreadById(tid); const uint32_t callerSp = getRegU32(ctx, 29); const uint32_t callerGp = getRegU32(ctx, 28); @@ -296,7 +314,8 @@ void StartThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) g_activeThreads.fetch_add(1, std::memory_order_relaxed); try { - std::thread worker([=]() mutable { + std::thread worker([=]() mutable + { { std::string name = "PS2Thread_" + std::to_string(tid); ThreadNaming::SetCurrentThreadName(name); @@ -342,10 +361,12 @@ void StartThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) uint32_t lastPc = 0xFFFFFFFFu; uint32_t samePcCount = 0; constexpr uint32_t kSamePcYieldMask = 0x3FFFu; - constexpr uint32_t kSamePcWarnInterval = 0x400000u; + constexpr uint32_t kSamePcWarnInterval = 0x20000u; + uint64_t stepCount = 0u; while (runtime && !runtime->isStopRequested()) { + ++stepCount; if (info->terminated.load(std::memory_order_relaxed)) { throw ThreadExitException(); @@ -359,6 +380,16 @@ void StartThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) break; } + if ((stepCount & 0x1FFFFFu) == 0u) + { + std::cout << "[StartThread] id=" << tid + << " heartbeat pc=0x" << std::hex << pc + << " ra=0x" << GPR_U32(threadCtx, 31) + << " sp=0x" << GPR_U32(threadCtx, 29) + << " gp=0x" << GPR_U32(threadCtx, 28) + << std::dec << std::endl; + } + if (pc == lastPc) { ++samePcCount; @@ -380,6 +411,33 @@ void StartThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) lastPc = pc; } + thread_local uint32_t s_adxProbeLogs = 0u; + if (s_adxProbeLogs < 256u) + { + const uint32_t raProbe = GPR_U32(threadCtx, 31); + const bool probeAdxSetCmd = (pc == 0x2F22E0u) && + ((raProbe < 0x00100000u) || (raProbe == 0x2F45B0u)); + const bool probeAdxUnlock = (pc == 0x2F45B0u) && + (raProbe < 0x00100000u); + const bool probeLowPc = (pc < 0x00100000u); + if (probeAdxSetCmd || probeAdxUnlock || probeLowPc) + { + auto flags = std::cerr.flags(); + std::cerr << "[StartThread:adx-probe] tid=" << tid + << " pc=0x" << std::hex << pc + << " ra=0x" << raProbe + << " sp=0x" << GPR_U32(threadCtx, 29) + << " gp=0x" << GPR_U32(threadCtx, 28) + << " a0=0x" << GPR_U32(threadCtx, 4) + << " a1=0x" << GPR_U32(threadCtx, 5) + << " a2=0x" << GPR_U32(threadCtx, 6) + << " a3=0x" << GPR_U32(threadCtx, 7) + << std::dec << std::endl; + std::cerr.flags(flags); + ++s_adxProbeLogs; + } + } + PS2Runtime::RecompiledFunction step = runtime->lookupFunction(pc); if (!step) { @@ -446,9 +504,8 @@ void StartThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) // Notify anybody waiting for termination (like TerminateThread) info->cv.notify_all(); - g_activeThreads.fetch_sub(1, std::memory_order_relaxed); - }); - worker.detach(); + g_activeThreads.fetch_sub(1, std::memory_order_relaxed); }); + registerHostThread(tid, std::move(worker)); } catch (const std::exception &e) { @@ -549,9 +606,8 @@ void TerminateThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { // Block until the target thread actually finishes unwinding and becomes dormant std::unique_lock lock(info->m); - info->cv.wait(lock, [&]() { - return !info->started && info->status == THS_DORMANT; - }); + info->cv.wait(lock, [&]() + { return !info->started && info->status == THS_DORMANT; }); } setReturnS32(ctx, KE_OK); @@ -684,6 +740,11 @@ void ReferThreadStatus(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, KE_OK); } +void iReferThreadStatus(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + ReferThreadStatus(rdram, ctx, runtime); +} + void SleepThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { auto info = ensureCurrentThreadInfo(ctx); @@ -708,6 +769,16 @@ void SleepThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) } else { + static std::atomic s_sleepBlockLogs{0}; + const uint32_t sleepBlockLog = s_sleepBlockLogs.fetch_add(1, std::memory_order_relaxed); + if (sleepBlockLog < 256u) + { + std::cout << "[SleepThread:block] tid=" << g_currentThreadId + << " pc=0x" << std::hex << ctx->pc + << " ra=0x" << getRegU32(ctx, 31) + << std::dec << std::endl; + } + info->status = THS_WAIT; info->waitType = TSW_SLEEP; info->waitId = 0; @@ -738,6 +809,16 @@ void SleepThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) } } + static std::atomic s_sleepWakeLogs{0}; + const uint32_t sleepWakeLog = s_sleepWakeLogs.fetch_add(1, std::memory_order_relaxed); + if (sleepWakeLog < 256u) + { + std::cout << "[SleepThread:wake] tid=" << g_currentThreadId + << " ret=" << ret + << " wakeupCount=" << info->wakeupCount + << std::endl; + } + lock.unlock(); waitWhileSuspended(info); setReturnS32(ctx, ret); @@ -764,6 +845,8 @@ void WakeupThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) return; } + int newWakeupCount = 0; + int statusAfter = THS_DORMANT; { std::lock_guard lock(info->m); if (info->status == THS_DORMANT) @@ -790,6 +873,19 @@ void WakeupThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { info->wakeupCount++; } + newWakeupCount = info->wakeupCount; + statusAfter = info->status; + } + + static std::atomic s_wakeupLogs{0}; + const uint32_t wakeupLog = s_wakeupLogs.fetch_add(1, std::memory_order_relaxed); + if (wakeupLog < 256u) + { + std::cout << "[WakeupThread] tid=" << g_currentThreadId + << " target=" << tid + << " status=" << statusAfter + << " wakeupCount=" << newWakeupCount + << std::endl; } setReturnS32(ctx, KE_OK); } @@ -885,6 +981,11 @@ void ChangeThreadPriority(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime setReturnS32(ctx, KE_OK); } +void iChangeThreadPriority(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + ChangeThreadPriority(rdram, ctx, runtime); +} + void RotateThreadReadyQueue(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { static int logCount = 0; @@ -914,6 +1015,11 @@ void RotateThreadReadyQueue(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runti setReturnS32(ctx, KE_OK); } +void iRotateThreadReadyQueue(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + RotateThreadReadyQueue(rdram, ctx, runtime); +} + void ReleaseWaitThread(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { int tid = static_cast(getRegU32(ctx, 4)); diff --git a/ps2xTest/CMakeLists.txt b/ps2xTest/CMakeLists.txt index 2cd08bbb..fb897849 100644 --- a/ps2xTest/CMakeLists.txt +++ b/ps2xTest/CMakeLists.txt @@ -11,7 +11,14 @@ add_executable(ps2x_tests src/r5900_decoder_tests.cpp src/elf_analyzer_tests.cpp src/ps2_runtime_io_tests.cpp + src/ps2_runtime_kernel_tests.cpp + src/ps2_runtime_interrupt_tests.cpp + src/ps2_memory_tests.cpp + src/ps2_gs_tests.cpp + src/ps2_sif_rpc_tests.cpp + src/ps2_sif_dma_tests.cpp src/ps2_recompiler_tests.cpp + src/ps2_runtime_expansion_tests.cpp ) option(PRINT_GENERATED_CODE "Print generated code in tests" OFF) diff --git a/ps2xTest/src/code_generator_tests.cpp b/ps2xTest/src/code_generator_tests.cpp index ee19eb2a..f2e62f9f 100644 --- a/ps2xTest/src/code_generator_tests.cpp +++ b/ps2xTest/src/code_generator_tests.cpp @@ -112,6 +112,46 @@ void register_code_generator_tests() { MiniTest::Case("CodeGenerator", [](TestCase &tc) { + tc.Run("R5900 MULT writes rd when rd is non-zero", [](TestCase &t) { + CodeGenerator gen({}, {}); + + Instruction mult{}; + mult.opcode = OPCODE_SPECIAL; + mult.function = SPECIAL_MULT; + mult.rs = 4; + mult.rt = 5; + mult.rd = 3; + + std::string generated = gen.translateInstruction(mult); + printGeneratedCode("R5900 MULT writes rd when rd is non-zero", generated); + + t.IsTrue(generated.find("SET_GPR_S32(ctx, 3, (int32_t)result);") != std::string::npos, + "MULT should write low product to rd on R5900"); + + mult.rd = 0; + generated = gen.translateInstruction(mult); + t.IsTrue(generated.find("SET_GPR_S32(") == std::string::npos, + "MULT should not write rd when rd is zero"); + }); + + tc.Run("R5900 MMI MULT1 writes rd when rd is non-zero", [](TestCase &t) { + CodeGenerator gen({}, {}); + + Instruction mult1{}; + mult1.opcode = OPCODE_MMI; + mult1.isMMI = true; + mult1.function = MMI_MULT1; + mult1.rs = 8; + mult1.rt = 9; + mult1.rd = 10; + + std::string generated = gen.translateInstruction(mult1); + printGeneratedCode("R5900 MMI MULT1 writes rd when rd is non-zero", generated); + + t.IsTrue(generated.find("SET_GPR_S32(ctx, 10, (int32_t)result);") != std::string::npos, + "MULT1 should write low product to rd on R5900"); + }); + tc.Run("emits labels and gotos for internal branches", [](TestCase &t) { Function func; func.name = "test_func"; @@ -393,6 +433,158 @@ void register_code_generator_tests() t.IsTrue(ctc2Code.find("Unimplemented CTC2 VU CReg") == std::string::npos, "CTC2 should not hit unimplemented CReg path"); }); + tc.Run("scalar logical immediates emit low64 operations", [](TestCase &t) { + CodeGenerator gen({}, {}); + + Instruction andi{}; + andi.opcode = OPCODE_ANDI; + andi.rs = 4; + andi.rt = 5; + andi.immediate = 0xABCD; + + std::string andiCode = gen.translateInstruction(andi); + t.IsTrue(andiCode.find("SET_GPR_U64(ctx, 5, GPR_U64(ctx, 4) & (uint64_t)(uint16_t)43981);") != std::string::npos, + "ANDI should use low64 scalar emission"); + t.IsTrue(andiCode.find("SET_GPR_VEC") == std::string::npos, + "ANDI should not use vector emission"); + + Instruction ori{}; + ori.opcode = OPCODE_ORI; + ori.rs = 6; + ori.rt = 7; + ori.immediate = 0x1234; + + std::string oriCode = gen.translateInstruction(ori); + t.IsTrue(oriCode.find("SET_GPR_U64(ctx, 7, GPR_U64(ctx, 6) | (uint64_t)(uint16_t)4660);") != std::string::npos, + "ORI should use low64 scalar emission"); + t.IsTrue(oriCode.find("SET_GPR_VEC") == std::string::npos, + "ORI should not use vector emission"); + + Instruction xori{}; + xori.opcode = OPCODE_XORI; + xori.rs = 8; + xori.rt = 9; + xori.immediate = 0x00FF; + + std::string xoriCode = gen.translateInstruction(xori); + t.IsTrue(xoriCode.find("SET_GPR_U64(ctx, 9, GPR_U64(ctx, 8) ^ (uint64_t)(uint16_t)255);") != std::string::npos, + "XORI should use low64 scalar emission"); + t.IsTrue(xoriCode.find("SET_GPR_VEC") == std::string::npos, + "XORI should not use vector emission"); + }); + + tc.Run("scalar logical register ops emit low64 operations", [](TestCase &t) { + CodeGenerator gen({}, {}); + + Instruction andInst{}; + andInst.opcode = OPCODE_SPECIAL; + andInst.function = SPECIAL_AND; + andInst.rs = 2; + andInst.rt = 3; + andInst.rd = 1; + + std::string andCode = gen.translateInstruction(andInst); + t.IsTrue(andCode.find("SET_GPR_U64(ctx, 1, GPR_U64(ctx, 2) & GPR_U64(ctx, 3));") != std::string::npos, + "AND should use low64 scalar emission"); + + Instruction orInst{}; + orInst.opcode = OPCODE_SPECIAL; + orInst.function = SPECIAL_OR; + orInst.rs = 4; + orInst.rt = 5; + orInst.rd = 6; + + std::string orCode = gen.translateInstruction(orInst); + t.IsTrue(orCode.find("SET_GPR_U64(ctx, 6, GPR_U64(ctx, 4) | GPR_U64(ctx, 5));") != std::string::npos, + "OR should use low64 scalar emission"); + + Instruction xorInst{}; + xorInst.opcode = OPCODE_SPECIAL; + xorInst.function = SPECIAL_XOR; + xorInst.rs = 7; + xorInst.rt = 8; + xorInst.rd = 9; + + std::string xorCode = gen.translateInstruction(xorInst); + t.IsTrue(xorCode.find("SET_GPR_U64(ctx, 9, GPR_U64(ctx, 7) ^ GPR_U64(ctx, 8));") != std::string::npos, + "XOR should use low64 scalar emission"); + + Instruction norInst{}; + norInst.opcode = OPCODE_SPECIAL; + norInst.function = SPECIAL_NOR; + norInst.rs = 10; + norInst.rt = 11; + norInst.rd = 12; + + std::string norCode = gen.translateInstruction(norInst); + t.IsTrue(norCode.find("SET_GPR_U64(ctx, 12, ~(GPR_U64(ctx, 10) | GPR_U64(ctx, 11)));") != std::string::npos, + "NOR should use low64 scalar emission"); + t.IsTrue(norCode.find("SET_GPR_VEC") == std::string::npos, + "SPECIAL logical ops should not use vector emission"); + }); + + tc.Run("SC requires matching LL reservation address", [](TestCase &t) { + CodeGenerator gen({}, {}); + + Instruction sc{}; + sc.opcode = OPCODE_SC; + sc.rs = 9; + sc.rt = 10; + sc.simmediate = static_cast(static_cast(4)); + + std::string out = gen.translateInstruction(sc); + t.IsTrue(out.find("ctx->llbit && ctx->lladdr == addr") != std::string::npos, + "SC must require both llbit and matching lladdr"); + t.IsTrue(out.find("ctx->llbit = 0; ctx->lladdr = 0;") != std::string::npos, + "SC must clear reservation state after attempting the store"); + }); + + tc.Run("QFSRV translation uses runtime helper macro", [](TestCase &t) { + CodeGenerator gen({}, {}); + + Instruction qfsrv{}; + qfsrv.isMMI = true; + qfsrv.opcode = OPCODE_MMI; + qfsrv.function = MMI_MMI1; + qfsrv.sa = MMI1_QFSRV; + qfsrv.rd = 3; + qfsrv.rs = 4; + qfsrv.rt = 5; + + std::string out = gen.translateInstruction(qfsrv); + t.IsTrue(out.find("PS2_QFSRV(GPR_VEC(ctx, 4), GPR_VEC(ctx, 5), ctx->sa & 0x7F)") != std::string::npos, + "QFSRV should map to PS2_QFSRV with rs/rt ordering"); + }); + + tc.Run("PCPYLD and PEXEW use runtime helper macros", [](TestCase &t) { + CodeGenerator gen({}, {}); + + Instruction pcpyld{}; + pcpyld.isMMI = true; + pcpyld.opcode = OPCODE_MMI; + pcpyld.function = MMI_MMI2; + pcpyld.sa = MMI2_PCPYLD; + pcpyld.rd = 6; + pcpyld.rs = 7; + pcpyld.rt = 8; + + std::string pcpyldOut = gen.translateInstruction(pcpyld); + t.IsTrue(pcpyldOut.find("PS2_PCPYLD(GPR_VEC(ctx, 7), GPR_VEC(ctx, 8))") != std::string::npos, + "PCPYLD should use PS2_PCPYLD helper"); + + Instruction pexew{}; + pexew.isMMI = true; + pexew.opcode = OPCODE_MMI; + pexew.function = MMI_MMI2; + pexew.sa = MMI2_PEXEW; + pexew.rd = 9; + pexew.rs = 10; + + std::string pexewOut = gen.translateInstruction(pexew); + t.IsTrue(pexewOut.find("PS2_PEXEW(GPR_VEC(ctx, 10))") != std::string::npos, + "PEXEW should use PS2_PEXEW helper"); + }); + tc.Run("VU0 macro mappings cover all S1/S2 enums", [](TestCase &t) { const std::vector candidates = { "ps2xRecomp/include/ps2recomp/instructions.h", @@ -737,6 +929,82 @@ void register_code_generator_tests() "switch should include other in-function labels"); }); + tc.Run("configured jump table addresses drive JR dispatch targets", [](TestCase &t) { + Function func; + func.name = "jr_configured_jump_table"; + func.start = 0x1600; + func.end = 0x1640; + func.isRecompiled = true; + func.isStub = false; + + constexpr uint32_t tableAddress = 0x00200000u; + + Instruction lui{}; + lui.address = 0x1600; + lui.opcode = OPCODE_LUI; + lui.rt = 9; + lui.immediate = static_cast((tableAddress >> 16) & 0xFFFFu); + + Instruction addiu{}; + addiu.address = 0x1604; + addiu.opcode = OPCODE_ADDIU; + addiu.rs = 9; + addiu.rt = 9; + addiu.immediate = static_cast(tableAddress & 0xFFFFu); + addiu.simmediate = addiu.immediate; + + Instruction sll{}; + sll.address = 0x1608; + sll.opcode = OPCODE_SPECIAL; + sll.function = SPECIAL_SLL; + sll.rd = 8; + sll.rt = 4; + sll.sa = 2; + + Instruction addu{}; + addu.address = 0x160C; + addu.opcode = OPCODE_SPECIAL; + addu.function = SPECIAL_ADDU; + addu.rs = 9; + addu.rt = 8; + addu.rd = 9; + + Instruction lw{}; + lw.address = 0x1610; + lw.opcode = OPCODE_LW; + lw.rs = 9; + lw.rt = 10; + lw.immediate = 0; + lw.simmediate = 0; + + Instruction jr = makeJr(0x1614, 10); + Instruction jrDelay = makeNop(0x1618); + Instruction target0 = makeNop(0x1620); + Instruction target1 = makeNop(0x1630); + + JumpTable configured{}; + configured.address = tableAddress; + configured.entries.push_back({0u, 0x1620u}); + configured.entries.push_back({1u, 0x1630u}); + + CodeGenerator gen({}, {}); + gen.setConfiguredJumpTables({configured}); + std::string generated = gen.generateFunction( + func, + {lui, addiu, sll, addu, lw, jr, jrDelay, target0, target1}, + false); + printGeneratedCode("configured jump table addresses drive JR dispatch targets", generated); + + t.IsTrue(generated.find("switch (jumpTarget)") != std::string::npos, + "JR should emit a switch"); + t.IsTrue(generated.find("case 0x1620u: goto label_1620;") != std::string::npos, + "configured table target 0x1620 should be emitted"); + t.IsTrue(generated.find("case 0x1630u: goto label_1630;") != std::string::npos, + "configured table target 0x1630 should be emitted"); + t.IsTrue(generated.find("case 0x1600u: goto label_1600;") == std::string::npos, + "configured table should avoid broad JR fallback labels"); + }); + tc.Run("JALR includes switch and fallback/guard pair", [](TestCase &t) { Function func; func.name = "jalr_switch_and_fallback"; @@ -767,6 +1035,71 @@ void register_code_generator_tests() "JALR should retain non-fallthrough guard"); }); + tc.Run("JALR fallback should not expose epilogue tail-jump labels", [](TestCase &t) { + Function func; + func.name = "jalr_epilogue_guard"; + func.start = 0x2000; + func.end = 0x2030; + func.isRecompiled = true; + func.isStub = false; + + Instruction prolog{}; + prolog.address = 0x2000; + prolog.opcode = OPCODE_ADDIU; + prolog.rs = 29; + prolog.rt = 29; + prolog.simmediate = static_cast(static_cast(-0x20)); + prolog.raw = 0; + + Instruction saveRa{}; + saveRa.address = 0x2004; + saveRa.opcode = OPCODE_SD; + saveRa.rs = 29; + saveRa.rt = 31; + saveRa.simmediate = 0x10; + saveRa.raw = 0; + + // Dynamic callback entry point. + Instruction jalr = makeJalr(0x2008, 2, 31); + Instruction jalrDelay = makeNop(0x200C); + + Instruction restoreRa{}; + restoreRa.address = 0x2010; + restoreRa.opcode = OPCODE_LD; + restoreRa.rs = 29; + restoreRa.rt = 31; + restoreRa.simmediate = 0x10; + restoreRa.raw = 0; + + // Tail jump sequence that must not be reachable from jalr fallback dispatch. + Instruction tailJump{}; + tailJump.address = 0x2014; + tailJump.opcode = OPCODE_J; + tailJump.target = (0x3000u >> 2) & 0x3FFFFFFu; + tailJump.hasDelaySlot = true; + tailJump.raw = 0; + + Instruction tailDelay{}; + tailDelay.address = 0x2018; + tailDelay.opcode = OPCODE_ADDIU; + tailDelay.rs = 29; + tailDelay.rt = 29; + tailDelay.simmediate = 0x20; + tailDelay.raw = 0; + + CodeGenerator gen({}, {}); + std::string generated = gen.generateFunction( + func, + {prolog, saveRa, jalr, jalrDelay, restoreRa, tailJump, tailDelay}, + false); + printGeneratedCode("JALR fallback should not expose epilogue tail-jump labels", generated); + + t.IsTrue(generated.find("case 0x2014u: goto label_2014;") == std::string::npos, + "jalr fallback should not dispatch directly to epilogue tail-jump block"); + t.IsTrue(generated.find("case 0x2018u: goto label_2018;") == std::string::npos, + "jalr fallback should not dispatch directly to tail-jump delay slot"); + }); + tc.Run("resolveStubTarget allows leading underscore alias", [](TestCase &t) { t.Equals(PS2Recompiler::resolveStubTarget("_rand"), StubTarget::Stub, "_rand should resolve via rand stub alias"); diff --git a/ps2xTest/src/elf_analyzer_tests.cpp b/ps2xTest/src/elf_analyzer_tests.cpp index 38558fcf..7d2ddd9e 100644 --- a/ps2xTest/src/elf_analyzer_tests.cpp +++ b/ps2xTest/src/elf_analyzer_tests.cpp @@ -4,6 +4,7 @@ #include "ps2recomp/types.h" #include +#include #include using namespace ps2recomp; @@ -33,6 +34,16 @@ void register_elf_analyzer_tests() "_printf should be classified as library"); t.IsTrue(analyzer.isLibrarySymbolNameForHeuristics("sceCdRead"), "sce-prefixed PS2 API should be classified as library"); + t.IsTrue(analyzer.isLibrarySymbolNameForHeuristics("SetSyscall"), + "SetSyscall kernel wrapper should be classified as library/runtime"); + t.IsTrue(analyzer.isLibrarySymbolNameForHeuristics("SetTLBEntry"), + "SetTLBEntry kernel wrapper should be classified as library/runtime"); + t.IsTrue(analyzer.isLibrarySymbolNameForHeuristics("InitTLB"), + "InitTLB kernel wrapper should be classified as library/runtime"); + t.IsTrue(analyzer.isLibrarySymbolNameForHeuristics("AddIntcHandler2"), + "AddIntcHandler2 kernel wrapper should be classified as library/runtime"); + t.IsTrue(analyzer.isLibrarySymbolNameForHeuristics("SetGsCrt"), + "SetGsCrt kernel wrapper should be classified as library/runtime"); t.IsFalse(analyzer.isLibrarySymbolNameForHeuristics("bhEne13_Brain"), "named game function should not be classified as library"); @@ -75,6 +86,21 @@ void register_elf_analyzer_tests() t.IsFalse(ElfAnalyzer::isSystemSymbolNameForHeuristics("sub_00100C00"), "unreliable names should not be considered system by this classifier"); }); + tc.Run("system skip keeps forced entry names recompiled", [](TestCase &t) + { + std::unordered_set forcedNames{"_start", "_init"}; + t.IsFalse(ElfAnalyzer::shouldSkipSystemSymbolForHeuristics("_start", forcedNames), + "forced entry name _start should not be skipped"); + t.IsFalse(ElfAnalyzer::shouldSkipSystemSymbolForHeuristics("_init", forcedNames), + "forced entry name _init should not be skipped"); + + t.IsTrue(ElfAnalyzer::shouldSkipSystemSymbolForHeuristics("__main", forcedNames), + "system symbol not marked as forced should still be skipped"); + t.IsTrue(ElfAnalyzer::shouldSkipSystemSymbolForHeuristics("__divdi3", {}), + "compiler helper __divdi3 should be skippable as system/runtime"); + t.IsFalse(ElfAnalyzer::shouldSkipSystemSymbolForHeuristics("ps2___divdi3", {}), + "generated ps2_ wrapper names should not be treated as system"); }); + tc.Run("entry-point mapping handles exact inside and fallback", [](TestCase &t) { Function f1; diff --git a/ps2xTest/src/main.cpp b/ps2xTest/src/main.cpp index 9c654511..18194212 100644 --- a/ps2xTest/src/main.cpp +++ b/ps2xTest/src/main.cpp @@ -4,7 +4,14 @@ void register_code_generator_tests(); void register_r5900_decoder_tests(); void register_elf_analyzer_tests(); void register_ps2_runtime_io_tests(); +void register_ps2_runtime_kernel_tests(); +void register_ps2_runtime_interrupt_tests(); +void register_ps2_memory_tests(); +void register_ps2_gs_tests(); +void register_ps2_sif_rpc_tests(); +void register_ps2_sif_dma_tests(); void register_ps2_recompiler_tests(); +void register_ps2_runtime_expansion_tests(); int main() { @@ -12,6 +19,13 @@ int main() register_r5900_decoder_tests(); register_elf_analyzer_tests(); register_ps2_runtime_io_tests(); + register_ps2_runtime_kernel_tests(); + register_ps2_runtime_interrupt_tests(); + register_ps2_memory_tests(); + register_ps2_gs_tests(); + register_ps2_sif_rpc_tests(); + register_ps2_sif_dma_tests(); register_ps2_recompiler_tests(); + register_ps2_runtime_expansion_tests(); return MiniTest::Run(); } diff --git a/ps2xTest/src/ps2_gs_tests.cpp b/ps2xTest/src/ps2_gs_tests.cpp new file mode 100644 index 00000000..1b07ea82 --- /dev/null +++ b/ps2xTest/src/ps2_gs_tests.cpp @@ -0,0 +1,452 @@ +#include "MiniTest.h" +#include "ps2_memory.h" +#include "ps2_runtime.h" +#include "ps2_syscalls.h" +#include "ps2_gs_gpu.h" + +#include +#include +#include + +using namespace ps2_syscalls; + +namespace +{ + void setRegU32(R5900Context &ctx, int reg, uint32_t value) + { + ctx.r[reg] = _mm_set_epi64x(0, static_cast(value)); + } + + uint32_t getRegU32Test(const R5900Context &ctx, int reg) + { + return ::getRegU32(&ctx, reg); + } + + uint64_t getReturnU64(const R5900Context &ctx) + { + const uint64_t lo = static_cast(getRegU32Test(ctx, 2)); + const uint64_t hi = static_cast(getRegU32Test(ctx, 3)); + return lo | (hi << 32); + } + + uint64_t makeGifTag(uint16_t nloop, uint8_t flg, uint8_t nreg, bool eop = true) + { + uint64_t tag = static_cast(nloop & 0x7FFFu); + if (eop) + tag |= (1ull << 15); + tag |= (static_cast(flg & 0x3u) << 58); + tag |= (static_cast(nreg & 0xFu) << 60); + return tag; + } + + void appendU64(std::vector &dst, uint64_t value) + { + const size_t pos = dst.size(); + dst.resize(pos + sizeof(uint64_t)); + std::memcpy(dst.data() + pos, &value, sizeof(uint64_t)); + } +} + +void register_ps2_gs_tests() +{ + MiniTest::Case("PS2GS", [](TestCase &tc) + { + tc.Run("GS CSR/IMR support coherent 64-bit and 32-bit access", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + constexpr uint32_t kGsCsr = 0x12001000u; + constexpr uint32_t kGsImr = 0x12001010u; + + const uint64_t csrPattern = 0xA1B2C3D4E5F60718ull; + mem.write64(kGsCsr, csrPattern); + t.Equals(mem.read64(kGsCsr), csrPattern, "64-bit CSR read should match prior 64-bit write"); + t.Equals(mem.read32(kGsCsr), static_cast(csrPattern & 0xFFFFFFFFull), "CSR low dword read should match"); + t.Equals(mem.read32(kGsCsr + 4u), static_cast(csrPattern >> 32), "CSR high dword read should match"); + + mem.write32(kGsCsr, 0x11223344u); + t.Equals(mem.read64(kGsCsr), 0xA1B2C3D411223344ull, "32-bit low write should preserve CSR high dword"); + + mem.write32(kGsCsr + 4u, 0x55667788u); + t.Equals(mem.read64(kGsCsr), 0x5566778811223344ull, "32-bit high write should preserve CSR low dword"); + + const uint64_t imrPattern = 0x0123456789ABCDEFull; + mem.write64(kGsImr, imrPattern); + t.Equals(mem.read64(kGsImr), imrPattern, "IMR 64-bit read should match prior write"); + t.Equals(mem.read32(kGsImr), 0x89ABCDEFu, "IMR low dword should match"); + t.Equals(mem.read32(kGsImr + 4u), 0x01234567u, "IMR high dword should match"); + }); + + tc.Run("unknown GS privileged offsets are no-op and read as zero", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + constexpr uint32_t kKnownBusdir = 0x12001040u; + constexpr uint32_t kUnknown = 0x12001008u; // inside GS priv range, but not mapped by gsRegPtr. + + mem.write64(kKnownBusdir, 0xCAFEBABE12345678ull); + const uint64_t before = mem.read64(kKnownBusdir); + mem.write32(kUnknown, 0xDEADBEEFu); + t.Equals(mem.read32(kUnknown), 0u, "unknown GS offset should read as zero"); + t.Equals(mem.read64(kKnownBusdir), before, "unknown GS writes should not corrupt mapped GS registers"); + }); + + tc.Run("GS writeIORegister increments GS write counter", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + constexpr uint32_t kGsPmode = 0x12000000u; + constexpr uint32_t kGsImr = 0x12001010u; + + const uint64_t countBefore = mem.gsWriteCount(); + t.IsTrue(mem.writeIORegister(kGsPmode, 0x11u), "writeIORegister PMODE should succeed"); + t.IsTrue(mem.writeIORegister(kGsImr, 0x22u), "writeIORegister IMR should succeed"); + t.Equals(mem.gsWriteCount(), countBefore + 2ull, "GS IO writes should increment GS write counter"); + + t.Equals(mem.readIORegister(kGsPmode), 0x11u, "writeIORegister PMODE value should be readable"); + t.Equals(mem.readIORegister(kGsImr), 0x22u, "writeIORegister IMR value should be readable"); + }); + + tc.Run("GsPutIMR and GsGetIMR roundtrip old and new values", [](TestCase &t) + { + PS2Runtime runtime; + t.IsTrue(runtime.memory().initialize(), "runtime memory initialize should succeed"); + runtime.memory().gs().imr = 0xAAAABBBBCCCCDDDDull; + + std::vector rdram(PS2_RAM_SIZE, 0u); + R5900Context ctx{}; + + setRegU32(ctx, 4, 0x11112222u); // new IMR low + setRegU32(ctx, 5, 0x33334444u); // new IMR high + GsPutIMR(rdram.data(), &ctx, &runtime); + + const uint64_t oldImr = getReturnU64(ctx); + t.Equals(oldImr, 0xAAAABBBBCCCCDDDDull, "GsPutIMR should return previous IMR"); + t.Equals(runtime.memory().gs().imr, 0x3333444411112222ull, "GsPutIMR should update GS IMR"); + + std::memset(&ctx, 0, sizeof(ctx)); + GsGetIMR(rdram.data(), &ctx, &runtime); + const uint64_t currentImr = getReturnU64(ctx); + t.Equals(currentImr, 0x3333444411112222ull, "GsGetIMR should return current GS IMR"); + }); + + tc.Run("GIF PACKED A+D writes DISPFB1 and DISPLAY1 privileged registers", [](TestCase &t) + { + std::vector vram(PS2_GS_VRAM_SIZE, 0u); + GSRegisters regs{}; + GS gs; + gs.init(vram.data(), static_cast(vram.size()), ®s); + + std::vector packet; + appendU64(packet, makeGifTag(2u, GIF_FMT_PACKED, 1u, true)); + appendU64(packet, 0x0Eull); // REGS[0] = A+D + + const uint64_t dispfb1 = 0x0123456789ABCDEFull; + const uint64_t display1 = 0x1111222233334444ull; + appendU64(packet, dispfb1); + appendU64(packet, 0x59ull); // DISPFB1 + appendU64(packet, display1); + appendU64(packet, 0x5Aull); // DISPLAY1 + + gs.processGIFPacket(packet.data(), static_cast(packet.size())); + + t.Equals(regs.dispfb1, dispfb1, "A+D should write GS DISPFB1"); + t.Equals(regs.display1, display1, "A+D should write GS DISPLAY1"); + }); + + tc.Run("GIF REGLIST with odd register count consumes 128-bit padding before next tag", [](TestCase &t) + { + std::vector vram(PS2_GS_VRAM_SIZE, 0u); + GS gs; + gs.init(vram.data(), static_cast(vram.size()), nullptr); + + const uint64_t bitblt = + (static_cast(0u) << 0) | + (static_cast(1u) << 16) | + (static_cast(0u) << 24) | + (static_cast(0u) << 32) | + (static_cast(1u) << 48) | + (static_cast(0u) << 56); + gs.writeRegister(GS_REG_BITBLTBUF, bitblt); + gs.writeRegister(GS_REG_TRXPOS, 0ull); + gs.writeRegister(GS_REG_TRXREG, (4ull << 0) | (1ull << 32)); + gs.writeRegister(GS_REG_TRXDIR, 0ull); + + std::vector packet; + appendU64(packet, makeGifTag(1u, GIF_FMT_REGLIST, 1u, false)); + appendU64(packet, 0x0ull); // REGS[0] = PRIM + appendU64(packet, 0x0000000000000006ull); // PRIM write + appendU64(packet, 0xDEADBEEFCAFEBABEull); // required REGLIST pad qword + + appendU64(packet, makeGifTag(1u, GIF_FMT_IMAGE, 0u, true)); + appendU64(packet, 0ull); + const uint8_t payload[16] = { + 0x31u, 0x32u, 0x33u, 0x34u, + 0x35u, 0x36u, 0x37u, 0x38u, + 0x39u, 0x3Au, 0x3Bu, 0x3Cu, + 0x3Du, 0x3Eu, 0x3Fu, 0x40u, + }; + packet.insert(packet.end(), payload, payload + sizeof(payload)); + + gs.processGIFPacket(packet.data(), static_cast(packet.size())); + + bool imageOk = true; + for (uint32_t i = 0; i < 16u; ++i) + { + if (vram[i] != payload[i]) + { + imageOk = false; + break; + } + } + t.IsTrue(imageOk, "odd REGLIST payload should not corrupt alignment of the following IMAGE tag"); + }); + + tc.Run("GIF REGLIST NREG=0 is treated as sixteen descriptors", [](TestCase &t) + { + std::vector vram(PS2_GS_VRAM_SIZE, 0u); + GS gs; + gs.init(vram.data(), static_cast(vram.size()), nullptr); + + const uint64_t bitblt = + (static_cast(0u) << 0) | + (static_cast(1u) << 16) | + (static_cast(0u) << 24) | + (static_cast(0u) << 32) | + (static_cast(1u) << 48) | + (static_cast(0u) << 56); + gs.writeRegister(GS_REG_BITBLTBUF, bitblt); + gs.writeRegister(GS_REG_TRXPOS, 0ull); + gs.writeRegister(GS_REG_TRXREG, (4ull << 0) | (1ull << 32)); + gs.writeRegister(GS_REG_TRXDIR, 0ull); + + std::vector packet; + appendU64(packet, makeGifTag(1u, GIF_FMT_REGLIST, 0u, false)); // NREG=0 -> 16 regs + appendU64(packet, 0ull); // 16x PRIM descriptors + for (uint32_t i = 0; i < 16u; ++i) + { + appendU64(packet, static_cast(i)); + } + + appendU64(packet, makeGifTag(1u, GIF_FMT_IMAGE, 0u, true)); + appendU64(packet, 0ull); + const uint8_t payload[16] = { + 0x51u, 0x52u, 0x53u, 0x54u, + 0x55u, 0x56u, 0x57u, 0x58u, + 0x59u, 0x5Au, 0x5Bu, 0x5Cu, + 0x5Du, 0x5Eu, 0x5Fu, 0x60u, + }; + packet.insert(packet.end(), payload, payload + sizeof(payload)); + + gs.processGIFPacket(packet.data(), static_cast(packet.size())); + + bool imageOk = true; + for (uint32_t i = 0; i < 16u; ++i) + { + if (vram[i] != payload[i]) + { + imageOk = false; + break; + } + } + t.IsTrue(imageOk, "NREG=0 REGLIST should consume 16 data words and keep following tag aligned"); + }); + + tc.Run("GS SIGNAL and FINISH set CSR bits that clear by CSR write-one acknowledge", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + GS gs; + gs.init(mem.getGSVRAM(), static_cast(PS2_GS_VRAM_SIZE), &mem.gs()); + + const uint64_t signalValue = (0xFFFFFFFFull << 32) | 0x11223344ull; + gs.writeRegister(GS_REG_SIGNAL, signalValue); + gs.writeRegister(GS_REG_FINISH, 0u); + + t.IsTrue((mem.gs().csr & 0x1ull) != 0ull, "SIGNAL should raise CSR.SIGNAL"); + t.IsTrue((mem.gs().csr & 0x2ull) != 0ull, "FINISH should raise CSR.FINISH"); + t.Equals(static_cast(mem.gs().siglblid & 0xFFFFFFFFull), 0x11223344u, "SIGNAL should update SIGLBLID low dword"); + + mem.write64(0x12001000u, 0x1ull); + t.IsTrue((mem.gs().csr & 0x1ull) == 0ull, "writing CSR bit0 should acknowledge SIGNAL"); + t.IsTrue((mem.gs().csr & 0x2ull) != 0ull, "acknowledging SIGNAL should not clear FINISH"); + + mem.write32(0x12001000u, 0x2u); + t.IsTrue((mem.gs().csr & 0x2ull) == 0ull, "writing CSR bit1 should acknowledge FINISH"); + }); + + tc.Run("GIF IMAGE packet writes host-to-local data into GS VRAM", [](TestCase &t) + { + std::vector vram(PS2_GS_VRAM_SIZE, 0u); + GS gs; + gs.init(vram.data(), static_cast(vram.size()), nullptr); + + // Setup for host->local transfer to DBP=0, DBW=1, PSMCT32, rect 2x2. + const uint64_t bitblt = + (static_cast(0u) << 0) | // SBP + (static_cast(1u) << 16) | // SBW + (static_cast(0u) << 24) | // SPSM + (static_cast(0u) << 32) | // DBP + (static_cast(1u) << 48) | // DBW + (static_cast(0u) << 56); // DPSM (CT32) + gs.writeRegister(GS_REG_BITBLTBUF, bitblt); + gs.writeRegister(GS_REG_TRXPOS, 0ull); + gs.writeRegister(GS_REG_TRXREG, (2ull << 0) | (2ull << 32)); + gs.writeRegister(GS_REG_TRXDIR, 0ull); + + std::vector packet; + appendU64(packet, makeGifTag(1u, GIF_FMT_IMAGE, 0u, true)); + appendU64(packet, 0ull); + + const uint8_t payload[16] = { + 0x10u, 0x11u, 0x12u, 0x13u, + 0x20u, 0x21u, 0x22u, 0x23u, + 0x30u, 0x31u, 0x32u, 0x33u, + 0x40u, 0x41u, 0x42u, 0x43u, + }; + packet.insert(packet.end(), payload, payload + sizeof(payload)); + + gs.processGIFPacket(packet.data(), static_cast(packet.size())); + + bool same = true; + for (size_t i = 0; i < 8u; ++i) + { + if (vram[i] != payload[i] || vram[256u + i] != payload[8u + i]) + { + same = false; + break; + } + } + t.IsTrue(same, "GIF IMAGE transfer should write payload bytes into GS VRAM"); + }); + + tc.Run("GS local-to-host transfer supports partial incremental reads", [](TestCase &t) + { + std::vector vram(PS2_GS_VRAM_SIZE, 0u); + GS gs; + gs.init(vram.data(), static_cast(vram.size()), nullptr); + + for (uint32_t i = 0; i < 16u; ++i) + { + vram[i] = static_cast(0xA0u + i); + } + + const uint64_t bitblt = + (static_cast(0u) << 0) | // SBP + (static_cast(1u) << 16) | // SBW + (static_cast(0u) << 24) | // SPSM (CT32) + (static_cast(0u) << 32) | + (static_cast(1u) << 48) | + (static_cast(0u) << 56); + gs.writeRegister(GS_REG_BITBLTBUF, bitblt); + gs.writeRegister(GS_REG_TRXPOS, 0ull); + gs.writeRegister(GS_REG_TRXREG, (4ull << 0) | (1ull << 32)); // 4 pixels, 1 row -> 16 bytes + gs.writeRegister(GS_REG_TRXDIR, 1ull); + + uint8_t bufA[8] = {}; + uint8_t bufB[16] = {}; + + const uint32_t nA = gs.consumeLocalToHostBytes(bufA, 6u); + const uint32_t nB = gs.consumeLocalToHostBytes(bufB, 16u); + const uint32_t nC = gs.consumeLocalToHostBytes(bufB, 4u); + + t.Equals(nA, 6u, "first partial read should consume requested bytes"); + t.Equals(nB, 10u, "second read should consume the remaining bytes"); + t.Equals(nC, 0u, "buffer should be empty after all bytes are consumed"); + + bool bytesOk = true; + for (uint32_t i = 0; i < 6u; ++i) + { + if (bufA[i] != static_cast(0xA0u + i)) + bytesOk = false; + } + for (uint32_t i = 0; i < 10u; ++i) + { + if (bufB[i] != static_cast(0xA6u + i)) + bytesOk = false; + } + t.IsTrue(bytesOk, "partial reads should return local->host data in-order"); + }); + + tc.Run("GS CT24 host-local-host transfer preserves 24-bit RGB payload", [](TestCase &t) + { + std::vector vram(PS2_GS_VRAM_SIZE, 0u); + GS gs; + gs.init(vram.data(), static_cast(vram.size()), nullptr); + + const uint64_t bitblt = + (static_cast(0u) << 0) | // SBP + (static_cast(1u) << 16) | // SBW + (static_cast(1u) << 24) | // SPSM CT24 + (static_cast(0u) << 32) | // DBP + (static_cast(1u) << 48) | // DBW + (static_cast(1u) << 56); // DPSM CT24 + gs.writeRegister(GS_REG_BITBLTBUF, bitblt); + gs.writeRegister(GS_REG_TRXPOS, 0ull); + gs.writeRegister(GS_REG_TRXREG, (2ull << 0) | (1ull << 32)); // 2 pixels + gs.writeRegister(GS_REG_TRXDIR, 0ull); + + std::vector packet; + appendU64(packet, makeGifTag(1u, GIF_FMT_IMAGE, 0u, true)); + appendU64(packet, 0ull); + const uint8_t rgbData[16] = { + 0x11u, 0x22u, 0x33u, + 0x44u, 0x55u, 0x66u, + 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u + }; + packet.insert(packet.end(), rgbData, rgbData + sizeof(rgbData)); + gs.processGIFPacket(packet.data(), static_cast(packet.size())); + + // Read back from local to host in CT24. + gs.writeRegister(GS_REG_TRXDIR, 1ull); + uint8_t out[16] = {}; + const uint32_t outBytes = gs.consumeLocalToHostBytes(out, sizeof(out)); + + t.Equals(outBytes, 6u, "CT24 local->host read should output 3 bytes per pixel"); + t.Equals(out[0], static_cast(0x11u), "pixel0 R should roundtrip"); + t.Equals(out[1], static_cast(0x22u), "pixel0 G should roundtrip"); + t.Equals(out[2], static_cast(0x33u), "pixel0 B should roundtrip"); + t.Equals(out[3], static_cast(0x44u), "pixel1 R should roundtrip"); + t.Equals(out[4], static_cast(0x55u), "pixel1 G should roundtrip"); + t.Equals(out[5], static_cast(0x66u), "pixel1 B should roundtrip"); + }); + + tc.Run("GS PSMT4 host-local-host keeps nibble packing stable", [](TestCase &t) + { + std::vector vram(PS2_GS_VRAM_SIZE, 0u); + GS gs; + gs.init(vram.data(), static_cast(vram.size()), nullptr); + + const uint64_t bitblt = + (static_cast(0u) << 0) | // SBP + (static_cast(1u) << 16) | // SBW + (static_cast(20u) << 24) | // SPSM PSMT4 + (static_cast(0u) << 32) | // DBP + (static_cast(1u) << 48) | // DBW + (static_cast(20u) << 56); // DPSM PSMT4 + gs.writeRegister(GS_REG_BITBLTBUF, bitblt); + gs.writeRegister(GS_REG_TRXPOS, 0ull); + gs.writeRegister(GS_REG_TRXREG, (4ull << 0) | (1ull << 32)); // 4 texels => 2 bytes + gs.writeRegister(GS_REG_TRXDIR, 0ull); + + std::vector packet; + appendU64(packet, makeGifTag(1u, GIF_FMT_IMAGE, 0u, true)); + appendU64(packet, 0ull); + const uint8_t nibbleData[16] = {0x21u, 0x43u}; + packet.insert(packet.end(), nibbleData, nibbleData + sizeof(nibbleData)); + gs.processGIFPacket(packet.data(), static_cast(packet.size())); + + gs.writeRegister(GS_REG_TRXDIR, 1ull); + uint8_t out[8] = {}; + const uint32_t outBytes = gs.consumeLocalToHostBytes(out, sizeof(out)); + + t.Equals(outBytes, 2u, "PSMT4 local->host should return packed nibble bytes"); + t.Equals(out[0], static_cast(0x21u), "packed nibble byte 0 should roundtrip"); + t.Equals(out[1], static_cast(0x43u), "packed nibble byte 1 should roundtrip"); + }); + }); +} diff --git a/ps2xTest/src/ps2_memory_tests.cpp b/ps2xTest/src/ps2_memory_tests.cpp new file mode 100644 index 00000000..e275f048 --- /dev/null +++ b/ps2xTest/src/ps2_memory_tests.cpp @@ -0,0 +1,1314 @@ +#include "MiniTest.h" +#include "ps2_memory.h" +#include "ps2_gs_gpu.h" +#include "ps2_vu1.h" +#include "ps2_runtime_macros.h" + +#include +#include +#include +#include + +namespace +{ + uint32_t makeVifCmd(uint8_t opcode, uint8_t num, uint16_t imm) + { + return (static_cast(opcode) << 24) | + (static_cast(num) << 16) | + static_cast(imm); + } + + void appendU32(std::vector &dst, uint32_t value) + { + const size_t pos = dst.size(); + dst.resize(pos + sizeof(uint32_t)); + std::memcpy(dst.data() + pos, &value, sizeof(uint32_t)); + } + + void appendU64(std::vector &dst, uint64_t value) + { + const size_t pos = dst.size(); + dst.resize(pos + sizeof(uint64_t)); + std::memcpy(dst.data() + pos, &value, sizeof(uint64_t)); + } + + uint64_t makeDmaTag(uint16_t qwc, uint8_t id, uint32_t addr, bool irq = false) + { + return static_cast(qwc) | + (static_cast(id & 0x7u) << 28) | + (irq ? (1ull << 31) : 0ull) | + (static_cast(addr & 0x7FFFFFFFu) << 32); + } + + void writeDmaTag(uint8_t *rdram, uint32_t tagAddr, uint64_t tagLo) + { + std::memset(rdram + tagAddr, 0, 16); + std::memcpy(rdram + tagAddr, &tagLo, sizeof(tagLo)); + } + + uint64_t makeGifTag(uint16_t nloop, uint8_t flg, uint8_t nreg, bool eop = true) + { + uint64_t tag = static_cast(nloop & 0x7FFFu); + if (eop) + tag |= (1ull << 15); + tag |= (static_cast(flg & 0x3u) << 58); + tag |= (static_cast(nreg & 0xFu) << 60); + return tag; + } + + uint32_t makeVuLowerSpecial(uint8_t funct, uint8_t is, uint8_t it = 0u, uint8_t id = 0u, uint8_t dest = 0u) + { + return (0x40u << 25) | + (static_cast(dest & 0xFu) << 21) | + (static_cast(it & 0x1Fu) << 16) | + (static_cast(is & 0x1Fu) << 11) | + (static_cast(id & 0x1Fu) << 6) | + static_cast(funct & 0x3Fu); + } +} + +void register_ps2_memory_tests() +{ + MiniTest::Case("PS2Memory", [](TestCase &tc) + { + tc.Run("uncached aliases map to same RDRAM bytes", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + mem.write32(0x00001000u, 0xDEADBEEFu); + t.Equals(mem.read32(0x00001000u), 0xDEADBEEFu, "base readback should match"); + t.Equals(mem.read32(0x20001000u), 0xDEADBEEFu, "0x2000_0000 alias should map to RDRAM"); + + // 0x3010_0000 maps to physical 0x0010_0000 (AboutPS2 memory map). + mem.write32(0x00101000u, 0xDEADBEEFu); + t.Equals(mem.read32(0x30101000u), 0xDEADBEEFu, "0x3010_0000 accelerated alias should map to RDRAM"); + + mem.write32(0x20002000u, 0x13579BDFu); + t.Equals(mem.read32(0x00002000u), 0x13579BDFu, "writes through 0x2000 alias should land in base RDRAM"); + + mem.write32(0x30103000u, 0x2468ACE0u); + t.Equals(mem.read32(0x00103000u), 0x2468ACE0u, "writes through 0x3010 alias should land in base RDRAM"); + }); + + tc.Run("translateAddress handles kseg and uncached aliases", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + t.Equals(mem.translateAddress(0x80001234u), 0x00001234u, "KSEG0 should map directly to physical"); + t.Equals(mem.translateAddress(0xA0005678u), 0x00005678u, "KSEG1 should map directly to physical"); + t.Equals(mem.translateAddress(0x20001234u), 0x00001234u, "0x2000 uncached alias should map to RAM"); + t.Equals(mem.translateAddress(0x30105678u), 0x00105678u, "0x3010 accelerated alias should map to RAM"); + }); + + tc.Run("fast memory helpers wrap safely at RAM boundary", [](TestCase &t) + { + std::vector rdram(PS2_RAM_SIZE, 0u); + const uint32_t tail = PS2_RAM_SIZE - 4u; + + // Build a wrapped 64-bit pattern: [tail..tail+3] + [0..3] + rdram[tail + 0u] = 0xA1u; + rdram[tail + 1u] = 0xB2u; + rdram[tail + 2u] = 0xC3u; + rdram[tail + 3u] = 0xD4u; + rdram[0u] = 0x11u; + rdram[1u] = 0x22u; + rdram[2u] = 0x33u; + rdram[3u] = 0x44u; + + const uint64_t wrappedRead = Ps2FastRead64(rdram.data(), tail); + t.Equals(wrappedRead, 0x44332211D4C3B2A1ull, + "Ps2FastRead64 should wrap across the 32MB boundary"); + + Ps2FastWrite64(rdram.data(), tail, 0x8877665544332211ull); + t.Equals(static_cast(rdram[tail + 0u]), 0x11u, "write byte 0 should land at tail+0"); + t.Equals(static_cast(rdram[tail + 1u]), 0x22u, "write byte 1 should land at tail+1"); + t.Equals(static_cast(rdram[tail + 2u]), 0x33u, "write byte 2 should land at tail+2"); + t.Equals(static_cast(rdram[tail + 3u]), 0x44u, "write byte 3 should land at tail+3"); + t.Equals(static_cast(rdram[0u]), 0x55u, "write byte 4 should wrap to address 0"); + t.Equals(static_cast(rdram[1u]), 0x66u, "write byte 5 should wrap to address 1"); + t.Equals(static_cast(rdram[2u]), 0x77u, "write byte 6 should wrap to address 2"); + t.Equals(static_cast(rdram[3u]), 0x88u, "write byte 7 should wrap to address 3"); + }); + + tc.Run("VIF MPG num zero uploads 256 instructions", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + std::vector packet; + packet.reserve(4u + 2048u); + appendU32(packet, makeVifCmd(0x4Au, 0u, 0u)); // MPG, num=0 -> 256 instructions (2048 bytes) + + for (uint32_t i = 0; i < 2048u; ++i) + { + packet.push_back(static_cast(i & 0xFFu)); + } + + std::memset(mem.getVU1Code(), 0, PS2_VU1_CODE_SIZE); + mem.processVIF1Data(packet.data(), static_cast(packet.size())); + + const uint8_t *vu1Code = mem.getVU1Code(); + bool matches = true; + for (uint32_t i = 0; i < 2048u; ++i) + { + if (vu1Code[i] != static_cast(i & 0xFFu)) + { + matches = false; + break; + } + } + t.IsTrue(matches, "MPG num=0 should copy 2048 bytes into VU1 code memory"); + }); + + tc.Run("VIF UNPACK num zero uploads 256 vectors", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + // UNPACK V4_32: opcode 0x6C (vn=3, vl=0), num=0 => 256 vectors, 16 bytes each. + std::vector packet; + packet.reserve(4u + 4096u); + appendU32(packet, makeVifCmd(0x6Cu, 0u, 0u)); + for (uint32_t i = 0; i < 4096u; ++i) + { + packet.push_back(static_cast((i * 3u) & 0xFFu)); + } + + std::memset(mem.getVU1Data(), 0, PS2_VU1_DATA_SIZE); + mem.processVIF1Data(packet.data(), static_cast(packet.size())); + + const uint8_t *vu1Data = mem.getVU1Data(); + bool matches = true; + for (uint32_t i = 0; i < 4096u; ++i) + { + if (vu1Data[i] != static_cast((i * 3u) & 0xFFu)) + { + matches = false; + break; + } + } + t.IsTrue(matches, "UNPACK num=0 should copy 256 V4_32 vectors (4096 bytes)"); + }); + + tc.Run("VIF control commands update MARK MASK ROW and COL registers", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + std::vector packet; + appendU32(packet, makeVifCmd(0x07u, 0u, 0x1234u)); // MARK + + appendU32(packet, makeVifCmd(0x20u, 0u, 0u)); // STMASK + appendU32(packet, 0x89ABCDEFu); + + appendU32(packet, makeVifCmd(0x30u, 0u, 0u)); // STROW + appendU32(packet, 0x11111111u); + appendU32(packet, 0x22222222u); + appendU32(packet, 0x33333333u); + appendU32(packet, 0x44444444u); + + appendU32(packet, makeVifCmd(0x31u, 0u, 0u)); // STCOL + appendU32(packet, 0xAAAA0001u); + appendU32(packet, 0xAAAA0002u); + appendU32(packet, 0xAAAA0003u); + appendU32(packet, 0xAAAA0004u); + + mem.processVIF1Data(packet.data(), static_cast(packet.size())); + + t.Equals(mem.vif1_regs.mark, 0x1234u, "MARK should set VIF1 MARK register"); + t.Equals(mem.vif1_regs.mask, 0x89ABCDEFu, "STMASK should set VIF1 MASK register"); + + t.Equals(mem.vif1_regs.row[0], 0x11111111u, "STROW should set row[0]"); + t.Equals(mem.vif1_regs.row[1], 0x22222222u, "STROW should set row[1]"); + t.Equals(mem.vif1_regs.row[2], 0x33333333u, "STROW should set row[2]"); + t.Equals(mem.vif1_regs.row[3], 0x44444444u, "STROW should set row[3]"); + + t.Equals(mem.vif1_regs.col[0], 0xAAAA0001u, "STCOL should set col[0]"); + t.Equals(mem.vif1_regs.col[1], 0xAAAA0002u, "STCOL should set col[1]"); + t.Equals(mem.vif1_regs.col[2], 0xAAAA0003u, "STCOL should set col[2]"); + t.Equals(mem.vif1_regs.col[3], 0xAAAA0004u, "STCOL should set col[3]"); + }); + + tc.Run("VIF UNPACK V4-16 sign and zero extension follow immediate bit14", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + std::memset(mem.getVU1Data(), 0, PS2_VU1_DATA_SIZE); + + // UNPACK V4-16 (opcode 0x6D), num=1, addr=0. + // Payload components: x=0xFF80, y=0x0001, z=0x7FFF, w=0x8001. + const uint16_t comps[4] = {0xFF80u, 0x0001u, 0x7FFFu, 0x8001u}; + + std::vector signPacket; + appendU32(signPacket, makeVifCmd(0x6Du, 1u, 0x0000u)); // sign-extend + for (uint16_t c : comps) + { + const size_t pos = signPacket.size(); + signPacket.resize(pos + sizeof(uint16_t)); + std::memcpy(signPacket.data() + pos, &c, sizeof(uint16_t)); + } + mem.processVIF1Data(signPacket.data(), static_cast(signPacket.size())); + + const uint8_t *vu1 = mem.getVU1Data(); + uint32_t sx = 0, sy = 0, sz = 0, sw = 0; + std::memcpy(&sx, vu1 + 0, 4); + std::memcpy(&sy, vu1 + 4, 4); + std::memcpy(&sz, vu1 + 8, 4); + std::memcpy(&sw, vu1 + 12, 4); + t.Equals(sx, 0xFFFFFF80u, "sign-extend x"); + t.Equals(sy, 0x00000001u, "sign-extend y"); + t.Equals(sz, 0x00007FFFu, "sign-extend z"); + t.Equals(sw, 0xFFFF8001u, "sign-extend w"); + + // Same UNPACK with imm bit14 set => zero-extend. + std::vector zeroPacket; + appendU32(zeroPacket, makeVifCmd(0x6Du, 1u, 0x4000u)); // zero-extend + for (uint16_t c : comps) + { + const size_t pos = zeroPacket.size(); + zeroPacket.resize(pos + sizeof(uint16_t)); + std::memcpy(zeroPacket.data() + pos, &c, sizeof(uint16_t)); + } + mem.processVIF1Data(zeroPacket.data(), static_cast(zeroPacket.size())); + + std::memcpy(&sx, vu1 + 0, 4); + std::memcpy(&sy, vu1 + 4, 4); + std::memcpy(&sz, vu1 + 8, 4); + std::memcpy(&sw, vu1 + 12, 4); + t.Equals(sx, 0x0000FF80u, "zero-extend x"); + t.Equals(sy, 0x00000001u, "zero-extend y"); + t.Equals(sz, 0x00007FFFu, "zero-extend z"); + t.Equals(sw, 0x00008001u, "zero-extend w"); + }); + + tc.Run("VIF UNPACK bit15 adds TOPS to destination address", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + std::memset(mem.getVU1Data(), 0, PS2_VU1_DATA_SIZE); + + mem.vif1_regs.tops = 4u; + + // UNPACK V4-32, num=1, addr=2, bit15 set => effective addr = 6. + std::vector packet; + appendU32(packet, makeVifCmd(0x6Cu, 1u, static_cast(0x8000u | 0x0002u))); + appendU32(packet, 0x11111111u); + appendU32(packet, 0x22222222u); + appendU32(packet, 0x33333333u); + appendU32(packet, 0x44444444u); + + mem.processVIF1Data(packet.data(), static_cast(packet.size())); + + const uint8_t *vu1 = mem.getVU1Data(); + + uint32_t untouched = 0xDEADBEEFu; + std::memcpy(&untouched, vu1 + (2u * 16u), 4); + t.Equals(untouched, 0u, "base addr without TOPS should remain untouched"); + + uint32_t x = 0, y = 0, z = 0, w = 0; + const uint32_t dest = 6u * 16u; + std::memcpy(&x, vu1 + dest + 0u, 4); + std::memcpy(&y, vu1 + dest + 4u, 4); + std::memcpy(&z, vu1 + dest + 8u, 4); + std::memcpy(&w, vu1 + dest + 12u, 4); + t.Equals(x, 0x11111111u, "TOPS-adjusted x"); + t.Equals(y, 0x22222222u, "TOPS-adjusted y"); + t.Equals(z, 0x33333333u, "TOPS-adjusted z"); + t.Equals(w, 0x44444444u, "TOPS-adjusted w"); + }); + + tc.Run("VIF STCYCL skip mode advances destination by CL when CL>=WL", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + std::memset(mem.getVU1Data(), 0, PS2_VU1_DATA_SIZE); + + std::vector packet; + appendU32(packet, makeVifCmd(0x01u, 0u, static_cast((1u << 8) | 3u))); // STCYCL: WL=1, CL=3 + appendU32(packet, makeVifCmd(0x6Cu, 2u, 0u)); // UNPACK V4-32, NUM=2, ADDR=0 + + appendU32(packet, 0x11111111u); + appendU32(packet, 0x22222222u); + appendU32(packet, 0x33333333u); + appendU32(packet, 0x44444444u); + + appendU32(packet, 0xAAAAAAAAu); + appendU32(packet, 0xBBBBBBBBu); + appendU32(packet, 0xCCCCCCCCu); + appendU32(packet, 0xDDDDDDDDu); + + mem.processVIF1Data(packet.data(), static_cast(packet.size())); + + const uint8_t *vu = mem.getVU1Data(); + + uint32_t v0x = 0, v1x = 0, v2x = 0, v3x = 0; + std::memcpy(&v0x, vu + 0u * 16u + 0u, 4); + std::memcpy(&v1x, vu + 1u * 16u + 0u, 4); + std::memcpy(&v2x, vu + 2u * 16u + 0u, 4); + std::memcpy(&v3x, vu + 3u * 16u + 0u, 4); + + t.Equals(v0x, 0x11111111u, "first vector should write at addr 0"); + t.Equals(v1x, 0u, "skip mode should leave addr 1 untouched when WL=1 CL=3"); + t.Equals(v2x, 0u, "skip mode should leave addr 2 untouched when WL=1 CL=3"); + t.Equals(v3x, 0xAAAAAAAAu, "second vector should write at addr CL (addr 3)"); + }); + + tc.Run("VIF masked UNPACK uses data row col and protect selectors", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + std::memset(mem.getVU1Data(), 0, PS2_VU1_DATA_SIZE); + + // Pre-fill destination W lane for write-protect verification. + uint32_t preservedW = 0xDEADBEEFu; + std::memcpy(mem.getVU1Data() + 12u, &preservedW, 4u); + + std::vector packet; + appendU32(packet, makeVifCmd(0x20u, 0u, 0u)); // STMASK + appendU32(packet, 0x000000E4u); // m0=0(data), m1=1(row), m2=2(col), m3=3(protect) + + appendU32(packet, makeVifCmd(0x30u, 0u, 0u)); // STROW + appendU32(packet, 0xAAAAB001u); + appendU32(packet, 0xAAAAB002u); + appendU32(packet, 0xAAAAB003u); + appendU32(packet, 0xAAAAB004u); + + appendU32(packet, makeVifCmd(0x31u, 0u, 0u)); // STCOL + appendU32(packet, 0x11110001u); + appendU32(packet, 0x11110002u); + appendU32(packet, 0x11110003u); + appendU32(packet, 0x11110004u); + + appendU32(packet, makeVifCmd(0x7Cu, 1u, 0u)); // UNPACK V4-32 with CMD bit4 (mask enable) + appendU32(packet, 0x01020304u); + appendU32(packet, 0x11121314u); + appendU32(packet, 0x21222324u); + appendU32(packet, 0x31323334u); + + mem.processVIF1Data(packet.data(), static_cast(packet.size())); + + const uint8_t *vu = mem.getVU1Data(); + uint32_t x = 0, y = 0, z = 0, w = 0; + std::memcpy(&x, vu + 0u, 4u); + std::memcpy(&y, vu + 4u, 4u); + std::memcpy(&z, vu + 8u, 4u); + std::memcpy(&w, vu + 12u, 4u); + + t.Equals(x, 0x01020304u, "mask=0 should write decompressed data"); + t.Equals(y, 0xAAAAB002u, "mask=1 should write row register for Y field"); + t.Equals(z, 0x11110001u, "mask=2 should write C0 on first write cycle"); + t.Equals(w, preservedW, "mask=3 should write-protect destination field"); + }); + + tc.Run("VIF STMOD offset and difference modes apply to UNPACK data", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + std::memset(mem.getVU1Data(), 0, PS2_VU1_DATA_SIZE); + + std::vector packet; + appendU32(packet, makeVifCmd(0x30u, 0u, 0u)); // STROW + appendU32(packet, 10u); + appendU32(packet, 20u); + appendU32(packet, 30u); + appendU32(packet, 40u); + + appendU32(packet, makeVifCmd(0x05u, 0u, 1u)); // STMOD offset mode + appendU32(packet, makeVifCmd(0x6Cu, 1u, 0u)); // UNPACK V4-32 -> addr 0 + appendU32(packet, 1u); + appendU32(packet, 2u); + appendU32(packet, 3u); + appendU32(packet, 4u); + + appendU32(packet, makeVifCmd(0x30u, 0u, 0u)); // reset STROW for difference mode + appendU32(packet, 100u); + appendU32(packet, 100u); + appendU32(packet, 100u); + appendU32(packet, 100u); + + appendU32(packet, makeVifCmd(0x05u, 0u, 2u)); // STMOD difference mode + appendU32(packet, makeVifCmd(0x6Cu, 2u, 1u)); // UNPACK V4-32 -> addr 1 and 2 + appendU32(packet, 1u); + appendU32(packet, 1u); + appendU32(packet, 1u); + appendU32(packet, 1u); + appendU32(packet, 2u); + appendU32(packet, 2u); + appendU32(packet, 2u); + appendU32(packet, 2u); + + mem.processVIF1Data(packet.data(), static_cast(packet.size())); + + const uint8_t *vu = mem.getVU1Data(); + uint32_t x0 = 0, y0 = 0, z0 = 0, w0 = 0; + std::memcpy(&x0, vu + 0u * 16u + 0u, 4u); + std::memcpy(&y0, vu + 0u * 16u + 4u, 4u); + std::memcpy(&z0, vu + 0u * 16u + 8u, 4u); + std::memcpy(&w0, vu + 0u * 16u + 12u, 4u); + t.Equals(x0, 11u, "offset mode X"); + t.Equals(y0, 22u, "offset mode Y"); + t.Equals(z0, 33u, "offset mode Z"); + t.Equals(w0, 44u, "offset mode W"); + + uint32_t x1 = 0, x2 = 0; + std::memcpy(&x1, vu + 1u * 16u + 0u, 4u); + std::memcpy(&x2, vu + 2u * 16u + 0u, 4u); + t.Equals(x1, 101u, "difference mode first write should add initial row"); + t.Equals(x2, 103u, "difference mode second write should accumulate updated row"); + t.Equals(mem.vif1_regs.row[0], 103u, "difference mode should update row register"); + t.Equals(mem.vif1_regs.row[1], 103u, "difference mode should update row register for Y"); + t.Equals(mem.vif1_regs.row[2], 103u, "difference mode should update row register for Z"); + t.Equals(mem.vif1_regs.row[3], 103u, "difference mode should update row register for W"); + }); + + tc.Run("VIF fill write uses STMASK and STROW when WL>CL", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + std::memset(mem.getVU1Data(), 0, PS2_VU1_DATA_SIZE); + + std::vector packet; + appendU32(packet, makeVifCmd(0x01u, 0u, static_cast((3u << 8) | 1u))); // STCYCL: WL=3, CL=1 + + appendU32(packet, makeVifCmd(0x20u, 0u, 0u)); // STMASK + appendU32(packet, 0x55555555u); // all fields all cycles use row register + + appendU32(packet, makeVifCmd(0x30u, 0u, 0u)); // STROW + appendU32(packet, 0x11111111u); + appendU32(packet, 0x22222222u); + appendU32(packet, 0x33333333u); + appendU32(packet, 0x44444444u); + + appendU32(packet, makeVifCmd(0x7Cu, 3u, 0u)); // masked UNPACK V4-32, NUM=3 writes + // Only one input vector should be consumed for CL=1, WL=3. + appendU32(packet, 0xAAAABBBB); + appendU32(packet, 0xCCCCDDDD); + appendU32(packet, 0xEEEEFFFF); + appendU32(packet, 0x12345678); + + mem.processVIF1Data(packet.data(), static_cast(packet.size())); + + const uint8_t *vu = mem.getVU1Data(); + for (uint32_t i = 0; i < 3u; ++i) + { + uint32_t x = 0, y = 0, z = 0, w = 0; + std::memcpy(&x, vu + i * 16u + 0u, 4u); + std::memcpy(&y, vu + i * 16u + 4u, 4u); + std::memcpy(&z, vu + i * 16u + 8u, 4u); + std::memcpy(&w, vu + i * 16u + 12u, 4u); + t.Equals(x, 0x11111111u, "fill write X should use row[0]"); + t.Equals(y, 0x22222222u, "fill write Y should use row[1]"); + t.Equals(z, 0x33333333u, "fill write Z should use row[2]"); + t.Equals(w, 0x44444444u, "fill write W should use row[3]"); + } + }); + + tc.Run("VIF irq command sets STAT.INT and CODE until FBRST.STC clears it", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + const uint32_t irqMarkCmd = 0x80000000u | makeVifCmd(0x07u, 0x12u, 0x3456u); + mem.processVIF1Data(reinterpret_cast(&irqMarkCmd), sizeof(irqMarkCmd)); + + t.Equals(mem.vif1_regs.code, irqMarkCmd, "VIF CODE should capture the last processed command"); + t.IsTrue((mem.vif1_regs.stat & (1u << 11)) != 0u, "irq bit should raise VIF1 STAT.INT"); + t.Equals(mem.vif1_regs.mark, 0x3456u, "MARK command should still update MARK register"); + + t.IsTrue(mem.writeIORegister(0x10003C10u, 0x8u), "FBRST STC write should succeed"); + t.IsTrue((mem.vif1_regs.stat & (1u << 11)) == 0u, "FBRST.STC should clear VIF1 STAT.INT"); + }); + + tc.Run("VIF FBRST RST clears VIF1 command state", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + mem.vif1_regs.mark = 0x1234u; + mem.vif1_regs.cycle = 0x0102u; + mem.vif1_regs.mode = 2u; + mem.vif1_regs.num = 7u; + mem.vif1_regs.mask = 0x89ABCDEFu; + mem.vif1_regs.code = 0xCAFEBABEu; + mem.vif1_regs.stat = 0x3F00u; + + t.IsTrue(mem.writeIORegister(0x10003C10u, 0x1u), "FBRST RST write should succeed"); + + t.Equals(mem.vif1_regs.mark, 0u, "RST should clear MARK"); + t.Equals(mem.vif1_regs.cycle, 0u, "RST should clear CYCLE"); + t.Equals(mem.vif1_regs.mode, 0u, "RST should clear MODE"); + t.Equals(mem.vif1_regs.num, 0u, "RST should clear NUM"); + t.Equals(mem.vif1_regs.mask, 0u, "RST should clear MASK"); + t.Equals(mem.vif1_regs.code, 0u, "RST should clear CODE"); + t.Equals(mem.vif1_regs.stat, 0u, "RST should clear STAT"); + }); + + tc.Run("VIF double-buffer OFFSET BASE and MSCAL update TOPS and ITOPS", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + mem.vif1_regs.tops = 0x120u; + mem.vif1_regs.stat = (1u << 7); // DBF=1 before OFFSET + + std::vector> mscalCalls; + mem.setVu1MscalCallback([&](uint32_t startPC, uint32_t itop) + { + mscalCalls.emplace_back(startPC, itop); + }); + + const uint32_t offsetCmd = makeVifCmd(0x02u, 0u, 0x0022u); + mem.processVIF1Data(reinterpret_cast(&offsetCmd), sizeof(offsetCmd)); + t.Equals(mem.vif1_regs.ofst, 0x22u, "OFFSET should update OFST"); + t.Equals(mem.vif1_regs.base, 0x120u, "OFFSET should copy old TOPS into BASE"); + t.IsTrue((mem.vif1_regs.stat & (1u << 7)) == 0u, "OFFSET should clear DBF"); + t.Equals(mem.vif1_regs.tops, 0x120u, "DBF=0 should keep TOPS at BASE"); + + const uint32_t baseCmd = makeVifCmd(0x03u, 0u, 0x0030u); + mem.processVIF1Data(reinterpret_cast(&baseCmd), sizeof(baseCmd)); + t.Equals(mem.vif1_regs.base, 0x30u, "BASE should update BASE register"); + t.Equals(mem.vif1_regs.tops, 0x30u, "DBF=0 keeps TOPS equal to BASE"); + + const uint32_t itopCmd = makeVifCmd(0x04u, 0u, 0x0044u); + mem.processVIF1Data(reinterpret_cast(&itopCmd), sizeof(itopCmd)); + t.Equals(mem.vif1_regs.itop, 0x44u, "ITOP should update ITOP register"); + + const uint32_t mscalCmd = makeVifCmd(0x14u, 0u, 0x0003u); + mem.processVIF1Data(reinterpret_cast(&mscalCmd), sizeof(mscalCmd)); + t.Equals(mscalCalls.size(), static_cast(1u), "MSCAL should invoke callback once"); + t.Equals(mscalCalls[0].first, 0x18u, "MSCAL callback startPC should be IMMEDIATE*8"); + t.Equals(mscalCalls[0].second, 0x44u, "MSCAL callback should receive current ITOP"); + t.Equals(mem.vif1_regs.itops, 0x44u, "MSCAL should latch ITOPS from ITOP"); + t.IsTrue((mem.vif1_regs.stat & (1u << 7)) != 0u, "MSCAL should toggle DBF"); + t.Equals(mem.vif1_regs.tops, 0x52u, "DBF=1 should set TOPS to BASE+OFST"); + + const uint32_t mscntCmd = makeVifCmd(0x17u, 0u, 0u); + mem.processVIF1Data(reinterpret_cast(&mscntCmd), sizeof(mscntCmd)); + t.IsTrue((mem.vif1_regs.stat & (1u << 7)) == 0u, "MSCNT should toggle DBF again"); + t.Equals(mem.vif1_regs.tops, 0x30u, "DBF=0 should restore TOPS to BASE"); + }); + + tc.Run("VIF MSKPATH3 uses immediate bit15", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + const uint32_t setMask = makeVifCmd(0x06u, 0u, 0x8000u); + mem.processVIF1Data(reinterpret_cast(&setMask), sizeof(setMask)); + t.IsTrue(mem.isPath3Masked(), "MSKPATH3 with imm bit15 set should enable PATH3 mask"); + + const uint32_t clearMask = makeVifCmd(0x06u, 0u, 0x0000u); + mem.processVIF1Data(reinterpret_cast(&clearMask), sizeof(clearMask)); + t.IsFalse(mem.isPath3Masked(), "MSKPATH3 with imm bit15 clear should disable PATH3 mask"); + }); + + tc.Run("PATH3 mask queues packets until unmask", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + std::vector> captured; + mem.setGifPacketCallback([&](const uint8_t *data, uint32_t sizeBytes) + { + captured.emplace_back(data, data + sizeBytes); + }); + + std::vector packetA(16u); + std::vector packetB(16u); + for (uint32_t i = 0; i < 16u; ++i) + { + packetA[i] = static_cast(0x10u + i); + packetB[i] = static_cast(0x40u + i); + } + + const uint32_t setMask = makeVifCmd(0x06u, 0u, 0x8000u); + mem.processVIF1Data(reinterpret_cast(&setMask), sizeof(setMask)); + t.IsTrue(mem.isPath3Masked(), "PATH3 mask should be enabled"); + + mem.submitGifPacket(GifPathId::Path3, packetA.data(), static_cast(packetA.size())); + mem.submitGifPacket(GifPathId::Path3, packetB.data(), static_cast(packetB.size())); + t.Equals(captured.size(), static_cast(0u), "masked PATH3 packets should be queued, not dropped/emitted"); + + const uint32_t clearMask = makeVifCmd(0x06u, 0u, 0x0000u); + mem.processVIF1Data(reinterpret_cast(&clearMask), sizeof(clearMask)); + + t.Equals(captured.size(), static_cast(2u), "unmask should flush queued PATH3 packets"); + bool firstOk = true; + bool secondOk = true; + for (uint32_t i = 0; i < 16u; ++i) + { + if (captured[0][i] != static_cast(0x10u + i)) + firstOk = false; + if (captured[1][i] != static_cast(0x40u + i)) + secondOk = false; + } + t.IsTrue(firstOk, "first queued PATH3 packet should flush in-order"); + t.IsTrue(secondOk, "second queued PATH3 packet should flush in-order"); + }); + + tc.Run("GIF arbiter prioritizes PATH1 then PATH2 then PATH3", [](TestCase &t) + { + std::vector order; + GifArbiter arbiter([&](const uint8_t *data, uint32_t sizeBytes) + { + if (data && sizeBytes > 0u) + order.push_back(data[0]); + }); + + const std::vector p1(16u, 0x11u); + const std::vector p2(16u, 0x22u); + const std::vector p3(16u, 0x33u); + + arbiter.submit(GifPathId::Path3, p3.data(), static_cast(p3.size())); + arbiter.submit(GifPathId::Path2, p2.data(), static_cast(p2.size())); + arbiter.submit(GifPathId::Path1, p1.data(), static_cast(p1.size())); + arbiter.drain(); + + t.Equals(order.size(), static_cast(3u), "all queued packets should be drained"); + t.Equals(order[0], static_cast(0x11u), "PATH1 should be drained first"); + t.Equals(order[1], static_cast(0x22u), "PATH2 should be drained second"); + t.Equals(order[2], static_cast(0x33u), "PATH3 should be drained third"); + }); + + tc.Run("VIF DIRECTHL stalls behind queued PATH3 IMAGE packets", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + std::vector firstBytes; + GifArbiter arbiter([&](const uint8_t *data, uint32_t sizeBytes) + { + if (data && sizeBytes > 0u) + firstBytes.push_back(data[0]); + }); + mem.setGifArbiter(&arbiter); + + std::vector path3Image; + appendU64(path3Image, makeGifTag(0x00AAu, 2u, 0u, true)); // IMAGE packet marker: first byte 0xAA + appendU64(path3Image, 0ull); + mem.submitGifPacket(GifPathId::Path3, path3Image.data(), static_cast(path3Image.size()), false); + + std::vector vifPacket; + appendU32(vifPacket, makeVifCmd(0x51u, 0u, 1u)); // DIRECTHL 1 QW + for (uint32_t i = 0; i < 16u; ++i) + { + vifPacket.push_back(static_cast(0xD2u + i)); + } + mem.processVIF1Data(vifPacket.data(), static_cast(vifPacket.size())); + + t.Equals(firstBytes.size(), static_cast(2u), "PATH3 and DIRECTHL packets should both drain"); + t.Equals(firstBytes[0], static_cast(0xAAu), "DIRECTHL should not preempt queued PATH3 IMAGE packet"); + t.Equals(firstBytes[1], static_cast(0xD2u), "DIRECTHL packet should drain after PATH3 IMAGE packet"); + }); + + tc.Run("GIF DMA mode0 copies RDRAM packet and clears channel", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + constexpr uint32_t kGifCh = 0x1000A000u; + constexpr uint32_t kSrc = 0x00022000u; + constexpr uint32_t kQwc = 2u; // 32 bytes + + uint8_t *rdram = mem.getRDRAM(); + for (uint32_t i = 0; i < kQwc * 16u; ++i) + { + rdram[kSrc + i] = static_cast((0x40u + i) & 0xFFu); + } + + std::vector> captured; + mem.setGifPacketCallback([&](const uint8_t *data, uint32_t sizeBytes) + { + captured.emplace_back(data, data + sizeBytes); + }); + + t.IsTrue(mem.writeIORegister(kGifCh + 0x10u, kSrc), "write MADR should succeed"); + t.IsTrue(mem.writeIORegister(kGifCh + 0x20u, kQwc), "write QWC should succeed"); + t.IsTrue(mem.writeIORegister(kGifCh + 0x00u, 0x100u), "write CHCR STR should succeed"); + + t.Equals(mem.dmaStartCount(), 1ull, "starting GIF DMA should increment dmaStartCount"); + + mem.processPendingTransfers(); + + t.Equals(captured.size(), static_cast(1u), "GIF DMA should emit one packet"); + t.Equals(captured[0].size(), static_cast(kQwc * 16u), "GIF packet size should match QWC"); + + bool contentOk = true; + for (uint32_t i = 0; i < kQwc * 16u; ++i) + { + if (captured[0][i] != static_cast((0x40u + i) & 0xFFu)) + { + contentOk = false; + break; + } + } + t.IsTrue(contentOk, "GIF DMA packet bytes should match source RDRAM"); + t.IsTrue(mem.hasSeenGifCopy(), "GIF DMA should mark seen GIF copy"); + t.Equals(mem.gifCopyCount(), 1ull, "GIF DMA should increment gifCopyCount"); + t.IsTrue((mem.readIORegister(kGifCh + 0x00u) & 0x100u) == 0u, "GIF CHCR STR bit should be cleared after drain"); + t.Equals(mem.readIORegister(kGifCh + 0x20u), 0u, "GIF QWC should be cleared after drain"); + }); + + tc.Run("GIF DMA can source from scratchpad", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + constexpr uint32_t kGifCh = 0x1000A000u; + constexpr uint32_t kSrcScratch = PS2_SCRATCHPAD_BASE + 0x80u; + constexpr uint32_t kQwc = 1u; // 16 bytes + + uint8_t *scratch = mem.getScratchpad(); + for (uint32_t i = 0; i < 16u; ++i) + { + scratch[0x80u + i] = static_cast((0xA0u + i) & 0xFFu); + } + + std::vector> captured; + mem.setGifPacketCallback([&](const uint8_t *data, uint32_t sizeBytes) + { + captured.emplace_back(data, data + sizeBytes); + }); + + t.IsTrue(mem.writeIORegister(kGifCh + 0x10u, kSrcScratch), "write MADR scratchpad should succeed"); + t.IsTrue(mem.writeIORegister(kGifCh + 0x20u, kQwc), "write QWC should succeed"); + t.IsTrue(mem.writeIORegister(kGifCh + 0x00u, 0x100u), "write CHCR STR should succeed"); + + mem.processPendingTransfers(); + + t.Equals(captured.size(), static_cast(1u), "scratchpad GIF DMA should emit one packet"); + t.Equals(captured[0].size(), static_cast(16u), "scratchpad GIF DMA packet should be 16 bytes"); + bool contentOk = true; + for (uint32_t i = 0; i < 16u; ++i) + { + if (captured[0][i] != static_cast((0xA0u + i) & 0xFFu)) + { + contentOk = false; + break; + } + } + t.IsTrue(contentOk, "scratchpad GIF DMA packet bytes should match scratchpad source"); + }); + + tc.Run("VIF1 DMA DIRECT forwards payload to GIF callback and clears channel", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + constexpr uint32_t kVif1Ch = 0x10009000u; + constexpr uint32_t kSrc = 0x00024000u; + constexpr uint32_t kQwc = 2u; // 32 bytes total transport + + uint8_t *rdram = mem.getRDRAM(); + std::memset(rdram + kSrc, 0, kQwc * 16u); + + // DIRECT 1 QW. + const uint32_t cmd = makeVifCmd(0x50u, 0u, 1u); + std::memcpy(rdram + kSrc, &cmd, sizeof(cmd)); + for (uint32_t i = 0; i < 16u; ++i) + { + rdram[kSrc + 4u + i] = static_cast((0x11u + i) & 0xFFu); + } + + std::vector> captured; + mem.setGifPacketCallback([&](const uint8_t *data, uint32_t sizeBytes) + { + captured.emplace_back(data, data + sizeBytes); + }); + + t.IsTrue(mem.writeIORegister(kVif1Ch + 0x10u, kSrc), "write VIF1 MADR should succeed"); + t.IsTrue(mem.writeIORegister(kVif1Ch + 0x20u, kQwc), "write VIF1 QWC should succeed"); + t.IsTrue(mem.writeIORegister(kVif1Ch + 0x00u, 0x100u), "write VIF1 CHCR STR should succeed"); + + mem.processPendingTransfers(); + + t.Equals(captured.size(), static_cast(1u), "VIF1 DIRECT should emit one GIF packet"); + t.Equals(captured[0].size(), static_cast(16u), "VIF1 DIRECT packet should be 1 QW"); + bool contentOk = true; + for (uint32_t i = 0; i < 16u; ++i) + { + if (captured[0][i] != static_cast((0x11u + i) & 0xFFu)) + { + contentOk = false; + break; + } + } + t.IsTrue(contentOk, "VIF1 DIRECT packet bytes should match payload"); + t.IsTrue((mem.readIORegister(kVif1Ch + 0x00u) & 0x100u) == 0u, "VIF1 CHCR STR bit should be cleared after drain"); + t.Equals(mem.readIORegister(kVif1Ch + 0x20u), 0u, "VIF1 QWC should be cleared after drain"); + }); + + tc.Run("GIF DMA chain CALL sources payload from TADR+16", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + constexpr uint32_t kGifCh = 0x1000A000u; + constexpr uint32_t kTag0 = 0x00026000u; + constexpr uint32_t kTag1 = 0x00026100u; + + uint8_t *rdram = mem.getRDRAM(); + + // CALL qwc=1 addr=kTag1 + writeDmaTag(rdram, kTag0, makeDmaTag(1u, 5u, kTag1, false)); + // END qwc=1 + writeDmaTag(rdram, kTag1, makeDmaTag(1u, 7u, 0u, false)); + + for (uint32_t i = 0; i < 16u; ++i) + { + rdram[kTag0 + 16u + i] = static_cast(0x40u + i); // CALL payload + rdram[kTag1 + 16u + i] = static_cast(0x80u + i); // END payload + } + + std::vector> captured; + mem.setGifPacketCallback([&](const uint8_t *data, uint32_t sizeBytes) + { + captured.emplace_back(data, data + sizeBytes); + }); + + t.IsTrue(mem.writeIORegister(kGifCh + 0x30u, kTag0), "write TADR should succeed"); + // STR + CHAIN mode (MOD=1) + t.IsTrue(mem.writeIORegister(kGifCh + 0x00u, 0x104u), "write CHCR should succeed"); + + mem.processPendingTransfers(); + + t.Equals(captured.size(), static_cast(1u), "chain CALL should emit one packet"); + t.Equals(captured[0].size(), static_cast(32u), "CALL+END should emit two qwords"); + + bool firstQwOk = true; + bool secondQwOk = true; + for (uint32_t i = 0; i < 16u; ++i) + { + if (captured[0][i] != static_cast(0x40u + i)) + firstQwOk = false; + if (captured[0][16u + i] != static_cast(0x80u + i)) + secondQwOk = false; + } + t.IsTrue(firstQwOk, "CALL must transfer from TADR+16, not DMAtag ADDR"); + t.IsTrue(secondQwOk, "END payload should follow CALL payload"); + }); + + tc.Run("GIF DMA chain RET transfers payload and resumes after CALL", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + constexpr uint32_t kGifCh = 0x1000A000u; + constexpr uint32_t kTagCall = 0x00026200u; + constexpr uint32_t kTagRet = 0x00026300u; + constexpr uint32_t kTagEnd = 0x00026220u; + + uint8_t *rdram = mem.getRDRAM(); + + // CALL qwc=1 -> jumps to RET tag + writeDmaTag(rdram, kTagCall, makeDmaTag(1u, 5u, kTagRet, false)); + // RET qwc=1 -> should return to kTagEnd + writeDmaTag(rdram, kTagRet, makeDmaTag(1u, 6u, 0u, false)); + // END qwc=1 after CALL payload + writeDmaTag(rdram, kTagEnd, makeDmaTag(1u, 7u, 0u, false)); + + for (uint32_t i = 0; i < 16u; ++i) + { + rdram[kTagCall + 16u + i] = static_cast(0x11u + i); // CALL payload + rdram[kTagRet + 16u + i] = static_cast(0x22u + i); // RET payload + rdram[kTagEnd + 16u + i] = static_cast(0x33u + i); // END payload + } + + std::vector> captured; + mem.setGifPacketCallback([&](const uint8_t *data, uint32_t sizeBytes) + { + captured.emplace_back(data, data + sizeBytes); + }); + + t.IsTrue(mem.writeIORegister(kGifCh + 0x30u, kTagCall), "write TADR should succeed"); + t.IsTrue(mem.writeIORegister(kGifCh + 0x00u, 0x104u), "write CHCR should succeed"); + + mem.processPendingTransfers(); + + t.Equals(captured.size(), static_cast(1u), "CALL/RET chain should emit one packet"); + t.Equals(captured[0].size(), static_cast(48u), "CALL+RET+END should emit three qwords"); + + bool q0 = true; + bool q1 = true; + bool q2 = true; + for (uint32_t i = 0; i < 16u; ++i) + { + if (captured[0][i] != static_cast(0x11u + i)) + q0 = false; + if (captured[0][16u + i] != static_cast(0x22u + i)) + q1 = false; + if (captured[0][32u + i] != static_cast(0x33u + i)) + q2 = false; + } + t.IsTrue(q0, "CALL payload should be first"); + t.IsTrue(q1, "RET must still transfer its own payload"); + t.IsTrue(q2, "RET must resume after CALL payload and continue chain"); + }); + + tc.Run("GIF DMA chain IRQ stops only when TIE is set", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + constexpr uint32_t kGifCh = 0x1000A000u; + constexpr uint32_t kTag0 = 0x00026400u; + constexpr uint32_t kTag1 = 0x00026410u; + constexpr uint32_t kRefData = 0x00026500u; + + auto runChain = [&](uint32_t chcrValue, std::vector &packetOut) -> bool + { + uint8_t *rdram = mem.getRDRAM(); + writeDmaTag(rdram, kTag0, makeDmaTag(1u, 3u, kRefData, true)); // REF + IRQ + writeDmaTag(rdram, kTag1, makeDmaTag(1u, 7u, 0u, false)); // END + for (uint32_t i = 0; i < 16u; ++i) + { + rdram[kRefData + i] = static_cast(0x55u + i); + rdram[kTag1 + 16u + i] = static_cast(0x77u + i); + } + + std::vector> captured; + mem.setGifPacketCallback([&](const uint8_t *data, uint32_t sizeBytes) + { + captured.emplace_back(data, data + sizeBytes); + }); + + if (!mem.writeIORegister(kGifCh + 0x30u, kTag0)) + return false; + if (!mem.writeIORegister(kGifCh + 0x00u, chcrValue)) + return false; + mem.processPendingTransfers(); + if (captured.empty()) + return false; + packetOut = captured[0]; + return true; + }; + + std::vector packetNoTie; + t.IsTrue(runChain(0x104u, packetNoTie), "chain run without TIE should succeed"); + t.Equals(packetNoTie.size(), static_cast(32u), "IRQ tag should not stop chain when TIE is clear"); + + std::vector packetTie; + // STR + CHAIN + TIE(bit7) + t.IsTrue(runChain(0x184u, packetTie), "chain run with TIE should succeed"); + t.Equals(packetTie.size(), static_cast(16u), "IRQ tag should stop chain when TIE is set"); + }); + + tc.Run("DMAC D_STAT toggles masks and clears channel status on write-one", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + constexpr uint32_t kDStat = 0x1000E010u; + constexpr uint32_t kGifMaskBit = (1u << 18); // channel 2 mask + constexpr uint32_t kGifStatusBit = (1u << 2); // channel 2 status + constexpr uint32_t kSummaryBit = (1u << 31); + + t.IsTrue(mem.writeIORegister(kDStat, kGifMaskBit), "D_STAT mask toggle write should succeed"); + t.IsTrue((mem.readIORegister(kDStat) & kGifMaskBit) != 0u, "first mask write should enable GIF mask bit"); + t.IsTrue(mem.writeIORegister(kDStat, kGifMaskBit), "D_STAT mask toggle write should succeed"); + t.IsTrue((mem.readIORegister(kDStat) & kGifMaskBit) == 0u, "second mask write should disable GIF mask bit"); + + t.IsTrue(mem.writeIORegister(kDStat, kGifMaskBit), "re-enable GIF mask for summary test"); + + constexpr uint32_t kGifCh = 0x1000A000u; + constexpr uint32_t kSrc = 0x00027000u; + uint8_t *rdram = mem.getRDRAM(); + for (uint32_t i = 0; i < 16u; ++i) + { + rdram[kSrc + i] = static_cast(0x90u + i); + } + + t.IsTrue(mem.writeIORegister(kGifCh + 0x10u, kSrc), "write MADR should succeed"); + t.IsTrue(mem.writeIORegister(kGifCh + 0x20u, 1u), "write QWC should succeed"); + t.IsTrue(mem.writeIORegister(kGifCh + 0x00u, 0x100u), "write CHCR STR should succeed"); + + t.IsTrue((mem.readIORegister(kDStat) & kGifStatusBit) == 0u, "D_STAT status should not set before transfer drain"); + + mem.processPendingTransfers(); + + const uint32_t dstatAfter = mem.readIORegister(kDStat); + t.IsTrue((dstatAfter & kGifStatusBit) != 0u, "GIF transfer completion should set D_STAT channel status bit"); + t.IsTrue((dstatAfter & kSummaryBit) != 0u, "status&mask should raise D_STAT summary bit"); + + t.IsTrue(mem.writeIORegister(kDStat, kGifStatusBit), "D_STAT status clear write should succeed"); + const uint32_t dstatCleared = mem.readIORegister(kDStat); + t.IsTrue((dstatCleared & kGifStatusBit) == 0u, "write-one should clear GIF channel status bit"); + t.IsTrue((dstatCleared & kSummaryBit) == 0u, "summary bit should clear after status clear"); + }); + + tc.Run("DMAC D_CTRL DMAE gates GIF DMA start", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + constexpr uint32_t kDctrl = 0x1000E000u; + constexpr uint32_t kGifCh = 0x1000A000u; + constexpr uint32_t kSrc = 0x00027800u; + + uint8_t *rdram = mem.getRDRAM(); + for (uint32_t i = 0; i < 16u; ++i) + { + rdram[kSrc + i] = static_cast(0xE0u + i); + } + + std::vector> captured; + mem.setGifPacketCallback([&](const uint8_t *data, uint32_t sizeBytes) + { + captured.emplace_back(data, data + sizeBytes); + }); + + t.IsTrue(mem.writeIORegister(kDctrl, 0u), "clearing D_CTRL.DMAE should succeed"); + t.IsTrue(mem.writeIORegister(kGifCh + 0x10u, kSrc), "write MADR should succeed"); + t.IsTrue(mem.writeIORegister(kGifCh + 0x20u, 1u), "write QWC should succeed"); + t.IsTrue(mem.writeIORegister(kGifCh + 0x00u, 0x100u), "write CHCR STR should succeed"); + mem.processPendingTransfers(); + + t.Equals(captured.size(), static_cast(0u), "DMAE=0 should prevent GIF DMA transfer"); + t.Equals(mem.dmaStartCount(), 0ull, "DMAE=0 should not increment dmaStartCount"); + + t.IsTrue(mem.writeIORegister(kDctrl, 1u), "setting D_CTRL.DMAE should succeed"); + t.IsTrue(mem.writeIORegister(kGifCh + 0x00u, 0x100u), "restarting GIF DMA should succeed"); + mem.processPendingTransfers(); + + t.Equals(captured.size(), static_cast(1u), "DMAE=1 should allow GIF DMA transfer"); + if (!captured.empty()) + { + t.Equals(captured[0].size(), static_cast(16u), "GIF DMA transfer should emit one qword"); + } + }); + + tc.Run("VU1 XGKICK wraps packet payload across VU1 memory boundary", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + std::vector> captured; + mem.setGifPacketCallback([&](const uint8_t *data, uint32_t sizeBytes) + { + captured.emplace_back(data, data + sizeBytes); + }); + + std::vector vram(PS2_GS_VRAM_SIZE, 0u); + GS gs; + gs.init(vram.data(), static_cast(vram.size()), nullptr); + + uint8_t *vuCode = mem.getVU1Code(); + uint8_t *vuData = mem.getVU1Data(); + std::memset(vuCode, 0, PS2_VU1_CODE_SIZE); + std::memset(vuData, 0, PS2_VU1_DATA_SIZE); + + constexpr uint32_t kLastQw = (PS2_VU1_DATA_SIZE / 16u) - 1u; + const uint32_t tagOffset = kLastQw * 16u; + + const uint64_t imageTag = makeGifTag(1u, GIF_FMT_IMAGE, 0u, true); + std::memcpy(vuData + tagOffset, &imageTag, sizeof(imageTag)); + + for (uint32_t i = 0; i < 16u; ++i) + { + vuData[i] = static_cast(0xC0u + i); + } + + const uint32_t lower = makeVuLowerSpecial(0x3Du, 1u); + std::memcpy(vuCode + 0u, &lower, sizeof(lower)); + const uint32_t upper = 0u; + std::memcpy(vuCode + 4u, &upper, sizeof(upper)); + + VU1Interpreter vu1; + vu1.state().vi[1] = static_cast(kLastQw); + vu1.execute(vuCode, + PS2_VU1_CODE_SIZE, + vuData, + PS2_VU1_DATA_SIZE, + gs, + &mem, + 0u, + 0u, + 1u); + + t.Equals(captured.size(), static_cast(1u), "XGKICK should emit one wrapped GIF packet"); + if (!captured.empty()) + { + t.Equals(captured[0].size(), static_cast(32u), "wrapped packet should include tag plus one qword payload"); + bool payloadOk = true; + for (uint32_t i = 0; i < 16u; ++i) + { + if (captured[0].size() < 32u || captured[0][16u + i] != static_cast(0xC0u + i)) + { + payloadOk = false; + break; + } + } + t.IsTrue(payloadOk, "wrapped payload should be copied from start of VU1 memory"); + } + }); + + tc.Run("VIF1 DMA DIRECT image packet reaches GS through arbiter", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + GS gs; + gs.init(mem.getGSVRAM(), static_cast(PS2_GS_VRAM_SIZE), &mem.gs()); + GifArbiter arbiter([&](const uint8_t *data, uint32_t sizeBytes) + { + gs.processGIFPacket(data, sizeBytes); + }); + mem.setGifArbiter(&arbiter); + + const uint64_t bitblt = + (static_cast(0u) << 0) | + (static_cast(1u) << 16) | + (static_cast(0u) << 24) | + (static_cast(0u) << 32) | + (static_cast(1u) << 48) | + (static_cast(0u) << 56); + gs.writeRegister(GS_REG_BITBLTBUF, bitblt); + gs.writeRegister(GS_REG_TRXPOS, 0ull); + gs.writeRegister(GS_REG_TRXREG, (4ull << 0) | (1ull << 32)); + gs.writeRegister(GS_REG_TRXDIR, 0ull); + + constexpr uint32_t kVif1Ch = 0x10009000u; + constexpr uint32_t kSrc = 0x00027C00u; + constexpr uint32_t kQwc = 3u; + + uint8_t *rdram = mem.getRDRAM(); + std::memset(rdram + kSrc, 0, kQwc * 16u); + + const uint32_t directCmd = makeVifCmd(0x50u, 0u, 2u); // DIRECT 2 QW payload. + std::memcpy(rdram + kSrc, &directCmd, sizeof(directCmd)); + + uint8_t *gifPayload = rdram + kSrc + 4u; + const uint64_t gifTag = makeGifTag(1u, GIF_FMT_IMAGE, 0u, true); + std::memcpy(gifPayload + 0u, &gifTag, sizeof(gifTag)); + const uint64_t tagHi = 0u; + std::memcpy(gifPayload + 8u, &tagHi, sizeof(tagHi)); + for (uint32_t i = 0; i < 16u; ++i) + { + gifPayload[16u + i] = static_cast(0x70u + i); + } + + t.IsTrue(mem.writeIORegister(kVif1Ch + 0x10u, kSrc), "write VIF1 MADR should succeed"); + t.IsTrue(mem.writeIORegister(kVif1Ch + 0x20u, kQwc), "write VIF1 QWC should succeed"); + t.IsTrue(mem.writeIORegister(kVif1Ch + 0x00u, 0x100u), "write VIF1 CHCR STR should succeed"); + + mem.processPendingTransfers(); + + const uint8_t *vramOut = mem.getGSVRAM(); + bool imageOk = true; + for (uint32_t i = 0; i < 16u; ++i) + { + if (vramOut[i] != static_cast(0x70u + i)) + { + imageOk = false; + break; + } + } + t.IsTrue(imageOk, "VIF1 DIRECT image should update GS VRAM through GIF path2"); + }); + + tc.Run("VIF MSCAL callback can execute XGKICK and update GS VRAM", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + GS gs; + gs.init(mem.getGSVRAM(), static_cast(PS2_GS_VRAM_SIZE), &mem.gs()); + GifArbiter arbiter([&](const uint8_t *data, uint32_t sizeBytes) + { + gs.processGIFPacket(data, sizeBytes); + }); + mem.setGifArbiter(&arbiter); + + const uint64_t bitblt = + (static_cast(0u) << 0) | + (static_cast(1u) << 16) | + (static_cast(0u) << 24) | + (static_cast(0u) << 32) | + (static_cast(1u) << 48) | + (static_cast(0u) << 56); + gs.writeRegister(GS_REG_BITBLTBUF, bitblt); + gs.writeRegister(GS_REG_TRXPOS, 0ull); + gs.writeRegister(GS_REG_TRXREG, (4ull << 0) | (1ull << 32)); + gs.writeRegister(GS_REG_TRXDIR, 0ull); + + uint8_t *vuCode = mem.getVU1Code(); + uint8_t *vuData = mem.getVU1Data(); + std::memset(vuCode, 0, PS2_VU1_CODE_SIZE); + std::memset(vuData, 0, PS2_VU1_DATA_SIZE); + + const uint32_t lower = makeVuLowerSpecial(0x3Du, 0u); + std::memcpy(vuCode + 0u, &lower, sizeof(lower)); + const uint32_t upper = 0u; + std::memcpy(vuCode + 4u, &upper, sizeof(upper)); + + const uint64_t gifTag = makeGifTag(1u, GIF_FMT_IMAGE, 0u, true); + std::memcpy(vuData + 0u, &gifTag, sizeof(gifTag)); + const uint64_t tagHi = 0u; + std::memcpy(vuData + 8u, &tagHi, sizeof(tagHi)); + for (uint32_t i = 0; i < 16u; ++i) + { + vuData[16u + i] = static_cast(0x90u + i); + } + + VU1Interpreter vu1; + mem.setVu1MscalCallback([&](uint32_t startPC, uint32_t itop) + { + vu1.execute(vuCode, + PS2_VU1_CODE_SIZE, + vuData, + PS2_VU1_DATA_SIZE, + gs, + &mem, + startPC, + itop, + 1u); + }); + + const uint32_t mscalCmd = makeVifCmd(0x14u, 0u, 0u); + mem.processVIF1Data(reinterpret_cast(&mscalCmd), sizeof(mscalCmd)); + + const uint8_t *vramOut = mem.getGSVRAM(); + bool imageOk = true; + for (uint32_t i = 0; i < 16u; ++i) + { + if (vramOut[i] != static_cast(0x90u + i)) + { + imageOk = false; + break; + } + } + t.IsTrue(imageOk, "MSCAL-triggered XGKICK should route PATH1 packet into GS VRAM"); + }); + + tc.Run("unaligned accesses throw", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + bool threwRead32 = false; + bool threwWrite64 = false; + try + { + (void)mem.read32(0x00000002u); + } + catch (const std::exception &) + { + threwRead32 = true; + } + + try + { + mem.write64(0x00000004u + 2u, 0x1122334455667788ull); + } + catch (const std::exception &) + { + threwWrite64 = true; + } + + t.IsTrue(threwRead32, "unaligned read32 should throw"); + t.IsTrue(threwWrite64, "unaligned write64 should throw"); + }); + }); +} diff --git a/ps2xTest/src/ps2_recompiler_tests.cpp b/ps2xTest/src/ps2_recompiler_tests.cpp index 29f2849d..575c647c 100644 --- a/ps2xTest/src/ps2_recompiler_tests.cpp +++ b/ps2xTest/src/ps2_recompiler_tests.cpp @@ -1,8 +1,14 @@ #include "MiniTest.h" #include "ps2recomp/ps2_recompiler.h" +#include "ps2recomp/config_manager.h" +#include "ps2recomp/elf_parser.h" #include "ps2recomp/instructions.h" #include "ps2recomp/types.h" +#include #include +#include +#include +#include #include #include @@ -29,6 +35,18 @@ static Instruction makeAbsJump(uint32_t address, uint32_t target, uint32_t opcod return inst; } +static Instruction makeJrRa(uint32_t address) +{ + Instruction inst{}; + inst.address = address; + inst.opcode = OPCODE_SPECIAL; + inst.function = SPECIAL_JR; + inst.rs = 31; + inst.hasDelaySlot = true; + inst.raw = 0x03E00008u; + return inst; +} + static Function makeFunction(const std::string &name, uint32_t start, uint32_t end) { Function fn{}; @@ -41,6 +59,65 @@ static Function makeFunction(const std::string &name, uint32_t start, uint32_t e return fn; } +static bool writeMinimalMipsElfWithCodeAndDataFunctionSymbols(const std::filesystem::path &elfPath) +{ + ELFIO::elfio writer; + writer.create(ELFIO::ELFCLASS32, ELFIO::ELFDATA2LSB); + writer.set_os_abi(ELFIO::ELFOSABI_NONE); + writer.set_type(ELFIO::ET_EXEC); + writer.set_machine(ELFIO::EM_MIPS); + writer.set_entry(0x00100000u); + + ELFIO::section *text = writer.sections.add(".text"); + text->set_type(ELFIO::SHT_PROGBITS); + text->set_flags(ELFIO::SHF_ALLOC | ELFIO::SHF_EXECINSTR); + text->set_addr_align(4); + text->set_address(0x00100000u); + const char textBytes[] = {0x08, 0x00, static_cast(0xE0), 0x03, 0x00, 0x00, 0x00, 0x00}; + text->set_data(textBytes, sizeof(textBytes)); + + ELFIO::section *data = writer.sections.add(".data"); + data->set_type(ELFIO::SHT_PROGBITS); + data->set_flags(ELFIO::SHF_ALLOC | ELFIO::SHF_WRITE); + data->set_addr_align(4); + data->set_address(0x00200000u); + const char dataBytes[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, static_cast(0x88)}; + data->set_data(dataBytes, sizeof(dataBytes)); + + ELFIO::section *strtab = writer.sections.add(".strtab"); + strtab->set_type(ELFIO::SHT_STRTAB); + strtab->set_addr_align(1); + + ELFIO::section *symtab = writer.sections.add(".symtab"); + symtab->set_type(ELFIO::SHT_SYMTAB); + symtab->set_info(1); + symtab->set_link(strtab->get_index()); + symtab->set_addr_align(4); + symtab->set_entry_size(writer.get_default_entry_size(ELFIO::SHT_SYMTAB)); + + ELFIO::symbol_section_accessor symbols(writer, symtab); + ELFIO::string_section_accessor strings(strtab); + symbols.add_symbol(strings, "", 0, 0, ELFIO::STB_LOCAL, ELFIO::STT_NOTYPE, 0, ELFIO::SHN_UNDEF); + symbols.add_symbol(strings, "code_func", text->get_address(), text->get_size(), + ELFIO::STB_GLOBAL, ELFIO::STT_FUNC, 0, text->get_index()); + symbols.add_symbol(strings, "data_func", data->get_address(), data->get_size(), + ELFIO::STB_GLOBAL, ELFIO::STT_FUNC, 0, data->get_index()); + + ELFIO::segment *textSegment = writer.segments.add(); + textSegment->set_type(ELFIO::PT_LOAD); + textSegment->set_flags(ELFIO::PF_R | ELFIO::PF_X); + textSegment->set_align(0x1000); + textSegment->add_section_index(text->get_index(), text->get_addr_align()); + + ELFIO::segment *dataSegment = writer.segments.add(); + dataSegment->set_type(ELFIO::PT_LOAD); + dataSegment->set_flags(ELFIO::PF_R | ELFIO::PF_W); + dataSegment->set_align(0x1000); + dataSegment->add_section_index(data->get_index(), data->get_addr_align()); + + return writer.save(elfPath.string()); +} + void register_ps2_recompiler_tests() { MiniTest::Case("PS2Recompiler", [](TestCase &tc) @@ -293,5 +370,148 @@ void register_ps2_recompiler_tests() const bool hasDataEntry = std::any_of(functions.begin(), functions.end(), [](const Function &fn) { return fn.start == 0x3004u; }); t.IsFalse(hasDataEntry, "target in data section must not produce entry wrapper"); + }); + + tc.Run("entry starting at jr ra is capped to return thunk", [](TestCase &t) { + std::vector
sections = { + {".text", 0x1000u, 0x2000u, 0u, true, false, false, true, nullptr} + }; + + std::vector functions = { + makeFunction("container", 0x1000u, 0x1200u), + makeFunction("caller", 0x1300u, 0x1310u) + }; + + std::unordered_map> decodedFunctions; + decodedFunctions[0x1000u] = { + makeNopLike(0x1000u), + makeNopLike(0x1004u), + makeNopLike(0x1008u), + makeJrRa(0x10A0u), + makeNopLike(0x10A4u), + makeNopLike(0x10A8u), + makeNopLike(0x10ACu) + }; + decodedFunctions[0x1300u] = { + makeAbsJump(0x1300u, 0x10A0u, OPCODE_J), + makeNopLike(0x1304u) + }; + + size_t discovered = PS2Recompiler::DiscoverAdditionalEntryPoints( + functions, decodedFunctions, sections); + t.Equals(discovered, static_cast(1), + "expected one additional entry from cross-function jump"); + + auto entryIt = std::find_if(functions.begin(), functions.end(), + [](const Function &fn) { return fn.start == 0x10A0u; }); + t.IsTrue(entryIt != functions.end(), "entry wrapper at 0x10A0 should exist"); + if (entryIt != functions.end()) + { + t.Equals(entryIt->end, 0x10A8u, + "jr ra entry should end after delay slot, not at container end"); + } + + auto decodedEntryIt = decodedFunctions.find(0x10A0u); + t.IsTrue(decodedEntryIt != decodedFunctions.end(), + "decoded entry slice for 0x10A0 should exist"); + if (decodedEntryIt != decodedFunctions.end()) + { + t.Equals(decodedEntryIt->second.size(), static_cast(2), + "jr ra entry slice should contain exactly jr+delay"); + if (!decodedEntryIt->second.empty()) + { + t.Equals(decodedEntryIt->second.front().address, 0x10A0u, + "entry slice should start at 0x10A0"); + } + } + }); + + tc.Run("config manager parses jump_tables table entries", [](TestCase &t) { + const auto uniqueSuffix = std::to_string( + static_cast(std::chrono::steady_clock::now().time_since_epoch().count())); + const std::filesystem::path configPath = + std::filesystem::temp_directory_path() / ("ps2recomp-jump-table-" + uniqueSuffix + ".toml"); + + std::ofstream configFile(configPath); + t.IsTrue(static_cast(configFile), "temp config file should be writable"); + if (!configFile) + { + return; + } + + configFile << "[general]\n"; + configFile << "input = \"dummy.elf\"\n"; + configFile << "output = \"out\"\n\n"; + configFile << "[jump_tables]\n"; + configFile << "[[jump_tables.table]]\n"; + configFile << "address = \"0x200000\"\n"; + configFile << "base_register = 9\n"; + configFile << "entries = [\n"; + configFile << " { index = 0, target = \"0x1620\" },\n"; + configFile << " { index = 1, target = \"0x1630\" },\n"; + configFile << "]\n"; + configFile.close(); + + ConfigManager manager(configPath.string()); + RecompilerConfig config = manager.loadConfig(); + + t.Equals(config.jumpTables.size(), static_cast(1), + "one configured jump table should be loaded"); + if (!config.jumpTables.empty()) + { + const JumpTable &table = config.jumpTables.front(); + t.Equals(table.address, 0x200000u, "table address should parse from hex string"); + t.Equals(table.baseRegister, 9u, "base register should parse"); + t.Equals(table.entries.size(), static_cast(2), + "two jump table entries should parse"); + if (table.entries.size() >= 2) + { + t.Equals(table.entries[0].index, 0u, "first entry index should parse"); + t.Equals(table.entries[0].target, 0x1620u, "first entry target should parse"); + t.Equals(table.entries[1].index, 1u, "second entry index should parse"); + t.Equals(table.entries[1].target, 0x1630u, "second entry target should parse"); + } + } + + std::error_code removeError; + std::filesystem::remove(configPath, removeError); + }); + + tc.Run("elf parser ignores STT_FUNC symbols in non-executable sections", [](TestCase &t) { + const auto uniqueSuffix = std::to_string( + static_cast(std::chrono::steady_clock::now().time_since_epoch().count())); + const std::filesystem::path elfPath = + std::filesystem::temp_directory_path() / ("ps2recomp-parser-" + uniqueSuffix + ".elf"); + + const bool writeOk = writeMinimalMipsElfWithCodeAndDataFunctionSymbols(elfPath); + t.IsTrue(writeOk, "temporary ELF should be generated"); + if (!writeOk) + { + return; + } + + ElfParser parser(elfPath.string()); + const bool parseOk = parser.parse(); + t.IsTrue(parseOk, "generated ELF should parse"); + if (!parseOk) + { + std::error_code removeError; + std::filesystem::remove(elfPath, removeError); + return; + } + + const auto functions = parser.extractFunctions(); + const bool hasCodeFunction = std::any_of(functions.begin(), functions.end(), + [](const Function &fn) + { return fn.start == 0x00100000u; }); + const bool hasDataFunction = std::any_of(functions.begin(), functions.end(), + [](const Function &fn) + { return fn.start == 0x00200000u; }); + + t.IsTrue(hasCodeFunction, "function in executable section should be retained"); + t.IsFalse(hasDataFunction, "STT_FUNC symbol in .data must be ignored"); + + std::error_code removeError; + std::filesystem::remove(elfPath, removeError); }); }); } diff --git a/ps2xTest/src/ps2_runtime_expansion_tests.cpp b/ps2xTest/src/ps2_runtime_expansion_tests.cpp new file mode 100644 index 00000000..7cdcf056 --- /dev/null +++ b/ps2xTest/src/ps2_runtime_expansion_tests.cpp @@ -0,0 +1,631 @@ +#include "MiniTest.h" +#include "ps2recomp/code_generator.h" +#include "ps2recomp/instructions.h" +#include "ps2recomp/r5900_decoder.h" +#include "ps2recomp/types.h" +#include "ps2_runtime.h" +#include "ps2_memory.h" +#include "ps2_syscalls.h" +#include "ps2_gs_gpu.h" +#include "ps2_runtime_macros.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace ps2recomp; +using namespace ps2_syscalls; + +namespace +{ + constexpr uint32_t COP0_CAUSE_BD = 0x80000000u; + constexpr uint32_t COP0_CAUSE_EXCCODE_MASK = 0x0000007Cu; + constexpr uint32_t COP0_STATUS_EXL = 0x00000002u; + constexpr uint32_t COP0_STATUS_BEV = 0x00400000u; + constexpr uint32_t EXCEPTION_VECTOR_GENERAL = 0x80000080u; + constexpr uint32_t EXCEPTION_VECTOR_BOOT = 0xBFC00200u; + + constexpr int KE_OK = 0; + + void setRegU32(R5900Context &ctx, int reg, uint32_t value) + { + ctx.r[reg] = _mm_set_epi64x(0, static_cast(value)); + } + + int32_t getRegS32(const R5900Context &ctx, int reg) + { + return static_cast(::getRegU32(&ctx, reg)); + } + + uint32_t makeVifCmd(uint8_t opcode, uint8_t num, uint16_t imm) + { + return (static_cast(opcode) << 24) | + (static_cast(num) << 16) | + static_cast(imm); + } + + bool hasSignedRdWrite(const std::string &generated, uint8_t rd) + { + if (rd == 0u) + { + return false; + } + + const std::string needle = "SET_GPR_S32(ctx, " + std::to_string(rd) + ","; + return generated.find(needle) != std::string::npos; + } + + template + bool waitUntil(Predicate pred, std::chrono::milliseconds timeout) + { + const auto deadline = std::chrono::steady_clock::now() + timeout; + while (std::chrono::steady_clock::now() < deadline) + { + if (pred()) + { + return true; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + return pred(); + } + + uint32_t frameOffsetBytes(uint32_t x, uint32_t y, uint32_t fbw) + { + const uint32_t stride = fbw * 64u * 4u; // CT32 + return y * stride + x * 4u; + } + + void testRuntimeWorkerLoop(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) + { + if (!ctx || !runtime) + { + return; + } + + // Keep touching guest memory so teardown races are easier to catch. + (void)Ps2FastRead64(rdram, static_cast(0x01FFFFF8u + (ctx->insn_count & 0x7u))); + ++ctx->insn_count; + + if (runtime->isStopRequested()) + { + ctx->pc = 0u; + return; + } + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } +} + +void register_ps2_runtime_expansion_tests() +{ + MiniTest::Case("PS2RuntimeExpansion", [](TestCase &tc) + { + tc.Run("differential decoder/codegen gpr-write contract for MULT and DIV families", [](TestCase &t) + { + R5900Decoder decoder; + CodeGenerator generator({}, {}); + + const struct + { + const char *name; + uint32_t raw; + } cases[] = { + {"MULT rd!=0", (OPCODE_SPECIAL << 26) | (4u << 21) | (5u << 16) | (3u << 11) | SPECIAL_MULT}, + {"MULT rd==0", (OPCODE_SPECIAL << 26) | (4u << 21) | (5u << 16) | (0u << 11) | SPECIAL_MULT}, + {"DIV rd!=0", (OPCODE_SPECIAL << 26) | (6u << 21) | (7u << 16) | (9u << 11) | SPECIAL_DIV}, + {"MMI MULT1 rd!=0", (OPCODE_MMI << 26) | (8u << 21) | (9u << 16) | (10u << 11) | MMI_MULT1}, + {"MMI DIV1 rd!=0", (OPCODE_MMI << 26) | (8u << 21) | (9u << 16) | (10u << 11) | MMI_DIV1}, + }; + + for (size_t i = 0; i < std::size(cases); ++i) + { + const Instruction inst = decoder.decodeInstruction(0x1000u + static_cast(i * 4u), cases[i].raw); + const std::string generated = generator.translateInstruction(inst); + const bool emittedRdWrite = hasSignedRdWrite(generated, inst.rd); + + t.Equals(emittedRdWrite, inst.modificationInfo.modifiesGPR, + std::string("decoder/codegen mismatch for ") + cases[i].name); + t.IsTrue(inst.modificationInfo.modifiesControl, + std::string("HI/LO control side-effect missing for ") + cases[i].name); + } + }); + + tc.Run("multiply-add matrix writes rd only when R5900 requires it", [](TestCase &t) + { + R5900Decoder decoder; + CodeGenerator generator({}, {}); + + const struct + { + const char *name; + uint32_t raw; + bool expectedRdWrite; + } cases[] = { + {"MULTU rd!=0", (OPCODE_SPECIAL << 26) | (2u << 21) | (3u << 16) | (11u << 11) | SPECIAL_MULTU, true}, + {"MMI MADD rd!=0", (OPCODE_MMI << 26) | (2u << 21) | (3u << 16) | (12u << 11) | MMI_MADD, true}, + {"MMI MADDU rd!=0", (OPCODE_MMI << 26) | (2u << 21) | (3u << 16) | (13u << 11) | MMI_MADDU, true}, + {"MMI MADD1 rd!=0", (OPCODE_MMI << 26) | (2u << 21) | (3u << 16) | (14u << 11) | MMI_MADD1, true}, + {"MMI MADDU1 rd!=0", (OPCODE_MMI << 26) | (2u << 21) | (3u << 16) | (15u << 11) | MMI_MADDU1, true}, + {"MMI DIVU1 rd!=0", (OPCODE_MMI << 26) | (2u << 21) | (3u << 16) | (16u << 11) | MMI_DIVU1, false}, + }; + + for (size_t i = 0; i < std::size(cases); ++i) + { + const Instruction inst = decoder.decodeInstruction(0x2000u + static_cast(i * 4u), cases[i].raw); + const std::string generated = generator.translateInstruction(inst); + const bool emittedRdWrite = hasSignedRdWrite(generated, inst.rd); + + t.Equals(inst.modificationInfo.modifiesGPR, cases[i].expectedRdWrite, + std::string("decoder rd-write metadata mismatch for ") + cases[i].name); + t.Equals(emittedRdWrite, cases[i].expectedRdWrite, + std::string("codegen rd-write mismatch for ") + cases[i].name); + } + }); + + tc.Run("SignalException marks EPC and BD for delay-slot exceptions", [](TestCase &t) + { + PS2Runtime runtime; + R5900Context ctx{}; + + ctx.pc = 0x2000u; + ctx.branch_pc = 0x1FFCu; + ctx.in_delay_slot = true; + ctx.cop0_status = 0u; + ctx.cop0_cause = 0u; + + runtime.SignalException(&ctx, EXCEPTION_ADDRESS_ERROR_LOAD); + + t.Equals(ctx.cop0_epc, 0x1FFCu, "delay-slot exception should capture branch_pc in EPC"); + t.IsTrue((ctx.cop0_cause & COP0_CAUSE_BD) != 0u, "delay-slot exception should set CAUSE.BD"); + t.Equals(ctx.cop0_cause & COP0_CAUSE_EXCCODE_MASK, + (static_cast(EXCEPTION_ADDRESS_ERROR_LOAD) << 2) & COP0_CAUSE_EXCCODE_MASK, + "CAUSE.EXCCODE should match exception"); + t.IsTrue((ctx.cop0_status & COP0_STATUS_EXL) != 0u, "exception should set STATUS.EXL"); + t.Equals(ctx.pc, EXCEPTION_VECTOR_GENERAL, "exception should jump to general vector when BEV=0"); + t.IsFalse(ctx.in_delay_slot, "exception delivery should clear delay-slot state"); + }); + + tc.Run("SignalException uses current pc without BD and honors BEV vector", [](TestCase &t) + { + PS2Runtime runtime; + R5900Context ctx{}; + + ctx.pc = 0x3000u; + ctx.in_delay_slot = false; + ctx.cop0_status = COP0_STATUS_BEV; + ctx.cop0_cause = COP0_CAUSE_BD; + + runtime.SignalException(&ctx, EXCEPTION_ADDRESS_ERROR_STORE); + + t.Equals(ctx.cop0_epc, 0x3000u, "non-delay exception should capture current pc in EPC"); + t.IsTrue((ctx.cop0_cause & COP0_CAUSE_BD) == 0u, "non-delay exception should clear CAUSE.BD"); + t.Equals(ctx.pc, EXCEPTION_VECTOR_BOOT, "BEV=1 should route exception to boot vector"); + }); + + tc.Run("handleSyscall rejects invocation in delay slot", [](TestCase &t) + { + PS2Runtime runtime; + std::vector rdram(PS2_RAM_SIZE, 0u); + R5900Context ctx{}; + ctx.in_delay_slot = true; + + bool threw = false; + try + { + runtime.handleSyscall(rdram.data(), &ctx, 0x3Cu); + } + catch (const std::runtime_error &) + { + threw = true; + } + + t.IsTrue(threw, "syscall from delay slot should throw to preserve block atomicity"); + }); + + tc.Run("VIF MSCAL and MSCNT toggle DBF and keep TOPS/ITOPS coherent", [](TestCase &t) + { + PS2Memory mem; + t.IsTrue(mem.initialize(), "PS2Memory initialize should succeed"); + + mem.vif1_regs.base = 4u; + mem.vif1_regs.ofst = 2u; + mem.vif1_regs.itop = 0x21u; + mem.vif1_regs.stat &= ~(1u << 7); // DBF = 0 + + uint32_t callbackPc = 0xFFFFFFFFu; + uint32_t callbackItop = 0xFFFFFFFFu; + uint32_t callbackCount = 0u; + mem.setVu1MscalCallback([&](uint32_t startPC, uint32_t itop) + { + callbackPc = startPC; + callbackItop = itop; + callbackCount++; + }); + + const uint32_t mscal = makeVifCmd(0x14u, 0u, 3u); // start PC = 3 * 8 + mem.processVIF1Data(reinterpret_cast(&mscal), sizeof(mscal)); + + t.Equals(callbackCount, 1u, "MSCAL should invoke VU1 callback exactly once"); + t.Equals(callbackPc, 24u, "MSCAL should pass startPC=imm*8"); + t.Equals(callbackItop, 0x21u, "MSCAL callback should receive current ITOP"); + t.Equals(mem.vif1_regs.itops, 0x21u, "MSCAL should latch ITOPS from ITOP"); + t.IsTrue((mem.vif1_regs.stat & (1u << 7)) != 0u, "MSCAL should toggle DBF on"); + t.Equals(mem.vif1_regs.tops, 6u, "DBF=1 should make TOPS=BASE+OFST"); + + const uint32_t mscnt = makeVifCmd(0x17u, 0u, 0u); + mem.processVIF1Data(reinterpret_cast(&mscnt), sizeof(mscnt)); + + t.Equals(callbackCount, 1u, "MSCNT should not invoke MSCAL callback"); + t.IsTrue((mem.vif1_regs.stat & (1u << 7)) == 0u, "MSCNT should toggle DBF back off"); + t.Equals(mem.vif1_regs.tops, 4u, "DBF=0 should make TOPS=BASE"); + t.Equals(mem.vif1_regs.itops, 0x21u, "MSCNT should refresh ITOPS from ITOP"); + }); + + tc.Run("GS sprite draw applies XYOFFSET and fully-outside scissor should not render", [](TestCase &t) + { + std::vector vram(PS2_GS_VRAM_SIZE, 0u); + GS gs; + gs.init(vram.data(), static_cast(vram.size()), nullptr); + + const uint64_t frame1 = + (0ull << 0) | // FBP + (1ull << 16) | // FBW + (0ull << 24) | // PSM CT32 + (0ull << 32); // FBMSK + gs.writeRegister(GS_REG_FRAME_1, frame1); + + // XYOFFSET=1,1 pixels (16.4 fixed point). + const uint64_t xyoffset = (16ull) | (16ull << 32); + gs.writeRegister(GS_REG_XYOFFSET_1, xyoffset); + + // Scissor initially includes pixel (1,1). + const uint64_t scissorInside = (0ull) | (3ull << 16) | (0ull << 32) | (3ull << 48); + gs.writeRegister(GS_REG_SCISSOR_1, scissorInside); + + gs.writeRegister(GS_REG_PRIM, static_cast(GS_PRIM_SPRITE)); + gs.writeRegister(GS_REG_RGBAQ, 0xFF3214C8ull); // RGBA=(200,20,50,255) + + // With XYOFFSET=(1,1), vertex at (2,2) draws to pixel (1,1). + const uint64_t xyz = (32ull) | (32ull << 16) | (0ull << 32); + gs.writeRegister(GS_REG_XYZ2, xyz); + gs.writeRegister(GS_REG_XYZ2, xyz); + + const uint32_t insideOff = frameOffsetBytes(1u, 1u, 1u); + t.Equals(vram[insideOff + 0u], static_cast(200u), "inside draw should write R"); + t.Equals(vram[insideOff + 1u], static_cast(20u), "inside draw should write G"); + t.Equals(vram[insideOff + 2u], static_cast(50u), "inside draw should write B"); + t.Equals(vram[insideOff + 3u], static_cast(255u), "inside draw should write A"); + + std::memset(vram.data(), 0, 1024u); + + // Move scissor so target pixel is fully outside. + const uint64_t scissorOutside = (3ull) | (4ull << 16) | (3ull << 32) | (4ull << 48); + gs.writeRegister(GS_REG_SCISSOR_1, scissorOutside); + gs.writeRegister(GS_REG_XYZ2, xyz); + gs.writeRegister(GS_REG_XYZ2, xyz); + + bool anyWrite = false; + for (size_t i = 0; i < 1024u; ++i) + { + if (vram[i] != 0u) + { + anyWrite = true; + break; + } + } + t.IsFalse(anyWrite, "fully-outside sprite should not render any pixel"); + }); + + tc.Run("GS alpha blend uses ALPHA register FIX factor", [](TestCase &t) + { + std::vector vram(PS2_GS_VRAM_SIZE, 0u); + GS gs; + gs.init(vram.data(), static_cast(vram.size()), nullptr); + + const uint64_t frame1 = + (0ull << 0) | // FBP + (1ull << 16) | // FBW + (0ull << 24) | // PSM CT32 + (0ull << 32); // FBMSK + gs.writeRegister(GS_REG_FRAME_1, frame1); + gs.writeRegister(GS_REG_SCISSOR_1, (0ull) | (4ull << 16) | (0ull << 32) | (4ull << 48)); + gs.writeRegister(GS_REG_XYOFFSET_1, 0ull); + + const uint32_t pxOff = frameOffsetBytes(1u, 1u, 1u); + vram[pxOff + 0u] = 40u; + vram[pxOff + 1u] = 40u; + vram[pxOff + 2u] = 40u; + vram[pxOff + 3u] = 255u; + + // ABE on sprite prim. + gs.writeRegister(GS_REG_PRIM, static_cast(GS_PRIM_SPRITE) | (1ull << 6)); + + // ALPHA: (A-B)*FIX/128 + D + // A=Cs(0), B=Cd(1), C=FIX(2), D=Cd(1), FIX=64. + const uint64_t alpha = (0ull << 0) | (1ull << 2) | (2ull << 4) | (1ull << 6) | (64ull << 32); + gs.writeRegister(GS_REG_ALPHA_1, alpha); + gs.writeRegister(GS_REG_RGBAQ, 0xFFC8C8C8ull); // src RGB = 200 + + const uint64_t xyz = (16ull) | (16ull << 16) | (0ull << 32); // pixel (1,1) + gs.writeRegister(GS_REG_XYZ2, xyz); + gs.writeRegister(GS_REG_XYZ2, xyz); + + // ((200 - 40) * 64 >> 7) + 40 = 120 + t.Equals(vram[pxOff + 0u], static_cast(120u), "alpha blend should update R with FIX factor"); + t.Equals(vram[pxOff + 1u], static_cast(120u), "alpha blend should update G with FIX factor"); + t.Equals(vram[pxOff + 2u], static_cast(120u), "alpha blend should update B with FIX factor"); + }); + + tc.Run("notifyRuntimeStop joins guest worker threads before teardown", [](TestCase &t) + { + notifyRuntimeStop(); + PS2Runtime runtime; + std::vector rdram(PS2_RAM_SIZE, 0u); + + constexpr uint32_t kEntry = 0x250000u; + constexpr uint32_t kThreadParamAddr = 0x2600u; + const uint32_t threadParam[7] = { + 0u, // attr + kEntry, // entry + 0x00100000u, // stack + 0x00000400u, // stack size + 0x00110000u, // gp + 8u, // priority + 0u // option + }; + + runtime.registerFunction(kEntry, &testRuntimeWorkerLoop); + std::memcpy(rdram.data() + kThreadParamAddr, threadParam, sizeof(threadParam)); + + R5900Context createCtx{}; + setRegU32(createCtx, 4, kThreadParamAddr); + CreateThread(rdram.data(), &createCtx, &runtime); + const int32_t tid = getRegS32(createCtx, 2); + t.IsTrue(tid > 0, "CreateThread should succeed for teardown-join test"); + + R5900Context startCtx{}; + setRegU32(startCtx, 4, static_cast(tid)); + setRegU32(startCtx, 5, 0u); + StartThread(rdram.data(), &startCtx, &runtime); + t.Equals(getRegS32(startCtx, 2), KE_OK, "StartThread should launch worker"); + + const bool started = waitUntil([&]() + { + return g_activeThreads.load(std::memory_order_relaxed) > 0; + }, std::chrono::milliseconds(500)); + t.IsTrue(started, "worker thread should become active"); + + runtime.requestStop(); + const bool drained = waitUntil([&]() + { + return g_activeThreads.load(std::memory_order_relaxed) == 0; + }, std::chrono::milliseconds(2000)); + t.IsTrue(drained, "requestStop should drain all guest worker threads"); + + notifyRuntimeStop(); + }); + + tc.Run("Semaphore poll/signal remains stable under host-thread contention", [](TestCase &t) + { + notifyRuntimeStop(); + PS2Runtime runtime; + std::vector rdram(PS2_RAM_SIZE, 0u); + + constexpr uint32_t kParamAddr = 0x2000u; + const uint32_t semaParam[6] = { + 0u, // count + 1u, // max_count + 1u, // init_count + 0u, // wait_threads + 0u, // attr + 0u // option + }; + std::memcpy(rdram.data() + kParamAddr, semaParam, sizeof(semaParam)); + + R5900Context createCtx{}; + setRegU32(createCtx, 4, kParamAddr); + CreateSema(rdram.data(), &createCtx, &runtime); + const int32_t sid = getRegS32(createCtx, 2); + t.IsTrue(sid > 0, "CreateSema should return a valid sid"); + + std::atomic pollOkCount{0}; + std::atomic signalOkCount{0}; + std::atomic pollerThrew{false}; + std::atomic signalerThrew{false}; + + std::thread poller([&]() + { + try + { + for (int i = 0; i < 64; ++i) + { + R5900Context pollCtx{}; + setRegU32(pollCtx, 4, static_cast(sid)); + PollSema(rdram.data(), &pollCtx, &runtime); + if (getRegS32(pollCtx, 2) == KE_OK) + { + pollOkCount.fetch_add(1, std::memory_order_relaxed); + } + } + } + catch (...) + { + pollerThrew.store(true, std::memory_order_release); + } + }); + + std::thread signaler([&]() + { + try + { + for (int i = 0; i < 64; ++i) + { + R5900Context signalCtx{}; + setRegU32(signalCtx, 4, static_cast(sid)); + SignalSema(rdram.data(), &signalCtx, &runtime); + if (getRegS32(signalCtx, 2) == KE_OK) + { + signalOkCount.fetch_add(1, std::memory_order_relaxed); + } + } + } + catch (...) + { + signalerThrew.store(true, std::memory_order_release); + } + }); + + if (poller.joinable()) + { + poller.join(); + } + if (signaler.joinable()) + { + signaler.join(); + } + + t.IsFalse(pollerThrew.load(std::memory_order_acquire), + "PollSema worker thread should not throw"); + t.IsFalse(signalerThrew.load(std::memory_order_acquire), + "SignalSema worker thread should not throw"); + t.IsTrue(pollOkCount.load(std::memory_order_relaxed) > 0, + "contended PollSema should observe at least one successful acquire"); + t.IsTrue(signalOkCount.load(std::memory_order_relaxed) > 0, + "contended SignalSema should observe successful releases"); + + constexpr uint32_t kStatusAddr = 0x2100u; + R5900Context referCtx{}; + setRegU32(referCtx, 4, static_cast(sid)); + setRegU32(referCtx, 5, kStatusAddr); + ReferSemaStatus(rdram.data(), &referCtx, &runtime); + t.Equals(getRegS32(referCtx, 2), KE_OK, "ReferSemaStatus should succeed after contention"); + + int32_t finalCount = 0; + std::memcpy(&finalCount, rdram.data() + kStatusAddr + 0u, sizeof(finalCount)); + t.IsTrue(finalCount >= 0 && finalCount <= 1, "semaphore count should remain within [0, max_count]"); + + runtime.requestStop(); + notifyRuntimeStop(); + }); + + tc.Run("WaitEventFlag AND-mode is stable under concurrent setters", [](TestCase &t) + { + notifyRuntimeStop(); + PS2Runtime runtime; + std::vector rdram(PS2_RAM_SIZE, 0u); + + constexpr uint32_t kEventParamAddr = 0x2400u; + constexpr uint32_t kResBitsAddr = 0x2410u; + const uint32_t eventParam[3] = {0u, 0u, 0u}; + std::memcpy(rdram.data() + kEventParamAddr, eventParam, sizeof(eventParam)); + + R5900Context createCtx{}; + setRegU32(createCtx, 4, kEventParamAddr); + CreateEventFlag(rdram.data(), &createCtx, &runtime); + const int32_t eid = getRegS32(createCtx, 2); + t.IsTrue(eid > 0, "CreateEventFlag should return a valid id"); + + std::atomic waiterDone{false}; + std::atomic waiterRet{-9999}; + std::atomic waiterBits{0u}; + std::atomic waiterThrew{false}; + std::atomic setterAThrew{false}; + std::atomic setterBThrew{false}; + + std::thread waiter([&]() + { + try + { + R5900Context waitCtx{}; + setRegU32(waitCtx, 4, static_cast(eid)); + setRegU32(waitCtx, 5, 0x3u); // wait for bit0 and bit1 (AND mode) + setRegU32(waitCtx, 6, 0u); // AND, no clear + setRegU32(waitCtx, 7, kResBitsAddr); + WaitEventFlag(rdram.data(), &waitCtx, &runtime); + waiterRet.store(getRegS32(waitCtx, 2), std::memory_order_relaxed); + uint32_t bits = 0u; + std::memcpy(&bits, rdram.data() + kResBitsAddr, sizeof(bits)); + waiterBits.store(bits, std::memory_order_relaxed); + } + catch (...) + { + waiterThrew.store(true, std::memory_order_release); + } + waiterDone.store(true, std::memory_order_release); + }); + + std::thread setterA([&]() + { + try + { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + R5900Context setCtx{}; + setRegU32(setCtx, 4, static_cast(eid)); + setRegU32(setCtx, 5, 0x1u); + SetEventFlag(rdram.data(), &setCtx, &runtime); + } + catch (...) + { + setterAThrew.store(true, std::memory_order_release); + } + }); + + std::thread setterB([&]() + { + try + { + std::this_thread::sleep_for(std::chrono::milliseconds(15)); + R5900Context setCtx{}; + setRegU32(setCtx, 4, static_cast(eid)); + setRegU32(setCtx, 5, 0x2u); + SetEventFlag(rdram.data(), &setCtx, &runtime); + } + catch (...) + { + setterBThrew.store(true, std::memory_order_release); + } + }); + + const bool woke = waitUntil([&]() + { + return waiterDone.load(std::memory_order_acquire); + }, std::chrono::milliseconds(500)); + + if (setterA.joinable()) + { + setterA.join(); + } + if (setterB.joinable()) + { + setterB.join(); + } + if (waiter.joinable()) + { + waiter.join(); + } + + t.IsFalse(waiterThrew.load(std::memory_order_acquire), + "WaitEventFlag waiter thread should not throw"); + t.IsFalse(setterAThrew.load(std::memory_order_acquire), + "SetEventFlag setterA thread should not throw"); + t.IsFalse(setterBThrew.load(std::memory_order_acquire), + "SetEventFlag setterB thread should not throw"); + t.IsTrue(woke, "WaitEventFlag AND waiter should wake after both bits are published"); + t.Equals(waiterRet.load(std::memory_order_relaxed), KE_OK, "WaitEventFlag should return KE_OK"); + t.IsTrue((waiterBits.load(std::memory_order_relaxed) & 0x3u) == 0x3u, + "WaitEventFlag result bits should include both concurrently-set bits"); + + R5900Context deleteCtx{}; + setRegU32(deleteCtx, 4, static_cast(eid)); + DeleteEventFlag(rdram.data(), &deleteCtx, &runtime); + runtime.requestStop(); + notifyRuntimeStop(); + }); + }); +} diff --git a/ps2xTest/src/ps2_runtime_interrupt_tests.cpp b/ps2xTest/src/ps2_runtime_interrupt_tests.cpp new file mode 100644 index 00000000..79d41933 --- /dev/null +++ b/ps2xTest/src/ps2_runtime_interrupt_tests.cpp @@ -0,0 +1,458 @@ +#include "MiniTest.h" +#include "ps2_runtime.h" +#include "ps2_syscalls.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace ps2_syscalls; + +namespace +{ + constexpr int KE_OK = 0; + constexpr int KE_EVF_COND = -421; + + constexpr uint32_t WEF_OR = 1u; + constexpr uint32_t WEF_CLEAR = 0x10u; + constexpr uint32_t WEF_CLEAR_ALL = 0x20u; + + struct Ps2EventFlagInfo + { + uint32_t attr; + uint32_t option; + uint32_t initBits; + uint32_t currBits; + int32_t numThreads; + int32_t reserved1; + int32_t reserved2; + }; + + static_assert(sizeof(Ps2EventFlagInfo) == 28u, "Unexpected Ps2EventFlagInfo layout."); + + struct TestEnv + { + std::vector rdram; + PS2Runtime runtime; + + TestEnv() : rdram(PS2_RAM_SIZE, 0u) + { + } + }; + + std::atomic g_vblankStartHits{0u}; + std::atomic g_vblankEndHits{0u}; + std::atomic g_lastIntcArg{0u}; + + void setRegU32(R5900Context &ctx, int reg, uint32_t value) + { + ctx.r[reg] = _mm_set_epi64x(0, static_cast(value)); + } + + int32_t getRegS32(const R5900Context &ctx, int reg) + { + return static_cast(::getRegU32(&ctx, reg)); + } + + bool callSyscall(uint32_t syscallNumber, uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) + { + return dispatchNumericSyscall(syscallNumber, rdram, ctx, runtime); + } + + void writeGuestU32(uint8_t *rdram, uint32_t addr, uint32_t value) + { + std::memcpy(rdram + addr, &value, sizeof(value)); + } + + uint32_t readGuestU32(const uint8_t *rdram, uint32_t addr) + { + uint32_t value = 0; + std::memcpy(&value, rdram + addr, sizeof(value)); + return value; + } + + uint64_t readGuestU64(const uint8_t *rdram, uint32_t addr) + { + uint64_t value = 0; + std::memcpy(&value, rdram + addr, sizeof(value)); + return value; + } + + template + bool waitUntil(Predicate pred, std::chrono::milliseconds timeout) + { + const auto deadline = std::chrono::steady_clock::now() + timeout; + while (std::chrono::steady_clock::now() < deadline) + { + if (pred()) + { + return true; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + return pred(); + } + + void cleanupRuntime(TestEnv &env) + { + env.runtime.requestStop(); + notifyRuntimeStop(); + } + + void testIntcHandler(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) + { + (void)rdram; + (void)runtime; + + const uint32_t cause = getRegU32(ctx, 4); + const uint32_t arg = getRegU32(ctx, 5); + g_lastIntcArg.store(arg, std::memory_order_relaxed); + + if (cause == 2u) + { + g_vblankStartHits.fetch_add(1u, std::memory_order_relaxed); + } + else if (cause == 3u) + { + g_vblankEndHits.fetch_add(1u, std::memory_order_relaxed); + } + + ctx->pc = 0u; + } +} + +void register_ps2_runtime_interrupt_tests() +{ + MiniTest::Case("PS2RuntimeInterrupt", [](TestCase &tc) + { + tc.Run("SetVSyncFlag updates guest flag and monotonic tick", [](TestCase &t) + { + notifyRuntimeStop(); + TestEnv env; + + constexpr uint32_t kFlagAddr = 0x1000u; + constexpr uint32_t kTickAddr = 0x1010u; + + writeGuestU32(env.rdram.data(), kFlagAddr, 0xDEADBEEFu); + writeGuestU32(env.rdram.data(), kTickAddr + 0u, 0xAAAAAAAAu); + writeGuestU32(env.rdram.data(), kTickAddr + 4u, 0xBBBBBBBBu); + + R5900Context ctx{}; + setRegU32(ctx, 4, kFlagAddr); + setRegU32(ctx, 5, kTickAddr); + t.IsTrue(callSyscall(0x73u, env.rdram.data(), &ctx, &env.runtime), "SetVSyncFlag syscall should dispatch"); + t.Equals(getRegS32(ctx, 2), KE_OK, "SetVSyncFlag should return KE_OK"); + t.Equals(readGuestU32(env.rdram.data(), kFlagAddr), 0u, "SetVSyncFlag should reset flag to zero"); + t.Equals(readGuestU64(env.rdram.data(), kTickAddr), 0ull, "SetVSyncFlag should reset tick counter to zero"); + + const bool firstTickSeen = waitUntil([&]() { + return readGuestU64(env.rdram.data(), kTickAddr) > 0u; + }, std::chrono::milliseconds(300)); + t.IsTrue(firstTickSeen, "VSync worker should update tick value"); + + const uint64_t firstTick = readGuestU64(env.rdram.data(), kTickAddr); + t.IsTrue(firstTick > 0u, "First observed VSync tick should be positive"); + t.Equals(readGuestU32(env.rdram.data(), kFlagAddr), 1u, "VSync worker should set flag to one"); + + const bool secondTickSeen = waitUntil([&]() { + return readGuestU64(env.rdram.data(), kTickAddr) > firstTick; + }, std::chrono::milliseconds(300)); + t.IsTrue(secondTickSeen, "VSync tick should continue to advance"); + t.IsTrue(readGuestU64(env.rdram.data(), kTickAddr) > firstTick, "tick should be monotonic"); + + cleanupRuntime(env); + }); + + tc.Run("INTC VBLANK handlers respect EnableIntc and DisableIntc masks", [](TestCase &t) + { + notifyRuntimeStop(); + TestEnv env; + + g_vblankStartHits.store(0u, std::memory_order_relaxed); + g_vblankEndHits.store(0u, std::memory_order_relaxed); + g_lastIntcArg.store(0u, std::memory_order_relaxed); + + constexpr uint32_t kFlagAddr = 0x1100u; + constexpr uint32_t kTickAddr = 0x1110u; + constexpr uint32_t kHandlerAddr = 0x00ABC100u; + + env.runtime.registerFunction(kHandlerAddr, &testIntcHandler); + + R5900Context addStart{}; + setRegU32(addStart, 4, 2u); // VBLANK start + setRegU32(addStart, 5, kHandlerAddr); + setRegU32(addStart, 6, 0u); + setRegU32(addStart, 7, 0xCAFE0002u); + setRegU32(addStart, 28, 0x12340000u); + setRegU32(addStart, 29, 0x001FFFE0u); + t.IsTrue(callSyscall(0x10u, env.rdram.data(), &addStart, &env.runtime), "AddIntcHandler syscall should dispatch"); + t.IsTrue(getRegS32(addStart, 2) > 0, "AddIntcHandler for cause 2 should return handler id"); + + R5900Context addEnd{}; + setRegU32(addEnd, 4, 3u); // VBLANK end + setRegU32(addEnd, 5, kHandlerAddr); + setRegU32(addEnd, 6, 0u); + setRegU32(addEnd, 7, 0xCAFE0003u); + setRegU32(addEnd, 28, 0x12340000u); + setRegU32(addEnd, 29, 0x001FFFE0u); + t.IsTrue(callSyscall(0x10u, env.rdram.data(), &addEnd, &env.runtime), "AddIntcHandler syscall should dispatch"); + t.IsTrue(getRegS32(addEnd, 2) > 0, "AddIntcHandler for cause 3 should return handler id"); + + R5900Context vsyncCtx{}; + setRegU32(vsyncCtx, 4, kFlagAddr); + setRegU32(vsyncCtx, 5, kTickAddr); + t.IsTrue(callSyscall(0x73u, env.rdram.data(), &vsyncCtx, &env.runtime), "SetVSyncFlag syscall should dispatch"); + t.Equals(getRegS32(vsyncCtx, 2), KE_OK, "SetVSyncFlag should succeed"); + + const bool startSeen = waitUntil([&]() { + return g_vblankStartHits.load(std::memory_order_relaxed) > 0u; + }, std::chrono::milliseconds(400)); + const bool endSeen = waitUntil([&]() { + return g_vblankEndHits.load(std::memory_order_relaxed) > 0u; + }, std::chrono::milliseconds(400)); + + t.IsTrue(startSeen, "VBLANK start handler should fire while cause 2 is enabled"); + t.IsTrue(endSeen, "VBLANK end handler should fire while cause 3 is enabled"); + + R5900Context disableStart{}; + setRegU32(disableStart, 4, 2u); + t.IsTrue(callSyscall(0x15u, env.rdram.data(), &disableStart, &env.runtime), "DisableIntc syscall should dispatch"); + t.Equals(getRegS32(disableStart, 2), KE_OK, "DisableIntc should return KE_OK"); + + std::this_thread::sleep_for(std::chrono::milliseconds(40)); + const uint32_t startAfterDisable = g_vblankStartHits.load(std::memory_order_relaxed); + const uint32_t endAfterDisable = g_vblankEndHits.load(std::memory_order_relaxed); + + std::this_thread::sleep_for(std::chrono::milliseconds(80)); + const uint32_t startLater = g_vblankStartHits.load(std::memory_order_relaxed); + const uint32_t endLater = g_vblankEndHits.load(std::memory_order_relaxed); + + t.Equals(startLater, startAfterDisable, "cause 2 handler count should stop increasing while cause 2 is disabled"); + t.IsTrue(endLater > endAfterDisable, "cause 3 handler should keep firing while still enabled"); + + R5900Context enableStart{}; + setRegU32(enableStart, 4, 2u); + t.IsTrue(callSyscall(0x14u, env.rdram.data(), &enableStart, &env.runtime), "EnableIntc syscall should dispatch"); + t.Equals(getRegS32(enableStart, 2), KE_OK, "EnableIntc should return KE_OK"); + + const bool startResumed = waitUntil([&]() { + return g_vblankStartHits.load(std::memory_order_relaxed) > startLater; + }, std::chrono::milliseconds(300)); + t.IsTrue(startResumed, "cause 2 handler should resume after re-enable"); + + const uint32_t lastArg = g_lastIntcArg.load(std::memory_order_relaxed); + t.IsTrue(lastArg == 0xCAFE0002u || lastArg == 0xCAFE0003u, + "handler should receive configured argument value"); + + cleanupRuntime(env); + }); + + tc.Run("WaitEventFlag blocks and wakes when SetEventFlag publishes bits", [](TestCase &t) + { + notifyRuntimeStop(); + TestEnv env; + + constexpr uint32_t kParamAddr = 0x1200u; + constexpr uint32_t kResBitsAddr = 0x1300u; + + const uint32_t eventParam[3] = { + 0u, // attr + 0u, // option + 0u // init bits + }; + std::memcpy(env.rdram.data() + kParamAddr, eventParam, sizeof(eventParam)); + + R5900Context createCtx{}; + setRegU32(createCtx, 4, kParamAddr); + CreateEventFlag(env.rdram.data(), &createCtx, &env.runtime); + const int32_t eid = getRegS32(createCtx, 2); + t.IsTrue(eid > 0, "CreateEventFlag should return a valid id"); + + writeGuestU32(env.rdram.data(), kResBitsAddr, 0u); + + std::atomic waiterDone{false}; + std::atomic waiterThrew{false}; + std::atomic waiterRet{0x7FFFFFFF}; + std::atomic waiterResBits{0u}; + + std::thread waiter([&]() + { + try + { + R5900Context waitCtx{}; + setRegU32(waitCtx, 4, static_cast(eid)); + setRegU32(waitCtx, 5, 0x4u); // wait bits + setRegU32(waitCtx, 6, WEF_OR); // OR mode + setRegU32(waitCtx, 7, kResBitsAddr); + WaitEventFlag(env.rdram.data(), &waitCtx, &env.runtime); + waiterRet.store(getRegS32(waitCtx, 2), std::memory_order_relaxed); + waiterResBits.store(readGuestU32(env.rdram.data(), kResBitsAddr), std::memory_order_relaxed); + } + catch (...) + { + waiterThrew.store(true, std::memory_order_release); + } + + waiterDone.store(true, std::memory_order_release); + }); + + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + t.IsFalse(waiterDone.load(std::memory_order_acquire), "WaitEventFlag should block before matching bits are set"); + + R5900Context signalCtx{}; + setRegU32(signalCtx, 4, static_cast(eid)); + setRegU32(signalCtx, 5, 0x4u); + SetEventFlag(env.rdram.data(), &signalCtx, &env.runtime); + t.Equals(getRegS32(signalCtx, 2), KE_OK, "SetEventFlag should succeed"); + + const bool woke = waitUntil([&]() { + return waiterDone.load(std::memory_order_acquire); + }, std::chrono::milliseconds(300)); + if (!woke) + { + // Force unblock for deterministic test cleanup. + R5900Context deleteCtx{}; + setRegU32(deleteCtx, 4, static_cast(eid)); + DeleteEventFlag(env.rdram.data(), &deleteCtx, &env.runtime); + } + + if (waiter.joinable()) + { + waiter.join(); + } + + t.IsFalse(waiterThrew.load(std::memory_order_acquire), + "WaitEventFlag waiter thread should not throw"); + t.IsTrue(woke, "WaitEventFlag should wake after SetEventFlag publishes matching bits"); + t.Equals(waiterRet.load(std::memory_order_relaxed), KE_OK, "waiter should return KE_OK"); + t.IsTrue((waiterResBits.load(std::memory_order_relaxed) & 0x4u) != 0u, + "waiter result bits should include published bit"); + + R5900Context deleteCtx{}; + setRegU32(deleteCtx, 4, static_cast(eid)); + DeleteEventFlag(env.rdram.data(), &deleteCtx, &env.runtime); + + cleanupRuntime(env); + }); + + tc.Run("PollEventFlag WEF_CLEAR clears only matched bits", [](TestCase &t) + { + notifyRuntimeStop(); + TestEnv env; + + constexpr uint32_t kParamAddr = 0x1400u; + constexpr uint32_t kResBitsAddr = 0x1410u; + constexpr uint32_t kStatusAddr = 0x1420u; + + const uint32_t eventParam[3] = { + 0u, // attr + 0u, // option + 0x7u // init bits: 0b111 + }; + std::memcpy(env.rdram.data() + kParamAddr, eventParam, sizeof(eventParam)); + + R5900Context createCtx{}; + setRegU32(createCtx, 4, kParamAddr); + CreateEventFlag(env.rdram.data(), &createCtx, &env.runtime); + const int32_t eid = getRegS32(createCtx, 2); + t.IsTrue(eid > 0, "CreateEventFlag should return a valid id"); + + R5900Context pollCtx{}; + setRegU32(pollCtx, 4, static_cast(eid)); + setRegU32(pollCtx, 5, 0x1u); + setRegU32(pollCtx, 6, WEF_OR | WEF_CLEAR); + setRegU32(pollCtx, 7, kResBitsAddr); + PollEventFlag(env.rdram.data(), &pollCtx, &env.runtime); + t.Equals(getRegS32(pollCtx, 2), KE_OK, "PollEventFlag should succeed when condition is met"); + t.Equals(readGuestU32(env.rdram.data(), kResBitsAddr), 0x7u, "PollEventFlag should report bits before clear"); + + R5900Context referCtx{}; + setRegU32(referCtx, 4, static_cast(eid)); + setRegU32(referCtx, 5, kStatusAddr); + ReferEventFlagStatus(env.rdram.data(), &referCtx, &env.runtime); + t.Equals(getRegS32(referCtx, 2), KE_OK, "ReferEventFlagStatus should succeed"); + + Ps2EventFlagInfo info{}; + std::memcpy(&info, env.rdram.data() + kStatusAddr, sizeof(info)); + t.Equals(info.currBits, 0x6u, "WEF_CLEAR should clear only requested bits, not all bits"); + + R5900Context pollMissCtx{}; + setRegU32(pollMissCtx, 4, static_cast(eid)); + setRegU32(pollMissCtx, 5, 0x1u); + setRegU32(pollMissCtx, 6, WEF_OR); + setRegU32(pollMissCtx, 7, 0u); + PollEventFlag(env.rdram.data(), &pollMissCtx, &env.runtime); + t.Equals(getRegS32(pollMissCtx, 2), KE_EVF_COND, + "after clearing bit 0, polling for bit 0 should fail condition"); + + R5900Context deleteCtx{}; + setRegU32(deleteCtx, 4, static_cast(eid)); + DeleteEventFlag(env.rdram.data(), &deleteCtx, &env.runtime); + t.Equals(getRegS32(deleteCtx, 2), KE_OK, "DeleteEventFlag should succeed"); + + cleanupRuntime(env); + }); + + tc.Run("WaitVSyncTick returns when runtime stop is requested", [](TestCase &t) + { + notifyRuntimeStop(); + TestEnv env; + + std::atomic waiterDone{false}; + std::atomic waiterThrew{false}; + std::thread waiter([&]() + { + try + { + WaitVSyncTick(env.rdram.data(), &env.runtime); + } + catch (...) + { + waiterThrew.store(true, std::memory_order_release); + } + waiterDone.store(true, std::memory_order_release); + }); + + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + env.runtime.requestStop(); + + bool wokeOnStop = waitUntil([&]() { + return waiterDone.load(std::memory_order_acquire); + }, std::chrono::milliseconds(80)); + + if (!wokeOnStop) + { + // Fallback wake-up for deterministic cleanup: one extra tick on fresh runtime. + TestEnv wakeEnv; + R5900Context setCtx{}; + constexpr uint32_t kWakeFlagAddr = 0x1500u; + constexpr uint32_t kWakeTickAddr = 0x1510u; + setRegU32(setCtx, 4, kWakeFlagAddr); + setRegU32(setCtx, 5, kWakeTickAddr); + (void)callSyscall(0x73u, wakeEnv.rdram.data(), &setCtx, &wakeEnv.runtime); + (void)waitUntil([&]() { + return readGuestU64(wakeEnv.rdram.data(), kWakeTickAddr) > 0u; + }, std::chrono::milliseconds(300)); + wakeEnv.runtime.requestStop(); + wokeOnStop = waitUntil([&]() { + return waiterDone.load(std::memory_order_acquire); + }, std::chrono::milliseconds(80)); + } + + if (waiter.joinable()) + { + waiter.join(); + } + + t.IsFalse(waiterThrew.load(std::memory_order_acquire), + "WaitVSyncTick waiter thread should not throw"); + t.IsTrue(wokeOnStop, "WaitVSyncTick waiter should unblock when runtime is stopping"); + + cleanupRuntime(env); + }); + }); +} diff --git a/ps2xTest/src/ps2_runtime_io_tests.cpp b/ps2xTest/src/ps2_runtime_io_tests.cpp index 77c20391..ecadc544 100644 --- a/ps2xTest/src/ps2_runtime_io_tests.cpp +++ b/ps2xTest/src/ps2_runtime_io_tests.cpp @@ -1,6 +1,7 @@ #include "MiniTest.h" #include "ps2_runtime.h" #include "ps2_syscalls.h" +#include "ps2_stubs.h" #include #include @@ -274,5 +275,26 @@ void register_ps2_runtime_io_tests() t.IsFalse(std::filesystem::exists(test.paths.cdRoot / "ISOLATED"), "mc0: directory should NOT exist under cdRoot"); }); + + tc.Run("sceIoctl cmd1 updates wait flag state", [](TestCase &t) + { + TestContext test; + + constexpr uint32_t statusAddr = GUEST_BUFFER_AREA_START + 0x1800; + const uint32_t busy = 1u; + std::memcpy(test.rdram.data() + statusAddr, &busy, sizeof(busy)); + + setRegU32(test.ctx, 4, 3u); // fd + setRegU32(test.ctx, 5, 1u); // cmd + setRegU32(test.ctx, 6, statusAddr); // arg + + ps2_stubs::sceIoctl(test.rdram.data(), &test.ctx, nullptr); + + t.Equals(getRegS32(&test.ctx, 2), 0, "sceIoctl cmd1 should return success"); + + uint32_t state = 0xFFFFFFFFu; + std::memcpy(&state, test.rdram.data() + statusAddr, sizeof(state)); + t.Equals(state, 0u, "sceIoctl cmd1 should clear wait state from busy to ready"); + }); }); } diff --git a/ps2xTest/src/ps2_runtime_kernel_tests.cpp b/ps2xTest/src/ps2_runtime_kernel_tests.cpp new file mode 100644 index 00000000..e05e6e1c --- /dev/null +++ b/ps2xTest/src/ps2_runtime_kernel_tests.cpp @@ -0,0 +1,390 @@ +#include "MiniTest.h" +#include "ps2_runtime.h" +#include "ps2_syscalls.h" + +#include +#include +#include +#include + +using namespace ps2_syscalls; + +namespace +{ + constexpr uint32_t K_PARAM_ADDR = 0x1000u; + constexpr uint32_t K_STATUS_ADDR = 0x1400u; + + constexpr int KE_OK = 0; + constexpr int KE_ERROR = -1; + constexpr int KE_ILLEGAL_THID = -406; + constexpr int KE_UNKNOWN_THID = -407; + constexpr int KE_UNKNOWN_SEMID = -408; + constexpr int KE_DORMANT = -413; + constexpr int KE_SEMA_ZERO = -419; + constexpr int KE_SEMA_OVF = -420; + + constexpr int THS_DORMANT = 0x10; + + struct EeThreadStatus + { + int32_t status; + uint32_t func; + uint32_t stack; + int32_t stack_size; + uint32_t gp_reg; + int32_t initial_priority; + int32_t current_priority; + uint32_t attr; + uint32_t option; + uint32_t waitType; + uint32_t waitId; + uint32_t wakeupCount; + }; + + struct EeSemaStatus + { + int32_t count; + int32_t max_count; + int32_t init_count; + int32_t wait_threads; + uint32_t attr; + uint32_t option; + }; + + static_assert(sizeof(EeThreadStatus) == 0x30u, "Unexpected ee_thread_status_t size."); + static_assert(sizeof(EeSemaStatus) == 0x18u, "Unexpected ee_sema_t size."); + + void setRegU32(R5900Context &ctx, int reg, uint32_t value) + { + ctx.r[reg] = _mm_set_epi64x(0, static_cast(value)); + } + + int32_t getRegS32(const R5900Context &ctx, int reg) + { + return static_cast(::getRegU32(&ctx, reg)); + } + + void writeGuestU32(uint8_t *rdram, uint32_t addr, uint32_t value) + { + std::memcpy(rdram + addr, &value, sizeof(value)); + } + + void writeGuestWords(uint8_t *rdram, uint32_t addr, const uint32_t *words, size_t count) + { + for (size_t i = 0; i < count; ++i) + { + writeGuestU32(rdram, addr + static_cast(i * sizeof(uint32_t)), words[i]); + } + } + + bool callSyscall(uint32_t syscallNumber, uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) + { + return dispatchNumericSyscall(syscallNumber, rdram, ctx, runtime); + } + + struct TestEnv + { + std::vector rdram; + R5900Context ctx{}; + PS2Runtime runtime; + + TestEnv() : rdram(PS2_RAM_SIZE, 0) + { + std::memset(&ctx, 0, sizeof(ctx)); + } + }; +} + +void register_ps2_runtime_kernel_tests() +{ + MiniTest::Case("PS2RuntimeKernel", [](TestCase &tc) + { + tc.Run("thread create/refer/delete follows EE status layout", [](TestCase &t) + { + TestEnv env; + + const uint32_t threadParam[7] = { + 0x00000002u, // attr + 0x00200000u, // entry + 0x00300000u, // stack + 0x00000800u, // stack size + 0x00120000u, // gp + 5u, // initial priority + 0xABCD0001u // option + }; + + writeGuestWords(env.rdram.data(), K_PARAM_ADDR, threadParam, std::size(threadParam)); + setRegU32(env.ctx, 4, K_PARAM_ADDR); + CreateThread(env.rdram.data(), &env.ctx, &env.runtime); + + const int32_t tid = getRegS32(env.ctx, 2); + t.IsTrue(tid >= 2, "CreateThread should return a valid non-main thread id"); + + setRegU32(env.ctx, 4, static_cast(tid)); + setRegU32(env.ctx, 5, K_STATUS_ADDR); + ReferThreadStatus(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "ReferThreadStatus should succeed for created thread"); + + EeThreadStatus status{}; + std::memcpy(&status, env.rdram.data() + K_STATUS_ADDR, sizeof(status)); + t.Equals(status.status, THS_DORMANT, "new thread should be dormant before StartThread"); + t.Equals(status.func, threadParam[1], "status.func should match entry"); + t.Equals(status.stack, threadParam[2], "status.stack should match configured stack"); + t.Equals(status.stack_size, static_cast(threadParam[3]), "status.stack_size should match thread param"); + t.Equals(status.gp_reg, threadParam[4], "status.gp_reg should match configured gp"); + t.Equals(status.initial_priority, 5, "status.initial_priority should match thread param"); + t.Equals(status.current_priority, 5, "status.current_priority should start at initial priority"); + t.Equals(status.attr, threadParam[0], "status.attr should match thread param"); + t.Equals(status.option, threadParam[6], "status.option should match thread param"); + + setRegU32(env.ctx, 4, static_cast(tid)); + DeleteThread(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "DeleteThread should succeed for dormant thread"); + + setRegU32(env.ctx, 4, static_cast(tid)); + setRegU32(env.ctx, 5, K_STATUS_ADDR); + ReferThreadStatus(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_UNKNOWN_THID, "deleted thread id should no longer be referable"); + }); + + tc.Run("start thread validates target and entry registration", [](TestCase &t) + { + TestEnv env; + + const uint32_t threadParam[7] = { + 0u, + 0x00250000u, // entry not registered in runtime + 0x00300000u, + 0x00000400u, + 0x00110000u, + 8u, + 0u + }; + + writeGuestWords(env.rdram.data(), K_PARAM_ADDR, threadParam, std::size(threadParam)); + setRegU32(env.ctx, 4, K_PARAM_ADDR); + CreateThread(env.rdram.data(), &env.ctx, &env.runtime); + const int32_t tid = getRegS32(env.ctx, 2); + t.IsTrue(tid >= 2, "CreateThread should return an id before StartThread check"); + + setRegU32(env.ctx, 4, static_cast(tid)); + setRegU32(env.ctx, 5, 0x12345678u); + StartThread(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_ERROR, "StartThread should fail when entry is not registered"); + + setRegU32(env.ctx, 4, static_cast(tid)); + setRegU32(env.ctx, 5, K_STATUS_ADDR); + ReferThreadStatus(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "ReferThreadStatus should still succeed after failed StartThread"); + + EeThreadStatus status{}; + std::memcpy(&status, env.rdram.data() + K_STATUS_ADDR, sizeof(status)); + t.Equals(status.status, THS_DORMANT, "thread should remain dormant when StartThread fails early"); + + setRegU32(env.ctx, 4, static_cast(tid)); + DeleteThread(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "DeleteThread should clean up failed-start thread"); + }); + + tc.Run("thread id and wakeup guard rails match kernel-style errors", [](TestCase &t) + { + TestEnv env; + + GetThreadId(env.rdram.data(), &env.ctx, &env.runtime); + const int32_t selfTid = getRegS32(env.ctx, 2); + t.IsTrue(selfTid > 0, "GetThreadId should return a positive thread id"); + + setRegU32(env.ctx, 4, 0u); + WakeupThread(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_ILLEGAL_THID, "WakeupThread(TH_SELF/0) should be illegal"); + + setRegU32(env.ctx, 4, static_cast(selfTid)); + WakeupThread(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_ILLEGAL_THID, "WakeupThread(self) should be illegal"); + + setRegU32(env.ctx, 4, 0u); + iCancelWakeupThread(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_ILLEGAL_THID, "iCancelWakeupThread(0) should be illegal"); + + setRegU32(env.ctx, 4, 0u); + CancelWakeupThread(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "CancelWakeupThread(TH_SELF) should return previous count (0)"); + }); + + tc.Run("semaphore EE layout covers poll, signal overflow, and status", [](TestCase &t) + { + TestEnv env; + + const uint32_t semaParam[6] = { + 0u, // count (unused by runtime decode) + 2u, // max_count + 1u, // init_count + 0u, // wait_threads + 0x11u, // attr + 0x00202020u // option + }; + + writeGuestWords(env.rdram.data(), K_PARAM_ADDR, semaParam, std::size(semaParam)); + setRegU32(env.ctx, 4, K_PARAM_ADDR); + CreateSema(env.rdram.data(), &env.ctx, &env.runtime); + const int32_t sid = getRegS32(env.ctx, 2); + t.IsTrue(sid > 0, "CreateSema should return positive semaphore id"); + + setRegU32(env.ctx, 4, static_cast(sid)); + setRegU32(env.ctx, 5, K_STATUS_ADDR); + ReferSemaStatus(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "ReferSemaStatus should succeed for valid semaphore"); + + EeSemaStatus semaStatus{}; + std::memcpy(&semaStatus, env.rdram.data() + K_STATUS_ADDR, sizeof(semaStatus)); + t.Equals(semaStatus.count, 1, "initial semaphore count should match init_count"); + t.Equals(semaStatus.max_count, 2, "max_count should match CreateSema params"); + t.Equals(semaStatus.init_count, 1, "init_count should be preserved"); + t.Equals(semaStatus.attr, semaParam[4], "attr should be preserved"); + t.Equals(semaStatus.option, semaParam[5], "option should be preserved"); + + setRegU32(env.ctx, 4, static_cast(sid)); + PollSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "PollSema should consume one available token"); + + setRegU32(env.ctx, 4, static_cast(sid)); + PollSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_SEMA_ZERO, "PollSema should fail when count is zero"); + + setRegU32(env.ctx, 4, static_cast(sid)); + SignalSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SignalSema should increment count when below max"); + + setRegU32(env.ctx, 4, static_cast(sid)); + SignalSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SignalSema should allow increment up to max"); + + setRegU32(env.ctx, 4, static_cast(sid)); + SignalSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_SEMA_OVF, "SignalSema should report overflow at max_count"); + + setRegU32(env.ctx, 4, static_cast(sid)); + DeleteSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "DeleteSema should succeed for existing semaphore"); + + setRegU32(env.ctx, 4, static_cast(sid)); + PollSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_UNKNOWN_SEMID, "deleted semaphore id should be rejected"); + }); + + tc.Run("semaphore legacy layout decode remains supported", [](TestCase &t) + { + TestEnv env; + + const uint32_t legacyParam[6] = { + 0x7u, // attr + 0x1234u, // legacy option / ee max_count + 3u, // init + 4u, // max + 0u, // ee attr (ignored if legacy selected) + 0x1FFFFFFFu // ee option (invalid guest pointer to bias decode toward legacy) + }; + writeGuestWords(env.rdram.data(), K_PARAM_ADDR, legacyParam, std::size(legacyParam)); + + setRegU32(env.ctx, 4, K_PARAM_ADDR); + CreateSema(env.rdram.data(), &env.ctx, &env.runtime); + const int32_t sid = getRegS32(env.ctx, 2); + t.IsTrue(sid > 0, "CreateSema should still accept legacy-style parameter blocks"); + + setRegU32(env.ctx, 4, static_cast(sid)); + setRegU32(env.ctx, 5, K_STATUS_ADDR); + ReferSemaStatus(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "ReferSemaStatus should succeed for legacy-decoded semaphore"); + + EeSemaStatus semaStatus{}; + std::memcpy(&semaStatus, env.rdram.data() + K_STATUS_ADDR, sizeof(semaStatus)); + t.Equals(semaStatus.count, 3, "legacy init_count should map to runtime count"); + t.Equals(semaStatus.max_count, 4, "legacy max_count should map to runtime max"); + t.Equals(semaStatus.attr, 0x7u, "legacy attr should be preserved"); + t.Equals(semaStatus.option, 0x1234u, "legacy option should be preserved"); + + setRegU32(env.ctx, 4, static_cast(sid)); + DeleteSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "DeleteSema should clean up legacy-decoded semaphore"); + }); + + tc.Run("setup heap and allocator primitives track end-of-heap", [](TestCase &t) + { + TestEnv env; + + setRegU32(env.ctx, 4, 0x00180010u); + setRegU32(env.ctx, 5, 0x00001000u); + t.IsTrue(callSyscall(0x3Du, env.rdram.data(), &env.ctx, &env.runtime), "SetupHeap syscall should dispatch"); + const uint32_t heapBase = static_cast(getRegS32(env.ctx, 2)); + t.Equals(heapBase, 0x00180010u, "SetupHeap should return configured base"); + + t.IsTrue(callSyscall(0x3Eu, env.rdram.data(), &env.ctx, &env.runtime), "EndOfHeap syscall should dispatch"); + const uint32_t heapEndBefore = static_cast(getRegS32(env.ctx, 2)); + t.Equals(heapEndBefore, heapBase, "EndOfHeap should start at heap base before allocation"); + + const uint32_t alignedAlloc = env.runtime.guestMalloc(0x20u, 64u); + t.IsTrue(alignedAlloc != 0u, "guestMalloc should allocate inside configured heap"); + t.Equals(alignedAlloc & 0x3Fu, 0u, "guestMalloc should honor 64-byte alignment"); + + t.IsTrue(callSyscall(0x3Eu, env.rdram.data(), &env.ctx, &env.runtime), "EndOfHeap syscall should dispatch"); + const uint32_t heapEndAfter = static_cast(getRegS32(env.ctx, 2)); + t.IsTrue(heapEndAfter >= alignedAlloc + 0x20u, "EndOfHeap should advance after allocation"); + + env.runtime.guestFree(alignedAlloc); + + const uint32_t a = env.runtime.guestMalloc(0x100u, 16u); + const uint32_t b = env.runtime.guestMalloc(0x100u, 16u); + t.IsTrue(a != 0u && b != 0u, "guestMalloc should provide two adjacent blocks in this heap window"); + env.runtime.guestFree(b); + + const uint32_t grown = env.runtime.guestRealloc(a, 0x180u, 16u); + t.Equals(grown, a, "guestRealloc should grow in place when adjacent free space is available"); + + env.runtime.guestFree(grown); + const uint32_t reused = env.runtime.guestMalloc(0x80u, 16u); + t.Equals(reused, heapBase, "guestFree should make the head block reusable"); + }); + + tc.Run("setup heap and thread invalid ids use documented kernel errors", [](TestCase &t) + { + TestEnv env; + + setRegU32(env.ctx, 4, 0u); + CreateThread(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_ERROR, "CreateThread with null param should fail"); + + setRegU32(env.ctx, 4, 0u); + DeleteThread(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_ILLEGAL_THID, "DeleteThread(0) should be KE_ILLEGAL_THID"); + + setRegU32(env.ctx, 4, 0x7FFFu); + StartThread(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_UNKNOWN_THID, "StartThread should reject unknown thread ids"); + + setRegU32(env.ctx, 4, 0x7FFFu); + WakeupThread(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_UNKNOWN_THID, "WakeupThread should reject unknown thread ids"); + + setRegU32(env.ctx, 4, 0x7FFFu); + PollSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_UNKNOWN_SEMID, "PollSema should reject unknown semaphore ids"); + + setRegU32(env.ctx, 4, 0xFFFFFFFFu); + t.IsTrue(callSyscall(0x3Du, env.rdram.data(), &env.ctx, &env.runtime), "SetupHeap syscall should dispatch"); + const uint32_t clampedBase = static_cast(getRegS32(env.ctx, 2)); + t.IsTrue(clampedBase < PS2_RAM_SIZE, "SetupHeap should normalize out-of-range base into guest RAM"); + + t.IsTrue(callSyscall(0x3Eu, env.rdram.data(), &env.ctx, &env.runtime), "EndOfHeap syscall should dispatch"); + const uint32_t heapEnd = static_cast(getRegS32(env.ctx, 2)); + t.IsTrue(heapEnd >= clampedBase, "EndOfHeap should be at or above normalized heap base"); + + setRegU32(env.ctx, 4, 1u); + setRegU32(env.ctx, 5, 0u); + setRegU32(env.ctx, 6, 0u); + setRegU32(env.ctx, 29, 0x0010FFF0u); + t.IsTrue(callSyscall(0x3Cu, env.rdram.data(), &env.ctx, &env.runtime), "SetupThread syscall should dispatch"); + const uint32_t setupSp = static_cast(getRegS32(env.ctx, 2)); + t.Equals(setupSp & 0xFu, 0u, "SetupThread should always return a 16-byte aligned stack pointer"); + }); + }); +} diff --git a/ps2xTest/src/ps2_sif_dma_tests.cpp b/ps2xTest/src/ps2_sif_dma_tests.cpp new file mode 100644 index 00000000..9cd103c1 --- /dev/null +++ b/ps2xTest/src/ps2_sif_dma_tests.cpp @@ -0,0 +1,232 @@ +#include "MiniTest.h" +#include "ps2_runtime.h" +#include "ps2_stubs.h" + +#include +#include +#include +#include + +namespace +{ + struct TestEnv + { + std::vector rdram; + R5900Context ctx{}; + PS2Runtime runtime; + + TestEnv() : rdram(PS2_RAM_SIZE, 0u) + { + std::memset(&ctx, 0, sizeof(ctx)); + } + }; + + #pragma pack(push, 1) + struct Ps2SifDmaTransfer + { + uint32_t src; + uint32_t dest; + int32_t size; + int32_t attr; + }; + + struct SifRpcHeader + { + uint32_t pkt_addr; + uint32_t rpc_id; + int32_t sema_id; + uint32_t mode; + }; + + struct SifRpcReceiveData + { + SifRpcHeader hdr; + uint32_t src; + uint32_t dest; + int32_t size; + }; + #pragma pack(pop) + + static_assert(sizeof(Ps2SifDmaTransfer) == 16u, "Unexpected Ps2SifDmaTransfer size."); + static_assert(sizeof(SifRpcReceiveData) == 28u, "Unexpected SifRpcReceiveData size."); + + void setRegU32(R5900Context &ctx, int reg, uint32_t value) + { + ctx.r[reg] = _mm_set_epi64x(0, static_cast(value)); + } + + int32_t getRegS32(const R5900Context &ctx, int reg) + { + return static_cast(::getRegU32(&ctx, reg)); + } + + void writeGuestU32(uint8_t *rdram, uint32_t addr, uint32_t value) + { + std::memcpy(rdram + addr, &value, sizeof(value)); + } + + uint32_t readGuestU32(const uint8_t *rdram, uint32_t addr) + { + uint32_t value = 0; + std::memcpy(&value, rdram + addr, sizeof(value)); + return value; + } +} + +void register_ps2_sif_dma_tests() +{ + MiniTest::Case("PS2SifDma", [](TestCase &tc) + { + tc.Run("sceSifSetDma copies payload and sceSifDmaStat reports complete", [](TestCase &t) + { + TestEnv env; + + constexpr uint32_t kDescAddr = 0x00020000u; + constexpr uint32_t kSrcAddr = 0x00020100u; + constexpr uint32_t kDstAddr = 0x00020200u; + + std::array payload{}; + for (size_t i = 0; i < payload.size(); ++i) + { + payload[i] = static_cast(0x30u + i); + } + std::memcpy(env.rdram.data() + kSrcAddr, payload.data(), payload.size()); + std::memset(env.rdram.data() + kDstAddr, 0, payload.size()); + + const Ps2SifDmaTransfer desc{ + kSrcAddr, + kDstAddr, + static_cast(payload.size()), + 0}; + std::memcpy(env.rdram.data() + kDescAddr, &desc, sizeof(desc)); + + setRegU32(env.ctx, 4, kDescAddr); + setRegU32(env.ctx, 5, 1u); + ps2_stubs::sceSifSetDma(env.rdram.data(), &env.ctx, &env.runtime); + const int32_t dmaId = getRegS32(env.ctx, 2); + t.IsTrue(dmaId > 0, "sceSifSetDma should return a positive transfer id on success"); + + t.IsTrue(std::memcmp(env.rdram.data() + kDstAddr, payload.data(), payload.size()) == 0, + "sceSifSetDma should copy transfer payload to destination"); + + setRegU32(env.ctx, 4, static_cast(dmaId)); + ps2_stubs::sceSifDmaStat(env.rdram.data(), &env.ctx, &env.runtime); + t.IsTrue(getRegS32(env.ctx, 2) < 0, "sceSifDmaStat should be negative when transfer is complete"); + }); + + tc.Run("sceSifSetDma rejects invalid descriptors without partial writes", [](TestCase &t) + { + TestEnv env; + + constexpr uint32_t kDescAddr = 0x00021000u; + constexpr uint32_t kSrcA = 0x00021100u; + constexpr uint32_t kDstA = 0x00021200u; + constexpr uint32_t kSrcB = 0x00021300u; + constexpr uint32_t kInvalidDstB = 0xE0000100u; // unsupported guest segment + + std::array payloadA{}; + for (size_t i = 0; i < payloadA.size(); ++i) + { + payloadA[i] = static_cast(0x70u + i); + } + std::array payloadB{}; + for (size_t i = 0; i < payloadB.size(); ++i) + { + payloadB[i] = static_cast(0x90u + i); + } + + std::memcpy(env.rdram.data() + kSrcA, payloadA.data(), payloadA.size()); + std::memcpy(env.rdram.data() + kSrcB, payloadB.data(), payloadB.size()); + std::memset(env.rdram.data() + kDstA, 0x5Au, payloadA.size()); + + const Ps2SifDmaTransfer descs[2] = { + {kSrcA, kDstA, static_cast(payloadA.size()), 0}, + {kSrcB, kInvalidDstB, static_cast(payloadB.size()), 0}}; + std::memcpy(env.rdram.data() + kDescAddr, descs, sizeof(descs)); + + setRegU32(env.ctx, 4, kDescAddr); + setRegU32(env.ctx, 5, 2u); + ps2_stubs::sceSifSetDma(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), 0, "sceSifSetDma should fail when any descriptor is invalid"); + + const std::array expectedUnchanged{ + 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A}; + t.IsTrue(std::memcmp(env.rdram.data() + kDstA, expectedUnchanged.data(), expectedUnchanged.size()) == 0, + "failed multi-descriptor sceSifSetDma should not partially write earlier descriptors"); + }); + + tc.Run("sceSifSetDma enforces descriptor count limit", [](TestCase &t) + { + TestEnv env; + constexpr uint32_t kDescAddr = 0x00022000u; + + setRegU32(env.ctx, 4, kDescAddr); + setRegU32(env.ctx, 5, 33u); + ps2_stubs::sceSifSetDma(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), 0, "sceSifSetDma should reject count > 32"); + }); + + tc.Run("sceSifGetOtherData copies payload and writes receive metadata", [](TestCase &t) + { + TestEnv env; + + constexpr uint32_t kRdAddr = 0x00023000u; + constexpr uint32_t kSrcAddr = 0x00023100u; + constexpr uint32_t kDstAddr = 0x00023200u; + constexpr uint32_t kSize = 20u; + + std::array payload{}; + for (size_t i = 0; i < payload.size(); ++i) + { + payload[i] = static_cast((i * 7u) & 0xFFu); + } + std::memcpy(env.rdram.data() + kSrcAddr, payload.data(), payload.size()); + std::memset(env.rdram.data() + kDstAddr, 0, payload.size()); + std::memset(env.rdram.data() + kRdAddr, 0, sizeof(SifRpcReceiveData)); + + setRegU32(env.ctx, 4, kRdAddr); + setRegU32(env.ctx, 5, kSrcAddr); + setRegU32(env.ctx, 6, kDstAddr); + setRegU32(env.ctx, 7, kSize); + ps2_stubs::sceSifGetOtherData(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), 0, "sceSifGetOtherData should succeed for valid transfer"); + + t.IsTrue(std::memcmp(env.rdram.data() + kDstAddr, payload.data(), payload.size()) == 0, + "sceSifGetOtherData should copy payload"); + + const SifRpcReceiveData rd = *reinterpret_cast(env.rdram.data() + kRdAddr); + t.Equals(rd.src, kSrcAddr, "receive metadata src should be populated"); + t.Equals(rd.dest, kDstAddr, "receive metadata dest should be populated"); + t.Equals(static_cast(rd.size), kSize, "receive metadata size should be populated"); + }); + + tc.Run("sceSifGetOtherData rejects unsupported guest segments", [](TestCase &t) + { + TestEnv env; + + constexpr uint32_t kRdAddr = 0x00024000u; + constexpr uint32_t kDstAddr = 0x00024100u; + constexpr uint32_t kInvalidSrcAddr = 0xE0000200u; + constexpr uint32_t kSize = 16u; + + std::memset(env.rdram.data() + kDstAddr, 0xA5, kSize); + writeGuestU32(env.rdram.data(), kRdAddr + 0x10u, 0x11111111u); + writeGuestU32(env.rdram.data(), kRdAddr + 0x14u, 0x22222222u); + writeGuestU32(env.rdram.data(), kRdAddr + 0x18u, 0x33333333u); + + setRegU32(env.ctx, 4, kRdAddr); + setRegU32(env.ctx, 5, kInvalidSrcAddr); + setRegU32(env.ctx, 6, kDstAddr); + setRegU32(env.ctx, 7, kSize); + ps2_stubs::sceSifGetOtherData(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), -1, "sceSifGetOtherData should fail for unsupported source segment"); + + std::array expected{}; + expected.fill(0xA5u); + t.IsTrue(std::memcmp(env.rdram.data() + kDstAddr, expected.data(), expected.size()) == 0, + "failed sceSifGetOtherData should not modify destination"); + t.Equals(readGuestU32(env.rdram.data(), kRdAddr + 0x10u), 0x11111111u, + "failed sceSifGetOtherData should not overwrite rd metadata"); + }); + }); +} diff --git a/ps2xTest/src/ps2_sif_rpc_tests.cpp b/ps2xTest/src/ps2_sif_rpc_tests.cpp new file mode 100644 index 00000000..e067b005 --- /dev/null +++ b/ps2xTest/src/ps2_sif_rpc_tests.cpp @@ -0,0 +1,509 @@ +#include "MiniTest.h" +#include "ps2_runtime.h" +#include "ps2_syscalls.h" + +#include +#include +#include +#include + +using namespace ps2_syscalls; + +namespace +{ + constexpr int KE_OK = 0; + constexpr int KE_SEMA_ZERO = -419; + + constexpr uint32_t K_SIF_RPC_MODE_NOWAIT = 0x01u; + constexpr uint32_t K_STACK_ADDR = 0x00100000u; + + #pragma pack(push, 1) + struct SifRpcHeader + { + uint32_t pkt_addr; + uint32_t rpc_id; + int32_t sema_id; + uint32_t mode; + }; + + struct SifRpcClientData + { + SifRpcHeader hdr; + uint32_t command; + uint32_t buf; + uint32_t cbuf; + uint32_t end_function; + uint32_t end_param; + uint32_t server; + }; + + struct SifRpcServerData + { + int32_t sid; + uint32_t func; + uint32_t buf; + int32_t size; + uint32_t cfunc; + uint32_t cbuf; + int32_t size2; + uint32_t client; + uint32_t pkt_addr; + int32_t rpc_number; + uint32_t recvbuf; + int32_t rsize; + int32_t rmode; + int32_t rid; + uint32_t link; + uint32_t next; + uint32_t base; + }; + + struct SifRpcDataQueue + { + int32_t thread_id; + int32_t active; + uint32_t link; + uint32_t start; + uint32_t end; + uint32_t next; + }; + #pragma pack(pop) + + static_assert(sizeof(SifRpcHeader) == 0x10u, "Unexpected SifRpcHeader size."); + static_assert(sizeof(SifRpcClientData) == 0x28u, "Unexpected SifRpcClientData size."); + static_assert(sizeof(SifRpcServerData) == 0x44u, "Unexpected SifRpcServerData size."); + static_assert(sizeof(SifRpcDataQueue) == 0x18u, "Unexpected SifRpcDataQueue size."); + + struct TestEnv + { + std::vector rdram; + R5900Context ctx{}; + PS2Runtime runtime; + + TestEnv() : rdram(PS2_RAM_SIZE, 0) + { + std::memset(&ctx, 0, sizeof(ctx)); + } + }; + + void setRegU32(R5900Context &ctx, int reg, uint32_t value) + { + ctx.r[reg] = _mm_set_epi64x(0, static_cast(value)); + } + + int32_t getRegS32(const R5900Context &ctx, int reg) + { + return static_cast(::getRegU32(&ctx, reg)); + } + + uint32_t getRegU32Result(const R5900Context &ctx, int reg) + { + return ::getRegU32(&ctx, reg); + } + + void writeGuestU32(uint8_t *rdram, uint32_t addr, uint32_t value) + { + std::memcpy(rdram + addr, &value, sizeof(value)); + } + + template + void writeGuestStruct(uint8_t *rdram, uint32_t addr, const T &value) + { + std::memcpy(rdram + addr, &value, sizeof(value)); + } + + template + T readGuestStruct(const uint8_t *rdram, uint32_t addr) + { + T value{}; + std::memcpy(&value, rdram + addr, sizeof(value)); + return value; + } +} + +void register_ps2_sif_rpc_tests() +{ + MiniTest::Case("PS2SifRpc", [](TestCase &tc) + { + tc.Run("register bind call updates descriptors and payload", [](TestCase &t) + { + TestEnv env; + + constexpr uint32_t kQdAddr = 0x00022000u; + constexpr uint32_t kSdAddr = 0x00022100u; + constexpr uint32_t kClientAddr = 0x00022200u; + constexpr uint32_t kServerBufAddr = 0x00022300u; + constexpr uint32_t kClientCbufAddr = 0x00022400u; + constexpr uint32_t kSendAddr = 0x00022500u; + constexpr uint32_t kRecvAddr = 0x00022600u; + constexpr uint32_t kSid = 0x20000111u; + + SifInitRpc(env.rdram.data(), &env.ctx, &env.runtime); + + setRegU32(env.ctx, 4, kQdAddr); + setRegU32(env.ctx, 5, 0x33u); + SifSetRpcQueue(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifSetRpcQueue should succeed"); + + setRegU32(env.ctx, 29, K_STACK_ADDR); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x10u, 0x9000u); // cfunc + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x14u, kClientCbufAddr); // cbuf + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x18u, kQdAddr); // qd + + setRegU32(env.ctx, 4, kSdAddr); + setRegU32(env.ctx, 5, kSid); + setRegU32(env.ctx, 6, 0u); // no server callback + setRegU32(env.ctx, 7, kServerBufAddr); + SifRegisterRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifRegisterRpc should succeed"); + + const SifRpcDataQueue qdAfterRegister = readGuestStruct(env.rdram.data(), kQdAddr); + const SifRpcServerData sdAfterRegister = readGuestStruct(env.rdram.data(), kSdAddr); + t.Equals(qdAfterRegister.link, kSdAddr, "queue link should point at registered server"); + t.Equals(static_cast(sdAfterRegister.sid), kSid, "server sid should match registered sid"); + t.Equals(sdAfterRegister.buf, kServerBufAddr, "server buf should match register arg"); + t.Equals(sdAfterRegister.cbuf, kClientCbufAddr, "server cbuf should match stack arg"); + t.Equals(sdAfterRegister.base, kQdAddr, "server base should point to queue"); + + setRegU32(env.ctx, 4, kClientAddr); + setRegU32(env.ctx, 5, kSid); + setRegU32(env.ctx, 6, 0u); + SifBindRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifBindRpc should succeed"); + + const SifRpcClientData clientAfterBind = readGuestStruct(env.rdram.data(), kClientAddr); + t.Equals(clientAfterBind.server, kSdAddr, "client should bind to registered server"); + t.Equals(clientAfterBind.buf, kServerBufAddr, "client buf should mirror server buf"); + t.Equals(clientAfterBind.cbuf, kClientCbufAddr, "client cbuf should mirror server cbuf"); + + std::array payload{}; + for (size_t i = 0; i < payload.size(); ++i) + { + payload[i] = static_cast(0x50u + i); + } + std::memcpy(env.rdram.data() + kSendAddr, payload.data(), payload.size()); + std::memset(env.rdram.data() + kServerBufAddr, 0, payload.size()); + std::memset(env.rdram.data() + kRecvAddr, 0, payload.size()); + + setRegU32(env.ctx, 4, kClientAddr); + setRegU32(env.ctx, 5, 0x55u); + setRegU32(env.ctx, 6, 0u); + setRegU32(env.ctx, 7, kSendAddr); + setRegU32(env.ctx, 8, static_cast(payload.size())); + setRegU32(env.ctx, 9, kRecvAddr); + setRegU32(env.ctx, 10, static_cast(payload.size())); + setRegU32(env.ctx, 11, 0u); + setRegU32(env.ctx, 29, K_STACK_ADDR); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x00u, 0u); // endParam + + SifCallRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifCallRpc should succeed"); + + const SifRpcServerData sdAfterCall = readGuestStruct(env.rdram.data(), kSdAddr); + t.Equals(sdAfterCall.client, kClientAddr, "server should record caller client pointer"); + t.Equals(static_cast(sdAfterCall.rpc_number), 0x55u, "server rpc_number should match request"); + t.Equals(static_cast(sdAfterCall.size), static_cast(payload.size()), "server size should match sendSize"); + t.Equals(sdAfterCall.recvbuf, kRecvAddr, "server recvbuf should match request recv pointer"); + t.Equals(static_cast(sdAfterCall.rsize), static_cast(payload.size()), "server rsize should match recvSize"); + t.Equals(static_cast(sdAfterCall.rmode), 1u, "blocking call should set rmode to 1"); + + t.IsTrue(std::memcmp(env.rdram.data() + kServerBufAddr, payload.data(), payload.size()) == 0, + "send payload should be copied into server buffer"); + t.IsTrue(std::memcmp(env.rdram.data() + kRecvAddr, payload.data(), payload.size()) == 0, + "unhandled RPC should copy payload into recv buffer"); + + setRegU32(env.ctx, 4, kClientAddr); + SifCheckStatRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), 0, "SifCheckStatRpc should report not busy after synchronous completion"); + }); + + tc.Run("bind before register creates placeholder then remaps", [](TestCase &t) + { + TestEnv env; + + constexpr uint32_t kQdAddr = 0x00024000u; + constexpr uint32_t kSdAddr = 0x00024100u; + constexpr uint32_t kClientAddr = 0x00024200u; + constexpr uint32_t kServerBufAddr = 0x00024300u; + constexpr uint32_t kServerCbufAddr = 0x00024400u; + constexpr uint32_t kSid = 0x20000122u; + + SifInitRpc(env.rdram.data(), &env.ctx, &env.runtime); + + setRegU32(env.ctx, 4, kClientAddr); + setRegU32(env.ctx, 5, kSid); + setRegU32(env.ctx, 6, 0u); + SifBindRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "initial bind without registered server should still succeed"); + + const SifRpcClientData clientBeforeRegister = readGuestStruct(env.rdram.data(), kClientAddr); + t.IsTrue(clientBeforeRegister.server != 0u, "bind should allocate placeholder server when sid is missing"); + t.IsTrue(clientBeforeRegister.server >= 0x01F10000u && clientBeforeRegister.server < 0x01F20000u, + "placeholder server should come from rpc server pool"); + t.Equals(clientBeforeRegister.buf, 0u, "placeholder server starts with empty buf"); + t.Equals(clientBeforeRegister.cbuf, 0u, "placeholder server starts with empty cbuf"); + + setRegU32(env.ctx, 4, kQdAddr); + setRegU32(env.ctx, 5, 0x44u); + SifSetRpcQueue(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifSetRpcQueue should succeed"); + + setRegU32(env.ctx, 29, K_STACK_ADDR); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x10u, 0u); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x14u, kServerCbufAddr); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x18u, kQdAddr); + + setRegU32(env.ctx, 4, kSdAddr); + setRegU32(env.ctx, 5, kSid); + setRegU32(env.ctx, 6, 0u); + setRegU32(env.ctx, 7, kServerBufAddr); + SifRegisterRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifRegisterRpc should succeed"); + + const SifRpcClientData clientAfterRegister = readGuestStruct(env.rdram.data(), kClientAddr); + t.Equals(clientAfterRegister.server, kSdAddr, "register should remap pre-bound clients to concrete server descriptor"); + t.Equals(clientAfterRegister.buf, kServerBufAddr, "register should update client buf from server descriptor"); + t.Equals(clientAfterRegister.cbuf, kServerCbufAddr, "register should update client cbuf from server descriptor"); + t.IsTrue(clientAfterRegister.server != clientBeforeRegister.server, "client server pointer should switch from placeholder to real server"); + + setRegU32(env.ctx, 4, kSdAddr); + setRegU32(env.ctx, 5, kQdAddr); + SifRemoveRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegU32Result(env.ctx, 2), kSdAddr, "SifRemoveRpc should return removed server pointer"); + + const SifRpcDataQueue qdAfterRemove = readGuestStruct(env.rdram.data(), kQdAddr); + const SifRpcServerData sdAfterRemove = readGuestStruct(env.rdram.data(), kSdAddr); + t.Equals(qdAfterRemove.link, 0u, "queue link should detach removed server"); + t.Equals(sdAfterRemove.link, 0u, "removed server link should be cleared"); + }); + + tc.Run("SifSetRpcQueue remove roundtrip is stable", [](TestCase &t) + { + TestEnv env; + + constexpr uint32_t kQdAddr = 0x00026000u; + + SifInitRpc(env.rdram.data(), &env.ctx, &env.runtime); + + setRegU32(env.ctx, 4, kQdAddr); + setRegU32(env.ctx, 5, 0x55u); + SifSetRpcQueue(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifSetRpcQueue should succeed"); + + const SifRpcDataQueue qd = readGuestStruct(env.rdram.data(), kQdAddr); + t.Equals(static_cast(qd.thread_id), 0x55u, "queue thread id should match argument"); + + setRegU32(env.ctx, 4, kQdAddr); + SifRemoveRpcQueue(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegU32Result(env.ctx, 2), kQdAddr, "SifRemoveRpcQueue should return removed queue pointer"); + + setRegU32(env.ctx, 4, kQdAddr); + SifRemoveRpcQueue(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegU32Result(env.ctx, 2), 0u, "removing the same queue twice should return 0"); + }); + + tc.Run("sid1 nowait RPC 0x12/0x13 returns expected pointers and signals sema", [](TestCase &t) + { + TestEnv env; + + constexpr uint32_t kClientAddr = 0x00028000u; + constexpr uint32_t kSemaParamAddr = 0x00028100u; + constexpr uint32_t kRecvAddr = 0x00028200u; + constexpr uint32_t kSid = 1u; + + SifInitRpc(env.rdram.data(), &env.ctx, &env.runtime); + + const uint32_t semaParam[6] = { + 0u, // count (unused by runtime decode) + 1u, // max_count + 0u, // init_count + 0u, // wait_threads + 0u, // attr + 0u // option + }; + std::memcpy(env.rdram.data() + kSemaParamAddr, semaParam, sizeof(semaParam)); + + setRegU32(env.ctx, 4, kSemaParamAddr); + CreateSema(env.rdram.data(), &env.ctx, &env.runtime); + const int32_t semaId = getRegS32(env.ctx, 2); + t.IsTrue(semaId > 0, "CreateSema should return a positive semaphore id"); + + setRegU32(env.ctx, 4, kClientAddr); + setRegU32(env.ctx, 5, kSid); + setRegU32(env.ctx, 6, 0u); + SifBindRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifBindRpc should succeed for sid 1"); + + SifRpcClientData client = readGuestStruct(env.rdram.data(), kClientAddr); + client.hdr.sema_id = semaId; + writeGuestStruct(env.rdram.data(), kClientAddr, client); + + setRegU32(env.ctx, 4, static_cast(semaId)); + PollSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_SEMA_ZERO, "semaphore should start at zero before nowait rpc"); + + std::memset(env.rdram.data() + kRecvAddr, 0, 16u); + setRegU32(env.ctx, 4, kClientAddr); + setRegU32(env.ctx, 5, 0x12u); + setRegU32(env.ctx, 6, K_SIF_RPC_MODE_NOWAIT); + setRegU32(env.ctx, 7, 0u); + setRegU32(env.ctx, 8, 0u); + setRegU32(env.ctx, 9, kRecvAddr); + setRegU32(env.ctx, 10, 16u); + setRegU32(env.ctx, 11, 0u); + setRegU32(env.ctx, 29, K_STACK_ADDR); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x00u, 0u); + SifCallRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifCallRpc(0x12) should succeed"); + t.Equals(readGuestStruct(env.rdram.data(), kRecvAddr), 0x00012000u, "rpc 0x12 should return SND_STATUS pointer"); + + setRegU32(env.ctx, 4, static_cast(semaId)); + PollSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "nowait rpc should signal completion sema"); + + std::memset(env.rdram.data() + kRecvAddr, 0, 16u); + setRegU32(env.ctx, 4, kClientAddr); + setRegU32(env.ctx, 5, 0x13u); + setRegU32(env.ctx, 6, K_SIF_RPC_MODE_NOWAIT); + setRegU32(env.ctx, 7, 0u); + setRegU32(env.ctx, 8, 0u); + setRegU32(env.ctx, 9, kRecvAddr); + setRegU32(env.ctx, 10, 16u); + setRegU32(env.ctx, 11, 0u); + SifCallRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifCallRpc(0x13) should succeed"); + t.Equals(readGuestStruct(env.rdram.data(), kRecvAddr), 0x00012100u, "rpc 0x13 should return address-table pointer"); + + setRegU32(env.ctx, 4, static_cast(semaId)); + PollSema(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "each nowait rpc should signal completion sema"); + }); + + tc.Run("SifCallRpc falls back to stack ABI when register pack is implausible", [](TestCase &t) + { + TestEnv env; + + constexpr uint32_t kQdAddr = 0x0002A000u; + constexpr uint32_t kSdAddr = 0x0002A100u; + constexpr uint32_t kClientAddr = 0x0002A200u; + constexpr uint32_t kServerBufAddr = 0x0002A300u; + constexpr uint32_t kSendAddr = 0x0002A400u; + constexpr uint32_t kRecvAddr = 0x0002A500u; + constexpr uint32_t kSid = 0x20000133u; + + SifInitRpc(env.rdram.data(), &env.ctx, &env.runtime); + + setRegU32(env.ctx, 4, kQdAddr); + setRegU32(env.ctx, 5, 0x66u); + SifSetRpcQueue(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifSetRpcQueue should succeed"); + + setRegU32(env.ctx, 29, K_STACK_ADDR); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x10u, 0u); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x14u, 0u); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x18u, kQdAddr); + + setRegU32(env.ctx, 4, kSdAddr); + setRegU32(env.ctx, 5, kSid); + setRegU32(env.ctx, 6, 0u); + setRegU32(env.ctx, 7, kServerBufAddr); + SifRegisterRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifRegisterRpc should succeed"); + + setRegU32(env.ctx, 4, kClientAddr); + setRegU32(env.ctx, 5, kSid); + setRegU32(env.ctx, 6, 0u); + SifBindRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifBindRpc should succeed"); + + std::array payload{}; + for (size_t i = 0; i < payload.size(); ++i) + { + payload[i] = static_cast(0xA0u + i); + } + std::memcpy(env.rdram.data() + kSendAddr, payload.data(), payload.size()); + std::memset(env.rdram.data() + kRecvAddr, 0, payload.size()); + + setRegU32(env.ctx, 29, K_STACK_ADDR); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x10u, static_cast(payload.size())); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x14u, kRecvAddr); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x18u, static_cast(payload.size())); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x1Cu, 0u); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x20u, 0u); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x00u, 0u); + + setRegU32(env.ctx, 4, kClientAddr); + setRegU32(env.ctx, 5, 0x99u); + setRegU32(env.ctx, 6, 0u); + setRegU32(env.ctx, 7, kSendAddr); + setRegU32(env.ctx, 8, 0x03000000u); // implausible size (> 0x02000000 threshold) + setRegU32(env.ctx, 9, 0x00000004u); // implausible guest pointer + setRegU32(env.ctx, 10, 0x03000001u); + setRegU32(env.ctx, 11, 0u); + + SifCallRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifCallRpc should succeed with stack ABI fallback"); + + const SifRpcServerData sdAfterCall = readGuestStruct(env.rdram.data(), kSdAddr); + t.Equals(static_cast(sdAfterCall.size), static_cast(payload.size()), + "stack ABI sendSize should be selected when register ABI is implausible"); + t.Equals(sdAfterCall.recvbuf, kRecvAddr, "stack ABI recvBuf should be selected"); + t.Equals(static_cast(sdAfterCall.rsize), static_cast(payload.size()), + "stack ABI recvSize should be selected"); + + t.IsTrue(std::memcmp(env.rdram.data() + kRecvAddr, payload.data(), payload.size()) == 0, + "recv payload should match stack-selected transfer size"); + }); + + tc.Run("SifCallRpc prefers stack ABI for DTX URPC when both packs look plausible", [](TestCase &t) + { + TestEnv env; + + constexpr uint32_t kClientAddr = 0x0002B000u; + constexpr uint32_t kDtxSid = 0x7D000000u; + constexpr uint32_t kSendAddr = 0x0002B100u; + constexpr uint32_t kRecvStackAddr = 0x0002B200u; + constexpr uint32_t kRecvRegAddr = 0x0002B300u; + + SifInitRpc(env.rdram.data(), &env.ctx, &env.runtime); + + setRegU32(env.ctx, 4, kClientAddr); + setRegU32(env.ctx, 5, kDtxSid); + setRegU32(env.ctx, 6, 0u); + SifBindRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifBindRpc should succeed for DTX sid"); + + writeGuestU32(env.rdram.data(), kSendAddr + 0x00u, 1u); // mode + writeGuestU32(env.rdram.data(), kSendAddr + 0x04u, 0x1E21440u); // wk addr + writeGuestU32(env.rdram.data(), kSendAddr + 0x08u, 0x100u); // wk size + writeGuestU32(env.rdram.data(), kRecvStackAddr, 0u); + writeGuestU32(env.rdram.data(), kRecvRegAddr, 0u); + + setRegU32(env.ctx, 29, K_STACK_ADDR); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x10u, 12u); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x14u, kRecvStackAddr); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x18u, 4u); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x1Cu, 0u); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x20u, 0u); + writeGuestU32(env.rdram.data(), K_STACK_ADDR + 0x00u, 0u); + + setRegU32(env.ctx, 4, kClientAddr); + setRegU32(env.ctx, 5, 0x422u); // DTX URPC command 34 (SJUNI create) + setRegU32(env.ctx, 6, 0u); + setRegU32(env.ctx, 7, kSendAddr); + // Plausible but intentionally wrong register-side packed args. + setRegU32(env.ctx, 8, 4u); + setRegU32(env.ctx, 9, kRecvRegAddr); + setRegU32(env.ctx, 10, 12u); + setRegU32(env.ctx, 11, 0u); + + SifCallRpc(env.rdram.data(), &env.ctx, &env.runtime); + t.Equals(getRegS32(env.ctx, 2), KE_OK, "SifCallRpc should succeed for DTX URPC"); + + const uint32_t stackHandle = readGuestStruct(env.rdram.data(), kRecvStackAddr); + const uint32_t regHandle = readGuestStruct(env.rdram.data(), kRecvRegAddr); + t.IsTrue(stackHandle != 0u, "DTX handle should be written to stack-selected recv buffer"); + t.Equals(regHandle, 0u, "register recv buffer should remain untouched when stack ABI is preferred"); + }); + }); +} diff --git a/ps2xTest/src/r5900_decoder_tests.cpp b/ps2xTest/src/r5900_decoder_tests.cpp index 1942a6cc..d158db3e 100644 --- a/ps2xTest/src/r5900_decoder_tests.cpp +++ b/ps2xTest/src/r5900_decoder_tests.cpp @@ -117,6 +117,31 @@ void register_r5900_decoder_tests() t.IsTrue(inst.modificationInfo.modifiesGPR, "jalr with rd!=0 should mark GPR modification"); }); + tc.Run("R5900 MULT marks rd modification when rd is non-zero", [](TestCase &t) { + uint32_t address = 0x5800; + uint32_t rawWithRd = (OPCODE_SPECIAL << 26) | (4 << 21) | (5 << 16) | (3 << 11) | SPECIAL_MULT; + uint32_t rawRdZero = (OPCODE_SPECIAL << 26) | (4 << 21) | (5 << 16) | (0 << 11) | SPECIAL_MULT; + + R5900Decoder decoder; + Instruction withRd = decoder.decodeInstruction(address, rawWithRd); + Instruction rdZero = decoder.decodeInstruction(address + 4, rawRdZero); + + t.IsTrue(withRd.modificationInfo.modifiesControl, "MULT should modify HI/LO"); + t.IsTrue(withRd.modificationInfo.modifiesGPR, "MULT should mark rd modification when rd!=0"); + t.IsFalse(rdZero.modificationInfo.modifiesGPR, "MULT should not mark rd modification when rd==0"); + }); + + tc.Run("R5900 MMI MULT1 marks rd modification when rd is non-zero", [](TestCase &t) { + uint32_t address = 0x5900; + uint32_t raw = (OPCODE_MMI << 26) | (6 << 21) | (7 << 16) | (8 << 11) | MMI_MULT1; + + R5900Decoder decoder; + Instruction inst = decoder.decodeInstruction(address, raw); + + t.IsTrue(inst.modificationInfo.modifiesControl, "MULT1 should modify HI1/LO1"); + t.IsTrue(inst.modificationInfo.modifiesGPR, "MULT1 should mark rd modification when rd!=0"); + }); + tc.Run("MMI instruction sets MMI flags", [](TestCase &t) { uint32_t address = 0x6000; // Use opcode 0x1C (MMI), rs=1, rt=2, rd=3, sa=MMI0_PADDW (0)