From 99df1f4b712ddaf6d0247fca8d51c19a16868165 Mon Sep 17 00:00:00 2001 From: Aslan Hud Date: Tue, 24 Feb 2026 21:19:20 +0100 Subject: [PATCH 1/3] GS stubs (ps2_stubs_gs.inl): - Replace direct VRAM writes with GIF packet path - sceGsExecLoadImage: build GIF packet, set MADR/QWC/CHCR - sceGsExecStoreImage: GIF packet, processPendingTransfers, consumeLocalToHostBytes - sceGsPutDispEnv/sceGsPutDrawEnv: program GIF DMA Path3 (5/9 QWs) - sceGsResetGraph: GIF packet + writeIORegister for pmode/smode2/dispfb/display/bgcolor - sceGsSetDefDrawEnv: GIF packet layout, sceGszbufaddr for zbuf - sceGsSyncPath: processPendingTransfers, poll DMA channels - sceGsSyncV/sceGsSyncVCallback: return 0 - sceGszbufaddr: zbuf calculation (width/height blocks, gparam) Helpers (ps2_stubs_helpers.inl): - GsDispEnvMem: 5 fields (pmode, smode2, dispfb, display, bgcolor) for GIF layout - writeGsDispEnv: read-modify-write pattern - toDmaPhys: SPR (scratchpad) handling for DMA MADR bit 31 - submitDmaSend: chain mode (chcr=0x185) for sceDmaSend/sceDmaSendI/sceDmaSendM Misc stubs (ps2_stubs_misc.inl): - Add sceeFontInit, sceeFontLoadFont, sceeFontPrintfAt, sceeFontPrintfAt2 - Add sceeFontClose, sceeFontSetColour, sceeFontSetMode, sceeFontSetFont, sceeFontSetScale - Font stubs use GIF packets for CLUT and texture upload Call list (ps2_call_list.h): - Add sceeFont* entries between sceGszbufaddr and sceIoctl --- ps2xRuntime/CMakeLists.txt | 2 + ps2xRuntime/include/ps2_call_list.h | 9 + ps2xRuntime/include/ps2_gif_arbiter.h | 42 + ps2xRuntime/include/ps2_gs_common.h | 57 + ps2xRuntime/include/ps2_gs_gpu.h | 283 +++- ps2xRuntime/include/ps2_gs_psmt4.h | 59 + ps2xRuntime/include/ps2_gs_rasterizer.h | 24 + ps2xRuntime/include/ps2_memory.h | 65 +- ps2xRuntime/include/ps2_runtime.h | 13 + ps2xRuntime/include/ps2_vu1.h | 62 + ps2xRuntime/src/lib/ps2_gif_arbiter.cpp | 44 + ps2xRuntime/src/lib/ps2_gs_gpu.cpp | 929 ++++++++++-- ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp | 1335 +++++------------ ps2xRuntime/src/lib/ps2_memory.cpp | 421 ++++-- ps2xRuntime/src/lib/ps2_runtime.cpp | 129 +- ps2xRuntime/src/lib/ps2_vif1_interpreter.cpp | 170 +-- ps2xRuntime/src/lib/ps2_vu1.cpp | 1030 +++++++++++++ .../lib/stubs/helpers/ps2_stubs_helpers.inl | 42 +- ps2xRuntime/src/lib/stubs/ps2_stubs_gs.inl | 451 ++++-- ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl | 292 ++++ 20 files changed, 3916 insertions(+), 1543 deletions(-) create mode 100644 ps2xRuntime/include/ps2_gif_arbiter.h create mode 100644 ps2xRuntime/include/ps2_gs_common.h create mode 100644 ps2xRuntime/include/ps2_gs_psmt4.h create mode 100644 ps2xRuntime/include/ps2_gs_rasterizer.h create mode 100644 ps2xRuntime/include/ps2_vu1.h create mode 100644 ps2xRuntime/src/lib/ps2_gif_arbiter.cpp create mode 100644 ps2xRuntime/src/lib/ps2_vu1.cpp diff --git a/ps2xRuntime/CMakeLists.txt b/ps2xRuntime/CMakeLists.txt index 7780b3e6..d6b481a8 100644 --- a/ps2xRuntime/CMakeLists.txt +++ b/ps2xRuntime/CMakeLists.txt @@ -20,6 +20,7 @@ FetchContent_MakeAvailable(raylib) add_library(ps2_runtime STATIC src/lib/game_overrides.cpp + src/lib/ps2_gif_arbiter.cpp src/lib/ps2_audio.cpp src/lib/ps2_audio_vag.cpp src/lib/ps2_gs_gpu.cpp @@ -32,6 +33,7 @@ add_library(ps2_runtime STATIC src/lib/ps2_stubs.cpp src/lib/ps2_syscalls.cpp src/lib/ps2_vif1_interpreter.cpp + src/lib/ps2_vu1.cpp ) file(GLOB RUNNER_SRC_FILES CONFIGURE_DEPENDS diff --git a/ps2xRuntime/include/ps2_call_list.h b/ps2xRuntime/include/ps2_call_list.h index f57b18fe..18ae4415 100644 --- a/ps2xRuntime/include/ps2_call_list.h +++ b/ps2xRuntime/include/ps2_call_list.h @@ -336,6 +336,15 @@ X(sceGsSyncV) \ X(sceGsSyncVCallback) \ X(sceGszbufaddr) \ + X(sceeFontInit) \ + X(sceeFontLoadFont) \ + X(sceeFontPrintfAt) \ + X(sceeFontPrintfAt2) \ + X(sceeFontClose) \ + X(sceeFontSetColour) \ + X(sceeFontSetMode) \ + X(sceeFontSetFont) \ + X(sceeFontSetScale) \ X(sceIoctl) \ X(sceIpuInit) \ X(sceIpuRestartDMA) \ diff --git a/ps2xRuntime/include/ps2_gif_arbiter.h b/ps2xRuntime/include/ps2_gif_arbiter.h new file mode 100644 index 00000000..6fc4a523 --- /dev/null +++ b/ps2xRuntime/include/ps2_gif_arbiter.h @@ -0,0 +1,42 @@ +#ifndef PS2_GIF_ARBITER_H +#define PS2_GIF_ARBITER_H + +#include +#include +#include + +enum class GifPathId : uint8_t +{ + Path1 = 1, + Path2 = 2, + Path3 = 3, +}; + +struct GifArbiterPacket +{ + GifPathId pathId; + std::vector data; +}; + +class GifArbiter +{ +public: + using ProcessPacketFn = std::function; + + GifArbiter() = default; + explicit GifArbiter(ProcessPacketFn processFn); + + void setProcessPacketFn(ProcessPacketFn fn) { m_processFn = std::move(fn); } + + void submit(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes); + + void drain(); + +private: + ProcessPacketFn m_processFn; + std::vector m_queue; + + static uint8_t pathPriority(GifPathId id); +}; + +#endif diff --git a/ps2xRuntime/include/ps2_gs_common.h b/ps2xRuntime/include/ps2_gs_common.h new file mode 100644 index 00000000..5cc8b45a --- /dev/null +++ b/ps2xRuntime/include/ps2_gs_common.h @@ -0,0 +1,57 @@ +#ifndef PS2_GS_COMMON_H +#define PS2_GS_COMMON_H + +#include "ps2_gs_gpu.h" +#include + +namespace GSInternal +{ +static inline uint32_t bitsPerPixel(uint8_t psm) +{ + switch (psm) + { + case GS_PSM_CT32: + case GS_PSM_Z32: + return 32; + case GS_PSM_CT24: + case GS_PSM_Z24: + return 32; + case GS_PSM_CT16: + case GS_PSM_CT16S: + case GS_PSM_Z16: + case GS_PSM_Z16S: + return 16; + case GS_PSM_T8: + case GS_PSM_T8H: + return 8; + case GS_PSM_T4: + case GS_PSM_T4HL: + case GS_PSM_T4HH: + return 4; + default: + return 32; + } +} + +static inline uint32_t fbStride(uint32_t fbw, uint8_t psm) +{ + uint32_t pixelsPerRow = fbw * 64u; + return pixelsPerRow * (bitsPerPixel(psm) / 8u); +} + +static inline int clampInt(int v, int lo, int hi) +{ + if (v < lo) return lo; + if (v > hi) return hi; + return v; +} + +static inline uint8_t clampU8(int v) +{ + if (v < 0) return 0; + if (v > 255) return 255; + return static_cast(v); +} +} + +#endif diff --git a/ps2xRuntime/include/ps2_gs_gpu.h b/ps2xRuntime/include/ps2_gs_gpu.h index a1455ed3..16235657 100644 --- a/ps2xRuntime/include/ps2_gs_gpu.h +++ b/ps2xRuntime/include/ps2_gs_gpu.h @@ -1,62 +1,271 @@ #ifndef PS2_GS_GPU_H #define PS2_GS_GPU_H +#include "ps2_gs_rasterizer.h" #include -#include +#include #include -#include +#include + +enum GSPrimType : uint8_t +{ + GS_PRIM_POINT = 0, + GS_PRIM_LINE = 1, + GS_PRIM_LINESTRIP = 2, + GS_PRIM_TRIANGLE = 3, + GS_PRIM_TRISTRIP = 4, + GS_PRIM_TRIFAN = 5, + GS_PRIM_SPRITE = 6, +}; + +enum GSPsm : uint8_t +{ + GS_PSM_CT32 = 0, + GS_PSM_CT24 = 1, + GS_PSM_CT16 = 2, + GS_PSM_CT16S = 10, + GS_PSM_T8 = 19, + GS_PSM_T4 = 20, + GS_PSM_T8H = 27, + GS_PSM_T4HL = 36, + GS_PSM_T4HH = 44, + GS_PSM_Z32 = 48, + GS_PSM_Z24 = 49, + GS_PSM_Z16 = 50, + GS_PSM_Z16S = 58, +}; + +enum GSGifFormat : uint8_t +{ + GIF_FMT_PACKED = 0, + GIF_FMT_REGLIST = 1, + GIF_FMT_IMAGE = 2, + GIF_FMT_DISABLED = 3, +}; + +enum GSRegId : uint8_t +{ + GS_REG_PRIM = 0x00, + GS_REG_RGBAQ = 0x01, + GS_REG_ST = 0x02, + GS_REG_UV = 0x03, + GS_REG_XYZF2 = 0x04, + GS_REG_XYZ2 = 0x05, + GS_REG_TEX0_1 = 0x06, + GS_REG_TEX0_2 = 0x07, + GS_REG_CLAMP_1 = 0x08, + GS_REG_CLAMP_2 = 0x09, + GS_REG_FOG = 0x0A, + GS_REG_XYZF3 = 0x0C, + GS_REG_XYZ3 = 0x0D, + GS_REG_AD = 0x0F, -enum GsGpuPrimType : uint8_t + GS_REG_TEX1_1 = 0x14, + GS_REG_TEX1_2 = 0x15, + GS_REG_TEX2_1 = 0x16, + GS_REG_TEX2_2 = 0x17, + GS_REG_XYOFFSET_1 = 0x18, + GS_REG_XYOFFSET_2 = 0x19, + GS_REG_PRMODECONT = 0x1A, + GS_REG_PRMODE = 0x1B, + GS_REG_TEXCLUT = 0x1C, + GS_REG_SCANMSK = 0x22, + GS_REG_MIPTBP1_1 = 0x34, + GS_REG_MIPTBP1_2 = 0x35, + GS_REG_MIPTBP2_1 = 0x36, + GS_REG_MIPTBP2_2 = 0x37, + GS_REG_TEXA = 0x3B, + GS_REG_FOGCOL = 0x3D, + GS_REG_TEXFLUSH = 0x3F, + GS_REG_SCISSOR_1 = 0x40, + GS_REG_SCISSOR_2 = 0x41, + GS_REG_ALPHA_1 = 0x42, + GS_REG_ALPHA_2 = 0x43, + GS_REG_DIMX = 0x44, + GS_REG_DTHE = 0x45, + GS_REG_COLCLAMP = 0x46, + GS_REG_TEST_1 = 0x47, + GS_REG_TEST_2 = 0x48, + GS_REG_PABE = 0x49, + GS_REG_FBA_1 = 0x4A, + GS_REG_FBA_2 = 0x4B, + GS_REG_FRAME_1 = 0x4C, + GS_REG_FRAME_2 = 0x4D, + GS_REG_ZBUF_1 = 0x4E, + GS_REG_ZBUF_2 = 0x4F, + GS_REG_BITBLTBUF = 0x50, + GS_REG_TRXPOS = 0x51, + GS_REG_TRXREG = 0x52, + GS_REG_TRXDIR = 0x53, + GS_REG_HWREG = 0x54, + GS_REG_SIGNAL = 0x60, + GS_REG_FINISH = 0x61, + GS_REG_LABEL = 0x62, +}; + +struct GSVertex { - GS_GPU_POINT = 0, - GS_GPU_LINE = 1, - GS_GPU_TRIANGLE = 2, - GS_GPU_QUAD = 3, + float x, y, z; + uint8_t r, g, b, a; + float q; + float s, t; + uint16_t u, v; + uint8_t fog; }; -struct GsGpuVertex +struct GSFrameReg { - float x, y, z; // screen-space position (after PS2 12.4 fixed → float) - uint8_t r, g, b, a; // vertex color - float u, v; // texture coords (for future use) + uint32_t fbp; + uint32_t fbw; + uint8_t psm; + uint32_t fbmsk; }; -struct GsGpuPrimitive +struct GSScissorReg { - GsGpuPrimType type; - uint8_t vertexCount; // 1 (point), 2 (line), 3 (tri), 4 (quad) - GsGpuVertex verts[4]; + uint16_t x0, x1, y0, y1; }; -class GsGpuFrameData +struct GSTex0Reg { + uint32_t tbp0; + uint8_t tbw; + uint8_t psm; + uint8_t tw; + uint8_t th; + uint8_t tcc; + uint8_t tfx; + uint32_t cbp; + uint8_t cpsm; + uint8_t csm; + uint8_t csa; + uint8_t cld; +}; + +struct GSXYOffsetReg +{ + uint16_t ofx; + uint16_t ofy; +}; + +struct GSContext +{ + GSFrameReg frame; + GSScissorReg scissor; + GSTex0Reg tex0; + GSXYOffsetReg xyoffset; + uint64_t zbuf; + uint64_t tex1; + uint64_t clamp; + uint64_t alpha; + uint64_t test; + uint64_t fba; +}; + +struct GSPrimReg +{ + GSPrimType type; + bool iip; + bool tme; + bool fge; + bool abe; + bool aa1; + bool fst; + bool ctxt; + bool fix; +}; + +struct GSBitBltBuf +{ + uint32_t sbp; + uint8_t sbw; + uint8_t spsm; + uint32_t dbp; + uint8_t dbw; + uint8_t dpsm; +}; + +struct GSTrxPos +{ + uint16_t ssax, ssay; + uint16_t dsax, dsay; + uint8_t dir; +}; + +struct GSTrxReg +{ + uint16_t rrw, rrh; +}; + +class GSRasterizer; + +class GS +{ + friend class GSRasterizer; + public: - GsGpuFrameData(); + GS(); + ~GS() = default; + + void init(uint8_t *vram, uint32_t vramSize, struct GSRegisters *privRegs = nullptr); + void reset(); - void pushPrimitive(const GsGpuPrimitive &prim); + void processGIFPacket(const uint8_t *data, uint32_t sizeBytes); + void writeRegister(uint8_t regAddr, uint64_t value); - const std::vector &swapAndGetFront(); + const uint8_t *lockDisplaySnapshot(uint32_t &outSize); + void unlockDisplaySnapshot(); + uint32_t getLastDisplayBaseBytes() const; - bool hasGpuPrimitives() const; + uint32_t consumeLocalToHostBytes(uint8_t *dst, uint32_t maxBytes); - void setScreenSize(uint32_t w, uint32_t h) - { - m_screenW = w; - m_screenH = h; - } - uint32_t screenWidth() const { return m_screenW; } - uint32_t screenHeight() const { return m_screenH; } + void refreshDisplaySnapshot(); private: - std::vector m_buffers[2]; - int m_backIdx = 0; // index into m_buffers for the current write target - mutable std::mutex m_mutex; - std::atomic m_hasData{false}; - uint32_t m_screenW = 640; - uint32_t m_screenH = 448; -}; + void snapshotVRAM(); + void writeRegisterPacked(uint8_t regDesc, uint64_t lo, uint64_t hi); + void vertexKick(bool drawing); + + void processImageData(const uint8_t *data, uint32_t sizeBytes); + void performLocalToHostToBuffer(); + + GSContext &activeContext(); + + uint8_t *m_vram = nullptr; + uint32_t m_vramSize = 0; + struct GSRegisters *m_privRegs = nullptr; + + GSContext m_ctx[2]; + GSPrimReg m_prim{}; + + uint8_t m_curR = 0x80, m_curG = 0x80, m_curB = 0x80, m_curA = 0x80; + float m_curQ = 1.0f; + float m_curS = 0.0f, m_curT = 0.0f; + uint16_t m_curU = 0, m_curV = 0; + uint8_t m_curFog = 0; -GsGpuFrameData &gsGpuGetFrameData(); -bool gsGpuRenderFrame(); + bool m_prmodecont = true; + + GSBitBltBuf m_bitbltbuf{}; + GSTrxPos m_trxpos{}; + GSTrxReg m_trxreg{}; + uint32_t m_trxdir = 3; + uint32_t m_hwregX = 0; + uint32_t m_hwregY = 0; + + static constexpr int kMaxVerts = 6; + GSVertex m_vtxQueue[kMaxVerts]; + int m_vtxCount = 0; + int m_vtxIndex = 0; + + std::vector m_displaySnapshot; + std::mutex m_snapshotMutex; + uint32_t m_lastDisplayBaseBytes = 0; + + std::vector m_localToHostBuffer; + size_t m_localToHostReadPos = 0; + + GSRasterizer m_rasterizer; +}; -#endif // PS2_GS_GPU_H +#endif diff --git a/ps2xRuntime/include/ps2_gs_psmt4.h b/ps2xRuntime/include/ps2_gs_psmt4.h new file mode 100644 index 00000000..e0ccd4ff --- /dev/null +++ b/ps2xRuntime/include/ps2_gs_psmt4.h @@ -0,0 +1,59 @@ +#ifndef PS2_GS_PSMT4_H +#define PS2_GS_PSMT4_H + +#include + +namespace GSPSMT4 +{ + +static const uint8_t blockTable4[8][4] = { + { 0, 2, 8, 10 }, + { 1, 3, 9, 11 }, + { 4, 6, 12, 14 }, + { 5, 7, 13, 15 }, + { 16, 18, 24, 26 }, + { 17, 19, 25, 27 }, + { 20, 22, 28, 30 }, + { 21, 23, 29, 31 }, +}; + +static const uint16_t columnTable4[16][32] = { + { 0, 8, 32, 40, 64, 72, 96, 104, 2, 10, 34, 42, 66, 74, 98, 106, 4, 12, 36, 44, 68, 76, 100, 108, 6, 14, 38, 46, 70, 78, 102, 110 }, + { 16, 24, 48, 56, 80, 88, 112, 120, 18, 26, 50, 58, 82, 90, 114, 122, 20, 28, 52, 60, 84, 92, 116, 124, 22, 30, 54, 62, 86, 94, 118, 126 }, + { 65, 73, 97, 105, 1, 9, 33, 41, 67, 75, 99, 107, 3, 11, 35, 43, 69, 77, 101, 109, 5, 13, 37, 45, 71, 79, 103, 111, 7, 15, 39, 47 }, + { 81, 89, 113, 121, 17, 25, 49, 57, 83, 91, 115, 123, 19, 27, 51, 59, 85, 93, 117, 125, 21, 29, 53, 61, 87, 95, 119, 127, 23, 31, 55, 63 }, + { 192, 200, 224, 232, 128, 136, 160, 168, 194, 202, 226, 234, 130, 138, 162, 170, 196, 204, 228, 236, 132, 140, 164, 172, 198, 206, 230, 238, 134, 142, 166, 174 }, + { 208, 216, 240, 248, 144, 152, 176, 184, 210, 218, 242, 250, 146, 154, 178, 186, 212, 220, 244, 252, 148, 156, 180, 188, 214, 222, 246, 254, 150, 158, 182, 190 }, + { 129, 137, 161, 169, 193, 201, 225, 233, 131, 139, 163, 171, 195, 203, 227, 235, 133, 141, 165, 173, 197, 205, 229, 237, 135, 143, 167, 175, 199, 207, 231, 239 }, + { 145, 153, 177, 185, 209, 217, 241, 249, 147, 155, 179, 187, 211, 219, 243, 251, 149, 157, 181, 189, 213, 221, 245, 253, 151, 159, 183, 191, 215, 223, 247, 255 }, + { 256, 264, 288, 296, 320, 328, 352, 360, 258, 266, 290, 298, 322, 330, 354, 362, 260, 268, 292, 300, 324, 332, 356, 364, 262, 270, 294, 302, 326, 334, 358, 366 }, + { 272, 280, 304, 312, 336, 344, 368, 376, 274, 282, 306, 314, 338, 346, 370, 378, 276, 284, 308, 316, 340, 348, 372, 380, 278, 286, 310, 318, 342, 350, 374, 382 }, + { 321, 329, 353, 361, 257, 265, 289, 297, 323, 331, 355, 363, 259, 267, 291, 299, 325, 333, 357, 365, 261, 269, 293, 301, 327, 335, 359, 367, 263, 271, 295, 303 }, + { 337, 345, 369, 377, 273, 281, 305, 313, 339, 347, 371, 379, 275, 283, 307, 315, 341, 349, 373, 381, 277, 285, 309, 317, 343, 351, 375, 383, 279, 287, 311, 319 }, + { 448, 456, 480, 488, 384, 392, 416, 424, 450, 458, 482, 490, 386, 394, 418, 426, 452, 460, 484, 492, 388, 396, 420, 428, 454, 462, 486, 494, 390, 398, 422, 430 }, + { 464, 472, 496, 504, 400, 408, 432, 440, 466, 474, 498, 506, 402, 410, 434, 442, 468, 476, 500, 508, 404, 412, 436, 444, 470, 478, 502, 510, 406, 414, 438, 446 }, + { 385, 393, 417, 425, 449, 457, 481, 489, 387, 395, 419, 427, 451, 459, 483, 491, 389, 397, 421, 429, 453, 461, 485, 493, 391, 399, 423, 431, 455, 463, 487, 495 }, + { 401, 409, 433, 441, 465, 473, 497, 505, 403, 411, 435, 443, 467, 475, 499, 507, 405, 413, 437, 445, 469, 477, 501, 509, 407, 415, 439, 447, 471, 479, 503, 511 }, +}; + +inline uint32_t blockIdPSMT4(uint32_t block, uint32_t width, uint32_t x, uint32_t y) +{ + return block + ((y >> 2) & ~0x1Fu) * (width >> 7) + ((x >> 2) & ~0x1Fu) + + blockTable4[(y >> 4) & 7][(x >> 5) & 3]; +} + +inline uint32_t addrPSMT4(uint32_t block, uint32_t width, uint32_t x, uint32_t y) +{ + uint32_t page = (block >> 5) + (y >> 7) * (width >> 1) + (x >> 7); + uint32_t blk = block & 0x1Fu; + uint32_t yy = y & 0x7Fu; + uint32_t xx = x & 0x7Fu; + uint32_t blockId = blk + blockTable4[(yy >> 4) & 7][(xx >> 5) & 3]; + uint32_t column = columnTable4[yy & 15u][xx & 31u]; + uint32_t offset = (blockId << 9) + column; + return (page << 14) + offset; +} + +} + +#endif diff --git a/ps2xRuntime/include/ps2_gs_rasterizer.h b/ps2xRuntime/include/ps2_gs_rasterizer.h new file mode 100644 index 00000000..53846106 --- /dev/null +++ b/ps2xRuntime/include/ps2_gs_rasterizer.h @@ -0,0 +1,24 @@ +#ifndef PS2_GS_RASTERIZER_H +#define PS2_GS_RASTERIZER_H + +#include + +class GS; + +class GSRasterizer +{ +public: + void drawPrimitive(GS *gs); + void writePixel(GS *gs, int x, int y, uint8_t r, uint8_t g, uint8_t b, uint8_t a); + uint32_t sampleTexture(GS *gs, float s, float t, uint16_t u, uint16_t v); + uint32_t readTexelPSMCT32(GS *gs, uint32_t tbp0, uint32_t tbw, int texU, int texV); + uint32_t readTexelPSMT4(GS *gs, uint32_t tbp0, uint32_t tbw, int texU, int texV); + uint32_t lookupCLUT(GS *gs, uint8_t index, uint32_t cbp, uint8_t cpsm, uint8_t csa); + +private: + void drawSprite(GS *gs); + void drawTriangle(GS *gs); + void drawLine(GS *gs); +}; + +#endif diff --git a/ps2xRuntime/include/ps2_memory.h b/ps2xRuntime/include/ps2_memory.h index d349320e..85642fef 100644 --- a/ps2xRuntime/include/ps2_memory.h +++ b/ps2xRuntime/include/ps2_memory.h @@ -3,10 +3,13 @@ #include #include +#include #include #include #include #include + +#include "ps2_gif_arbiter.h" #if defined(_MSC_VER) #include #elif defined(USE_SSE2NEON) @@ -267,34 +270,29 @@ class PS2Memory bool writeIORegister(uint32_t address, uint32_t value); uint32_t readIORegister(uint32_t address); - // Software GS/VIF path used by GIF and VIF1 DMA channels. + using GifPacketCallback = std::function; + void setGifPacketCallback(GifPacketCallback cb) { m_gifPacketCallback = std::move(cb); } + void setGifArbiter(GifArbiter *arbiter) { m_gifArbiter = arbiter; } + + using Vu1MscalCallback = std::function; + void setVu1MscalCallback(Vu1MscalCallback cb) { m_vu1MscalCallback = std::move(cb); } + + uint8_t *getVU1Code() { return m_vu1Code; } + const uint8_t *getVU1Code() const { return m_vu1Code; } + uint8_t *getVU1Data() { return m_vu1Data; } + const uint8_t *getVU1Data() const { return m_vu1Data; } + + bool isPath3Masked() const { return m_path3Masked; } + + void submitGifPacket(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes, bool drainImmediately = true); void processGIFPacket(uint32_t srcPhysAddr, uint32_t qwCount); + void processGIFPacket(const uint8_t *data, uint32_t sizeBytes); void processVIF1Data(uint32_t srcPhysAddr, uint32_t sizeBytes); + void processVIF1Data(const uint8_t *data, uint32_t sizeBytes); + void processPendingTransfers(); - // Poll DMA registers from rdram shadow (workaround for KSEG1 fast-path bypass) int pollDmaRegisters(); - struct GSDrawContext - { - uint64_t bitbltbuf = 0; - uint64_t trxpos = 0; - uint64_t trxreg = 0; - uint64_t trxdir = 0; - bool xferActive = false; - uint32_t xferDestX = 0; - uint32_t xferDestY = 0; - uint32_t xferWidth = 0; - uint32_t xferHeight = 0; - uint32_t xferDBP = 0; - uint32_t xferDBW = 0; - uint32_t xferDPSM = 0; - uint32_t xferPixelsWritten = 0; - uint32_t gifTagsProcessed = 0; - uint32_t adWrites = 0; - uint32_t imageTransfers = 0; - uint32_t primitivesDrawn = 0; - }; - // Track code modifications for self-modifying code void registerCodeRegion(uint32_t start, uint32_t end); bool isCodeModified(uint32_t address, uint32_t size); @@ -305,8 +303,6 @@ class PS2Memory const GSRegisters &gs() const { return gs_regs; } uint8_t *getGSVRAM() { return m_gsVRAM; } const uint8_t *getGSVRAM() const { return m_gsVRAM; } - GSDrawContext &gsDrawCtx() { return m_gsDrawCtx; } - const GSDrawContext &gsDrawCtx() const { return m_gsDrawCtx; } bool hasSeenGifCopy() const { return m_seenGifCopy; } // Main RAM (32MB) uint8_t *m_rdram; @@ -327,7 +323,6 @@ class PS2Memory // Registers GSRegisters gs_regs; - GSDrawContext m_gsDrawCtx; uint8_t *m_gsVRAM; VIFRegisters vif0_regs; VIFRegisters vif1_regs; @@ -344,6 +339,24 @@ class PS2Memory std::vector m_tlbEntries; + GifPacketCallback m_gifPacketCallback; + GifArbiter *m_gifArbiter = nullptr; + Vu1MscalCallback m_vu1MscalCallback; + + uint8_t *m_vu1Code = nullptr; + uint8_t *m_vu1Data = nullptr; + bool m_path3Masked = false; + + struct PendingTransfer + { + bool fromScratchpad = false; + uint32_t srcAddr = 0; + uint32_t qwc = 0; + std::vector chainData; + }; + std::vector m_pendingGifTransfers; + std::vector m_pendingVif1Transfers; + struct CodeRegion { uint32_t start; diff --git a/ps2xRuntime/include/ps2_runtime.h b/ps2xRuntime/include/ps2_runtime.h index 264318b3..27340dd6 100644 --- a/ps2xRuntime/include/ps2_runtime.h +++ b/ps2xRuntime/include/ps2_runtime.h @@ -21,8 +21,11 @@ #include #include +#include "ps2_gif_arbiter.h" #include "ps2_memory.h" +#include "ps2_gs_gpu.h" #include "ps2_iop.h" +#include "ps2_vu1.h" #include "ps2_audio.h" #include "ps2_pad.h" @@ -460,6 +463,13 @@ class PS2Runtime inline PS2Memory &memory() { return m_memory; } inline const PS2Memory &memory() const { return m_memory; } + inline GS &gs() { return m_gs; } + inline const GS &gs() const { return m_gs; } + inline GifArbiter &gifArbiter() { return m_gifArbiter; } + inline const GifArbiter &gifArbiter() const { return m_gifArbiter; } + inline VU1Interpreter &vu1() { return m_vu1; } + inline const VU1Interpreter &vu1() const { return m_vu1; } + inline IOP &iop() { return m_iop; } inline const IOP &iop() const { return m_iop; } inline PS2AudioBackend &audioBackend() { return m_audioBackend; } @@ -491,9 +501,12 @@ class PS2Runtime private: PS2Memory m_memory; + GifArbiter m_gifArbiter; + GS m_gs; IOP m_iop; PS2AudioBackend m_audioBackend; PSPadBackend m_padBackend; + VU1Interpreter m_vu1; R5900Context m_cpuContext; mutable std::mutex m_guestHeapMutex; std::vector m_guestHeapBlocks; diff --git a/ps2xRuntime/include/ps2_vu1.h b/ps2xRuntime/include/ps2_vu1.h new file mode 100644 index 00000000..1e2ad59b --- /dev/null +++ b/ps2xRuntime/include/ps2_vu1.h @@ -0,0 +1,62 @@ +#ifndef PS2_VU1_H +#define PS2_VU1_H + +#include + +class GS; +class PS2Memory; + +struct VU1State +{ + float vf[32][4]; + int32_t vi[16]; + float acc[4]; + float q; + float p; + float i; + uint32_t pc; + uint32_t mac; + uint32_t clip; + uint32_t status; + bool ebit; + uint32_t itop; + uint32_t xitop; +}; + +class VU1Interpreter +{ +public: + VU1Interpreter(); + + void reset(); + + void execute(uint8_t *vuCode, uint32_t codeSize, + uint8_t *vuData, uint32_t dataSize, + GS &gs, PS2Memory *memory = nullptr, + uint32_t startPC = 0, uint32_t itop = 0, + uint32_t maxCycles = 65536); + + void resume(uint8_t *vuCode, uint32_t codeSize, + uint8_t *vuData, uint32_t dataSize, + GS &gs, PS2Memory *memory = nullptr, + uint32_t itop = 0, uint32_t maxCycles = 65536); + + VU1State &state() { return m_state; } + const VU1State &state() const { return m_state; } + +private: + VU1State m_state; + + void run(uint8_t *vuCode, uint32_t codeSize, + uint8_t *vuData, uint32_t dataSize, + GS &gs, PS2Memory *memory, uint32_t maxCycles); + + void execUpper(uint32_t instr); + void execLower(uint32_t instr, uint8_t *vuData, uint32_t dataSize, GS &gs, PS2Memory *memory, uint32_t upperInstr); + + void applyDest(float *dst, const float *result, uint8_t dest); + void applyDestAcc(const float *result, uint8_t dest); + float broadcast(const float *vf, uint8_t bc); +}; + +#endif diff --git a/ps2xRuntime/src/lib/ps2_gif_arbiter.cpp b/ps2xRuntime/src/lib/ps2_gif_arbiter.cpp new file mode 100644 index 00000000..3a137b19 --- /dev/null +++ b/ps2xRuntime/src/lib/ps2_gif_arbiter.cpp @@ -0,0 +1,44 @@ +#include "ps2_gif_arbiter.h" +#include +#include + +GifArbiter::GifArbiter(ProcessPacketFn processFn) + : m_processFn(std::move(processFn)) +{ +} + +void GifArbiter::submit(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes) +{ + if (!data || sizeBytes < 16 || !m_processFn) + return; + + GifArbiterPacket pkt; + pkt.pathId = pathId; + pkt.data.resize(sizeBytes); + std::memcpy(pkt.data.data(), data, sizeBytes); + m_queue.push_back(std::move(pkt)); +} + +void GifArbiter::drain() +{ + if (!m_processFn) + return; + + std::stable_sort(m_queue.begin(), m_queue.end(), + [](const GifArbiterPacket &a, const GifArbiterPacket &b) { + return pathPriority(a.pathId) < pathPriority(b.pathId); + }); + + for (size_t i = 0; i < m_queue.size(); ++i) + { + auto &pkt = m_queue[i]; + if (!pkt.data.empty()) + m_processFn(pkt.data.data(), static_cast(pkt.data.size())); + } + m_queue.clear(); +} + +uint8_t GifArbiter::pathPriority(GifPathId id) +{ + return static_cast(id); +} diff --git a/ps2xRuntime/src/lib/ps2_gs_gpu.cpp b/ps2xRuntime/src/lib/ps2_gs_gpu.cpp index faf9a9d3..105f4ed7 100644 --- a/ps2xRuntime/src/lib/ps2_gs_gpu.cpp +++ b/ps2xRuntime/src/lib/ps2_gs_gpu.cpp @@ -1,141 +1,894 @@ #include "ps2_gs_gpu.h" -#include "raylib.h" -#include "rlgl.h" +#include "ps2_gs_common.h" +#include "ps2_gs_psmt4.h" +#include "ps2_memory.h" +#include +#include +#include +#include -GsGpuFrameData::GsGpuFrameData() +namespace { - m_buffers[0].reserve(8192); - m_buffers[1].reserve(8192); +static inline uint64_t loadLE64(const uint8_t *p) +{ + uint64_t v; + std::memcpy(&v, p, 8); + return v; +} } -void GsGpuFrameData::pushPrimitive(const GsGpuPrimitive &prim) +using namespace GSInternal; + +GS::GS() { - std::lock_guard lock(m_mutex); - m_buffers[m_backIdx].push_back(prim); - m_hasData.store(true, std::memory_order_relaxed); + reset(); } -const std::vector &GsGpuFrameData::swapAndGetFront() +void GS::init(uint8_t *vram, uint32_t vramSize, GSRegisters *privRegs) { - std::lock_guard lock(m_mutex); - int frontIdx = m_backIdx; - m_backIdx = 1 - m_backIdx; - m_buffers[m_backIdx].clear(); - m_hasData.store(false, std::memory_order_relaxed); - return m_buffers[frontIdx]; + m_vram = vram; + m_vramSize = vramSize; + m_privRegs = privRegs; + reset(); } -bool GsGpuFrameData::hasGpuPrimitives() const +void GS::reset() { - return m_hasData.load(std::memory_order_relaxed); + std::memset(m_ctx, 0, sizeof(m_ctx)); + m_prim = {}; + m_curR = 0x80; m_curG = 0x80; m_curB = 0x80; m_curA = 0x80; + m_curQ = 1.0f; + m_curS = 0.0f; m_curT = 0.0f; + m_curU = 0; m_curV = 0; + m_curFog = 0; + m_prmodecont = true; + m_bitbltbuf = {}; + m_trxpos = {}; + m_trxreg = {}; + m_trxdir = 3; + m_hwregX = 0; + m_hwregY = 0; + m_vtxCount = 0; + m_vtxIndex = 0; + m_localToHostBuffer.clear(); + m_localToHostReadPos = 0; + + for (int i = 0; i < 2; ++i) + { + m_ctx[i].frame.fbw = 10; + m_ctx[i].scissor = {0, 639, 0, 447}; + m_ctx[i].xyoffset = {0, 0}; + } } -GsGpuFrameData &gsGpuGetFrameData() +GSContext &GS::activeContext() { - static GsGpuFrameData instance; - return instance; + return m_ctx[m_prim.ctxt ? 1 : 0]; } -bool gsGpuRenderFrame() +void GS::snapshotVRAM() { - GsGpuFrameData &fd = gsGpuGetFrameData(); - const std::vector &prims = fd.swapAndGetFront(); + if (!m_vram || m_vramSize == 0) return; + std::lock_guard lock(m_snapshotMutex); + m_displaySnapshot.resize(m_vramSize); + std::memcpy(m_displaySnapshot.data(), m_vram, m_vramSize); +} - if (prims.empty()) +const uint8_t *GS::lockDisplaySnapshot(uint32_t &outSize) +{ + m_snapshotMutex.lock(); + if (m_displaySnapshot.empty()) { - return false; + outSize = 0; + return nullptr; } + outSize = static_cast(m_displaySnapshot.size()); + return m_displaySnapshot.data(); +} - const float screenW = static_cast(fd.screenWidth()); - const float screenH = static_cast(fd.screenHeight()); +void GS::unlockDisplaySnapshot() +{ + m_snapshotMutex.unlock(); +} - // Set up 2D orthographic projection matching PS2 screen coords - rlMatrixMode(RL_PROJECTION); - rlPushMatrix(); - rlLoadIdentity(); - rlOrtho(0.0, static_cast(screenW), - static_cast(screenH), 0.0, - -1.0, 1.0); +uint32_t GS::getLastDisplayBaseBytes() const +{ + return m_lastDisplayBaseBytes; +} - rlMatrixMode(RL_MODELVIEW); - rlPushMatrix(); - rlLoadIdentity(); +void GS::refreshDisplaySnapshot() +{ + snapshotVRAM(); +} - // Disable depth test for 2D rendering (PS2 GS handles Z separately) - rlDisableDepthTest(); +void GS::processGIFPacket(const uint8_t *data, uint32_t sizeBytes) +{ + if (!data || sizeBytes < 16 || !m_vram) + return; - // Disable backface culling — PS2 games rely on both winding orders - rlDisableBackfaceCulling(); + if (sizeBytes >= 16) + { + const uint64_t tagLo = loadLE64(data); + const uint8_t flg = static_cast((tagLo >> 58) & 0x3); + if (flg == GIF_FMT_PACKED) + { + m_hwregX = 0; + m_hwregY = 0; + } + } - // Render each primitive - for (const GsGpuPrimitive &prim : prims) + uint32_t offset = 0; + while (offset + 16 <= sizeBytes) { - switch (prim.type) + uint64_t tagLo = loadLE64(data + offset); + uint64_t tagHi = loadLE64(data + offset + 8); + offset += 16; + + m_curQ = 1.0f; + + uint32_t nloop = static_cast(tagLo & 0x7FFF); + uint8_t flg = static_cast((tagLo >> 58) & 0x3); + uint32_t nreg = static_cast((tagLo >> 60) & 0xF); + if (nreg == 0) nreg = 16; + + bool pre = ((tagLo >> 46) & 1) != 0; + if (pre) { - case GS_GPU_TRIANGLE: + writeRegister(GS_REG_PRIM, (tagLo >> 47) & 0x7FF); + } + + uint8_t regs[16]; + for (uint32_t i = 0; i < nreg; ++i) + regs[i] = static_cast((tagHi >> (i * 4)) & 0xF); + + if (flg == GIF_FMT_PACKED) + { + for (uint32_t loop = 0; loop < nloop; ++loop) + { + for (uint32_t r = 0; r < nreg; ++r) + { + if (offset + 16 > sizeBytes) + return; + uint64_t lo = loadLE64(data + offset); + uint64_t hi = loadLE64(data + offset + 8); + offset += 16; + writeRegisterPacked(regs[r], lo, hi); + } + } + } + else if (flg == GIF_FMT_REGLIST) { - rlBegin(RL_TRIANGLES); - for (int i = 0; i < 3; ++i) + for (uint32_t loop = 0; loop < nloop; ++loop) { - const GsGpuVertex &v = prim.verts[i]; - rlColor4ub(v.r, v.g, v.b, v.a); - rlVertex3f(v.x, v.y, v.z); + for (uint32_t r = 0; r < nreg; ++r) + { + if (offset + 8 > sizeBytes) + return; + writeRegister(regs[r], loadLE64(data + offset)); + offset += 8; + } } - rlEnd(); - break; + if ((nloop * nreg) & 1) + offset += 8; + } + else if (flg == GIF_FMT_IMAGE) + { + uint32_t imageBytes = nloop * 16; + if (offset + imageBytes > sizeBytes) + imageBytes = sizeBytes - offset; + processImageData(data + offset, imageBytes); + offset += imageBytes; } + } +} + +void GS::writeRegisterPacked(uint8_t regDesc, uint64_t lo, uint64_t hi) +{ + switch (regDesc) + { + case 0x00: + writeRegister(GS_REG_PRIM, lo & 0x7FF); + break; + case 0x01: + m_curR = static_cast(lo & 0xFF); + m_curG = static_cast((lo >> 32) & 0xFF); + m_curB = static_cast(hi & 0xFF); + m_curA = static_cast((hi >> 32) & 0xFF); + break; + case 0x02: + { + uint32_t sBits = static_cast(lo & 0xFFFFFFFF); + uint32_t tBits = static_cast((lo >> 32) & 0xFFFFFFFF); + uint32_t qBits = static_cast(hi & 0xFFFFFFFF); + std::memcpy(&m_curS, &sBits, 4); + std::memcpy(&m_curT, &tBits, 4); + std::memcpy(&m_curQ, &qBits, 4); + if (m_curQ == 0.0f) m_curQ = 1.0f; + break; + } + case 0x03: + m_curU = static_cast(lo & 0xFFFFu); + m_curV = static_cast((lo >> 32) & 0xFFFFu); + break; + case 0x04: + { + uint16_t x = static_cast(lo & 0xFFFF); + uint16_t y = static_cast((lo >> 32) & 0xFFFF); + uint32_t z = static_cast((hi >> 4) & 0xFFFFFF); + uint8_t f = static_cast((hi >> 36) & 0xFF); + bool adk = ((hi >> 47) & 1) != 0; + GSVertex &vtx = m_vtxQueue[m_vtxCount % kMaxVerts]; + vtx.x = static_cast(x) / 16.0f; + vtx.y = static_cast(y) / 16.0f; + vtx.z = static_cast(z); + vtx.r = m_curR; vtx.g = m_curG; vtx.b = m_curB; vtx.a = m_curA; + vtx.q = m_curQ; vtx.s = m_curS; vtx.t = m_curT; + vtx.u = m_curU; vtx.v = m_curV; vtx.fog = f; + vertexKick(!adk); + break; + } + case 0x05: + { + uint16_t x = static_cast(lo & 0xFFFF); + uint16_t y = static_cast((lo >> 32) & 0xFFFF); + uint32_t z = static_cast(hi & 0xFFFFFFFF); + bool adk = ((hi >> 47) & 1) != 0; + GSVertex &vtx = m_vtxQueue[m_vtxCount % kMaxVerts]; + vtx.x = static_cast(x) / 16.0f; + vtx.y = static_cast(y) / 16.0f; + vtx.z = static_cast(z); + vtx.r = m_curR; vtx.g = m_curG; vtx.b = m_curB; vtx.a = m_curA; + vtx.q = m_curQ; vtx.s = m_curS; vtx.t = m_curT; + vtx.u = m_curU; vtx.v = m_curV; vtx.fog = m_curFog; + vertexKick(!adk); + break; + } + case 0x0A: + m_curFog = static_cast((hi >> 36) & 0xFF); + break; + case 0x0C: + { + GSVertex &vtx = m_vtxQueue[m_vtxCount % kMaxVerts]; + vtx.x = static_cast(lo & 0xFFFF) / 16.0f; + vtx.y = static_cast((lo >> 32) & 0xFFFF) / 16.0f; + vtx.z = static_cast((hi >> 4) & 0xFFFFFF); + vtx.r = m_curR; vtx.g = m_curG; vtx.b = m_curB; vtx.a = m_curA; + vtx.q = m_curQ; vtx.s = m_curS; vtx.t = m_curT; + vtx.u = m_curU; vtx.v = m_curV; + vtx.fog = static_cast((hi >> 36) & 0xFF); + vertexKick(false); + break; + } + case 0x0D: + { + GSVertex &vtx = m_vtxQueue[m_vtxCount % kMaxVerts]; + vtx.x = static_cast(lo & 0xFFFF) / 16.0f; + vtx.y = static_cast((lo >> 32) & 0xFFFF) / 16.0f; + vtx.z = static_cast(hi & 0xFFFFFFFF); + vtx.r = m_curR; vtx.g = m_curG; vtx.b = m_curB; vtx.a = m_curA; + vtx.q = m_curQ; vtx.s = m_curS; vtx.t = m_curT; + vtx.u = m_curU; vtx.v = m_curV; vtx.fog = m_curFog; + vertexKick(false); + break; + } + case 0x0E: + { + uint8_t addr = static_cast(hi & 0xFF); + writeRegister(addr, lo); + break; + } + case 0x0F: + break; + default: + writeRegister(regDesc, lo); + break; + } +} + +void GS::writeRegister(uint8_t regAddr, uint64_t value) +{ + switch (regAddr) + { + case GS_REG_PRIM: + { + m_prim.type = static_cast(value & 0x7); + m_prim.iip = ((value >> 3) & 1) != 0; + m_prim.tme = ((value >> 4) & 1) != 0; + m_prim.fge = ((value >> 5) & 1) != 0; + m_prim.abe = ((value >> 6) & 1) != 0; + m_prim.aa1 = ((value >> 7) & 1) != 0; + m_prim.fst = ((value >> 8) & 1) != 0; + m_prim.ctxt = ((value >> 9) & 1) != 0; + m_prim.fix = ((value >> 10) & 1) != 0; + m_vtxCount = 0; + m_vtxIndex = 0; + break; + } + case GS_REG_RGBAQ: + { + m_curR = static_cast(value & 0xFF); + m_curG = static_cast((value >> 8) & 0xFF); + m_curB = static_cast((value >> 16) & 0xFF); + m_curA = static_cast((value >> 24) & 0xFF); + uint32_t qBits = static_cast((value >> 32) & 0xFFFFFFFF); + std::memcpy(&m_curQ, &qBits, 4); + if (m_curQ == 0.0f) m_curQ = 1.0f; + break; + } + case GS_REG_ST: + { + uint32_t sBits = static_cast(value & 0xFFFFFFFF); + uint32_t tBits = static_cast((value >> 32) & 0xFFFFFFFF); + std::memcpy(&m_curS, &sBits, 4); + std::memcpy(&m_curT, &tBits, 4); + break; + } + case GS_REG_UV: + { + m_curU = static_cast(value & 0xFFFFu); + m_curV = static_cast((value >> 16) & 0xFFFFu); + break; + } + case GS_REG_XYZF2: + case GS_REG_XYZF3: + { + GSVertex &vtx = m_vtxQueue[m_vtxCount % kMaxVerts]; + vtx.x = static_cast(value & 0xFFFF) / 16.0f; + vtx.y = static_cast((value >> 16) & 0xFFFF) / 16.0f; + vtx.z = static_cast((value >> 32) & 0xFFFFFF); + vtx.fog = static_cast((value >> 56) & 0xFF); + vtx.r = m_curR; vtx.g = m_curG; vtx.b = m_curB; vtx.a = m_curA; + vtx.q = m_curQ; vtx.s = m_curS; vtx.t = m_curT; + vtx.u = m_curU; vtx.v = m_curV; + vertexKick(regAddr == GS_REG_XYZF2); + break; + } + case GS_REG_XYZ2: + case GS_REG_XYZ3: + { + GSVertex &vtx = m_vtxQueue[m_vtxCount % kMaxVerts]; + vtx.x = static_cast(value & 0xFFFF) / 16.0f; + vtx.y = static_cast((value >> 16) & 0xFFFF) / 16.0f; + vtx.z = static_cast((value >> 32) & 0xFFFFFFFF); + vtx.r = m_curR; vtx.g = m_curG; vtx.b = m_curB; vtx.a = m_curA; + vtx.q = m_curQ; vtx.s = m_curS; vtx.t = m_curT; + vtx.u = m_curU; vtx.v = m_curV; vtx.fog = m_curFog; + vertexKick(regAddr == GS_REG_XYZ2); + break; + } + case GS_REG_TEX0_1: + case GS_REG_TEX0_2: + { + int ci = (regAddr == GS_REG_TEX0_2) ? 1 : 0; + auto &t = m_ctx[ci].tex0; + t.tbp0 = static_cast(value & 0x3FFF); + t.tbw = static_cast((value >> 14) & 0x3F); + t.psm = static_cast((value >> 20) & 0x3F); + t.tw = static_cast((value >> 26) & 0xF); + t.th = static_cast((value >> 30) & 0xF); + t.tcc = static_cast((value >> 34) & 0x1); + t.tfx = static_cast((value >> 35) & 0x3); + t.cbp = static_cast((value >> 37) & 0x3FFF); + t.cpsm = static_cast((value >> 51) & 0xF); + t.csm = static_cast((value >> 55) & 0x1); + t.csa = static_cast((value >> 56) & 0x1F); + t.cld = static_cast((value >> 61) & 0x7); + break; + } + case GS_REG_CLAMP_1: + case GS_REG_CLAMP_2: + { + int ci = (regAddr == GS_REG_CLAMP_2) ? 1 : 0; + m_ctx[ci].clamp = value; + break; + } + case GS_REG_FOG: + m_curFog = static_cast((value >> 56) & 0xFF); + break; + case GS_REG_TEX1_1: + case GS_REG_TEX1_2: + { + int ci = (regAddr == GS_REG_TEX1_2) ? 1 : 0; + m_ctx[ci].tex1 = value; + break; + } + case GS_REG_TEX2_1: + case GS_REG_TEX2_2: + break; + case GS_REG_XYOFFSET_1: + case GS_REG_XYOFFSET_2: + { + int ci = (regAddr == GS_REG_XYOFFSET_2) ? 1 : 0; + m_ctx[ci].xyoffset.ofx = static_cast(value & 0xFFFF); + m_ctx[ci].xyoffset.ofy = static_cast((value >> 32) & 0xFFFF); + break; + } + case GS_REG_PRMODECONT: + m_prmodecont = (value & 1) != 0; + break; + case GS_REG_PRMODE: + if (!m_prmodecont) + { + m_prim.iip = ((value >> 3) & 1) != 0; + m_prim.tme = ((value >> 4) & 1) != 0; + m_prim.fge = ((value >> 5) & 1) != 0; + m_prim.abe = ((value >> 6) & 1) != 0; + m_prim.aa1 = ((value >> 7) & 1) != 0; + m_prim.fst = ((value >> 8) & 1) != 0; + m_prim.ctxt = ((value >> 9) & 1) != 0; + m_prim.fix = ((value >> 10) & 1) != 0; + } + break; + case GS_REG_SCISSOR_1: + case GS_REG_SCISSOR_2: + { + int ci = (regAddr == GS_REG_SCISSOR_2) ? 1 : 0; + m_ctx[ci].scissor.x0 = static_cast(value & 0x7FF); + m_ctx[ci].scissor.x1 = static_cast((value >> 16) & 0x7FF); + m_ctx[ci].scissor.y0 = static_cast((value >> 32) & 0x7FF); + m_ctx[ci].scissor.y1 = static_cast((value >> 48) & 0x7FF); + break; + } + case GS_REG_ALPHA_1: + case GS_REG_ALPHA_2: + { + int ci = (regAddr == GS_REG_ALPHA_2) ? 1 : 0; + m_ctx[ci].alpha = value; + break; + } + case GS_REG_TEST_1: + case GS_REG_TEST_2: + { + int ci = (regAddr == GS_REG_TEST_2) ? 1 : 0; + m_ctx[ci].test = value; + break; + } + case GS_REG_FRAME_1: + case GS_REG_FRAME_2: + { + int ci = (regAddr == GS_REG_FRAME_2) ? 1 : 0; + m_ctx[ci].frame.fbp = static_cast(value & 0x1FF); + m_ctx[ci].frame.fbw = static_cast((value >> 16) & 0x3F); + m_ctx[ci].frame.psm = static_cast((value >> 24) & 0x3F); + m_ctx[ci].frame.fbmsk = static_cast((value >> 32) & 0xFFFFFFFF); + break; + } + case GS_REG_ZBUF_1: + case GS_REG_ZBUF_2: + { + int ci = (regAddr == GS_REG_ZBUF_2) ? 1 : 0; + m_ctx[ci].zbuf = value; + break; + } + case GS_REG_FBA_1: + case GS_REG_FBA_2: + { + int ci = (regAddr == GS_REG_FBA_2) ? 1 : 0; + m_ctx[ci].fba = value; + break; + } + case GS_REG_BITBLTBUF: + { + m_bitbltbuf.sbp = static_cast(value & 0x3FFF); + m_bitbltbuf.sbw = static_cast((value >> 16) & 0x3F); + m_bitbltbuf.spsm = static_cast((value >> 24) & 0x3F); + m_bitbltbuf.dbp = static_cast((value >> 32) & 0x3FFF); + m_bitbltbuf.dbw = static_cast((value >> 48) & 0x3F); + m_bitbltbuf.dpsm = static_cast((value >> 56) & 0x3F); + break; + } + case GS_REG_TRXPOS: + { + m_trxpos.ssax = static_cast(value & 0x7FF); + m_trxpos.ssay = static_cast((value >> 16) & 0x7FF); + m_trxpos.dsax = static_cast((value >> 32) & 0x7FF); + m_trxpos.dsay = static_cast((value >> 48) & 0x7FF); + m_trxpos.dir = static_cast((value >> 59) & 0x3); + break; + } + case GS_REG_TRXREG: + { + m_trxreg.rrw = static_cast(value & 0xFFF); + m_trxreg.rrh = static_cast((value >> 32) & 0xFFF); + break; + } + case GS_REG_TRXDIR: + { + m_trxdir = static_cast(value & 0x3); + m_hwregX = 0; + m_hwregY = 0; - case GS_GPU_QUAD: + if (m_trxdir == 2 && m_vram) { - // QUAD: v0=top-left, v1=top-right, v2=bottom-left, v3=bottom-right - // Raylib RL_QUADS expects: v0, v1, v2, v3 in order - rlBegin(RL_QUADS); - for (int i = 0; i < 4; ++i) + uint32_t sbp = m_bitbltbuf.sbp; + uint8_t sbw = m_bitbltbuf.sbw; + uint8_t spsm = m_bitbltbuf.spsm; + uint32_t dbp = m_bitbltbuf.dbp; + uint8_t dbw = m_bitbltbuf.dbw; + uint8_t dpsm = m_bitbltbuf.dpsm; + + if (sbw == 0) sbw = 1; + if (dbw == 0) dbw = 1; + + uint32_t srcBase = sbp * 256u; + uint32_t dstBase = dbp * 256u; + uint32_t srcBpp = bitsPerPixel(spsm) / 8u; + uint32_t dstBpp = bitsPerPixel(dpsm) / 8u; + if (srcBpp == 0) srcBpp = 4; + if (dstBpp == 0) dstBpp = 4; + uint32_t srcStride = static_cast(sbw) * 64u * srcBpp; + uint32_t dstStride = static_cast(dbw) * 64u * dstBpp; + uint32_t rrw = m_trxreg.rrw; + uint32_t rrh = m_trxreg.rrh; + uint32_t ssax = m_trxpos.ssax; + uint32_t ssay = m_trxpos.ssay; + uint32_t dsax = m_trxpos.dsax; + uint32_t dsay = m_trxpos.dsay; + uint32_t copyBpp = (srcBpp < dstBpp) ? srcBpp : dstBpp; + uint32_t rowBytes = rrw * copyBpp; + + if (dstBase > srcBase) + { + for (int row = static_cast(rrh) - 1; row >= 0; --row) + { + uint32_t srcOff = srcBase + (ssay + row) * srcStride + ssax * srcBpp; + uint32_t dstOff = dstBase + (dsay + row) * dstStride + dsax * dstBpp; + if (srcOff + rowBytes <= m_vramSize && dstOff + rowBytes <= m_vramSize) + std::memmove(m_vram + dstOff, m_vram + srcOff, rowBytes); + } + } + else { - const GsGpuVertex &v = prim.verts[i]; - rlColor4ub(v.r, v.g, v.b, v.a); - rlVertex3f(v.x, v.y, v.z); + for (uint32_t row = 0; row < rrh; ++row) + { + uint32_t srcOff = srcBase + (ssay + row) * srcStride + ssax * srcBpp; + uint32_t dstOff = dstBase + (dsay + row) * dstStride + dsax * dstBpp; + if (srcOff + rowBytes <= m_vramSize && dstOff + rowBytes <= m_vramSize) + std::memmove(m_vram + dstOff, m_vram + srcOff, rowBytes); + } } - rlEnd(); - break; + + if (sbp == 0u && (dbp == 0u || dbp == 0x20u) && rrw >= 640u && rrh >= 512u) { + m_lastDisplayBaseBytes = (dbp == 0x20u) ? 8192u : 0u; + snapshotVRAM(); + } + } + else if (m_trxdir == 1 && m_vram) + { + performLocalToHostToBuffer(); + } + break; + } + case GS_REG_HWREG: + { + uint8_t buf[8]; + std::memcpy(buf, &value, 8); + processImageData(buf, 8); + break; + } + case GS_REG_TEXFLUSH: + case GS_REG_TEXCLUT: + case GS_REG_SCANMSK: + case GS_REG_FOGCOL: + case GS_REG_DIMX: + case GS_REG_DTHE: + case GS_REG_COLCLAMP: + case GS_REG_PABE: + case GS_REG_MIPTBP1_1: + case GS_REG_MIPTBP1_2: + case GS_REG_MIPTBP2_1: + case GS_REG_MIPTBP2_2: + case GS_REG_TEXA: + break; + case GS_REG_SIGNAL: + { + if (m_privRegs) + { + uint32_t id = static_cast(value & 0xFFFFFFFF); + uint32_t mask = static_cast(value >> 32); + uint32_t lo = static_cast(m_privRegs->siglblid & 0xFFFFFFFF); + lo = (lo & ~mask) | (id & mask); + m_privRegs->siglblid = (m_privRegs->siglblid & 0xFFFFFFFF00000000ULL) | lo; + m_privRegs->csr |= 0x1; + } + break; + } + case GS_REG_FINISH: + { + if (m_privRegs) + m_privRegs->csr |= 0x2; + break; + } + case GS_REG_LABEL: + { + if (m_privRegs) + { + uint32_t id = static_cast(value & 0xFFFFFFFF); + uint32_t mask = static_cast(value >> 32); + uint32_t hi = static_cast(m_privRegs->siglblid >> 32); + hi = (hi & ~mask) | (id & mask); + m_privRegs->siglblid = (static_cast(hi) << 32) | (m_privRegs->siglblid & 0xFFFFFFFF); } + break; + } + default: + break; + } +} + +void GS::vertexKick(bool drawing) +{ + ++m_vtxCount; + ++m_vtxIndex; + + if (!drawing) + return; + + int needed = 0; + switch (m_prim.type) + { + case GS_PRIM_POINT: needed = 1; break; + case GS_PRIM_LINE: needed = 2; break; + case GS_PRIM_LINESTRIP: needed = 2; break; + case GS_PRIM_TRIANGLE: needed = 3; break; + case GS_PRIM_TRISTRIP: needed = 3; break; + case GS_PRIM_TRIFAN: needed = 3; break; + case GS_PRIM_SPRITE: needed = 2; break; + default: return; + } + + if (m_vtxCount < needed) + return; - case GS_GPU_LINE: + m_rasterizer.drawPrimitive(this); + + switch (m_prim.type) + { + case GS_PRIM_LINE: + case GS_PRIM_TRIANGLE: + case GS_PRIM_SPRITE: + case GS_PRIM_POINT: + m_vtxCount = 0; + break; + case GS_PRIM_LINESTRIP: + m_vtxQueue[0] = m_vtxQueue[1]; + m_vtxCount = 1; + break; + case GS_PRIM_TRISTRIP: + m_vtxQueue[0] = m_vtxQueue[1]; + m_vtxQueue[1] = m_vtxQueue[2]; + m_vtxCount = 2; + break; + case GS_PRIM_TRIFAN: + m_vtxQueue[1] = m_vtxQueue[2]; + m_vtxCount = 2; + break; + default: + m_vtxCount = 0; + break; + } +} + +void GS::processImageData(const uint8_t *data, uint32_t sizeBytes) +{ + if (m_trxdir != 0 || !m_vram) + return; + + uint32_t dbp = m_bitbltbuf.dbp; + uint8_t dbw = m_bitbltbuf.dbw; + uint8_t dpsm = m_bitbltbuf.dpsm; + + if (dbw == 0) dbw = 1; + uint32_t base = dbp * 256u; + uint32_t bpp = bitsPerPixel(dpsm); + uint32_t stridePixels = static_cast(dbw) * 64u; + + uint32_t rrw = m_trxreg.rrw; + uint32_t rrh = m_trxreg.rrh; + uint32_t dsax = m_trxpos.dsax; + uint32_t dsay = m_trxpos.dsay; + + if (bpp == 4) + { + uint32_t rowBytes = (rrw + 1u) / 2u; + if (rowBytes == 0) rowBytes = 1; + uint32_t widthBlocks = (dbw != 0) ? static_cast(dbw) : 1u; + for (uint32_t y = 0; y < rrh && (y * rowBytes) < sizeBytes; ++y) { - rlBegin(RL_LINES); - for (int i = 0; i < 2; ++i) + uint32_t srcRowOff = y * rowBytes; + for (uint32_t x = 0; x < rrw && (srcRowOff + (x / 2u)) < sizeBytes; ++x) { - const GsGpuVertex &v = prim.verts[i]; - rlColor4ub(v.r, v.g, v.b, v.a); - rlVertex3f(v.x, v.y, v.z); + uint32_t srcByte = data[srcRowOff + (x / 2u)]; + uint32_t nibble = (x & 1u) ? ((srcByte >> 4) & 0xFu) : (srcByte & 0xFu); + + uint32_t vx = dsax + x; + uint32_t vy = dsay + y; + uint32_t nibbleAddr = GSPSMT4::addrPSMT4(dbp, widthBlocks, vx, vy); + uint32_t byteOff = nibbleAddr >> 1; + + if (byteOff < m_vramSize) + { + int shift = static_cast((nibbleAddr & 1u) << 2); + uint8_t &b = m_vram[byteOff]; + b = static_cast((b & (0xF0u >> shift)) | ((nibble & 0x0Fu) << shift)); + } } - rlEnd(); - break; } + m_hwregX = 0; + m_hwregY = rrh; + } + else if (dpsm == GS_PSM_CT24 || dpsm == GS_PSM_Z24) + { + uint32_t storageBpp = 4; + uint32_t transferBpp = 3; + uint32_t storageStride = stridePixels * storageBpp; - case GS_GPU_POINT: + uint32_t offset = 0; + while (offset < sizeBytes && m_hwregY < rrh) { - const GsGpuVertex &v = prim.verts[0]; - rlBegin(RL_TRIANGLES); - rlColor4ub(v.r, v.g, v.b, v.a); - rlVertex3f(v.x - 0.5f, v.y - 0.5f, v.z); - rlVertex3f(v.x + 0.5f, v.y - 0.5f, v.z); - rlVertex3f(v.x, v.y + 0.5f, v.z); - rlEnd(); - break; + uint32_t dstY = dsay + m_hwregY; + uint32_t pixelsLeft = rrw - m_hwregX; + uint32_t srcBytesLeft = pixelsLeft * transferBpp; + uint32_t bytesAvail = sizeBytes - offset; + uint32_t pixelsToCopy = pixelsLeft; + if (srcBytesLeft > bytesAvail) + pixelsToCopy = bytesAvail / transferBpp; + + if (pixelsToCopy == 0) + break; + + uint32_t dstOff = base + dstY * storageStride + (dsax + m_hwregX) * storageBpp; + if (dstOff + pixelsToCopy * storageBpp <= m_vramSize && pixelsToCopy > 0) + { + for (uint32_t p = 0; p < pixelsToCopy; ++p) + { + m_vram[dstOff + p * 4 + 0] = data[offset + p * 3 + 0]; + m_vram[dstOff + p * 4 + 1] = data[offset + p * 3 + 1]; + m_vram[dstOff + p * 4 + 2] = data[offset + p * 3 + 2]; + m_vram[dstOff + p * 4 + 3] = 0x80; + } + } + + offset += pixelsToCopy * transferBpp; + m_hwregX += pixelsToCopy; + if (m_hwregX >= rrw) + { + m_hwregX = 0; + ++m_hwregY; + } } + } + else + { + uint32_t bytesPerPixel = bpp / 8u; + if (bytesPerPixel == 0) bytesPerPixel = 4; + uint32_t strideBytes = stridePixels * bytesPerPixel; + uint32_t rowBytes = rrw * bytesPerPixel; + + uint32_t offset = 0; + while (offset < sizeBytes && m_hwregY < rrh) + { + uint32_t dstY = dsay + m_hwregY; + uint32_t pixelsLeft = rrw - m_hwregX; + uint32_t bytesLeft = pixelsLeft * bytesPerPixel; + uint32_t bytesAvail = sizeBytes - offset; + if (bytesLeft > bytesAvail) + bytesLeft = (bytesAvail / bytesPerPixel) * bytesPerPixel; + + uint32_t dstOff = base + dstY * strideBytes + (dsax + m_hwregX) * bytesPerPixel; + if (dstOff + bytesLeft <= m_vramSize && bytesLeft > 0) + std::memcpy(m_vram + dstOff, data + offset, bytesLeft); + + uint32_t pixelsCopied = bytesLeft / bytesPerPixel; + offset += bytesLeft; + m_hwregX += pixelsCopied; + if (m_hwregX >= rrw) + { + m_hwregX = 0; + ++m_hwregY; + } } } +} - rlDrawRenderBatchActive(); +void GS::performLocalToHostToBuffer() +{ + m_localToHostBuffer.clear(); + m_localToHostReadPos = 0; + if (!m_vram) + return; - rlEnableBackfaceCulling(); - rlEnableDepthTest(); + uint32_t sbp = m_bitbltbuf.sbp; + uint8_t sbw = m_bitbltbuf.sbw; + uint8_t spsm = m_bitbltbuf.spsm; - rlMatrixMode(RL_MODELVIEW); - rlPopMatrix(); - rlMatrixMode(RL_PROJECTION); - rlPopMatrix(); + if (sbw == 0) sbw = 1; + uint32_t base = sbp * 256u; + uint32_t bpp = bitsPerPixel(spsm); + uint32_t stridePixels = static_cast(sbw) * 64u; - return true; + uint32_t rrw = m_trxreg.rrw; + uint32_t rrh = m_trxreg.rrh; + uint32_t ssax = m_trxpos.ssax; + uint32_t ssay = m_trxpos.ssay; + + if (bpp == 4) + { + uint32_t rowBytes = (rrw + 1u) / 2u; + if (rowBytes == 0) rowBytes = 1; + m_localToHostBuffer.reserve(rowBytes * rrh); + uint32_t widthBlocks = static_cast(sbw); + for (uint32_t y = 0; y < rrh; ++y) + { + for (uint32_t x = 0; x < rrw; ++x) + { + uint32_t vx = ssax + x; + uint32_t vy = ssay + y; + uint32_t nibbleAddr = GSPSMT4::addrPSMT4(sbp, widthBlocks, vx, vy); + uint32_t byteOff = nibbleAddr >> 1; + uint8_t nibble = 0; + if (byteOff < m_vramSize) + { + int shift = static_cast((nibbleAddr & 1u) << 2); + nibble = static_cast((m_vram[byteOff] >> shift) & 0x0Fu); + } + if (x & 1u) + m_localToHostBuffer.back() = static_cast((m_localToHostBuffer.back() & 0x0Fu) | (nibble << 4)); + else + m_localToHostBuffer.push_back(nibble); + } + } + } + else if (spsm == GS_PSM_CT24 || spsm == GS_PSM_Z24) + { + uint32_t storageBpp = 4; + uint32_t transferBpp = 3; + uint32_t storageStride = stridePixels * storageBpp; + m_localToHostBuffer.reserve(rrw * rrh * transferBpp); + + for (uint32_t y = 0; y < rrh; ++y) + { + for (uint32_t x = 0; x < rrw; ++x) + { + uint32_t srcOff = base + (ssay + y) * storageStride + (ssax + x) * storageBpp; + if (srcOff + 4 <= m_vramSize) + { + m_localToHostBuffer.push_back(m_vram[srcOff + 0]); + m_localToHostBuffer.push_back(m_vram[srcOff + 1]); + m_localToHostBuffer.push_back(m_vram[srcOff + 2]); + } + } + } + } + else + { + uint32_t bytesPerPixel = bpp / 8u; + if (bytesPerPixel == 0) bytesPerPixel = 4; + uint32_t strideBytes = stridePixels * bytesPerPixel; + uint32_t rowBytes = rrw * bytesPerPixel; + m_localToHostBuffer.reserve(rowBytes * rrh); + + for (uint32_t y = 0; y < rrh; ++y) + { + uint32_t srcOff = base + (ssay + y) * strideBytes + ssax * bytesPerPixel; + if (srcOff + rowBytes <= m_vramSize) + { + for (uint32_t i = 0; i < rowBytes; ++i) + m_localToHostBuffer.push_back(m_vram[srcOff + i]); + } + } + } +} + +uint32_t GS::consumeLocalToHostBytes(uint8_t *dst, uint32_t maxBytes) +{ + if (!dst || maxBytes == 0) + return 0; + size_t avail = m_localToHostBuffer.size() - m_localToHostReadPos; + if (avail == 0) + return 0; + size_t toCopy = (avail < maxBytes) ? avail : static_cast(maxBytes); + std::memcpy(dst, m_localToHostBuffer.data() + m_localToHostReadPos, toCopy); + m_localToHostReadPos += toCopy; + return static_cast(toCopy); } diff --git a/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp b/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp index 4f1b59a7..2e07bda7 100644 --- a/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp +++ b/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp @@ -1,1039 +1,486 @@ -// Based on Blackline Interactive implementation -#include "ps2_memory.h" +#include "ps2_gs_rasterizer.h" #include "ps2_gs_gpu.h" -#include +#include "ps2_gs_common.h" +#include "ps2_gs_psmt4.h" #include -#include -#include -#include - -enum GSReg : uint8_t -{ - GS_PRIM = 0x00, - GS_RGBAQ = 0x01, - GS_ST = 0x02, - GS_UV = 0x03, - GS_XYZF2 = 0x04, - GS_XYZ2 = 0x05, - GS_TEX0_1 = 0x06, - GS_TEX0_2 = 0x07, - GS_CLAMP_1 = 0x08, - GS_CLAMP_2 = 0x09, - GS_FOG = 0x0A, - GS_XYZF3 = 0x0C, - GS_XYZ3 = 0x0D, - GS_TEX1_1 = 0x14, - GS_TEX1_2 = 0x15, - GS_TEX2_1 = 0x16, - GS_TEX2_2 = 0x17, - GS_XYOFFSET_1 = 0x18, - GS_XYOFFSET_2 = 0x19, - GS_PRMODECONT = 0x1A, - GS_PRMODE = 0x1B, - GS_TEXCLUT = 0x1C, - GS_SCANMSK = 0x22, - GS_MIPTBP1_1 = 0x34, - GS_MIPTBP1_2 = 0x35, - GS_MIPTBP2_1 = 0x36, - GS_MIPTBP2_2 = 0x37, - GS_TEXA = 0x3B, - GS_FOGCOL = 0x3D, - GS_TEXFLUSH = 0x3F, - GS_SCISSOR_1 = 0x40, - GS_SCISSOR_2 = 0x41, - GS_ALPHA_1 = 0x42, - GS_ALPHA_2 = 0x43, - GS_DIMX = 0x44, - GS_DTHE = 0x45, - GS_COLCLAMP = 0x46, - GS_TEST_1 = 0x47, - GS_TEST_2 = 0x48, - GS_PABE = 0x49, - GS_FBA_1 = 0x4A, - GS_FBA_2 = 0x4B, - GS_FRAME_1 = 0x4C, - GS_FRAME_2 = 0x4D, - GS_ZBUF_1 = 0x4E, - GS_ZBUF_2 = 0x4F, - GS_BITBLTBUF = 0x50, - GS_TRXPOS = 0x51, - GS_TRXREG = 0x52, - GS_TRXDIR = 0x53, - GS_HWREG = 0x54, - GS_SIGNAL = 0x60, - GS_FINISH = 0x61, - GS_LABEL = 0x62, -}; - -namespace -{ +#include +#include - struct GSVertex - { - float x, y, z; // screen coords (after 12.4 fixed -> float) - uint8_t r, g, b, a; // vertex color - float s, t, q; // texture coords - uint16_t u, v; // UV coords (14.0 fixed) - }; +using namespace GSInternal; - struct GSInternalRegs +void GSRasterizer::drawPrimitive(GS *gs) +{ + switch (gs->m_prim.type) { - // Current primitive - uint32_t prim = 0; - // Color - uint8_t r = 128, g = 128, b = 128, a = 128; - float q = 1.0f; - // Texture coords - float s = 0, t = 0; - uint16_t u = 0, v = 0; - // Frame buffer - uint32_t fbp = 0; // frame buffer base pointer (in pages) - uint32_t fbw = 10; // frame buffer width (64-pixel units) - uint32_t psm = 0; // pixel storage mode - uint32_t fbmsk = 0; // frame buffer write mask - // Scissor - uint32_t scax0 = 0, scax1 = 639, scay0 = 0, scay1 = 447; - // XY offset (12.4 fixed point) - uint32_t ofx = 0, ofy = 0; - // Texture - uint64_t tex0 = 0; - uint32_t tbp0 = 0; // texture base pointer - uint32_t tbw = 0; // texture buffer width - uint32_t tpsm = 0; // texture pixel storage mode - uint32_t tw = 0, th = 0; // texture width/height (log2) - // Alpha - uint64_t alpha = 0; - // Test - uint64_t test = 0; - // BITBLTBUF / TRXPOS / TRXREG / TRXDIR for image transfers - uint64_t bitbltbuf = 0; - uint64_t trxpos = 0; - uint64_t trxreg = 0; - uint64_t trxdir = 0; - // Image transfer state - bool xferActive = false; - uint32_t xferX = 0, xferY = 0; - uint32_t xferW = 0, xferH = 0; - uint32_t xferDBP = 0, xferDBW = 0, xferDPSM = 0; - uint32_t xferDstX = 0, xferDstY = 0; - uint32_t xferPixelsWritten = 0; - // Vertex queue for primitive assembly - GSVertex vtxQueue[3]; - int vtxCount = 0; - int vtxKick = 0; // vertices needed for current prim - // Stats - uint32_t gifTagsProcessed = 0; - uint32_t adWrites = 0; - uint32_t imageQWs = 0; - uint32_t primsDrawn = 0; - // PRMODECONT / PRMODE - uint32_t prmodecont = 1; // 1 = use PRIM bits, 0 = use PRMODE bits - uint32_t prmode = 0; - }; - - static GSInternalRegs g_gsRegs; - static std::mutex g_gsRegsMutex; // protects g_gsRegs from game/render thread races - static int g_gsLogCount = 0; - - // PSMCT24 bit accumulator for HWREG transfers. - // Each 64-bit write delivers 64 bits; each pixel is 24 bits. - // We accumulate leftover bits across writes. - static uint64_t g_psmct24_accBits = 0; - static int g_psmct24_accCount = 0; // bits currently in accumulator - - // PSMCT32: 4 bytes per pixel, standard layout per block - // For FRAME register: FBP is in words/2048 => base = fbp * 2048 * 4 bytes - inline uint32_t gsVramOffset32_Frame(uint32_t fbp, uint32_t fbw, uint32_t x, uint32_t y) + case GS_PRIM_SPRITE: + drawSprite(gs); + break; + case GS_PRIM_TRIANGLE: + case GS_PRIM_TRISTRIP: + case GS_PRIM_TRIFAN: + drawTriangle(gs); + break; + case GS_PRIM_LINE: + case GS_PRIM_LINESTRIP: + drawLine(gs); + break; + case GS_PRIM_POINT: { - const uint32_t baseBytes = fbp * 2048u * 4u; - const uint32_t stride = (fbw ? fbw : 10u) * 64u * 4u; // bytes per row - return baseBytes + y * stride + x * 4u; + const GSVertex &v = gs->m_vtxQueue[0]; + const auto &ctx = gs->activeContext(); + int px = static_cast(v.x) - (ctx.xyoffset.ofx >> 4); + int py = static_cast(v.y) - (ctx.xyoffset.ofy >> 4); + writePixel(gs, px, py, v.r, v.g, v.b, v.a); + break; } - - // For BITBLTBUF/TEX0: BP is in words/64 => base = bp * 64 * 4 bytes - inline uint32_t gsVramOffset32_BP64(uint32_t bp, uint32_t bw, uint32_t x, uint32_t y) - { - const uint32_t baseBytes = bp * 64u * 4u; - const uint32_t stride = (bw ? bw : 1u) * 64u * 4u; - return baseBytes + y * stride + x * 4u; + default: + break; } +} - inline void writePixel32(uint8_t *vram, uint32_t bp, uint32_t bw, - uint32_t x, uint32_t y, uint8_t r, uint8_t g, uint8_t b, uint8_t a, - uint32_t vramSize, bool isBP64 = false) - { - uint32_t off = isBP64 ? gsVramOffset32_BP64(bp, bw, x, y) - : gsVramOffset32_Frame(bp, bw, x, y); - if (off + 3 < vramSize) - { - vram[off + 0] = r; - vram[off + 1] = g; - vram[off + 2] = b; - vram[off + 3] = a; - } - } +void GSRasterizer::writePixel(GS *gs, int x, int y, uint8_t r, uint8_t g, uint8_t b, uint8_t a) +{ + const auto &ctx = gs->activeContext(); + if (x < ctx.scissor.x0 || x > ctx.scissor.x1 || + y < ctx.scissor.y0 || y > ctx.scissor.y1) + return; - inline void readPixel32(const uint8_t *vram, uint32_t bp, uint32_t bw, - uint32_t x, uint32_t y, uint8_t &r, uint8_t &g, uint8_t &b, uint8_t &a, - uint32_t vramSize, bool isBP64 = false) - { - uint32_t off = isBP64 ? gsVramOffset32_BP64(bp, bw, x, y) - : gsVramOffset32_Frame(bp, bw, x, y); - if (off + 3 < vramSize) - { - r = vram[off + 0]; - g = vram[off + 1]; - b = vram[off + 2]; - a = vram[off + 3]; - } - else - { - r = g = b = a = 0; - } - } + uint32_t fbBase = ctx.frame.fbp * 8192u; + uint32_t stride = fbStride(ctx.frame.fbw, ctx.frame.psm); + if (stride == 0) return; + + uint32_t off = fbBase + static_cast(y) * stride + static_cast(x) * 4u; + if (off + 4 > gs->m_vramSize) + return; - inline GsGpuVertex toGpuVertex(const GSVertex &v) + if (gs->m_prim.abe) { - GsGpuVertex gv{}; - gv.x = v.x; - gv.y = v.y; - gv.z = v.z; - gv.r = v.r; - gv.g = v.g; - gv.b = v.b; - gv.a = v.a; - gv.u = v.s; // TODO: proper tex coord mapping - gv.v = v.t; - return gv; + uint32_t existing; + std::memcpy(&existing, gs->m_vram + off, 4); + uint8_t dr = existing & 0xFF; + uint8_t dg = (existing >> 8) & 0xFF; + uint8_t db = (existing >> 16) & 0xFF; + uint8_t da = (existing >> 24) & 0xFF; + + uint64_t alphaReg = ctx.alpha; + uint8_t asel = alphaReg & 3; + uint8_t bsel = (alphaReg >> 2) & 3; + uint8_t csel = (alphaReg >> 4) & 3; + uint8_t dsel = (alphaReg >> 6) & 3; + uint8_t fix = static_cast((alphaReg >> 32) & 0xFF); + + auto pickRGB = [&](uint8_t sel, int cs, int cd) -> int { + if (sel == 0) return cs; + if (sel == 1) return cd; + return 0; + }; + int cAlpha = (csel == 0) ? a : (csel == 1) ? da : fix; + + r = clampU8(((pickRGB(asel, r, dr) - pickRGB(bsel, r, dr)) * cAlpha >> 7) + pickRGB(dsel, r, dr)); + g = clampU8(((pickRGB(asel, g, dg) - pickRGB(bsel, g, dg)) * cAlpha >> 7) + pickRGB(dsel, g, dg)); + b = clampU8(((pickRGB(asel, b, db) - pickRGB(bsel, b, db)) * cAlpha >> 7) + pickRGB(dsel, b, db)); } - // Emit a QUAD (axis-aligned rectangle from SPRITE primitive) - void drawSprite(uint8_t * /*vram*/, uint32_t /*vramSize*/) + uint32_t pixel = static_cast(r) + | (static_cast(g) << 8) + | (static_cast(b) << 16) + | (static_cast(a) << 24); + + uint32_t mask = ctx.frame.fbmsk; + if (mask != 0) { - auto &gs = g_gsRegs; - if (gs.vtxCount < 2) - return; - - GSVertex &v0 = gs.vtxQueue[0]; - GSVertex &v1 = gs.vtxQueue[1]; - - float x0 = std::max(v0.x, (float)gs.scax0); - float y0 = std::max(v0.y, (float)gs.scay0); - float x1 = std::min(v1.x, (float)(gs.scax1 + 1)); - float y1 = std::min(v1.y, (float)(gs.scay1 + 1)); - - if (x0 >= x1 || y0 >= y1) - return; - - // Use the second vertex color (PS2 SPRITE convention) - uint8_t r = v1.r, g = v1.g, b = v1.b, a = v1.a; - float z = v1.z; - - GsGpuPrimitive prim{}; - prim.type = GS_GPU_QUAD; - prim.vertexCount = 4; - // v0=Top-left, v1=Top-right, v2=Bottom-right, v3=Bottom-left - prim.verts[0] = {x0, y0, z, r, g, b, a, 0.0f, 0.0f}; - prim.verts[1] = {x1, y0, z, r, g, b, a, 1.0f, 0.0f}; - prim.verts[2] = {x1, y1, z, r, g, b, a, 1.0f, 1.0f}; - prim.verts[3] = {x0, y1, z, r, g, b, a, 0.0f, 1.0f}; - - gsGpuGetFrameData().pushPrimitive(prim); - gs.primsDrawn++; + uint32_t existing; + std::memcpy(&existing, gs->m_vram + off, 4); + pixel = (pixel & ~mask) | (existing & mask); } - void drawTriangle(uint8_t * /*vram*/, uint32_t /*vramSize*/) - { - auto &gs = g_gsRegs; - if (gs.vtxCount < 3) - return; + std::memcpy(gs->m_vram + off, &pixel, 4); +} - bool gouraud; - { - // Determine effective primitive attributes (PRMODECONT) - uint32_t effectivePrim = (gs.prmodecont == 1) ? gs.prim : ((gs.prim & 0x7) | (gs.prmode & ~0x7)); - gouraud = (effectivePrim >> 3) & 1; // IIP bit - } +uint32_t GSRasterizer::readTexelPSMCT32(GS *gs, uint32_t tbp0, uint32_t tbw, int texU, int texV) +{ + if (tbw == 0) tbw = 1; + uint32_t base = tbp0 * 256u; + uint32_t stride = tbw * 64u * 4u; + uint32_t off = base + static_cast(texV) * stride + static_cast(texU) * 4u; + if (off + 4 > gs->m_vramSize) + return 0xFFFF00FFu; + uint32_t texel; + std::memcpy(&texel, gs->m_vram + off, 4); + return texel; +} - GsGpuPrimitive prim{}; - prim.type = GS_GPU_TRIANGLE; - prim.vertexCount = 3; +uint32_t GSRasterizer::readTexelPSMT4(GS *gs, uint32_t tbp0, uint32_t tbw, int texU, int texV) +{ + if (tbw == 0) tbw = 1; + uint32_t nibbleAddr = GSPSMT4::addrPSMT4(tbp0, tbw, static_cast(texU), static_cast(texV)); + uint32_t byteOff = nibbleAddr >> 1; + if (byteOff >= gs->m_vramSize) + return 0; + uint8_t packed = gs->m_vram[byteOff]; + uint32_t shift = (nibbleAddr & 1u) << 2; + uint32_t idx = (packed >> shift) & 0xFu; + return idx; +} - if (gouraud) - { - // Per-vertex colors - prim.verts[0] = toGpuVertex(gs.vtxQueue[0]); - prim.verts[1] = toGpuVertex(gs.vtxQueue[1]); - prim.verts[2] = toGpuVertex(gs.vtxQueue[2]); - } - else - { - // Flat shading: use last vertex color for all - prim.verts[0] = toGpuVertex(gs.vtxQueue[0]); - prim.verts[1] = toGpuVertex(gs.vtxQueue[1]); - prim.verts[2] = toGpuVertex(gs.vtxQueue[2]); - // Override colors to match PS2 flat shading (last vertex) - uint8_t r = gs.vtxQueue[2].r, g = gs.vtxQueue[2].g; - uint8_t b = gs.vtxQueue[2].b, a = gs.vtxQueue[2].a; - prim.verts[0].r = r; - prim.verts[0].g = g; - prim.verts[0].b = b; - prim.verts[0].a = a; - prim.verts[1].r = r; - prim.verts[1].g = g; - prim.verts[1].b = b; - prim.verts[1].a = a; - } +uint32_t GSRasterizer::lookupCLUT(GS *gs, uint8_t index, uint32_t cbp, uint8_t cpsm, uint8_t csa) +{ + uint32_t clutBase = cbp * 256u; - gsGpuGetFrameData().pushPrimitive(prim); - gs.primsDrawn++; + if (cpsm == GS_PSM_CT32 || cpsm == GS_PSM_CT24) + { + uint32_t off = clutBase + (static_cast(csa) * 16u + index) * 4u; + if (off + 4 > gs->m_vramSize) + return 0xFFFF00FFu; + uint32_t color; + std::memcpy(&color, gs->m_vram + off, 4); + return color; } - void drawLine(uint8_t * /*vram*/, uint32_t /*vramSize*/) + if (cpsm == GS_PSM_CT16 || cpsm == GS_PSM_CT16S) { - auto &gs = g_gsRegs; - if (gs.vtxCount < 2) - return; + uint32_t off = clutBase + (static_cast(csa) * 16u + index) * 2u; + if (off + 2 > gs->m_vramSize) + return 0xFFFF00FFu; + uint16_t c16; + std::memcpy(&c16, gs->m_vram + off, 2); + uint32_t r = ((c16 >> 0) & 0x1Fu) << 3; + uint32_t g = ((c16 >> 5) & 0x1Fu) << 3; + uint32_t b = ((c16 >> 10) & 0x1Fu) << 3; + uint32_t a = (c16 & 0x8000u) ? 0x80u : 0u; + return r | (g << 8) | (b << 16) | (a << 24); + } - // Determine effective primitive attributes (PRMODECONT) - uint32_t effectivePrim = (gs.prmodecont == 1) ? gs.prim : ((gs.prim & 0x7) | (gs.prmode & ~0x7)); - bool gouraud = (effectivePrim >> 3) & 1; // IIP bit + return 0xFFFF00FFu; +} - GsGpuPrimitive prim{}; - prim.type = GS_GPU_LINE; - prim.vertexCount = 2; - prim.verts[0] = toGpuVertex(gs.vtxQueue[0]); - prim.verts[1] = toGpuVertex(gs.vtxQueue[1]); +uint32_t GSRasterizer::sampleTexture(GS *gs, float s, float t, uint16_t u, uint16_t v) +{ + const auto &ctx = gs->activeContext(); + const auto &tex = ctx.tex0; - if (!gouraud) - { - // Flat shading: use last vertex color - prim.verts[0].r = prim.verts[1].r; - prim.verts[0].g = prim.verts[1].g; - prim.verts[0].b = prim.verts[1].b; - prim.verts[0].a = prim.verts[1].a; - } + int texW = 1 << tex.tw; + int texH = 1 << tex.th; + if (texW == 0) texW = 1; + if (texH == 0) texH = 1; - gsGpuGetFrameData().pushPrimitive(prim); - gs.primsDrawn++; + int texU, texV; + if (gs->m_prim.fst) + { + texU = static_cast(u >> 4); + texV = static_cast(v >> 4); } - - // Submit vertex (XYZ2/XYZ3/XYZF2) - void submitVertex(uint8_t *vram, uint32_t vramSize, bool drawing) + else { - auto &gs = g_gsRegs; - int primType = gs.prim & 0x7; - - switch (primType) - { - case 0: // POINT - if (gs.vtxCount >= 1 && drawing) - { - GsGpuPrimitive prim{}; - prim.type = GS_GPU_POINT; - prim.vertexCount = 1; - prim.verts[0] = toGpuVertex(gs.vtxQueue[0]); - gsGpuGetFrameData().pushPrimitive(prim); - gs.primsDrawn++; - gs.vtxCount = 0; - } - break; - case 1: // LINE - if (gs.vtxCount >= 2 && drawing) - { - drawLine(vram, vramSize); - gs.vtxCount = 0; - } - break; - case 2: // LINE_STRIP - if (gs.vtxCount >= 2 && drawing) - { - drawLine(vram, vramSize); - gs.vtxQueue[0] = gs.vtxQueue[1]; - gs.vtxCount = 1; - } - break; - case 3: // TRIANGLE - if (gs.vtxCount >= 3 && drawing) - { - drawTriangle(vram, vramSize); - gs.vtxCount = 0; - } - break; - case 4: // TRIANGLE_STRIP - if (gs.vtxCount >= 3 && drawing) - { - drawTriangle(vram, vramSize); - gs.vtxQueue[0] = gs.vtxQueue[1]; - gs.vtxQueue[1] = gs.vtxQueue[2]; - gs.vtxCount = 2; - } - break; - case 5: // TRIANGLE_FAN - if (gs.vtxCount >= 3 && drawing) - { - drawTriangle(vram, vramSize); - gs.vtxQueue[1] = gs.vtxQueue[2]; - gs.vtxCount = 2; - } - break; - case 6: // SPRITE - if (gs.vtxCount >= 2 && drawing) - { - drawSprite(vram, vramSize); - gs.vtxCount = 0; - } - break; - default: - gs.vtxCount = 0; - break; - } + float invQ = (gs->m_curQ != 0.0f) ? (1.0f / gs->m_curQ) : 1.0f; + texU = static_cast(s * invQ * static_cast(texW)); + texV = static_cast(t * invQ * static_cast(texH)); } - void handleADWrite(uint64_t data, uint8_t reg, uint8_t *vram, uint32_t vramSize, - PS2Memory::GSDrawContext &drawCtx) + texU = clampInt(texU, 0, texW - 1); + texV = clampInt(texV, 0, texH - 1); + + if (tex.psm == GS_PSM_CT32 || tex.psm == GS_PSM_CT24) + return readTexelPSMCT32(gs, tex.tbp0, tex.tbw, texU, texV); + + if (tex.psm == GS_PSM_T4) { - auto &gs = g_gsRegs; - gs.adWrites++; + uint32_t idx = readTexelPSMT4(gs, tex.tbp0, tex.tbw, texU, texV); + return lookupCLUT(gs, static_cast(idx), tex.cbp, tex.cpsm, tex.csa); + } - switch (reg) - { - case GS_PRIM: - gs.prim = (uint32_t)(data & 0x7FF); - gs.vtxCount = 0; // reset vertex queue on new prim - break; + if (tex.psm == GS_PSM_T8) + { + if (tex.tbw == 0) return 0xFFFF00FFu; + uint32_t base = tex.tbp0 * 256u; + uint32_t stride = static_cast(tex.tbw) * 64u; + uint32_t off = base + static_cast(texV) * stride + static_cast(texU); + if (off >= gs->m_vramSize) return 0xFFFF00FFu; + uint8_t idx = gs->m_vram[off]; + return lookupCLUT(gs, idx, tex.cbp, tex.cpsm, tex.csa); + } - case GS_RGBAQ: - gs.r = data & 0xFF; - gs.g = (data >> 8) & 0xFF; - gs.b = (data >> 16) & 0xFF; - gs.a = (data >> 24) & 0xFF; - // Q is in bits 32-63 as float - { - uint32_t qBits = (uint32_t)(data >> 32); - memcpy(&gs.q, &qBits, 4); - if (!std::isfinite(gs.q) || gs.q == 0.0f) - gs.q = 1.0f; - } - break; + return 0xFFFF00FFu; +} - case GS_ST: - { - uint32_t sBits = (uint32_t)(data & 0xFFFFFFFF); - uint32_t tBits = (uint32_t)(data >> 32); - memcpy(&gs.s, &sBits, 4); - memcpy(&gs.t, &tBits, 4); - } - break; +void GSRasterizer::drawSprite(GS *gs) +{ + const GSVertex &v0 = gs->m_vtxQueue[0]; + const GSVertex &v1 = gs->m_vtxQueue[1]; + const auto &ctx = gs->activeContext(); - case GS_UV: - gs.u = (uint16_t)(data & 0x3FFF); - gs.v = (uint16_t)((data >> 16) & 0x3FFF); - break; + int ofx = ctx.xyoffset.ofx >> 4; + int ofy = ctx.xyoffset.ofy >> 4; - case GS_XYZ2: - case GS_XYZ3: - { - // XYZ2 triggers drawing kick, XYZ3 does not - bool drawing = (reg == GS_XYZ2); + int x0 = static_cast(v0.x) - ofx; + int y0 = static_cast(v0.y) - ofy; + int x1 = static_cast(v1.x) - ofx; + int y1 = static_cast(v1.y) - ofy; - // X,Y are 16-bit 12.4 fixed point - uint32_t xFix = (uint32_t)(data & 0xFFFF); - uint32_t yFix = (uint32_t)((data >> 16) & 0xFFFF); - uint32_t z = (uint32_t)(data >> 32); + if (x0 > x1) std::swap(x0, x1); + if (y0 > y1) std::swap(y0, y1); - float x = (float)((int32_t)xFix - (drawing ? (int32_t)(gs.ofx & 0xFFFF) : 0)) / 16.0f; - float y = (float)((int32_t)yFix - (drawing ? (int32_t)(gs.ofy & 0xFFFF) : 0)) / 16.0f; - float zf = (float)z / 4294967295.0f; // normalize 32-bit Z to [0,1] + x0 = clampInt(x0, ctx.scissor.x0, ctx.scissor.x1); + y0 = clampInt(y0, ctx.scissor.y0, ctx.scissor.y1); + x1 = clampInt(x1, ctx.scissor.x0, ctx.scissor.x1); + y1 = clampInt(y1, ctx.scissor.y0, ctx.scissor.y1); - if (gs.vtxCount < 3) - { - GSVertex &v = gs.vtxQueue[gs.vtxCount]; - v.x = x; - v.y = y; - v.z = zf; - v.r = gs.r; - v.g = gs.g; - v.b = gs.b; - v.a = gs.a; - v.s = gs.s; - v.t = gs.t; - v.q = gs.q; - v.u = gs.u; - v.v = gs.v; - gs.vtxCount++; - } + uint8_t r = v1.r, g = v1.g, b = v1.b, a = v1.a; - submitVertex(vram, vramSize, drawing); - break; + if (gs->m_prim.tme) + { + const auto &tex = ctx.tex0; + int texW = 1 << tex.tw; + int texH = 1 << tex.th; + if (texW == 0) texW = 1; + if (texH == 0) texH = 1; + + float u0f, v0f, u1f, v1f; + if (gs->m_prim.fst) + { + u0f = static_cast(v0.u >> 4); + v0f = static_cast(v0.v >> 4); + u1f = static_cast(v1.u >> 4); + v1f = static_cast(v1.v >> 4); } - - case GS_XYZF2: - case GS_XYZF3: + else { - bool drawing = (reg == GS_XYZF2); - uint32_t xFix = (uint32_t)(data & 0xFFFF); - uint32_t yFix = (uint32_t)((data >> 16) & 0xFFFF); - // Z is bits 32-55, F is bits 56-63 - uint32_t z = (uint32_t)((data >> 32) & 0xFFFFFF); - - float x = (float)((int32_t)xFix - (drawing ? (int32_t)(gs.ofx & 0xFFFF) : 0)) / 16.0f; - float y = (float)((int32_t)yFix - (drawing ? (int32_t)(gs.ofy & 0xFFFF) : 0)) / 16.0f; - float zf = (float)z / 16777215.0f; // normalize 24-bit Z to [0,1] - - if (gs.vtxCount < 3) - { - GSVertex &v = gs.vtxQueue[gs.vtxCount]; - v.x = x; - v.y = y; - v.z = zf; - v.r = gs.r; - v.g = gs.g; - v.b = gs.b; - v.a = gs.a; - v.s = gs.s; - v.t = gs.t; - v.q = gs.q; - v.u = gs.u; - v.v = gs.v; - gs.vtxCount++; - } - - submitVertex(vram, vramSize, drawing); - break; + u0f = v0.s * static_cast(texW); + v0f = v0.t * static_cast(texH); + u1f = v1.s * static_cast(texW); + v1f = v1.t * static_cast(texH); } - case GS_FRAME_1: - case GS_FRAME_2: // Context 2 — alias to Context 1 - gs.fbp = (uint32_t)(data & 0x1FF); - gs.fbw = (uint32_t)((data >> 16) & 0x3F); - gs.psm = (uint32_t)((data >> 24) & 0x3F); - gs.fbmsk = (uint32_t)(data >> 32); - break; - - case GS_SCISSOR_1: - case GS_SCISSOR_2: // Context 2 — alias to Context 1 - gs.scax0 = (uint32_t)(data & 0x7FF); - gs.scax1 = (uint32_t)((data >> 16) & 0x7FF); - gs.scay0 = (uint32_t)((data >> 32) & 0x7FF); - gs.scay1 = (uint32_t)((data >> 48) & 0x7FF); - break; - - case GS_XYOFFSET_1: - case GS_XYOFFSET_2: // Context 2 — alias to Context 1 - gs.ofx = (uint32_t)(data & 0xFFFF); - gs.ofy = (uint32_t)((data >> 32) & 0xFFFF); - break; - - case GS_TEX0_1: - case GS_TEX0_2: // Context 2 — alias to Context 1 - gs.tex0 = data; - gs.tbp0 = (uint32_t)(data & 0x3FFF); - gs.tbw = (uint32_t)((data >> 14) & 0x3F); - gs.tpsm = (uint32_t)((data >> 20) & 0x3F); - gs.tw = (uint32_t)((data >> 26) & 0xF); - gs.th = (uint32_t)((data >> 30) & 0xF); - break; - - case GS_ALPHA_1: - case GS_ALPHA_2: - gs.alpha = data; - break; - - case GS_TEST_1: - case GS_TEST_2: - gs.test = data; - break; - - case GS_PRMODECONT: - gs.prmodecont = (uint32_t)(data & 1); - break; + float spriteW = static_cast(x1 - x0); + float spriteH = static_cast(y1 - y0); + if (spriteW < 1.0f) spriteW = 1.0f; + if (spriteH < 1.0f) spriteH = 1.0f; - case GS_PRMODE: - gs.prmode = (uint32_t)(data & 0x7F8); // bits 3-10 - break; - - case GS_BITBLTBUF: - gs.bitbltbuf = data; - drawCtx.bitbltbuf = data; - break; - - case GS_TRXPOS: - gs.trxpos = data; - drawCtx.trxpos = data; - break; - - case GS_TRXREG: - gs.trxreg = data; - drawCtx.trxreg = data; - break; - - case GS_TRXDIR: + for (int y = y0; y <= y1; ++y) { - gs.trxdir = data; - drawCtx.trxdir = data; - uint32_t dir = data & 3; - if (dir == 0) + float ty = (static_cast(y - y0) + 0.5f) / spriteH; + float texVf = v0f + (v1f - v0f) * ty; + int tv = clampInt(static_cast(texVf), 0, texH - 1); + + for (int x = x0; x <= x1; ++x) { - // Host -> Local (texture upload to GS VRAM) - gs.xferActive = true; - gs.xferDBP = (uint32_t)((gs.bitbltbuf >> 32) & 0x3FFF); - gs.xferDBW = (uint32_t)((gs.bitbltbuf >> 46) & 0x3F); - gs.xferDPSM = (uint32_t)((gs.bitbltbuf >> 56) & 0x3F); - gs.xferDstX = (uint32_t)((gs.trxpos >> 32) & 0x7FF); - gs.xferDstY = (uint32_t)((gs.trxpos >> 48) & 0x7FF); - gs.xferW = (uint32_t)(gs.trxreg & 0xFFF); - gs.xferH = (uint32_t)((gs.trxreg >> 32) & 0xFFF); - gs.xferPixelsWritten = 0; - gs.xferX = 0; - gs.xferY = 0; - // Reset PSMCT24 bit accumulator for new transfer. - g_psmct24_accBits = 0; - g_psmct24_accCount = 0; - - drawCtx.xferActive = true; - drawCtx.xferDBP = gs.xferDBP; - drawCtx.xferDBW = gs.xferDBW; - drawCtx.xferDPSM = gs.xferDPSM; - drawCtx.xferDestX = gs.xferDstX; - drawCtx.xferDestY = gs.xferDstY; - drawCtx.xferWidth = gs.xferW; - drawCtx.xferHeight = gs.xferH; - - if (g_gsLogCount < 20) + float tx = (static_cast(x - x0) + 0.5f) / spriteW; + float texUf = u0f + (u1f - u0f) * tx; + int tu = clampInt(static_cast(texUf), 0, texW - 1); + + uint32_t texel; + if (tex.psm == GS_PSM_CT32 || tex.psm == GS_PSM_CT24) + texel = readTexelPSMCT32(gs, tex.tbp0, tex.tbw, tu, tv); + else if (tex.psm == GS_PSM_T4) { - std::cerr << "[GS] IMAGE xfer start: DBP=" << gs.xferDBP - << " DBW=" << gs.xferDBW << " DPSM=" << gs.xferDPSM - << " dst=(" << gs.xferDstX << "," << gs.xferDstY << ")" - << " size=" << gs.xferW << "x" << gs.xferH << std::endl; - g_gsLogCount++; + uint32_t idx = readTexelPSMT4(gs, tex.tbp0, tex.tbw, tu, tv); + texel = lookupCLUT(gs, static_cast(idx), tex.cbp, tex.cpsm, tex.csa); } - drawCtx.imageTransfers++; - } - else if (dir == 1) - { - // Local -> Host (readback) - not needed for rendering - gs.xferActive = false; - } - else if (dir == 2) - { - // Local -> Local (VRAM copy) - uint32_t sbp = (uint32_t)(gs.bitbltbuf & 0x3FFF); - uint32_t sbw = (uint32_t)((gs.bitbltbuf >> 14) & 0x3F); - uint32_t spsm = (uint32_t)((gs.bitbltbuf >> 24) & 0x3F); - uint32_t dbp = (uint32_t)((gs.bitbltbuf >> 32) & 0x3FFF); - uint32_t dbw = (uint32_t)((gs.bitbltbuf >> 46) & 0x3F); - uint32_t dpsm = (uint32_t)((gs.bitbltbuf >> 56) & 0x3F); - uint32_t sx = (uint32_t)(gs.trxpos & 0x7FF); - uint32_t sy = (uint32_t)((gs.trxpos >> 16) & 0x7FF); - uint32_t dx = (uint32_t)((gs.trxpos >> 32) & 0x7FF); - uint32_t dy = (uint32_t)((gs.trxpos >> 48) & 0x7FF); - uint32_t w = (uint32_t)(gs.trxreg & 0xFFF); - uint32_t h = (uint32_t)((gs.trxreg >> 32) & 0xFFF); - - if (spsm == 0 && dpsm == 0 && w > 0 && h > 0) + else if (tex.psm == GS_PSM_T8) { - // PSMCT32 copy — sbp/dbp are BP64 units (from BITBLTBUF) - for (uint32_t row = 0; row < h; row++) - { - for (uint32_t col = 0; col < w; col++) - { - uint8_t r, g, b, a; - readPixel32(vram, sbp, sbw, sx + col, sy + row, r, g, b, a, vramSize, true); - writePixel32(vram, dbp, dbw, dx + col, dy + row, r, g, b, a, vramSize, true); - } - } + uint32_t base = tex.tbp0 * 256u; + uint32_t tbw = tex.tbw ? tex.tbw : 1u; + uint32_t stride = tbw * 64u; + uint32_t off = base + static_cast(tv) * stride + static_cast(tu); + uint8_t idx = (off < gs->m_vramSize) ? gs->m_vram[off] : 0; + texel = lookupCLUT(gs, idx, tex.cbp, tex.cpsm, tex.csa); } - gs.xferActive = false; - } - break; - } + else + texel = 0xFFFF00FFu; - case GS_HWREG: - { - // Image data for Host -> Local transfer - if (!gs.xferActive) - break; + uint8_t tr = static_cast(texel & 0xFF); + uint8_t tg = static_cast((texel >> 8) & 0xFF); + uint8_t tb = static_cast((texel >> 16) & 0xFF); + uint8_t ta = static_cast((texel >> 24) & 0xFF); - // Each HWREG write delivers 64 bits of pixel data - // For PSMCT32: 2 pixels per write - if (gs.xferDPSM == 0) - { - // PSMCT32: 2 pixels per 64-bit write - for (int p = 0; p < 2 && gs.xferY < gs.xferH; p++) - { - uint32_t pixel = (p == 0) ? (uint32_t)(data & 0xFFFFFFFF) : (uint32_t)(data >> 32); - uint32_t dx = gs.xferDstX + gs.xferX; - uint32_t dy = gs.xferDstY + gs.xferY; - writePixel32(vram, gs.xferDBP, gs.xferDBW, dx, dy, - pixel & 0xFF, (pixel >> 8) & 0xFF, - (pixel >> 16) & 0xFF, (pixel >> 24) & 0xFF, - vramSize, true); // BP64 units for BITBLTBUF - gs.xferX++; - if (gs.xferX >= gs.xferW) - { - gs.xferX = 0; - gs.xferY++; - } - gs.xferPixelsWritten++; - } - } - else if (gs.xferDPSM == 0x13) - { - // PSMT8: 8 pixels per 64-bit write - for (int p = 0; p < 8 && gs.xferY < gs.xferH; p++) - { - uint8_t idx = (uint8_t)(data >> (p * 8)); - // For indexed textures, store index as grayscale - uint32_t dx = gs.xferDstX + gs.xferX; - uint32_t dy = gs.xferDstY + gs.xferY; - writePixel32(vram, gs.xferDBP, gs.xferDBW, dx, dy, - idx, idx, idx, 255, vramSize, true); - gs.xferX++; - if (gs.xferX >= gs.xferW) - { - gs.xferX = 0; - gs.xferY++; - } - gs.xferPixelsWritten++; - } - } - else if (gs.xferDPSM == 0x14) - { - // PSMT4: 16 pixels per 64-bit write - for (int p = 0; p < 16 && gs.xferY < gs.xferH; p++) + uint8_t fr, fg, fb, fa; + if (tex.tfx == 0) { - uint8_t idx = (uint8_t)((data >> (p * 4)) & 0xF); - uint32_t dx = gs.xferDstX + gs.xferX; - uint32_t dy = gs.xferDstY + gs.xferY; - writePixel32(vram, gs.xferDBP, gs.xferDBW, dx, dy, - idx * 17, idx * 17, idx * 17, 255, vramSize, true); - gs.xferX++; - if (gs.xferX >= gs.xferW) - { - gs.xferX = 0; - gs.xferY++; - } - gs.xferPixelsWritten++; + fr = clampU8((tr * r) >> 7); + fg = clampU8((tg * g) >> 7); + fb = clampU8((tb * b) >> 7); + fa = ta; } - } - else if (gs.xferDPSM == 0x01) - { - // PSMCT24: 24-bit pixels. Use a bit accumulator to handle - // the non-aligned boundary (64 bits / 24 bits = 2.67 pixels). - g_psmct24_accBits |= (data << g_psmct24_accCount); - g_psmct24_accCount += 64; - - while (g_psmct24_accCount >= 24 && gs.xferY < gs.xferH) + else if (tex.tfx == 1) { - uint32_t pixel = (uint32_t)(g_psmct24_accBits & 0xFFFFFF); - g_psmct24_accBits >>= 24; - g_psmct24_accCount -= 24; - - uint32_t dx = gs.xferDstX + gs.xferX; - uint32_t dy = gs.xferDstY + gs.xferY; - writePixel32(vram, gs.xferDBP, gs.xferDBW, dx, dy, - pixel & 0xFF, (pixel >> 8) & 0xFF, - (pixel >> 16) & 0xFF, 0x80, - vramSize, true); - gs.xferX++; - if (gs.xferX >= gs.xferW) - { - gs.xferX = 0; - gs.xferY++; - } - gs.xferPixelsWritten++; + fr = tr; fg = tg; fb = tb; fa = ta; } - } - else if (gs.xferDPSM == 0x30 || gs.xferDPSM == 0x31) - { - // PSMZ32/PSMZ24: z-buffer - skip for now - } - else - { - // PSMCT16/PSMCT16S: 4 pixels per 64-bit write - for (int p = 0; p < 4 && gs.xferY < gs.xferH; p++) + else { - uint16_t pixel16 = (uint16_t)((data >> (p * 16)) & 0xFFFF); - uint8_t r = (pixel16 & 0x1F) << 3; - uint8_t g = ((pixel16 >> 5) & 0x1F) << 3; - uint8_t b = ((pixel16 >> 10) & 0x1F) << 3; - uint8_t a = (pixel16 & 0x8000) ? 128 : 0; - uint32_t dx = gs.xferDstX + gs.xferX; - uint32_t dy = gs.xferDstY + gs.xferY; - writePixel32(vram, gs.xferDBP, gs.xferDBW, dx, dy, r, g, b, a, vramSize, true); - gs.xferX++; - if (gs.xferX >= gs.xferW) - { - gs.xferX = 0; - gs.xferY++; - } - gs.xferPixelsWritten++; + fr = clampU8((tr * r) >> 7); + fg = clampU8((tg * g) >> 7); + fb = clampU8((tb * b) >> 7); + fa = ta; } - } - if (gs.xferY >= gs.xferH) - { - gs.xferActive = false; - drawCtx.xferActive = false; - drawCtx.xferPixelsWritten = gs.xferPixelsWritten; + writePixel(gs, x, y, fr, fg, fb, fa); } - break; - } - - case GS_TEXFLUSH: - // Texture cache flush - no-op for software renderer - break; - - default: - // Ignore unknown registers, maybe we should log? - break; } } - + else + { + for (int y = y0; y <= y1; ++y) + for (int x = x0; x <= x1; ++x) + writePixel(gs, x, y, r, g, b, a); + } } -void PS2Memory::processGIFPacket(uint32_t srcAddr, uint32_t qwCount) +void GSRasterizer::drawTriangle(GS *gs) { - if (!m_rdram || !m_gsVRAM || qwCount == 0) - return; - - if (srcAddr >= PS2_RAM_SIZE) + const GSVertex &v0 = gs->m_vtxQueue[0]; + const GSVertex &v1 = gs->m_vtxQueue[1]; + const GSVertex &v2 = gs->m_vtxQueue[2]; + const auto &ctx = gs->activeContext(); + + int ofx = ctx.xyoffset.ofx >> 4; + int ofy = ctx.xyoffset.ofy >> 4; + + float fx0 = v0.x - static_cast(ofx); + float fy0 = v0.y - static_cast(ofy); + float fx1 = v1.x - static_cast(ofx); + float fy1 = v1.y - static_cast(ofy); + float fx2 = v2.x - static_cast(ofx); + float fy2 = v2.y - static_cast(ofy); + + int minX = static_cast(std::floor(std::min({fx0, fx1, fx2}))); + int maxX = static_cast(std::ceil(std::max({fx0, fx1, fx2}))); + int minY = static_cast(std::floor(std::min({fy0, fy1, fy2}))); + int maxY = static_cast(std::ceil(std::max({fy0, fy1, fy2}))); + + minX = clampInt(minX, ctx.scissor.x0, ctx.scissor.x1); + maxX = clampInt(maxX, ctx.scissor.x0, ctx.scissor.x1); + minY = clampInt(minY, ctx.scissor.y0, ctx.scissor.y1); + maxY = clampInt(maxY, ctx.scissor.y0, ctx.scissor.y1); + + float denom = (fy1 - fy2) * (fx0 - fx2) + (fx2 - fx1) * (fy0 - fy2); + if (std::fabs(denom) < 0.001f) return; - uint32_t pos = srcAddr; - const uint64_t requestedEnd = static_cast(srcAddr) + static_cast(qwCount) * 16ull; - uint32_t endAddr = requestedEnd > static_cast(PS2_RAM_SIZE) - ? PS2_RAM_SIZE - : static_cast(requestedEnd); + float invDenom = 1.0f / denom; - while (pos + 16 <= endAddr) + for (int y = minY; y <= maxY; ++y) { - // Read GIF tag (128 bits = 16 bytes) - uint64_t lo, hi; - memcpy(&lo, m_rdram + pos, 8); - memcpy(&hi, m_rdram + pos + 8, 8); - pos += 16; - - uint32_t nloop = (uint32_t)(lo & 0x7FFF); - bool eop = (lo >> 15) & 1; - // bool pre = (lo >> 46) & 1; // not used currently - uint32_t prim = (uint32_t)((lo >> 47) & 0x7FF); - uint32_t flg = (uint32_t)((lo >> 58) & 0x3); - uint32_t nreg = (uint32_t)((lo >> 60) & 0xF); - if (nreg == 0) - nreg = 16; - - // PRE bit: if set, write PRIM register - bool pre = (lo >> 46) & 1; - if (pre && flg != 2) - { // not IMAGE mode - g_gsRegs.prim = prim; - g_gsRegs.vtxCount = 0; - } + float py = static_cast(y) + 0.5f; + for (int x = minX; x <= maxX; ++x) + { + float px = static_cast(x) + 0.5f; - g_gsRegs.gifTagsProcessed++; - m_gsDrawCtx.gifTagsProcessed = g_gsRegs.gifTagsProcessed; + float w0 = ((fy1 - fy2) * (px - fx2) + (fx2 - fx1) * (py - fy2)) * invDenom; + float w1 = ((fy2 - fy0) * (px - fx2) + (fx0 - fx2) * (py - fy2)) * invDenom; + float w2 = 1.0f - w0 - w1; - // GS Q register resets to 1.0f when a GIFtag is read (ps2tek spec) - g_gsRegs.q = 1.0f; + if (w0 < 0.0f || w1 < 0.0f || w2 < 0.0f) + continue; - // If NLOOP==0, no processing — just check EOP - if (nloop == 0) - { - if (eop) - break; - continue; - } - - if (flg == 0) - { - // PACKED mode - uint64_t regs = hi; - for (uint32_t loop = 0; loop < nloop && pos + 16 <= endAddr; loop++) + uint8_t r, g, b, a; + if (gs->m_prim.iip) { - for (uint32_t r = 0; r < nreg && pos + 16 <= endAddr; r++) - { - uint8_t regId = (uint8_t)((regs >> (r * 4)) & 0xF); - - uint64_t dataLo, dataHi; - memcpy(&dataLo, m_rdram + pos, 8); - memcpy(&dataHi, m_rdram + pos + 8, 8); - pos += 16; - - // In PACKED mode, most regs use dataLo, except A+D which uses both - if (regId == 0x0E) - { - // A+D: dataLo = value, dataHi low byte = register address - uint8_t gsReg = (uint8_t)(dataHi & 0xFF); - handleADWrite(dataLo, gsReg, m_gsVRAM, PS2_GS_VRAM_SIZE, m_gsDrawCtx); - } - else if (regId == 0x0F) - { - // NOP - } - else - { - // Direct register write in PACKED format - // Convert PACKED register data to A+D equivalent - switch (regId) - { - case 0x00: // PRIM - handleADWrite(dataLo & 0x7FF, GS_PRIM, m_gsVRAM, PS2_GS_VRAM_SIZE, m_gsDrawCtx); - break; - case 0x01: // RGBA (PACKED writes RGBAQ, Q unchanged) - { - // PACKED RGBA: R=lo[7:0], G=lo[39:32], B=hi[7:0], A=hi[39:32] - g_gsRegs.r = (uint8_t)(dataLo & 0xFF); - g_gsRegs.g = (uint8_t)((dataLo >> 32) & 0xFF); - g_gsRegs.b = (uint8_t)(dataHi & 0xFF); - g_gsRegs.a = (uint8_t)((dataHi >> 32) & 0xFF); - // Do NOT touch g_gsRegs.q — PACKED RGBA leaves Q unchanged - break; - } - case 0x02: // ST - { - // PACKED ST: lo[31:0]=S, lo[63:32]=T, hi[31:0]=Q - uint32_t sVal = (uint32_t)(dataLo & 0xFFFFFFFF); - uint32_t tVal = (uint32_t)(dataLo >> 32); - uint32_t qVal = (uint32_t)(dataHi & 0xFFFFFFFF); - memcpy(&g_gsRegs.s, &sVal, 4); - memcpy(&g_gsRegs.t, &tVal, 4); - memcpy(&g_gsRegs.q, &qVal, 4); - if (!std::isfinite(g_gsRegs.q) || g_gsRegs.q == 0.0f) - g_gsRegs.q = 1.0f; - break; - } - case 0x03: // UV (PACKED: U=lo[13:0], V=lo[45:32]) - { - uint16_t u = (uint16_t)(dataLo & 0x3FFF); - uint16_t v = (uint16_t)((dataLo >> 32) & 0x3FFF); - // Repack to A+D UV layout: U=bits[13:0], V=bits[29:16] - uint64_t uv = (uint64_t)u | ((uint64_t)v << 16); - handleADWrite(uv, GS_UV, m_gsVRAM, PS2_GS_VRAM_SIZE, m_gsDrawCtx); - break; - } - case 0x04: // XYZF2/XYZF3 - { - // PACKED XYZF: X=lo[15:0], Y=lo[47:32] - // Z=hi[27:4] (24 bits), F=hi[43:36] (8 bits) - // ADC=hi[47] (bit 111 of 128-bit QW) - uint32_t x = (uint32_t)(dataLo & 0xFFFF); - uint32_t y = (uint32_t)((dataLo >> 32) & 0xFFFF); - uint32_t z = (uint32_t)((dataHi >> 4) & 0x00FFFFFF); // 24-bit Z - uint32_t f = (uint32_t)((dataHi >> 36) & 0xFF); // fog coeff - bool adc = ((dataHi >> 47) & 1) != 0; // bit 111 - - uint8_t gsReg = adc ? GS_XYZF3 : GS_XYZF2; - uint64_t xyzf = (uint64_t)x | ((uint64_t)y << 16) | ((uint64_t)z << 32) | ((uint64_t)f << 56); - handleADWrite(xyzf, gsReg, m_gsVRAM, PS2_GS_VRAM_SIZE, m_gsDrawCtx); - break; - } - case 0x05: // XYZ2/XYZ3 - { - uint32_t x = (uint32_t)(dataLo & 0xFFFF); - uint32_t y = (uint32_t)((dataLo >> 32) & 0xFFFF); - uint32_t z = (uint32_t)(dataHi & 0xFFFFFFFF); - bool adc = ((dataHi >> 47) & 1) != 0; // bit 111 - uint8_t gsReg = adc ? GS_XYZ3 : GS_XYZ2; - uint64_t xyz = (uint64_t)x | ((uint64_t)y << 16) | ((uint64_t)z << 32); - handleADWrite(xyz, gsReg, m_gsVRAM, PS2_GS_VRAM_SIZE, m_gsDrawCtx); - break; - } - case 0x0A: // FOG - // Ignore fog for now - break; - default: - // Other packed regs - pass through as A+D - handleADWrite(dataLo, regId, m_gsVRAM, PS2_GS_VRAM_SIZE, m_gsDrawCtx); - break; - } - } - } + r = clampU8(static_cast(v0.r * w0 + v1.r * w1 + v2.r * w2)); + g = clampU8(static_cast(v0.g * w0 + v1.g * w1 + v2.g * w2)); + b = clampU8(static_cast(v0.b * w0 + v1.b * w1 + v2.b * w2)); + a = clampU8(static_cast(v0.a * w0 + v1.a * w1 + v2.a * w2)); } - } - else if (flg == 1) - { - // REGLIST mode: stream is DWs; each QW contains 2 DWs - // A+D is NOT available in REGLIST (only regs 0x0..0xD) - uint64_t regs = hi; - - uint64_t totalDw = (uint64_t)nloop * (uint64_t)nreg; - uint64_t dwIndex = 0; - - while (dwIndex < totalDw && pos + 16 <= endAddr) + else { - uint64_t qwLo, qwHi; - memcpy(&qwLo, m_rdram + pos, 8); - memcpy(&qwHi, m_rdram + pos + 8, 8); - pos += 16; + r = v2.r; g = v2.g; b = v2.b; a = v2.a; + } - // DW0 - if (dwIndex < totalDw) + if (gs->m_prim.tme) + { + float is, it; + uint16_t iu, iv; + if (gs->m_prim.fst) { - uint32_t r = (uint32_t)(dwIndex % nreg); - uint8_t regId = (uint8_t)((regs >> (r * 4)) & 0xF); - if (regId != 0x0E && regId != 0x0F) - { - handleADWrite(qwLo, regId, m_gsVRAM, PS2_GS_VRAM_SIZE, m_gsDrawCtx); - } - dwIndex++; + iu = static_cast(v0.u * w0 + v1.u * w1 + v2.u * w2); + iv = static_cast(v0.v * w0 + v1.v * w1 + v2.v * w2); + is = 0; it = 0; } - - // DW1 - if (dwIndex < totalDw) + else { - uint32_t r = (uint32_t)(dwIndex % nreg); - uint8_t regId = (uint8_t)((regs >> (r * 4)) & 0xF); - if (regId != 0x0E && regId != 0x0F) - { - handleADWrite(qwHi, regId, m_gsVRAM, PS2_GS_VRAM_SIZE, m_gsDrawCtx); - } - dwIndex++; + is = v0.s * w0 + v1.s * w1 + v2.s * w2; + it = v0.t * w0 + v1.t * w1 + v2.t * w2; + iu = 0; iv = 0; + } + uint32_t texel = sampleTexture(gs, is, it, iu, iv); + uint8_t tr = static_cast(texel & 0xFF); + uint8_t tg = static_cast((texel >> 8) & 0xFF); + uint8_t tb = static_cast((texel >> 16) & 0xFF); + uint8_t ta = static_cast((texel >> 24) & 0xFF); + + const auto &tex = ctx.tex0; + if (tex.tfx == 0) + { + r = clampU8((tr * r) >> 7); + g = clampU8((tg * g) >> 7); + b = clampU8((tb * b) >> 7); + a = ta; + } + else if (tex.tfx == 1) + { + r = tr; g = tg; b = tb; a = ta; + } + else + { + r = clampU8((tr * r) >> 7); + g = clampU8((tg * g) >> 7); + b = clampU8((tb * b) >> 7); + a = ta; } } + + writePixel(gs, x, y, r, g, b, a); } - else if (flg == 2) - { - // IMAGE mode: raw pixel data for Host->Local transfer - uint32_t imageBytes = nloop * 16; - if (g_gsRegs.xferActive && m_gsVRAM) - { - for (uint32_t i = 0; i < nloop && pos + 16 <= endAddr; i++) - { - uint64_t lo2, hi2; - memcpy(&lo2, m_rdram + pos, 8); - memcpy(&hi2, m_rdram + pos + 8, 8); + } +} - // Process as two HWREG writes (each 64 bits) - handleADWrite(lo2, GS_HWREG, m_gsVRAM, PS2_GS_VRAM_SIZE, m_gsDrawCtx); - handleADWrite(hi2, GS_HWREG, m_gsVRAM, PS2_GS_VRAM_SIZE, m_gsDrawCtx); +void GSRasterizer::drawLine(GS *gs) +{ + const GSVertex &v0 = gs->m_vtxQueue[0]; + const GSVertex &v1 = gs->m_vtxQueue[1]; + const auto &ctx = gs->activeContext(); - pos += 16; - g_gsRegs.imageQWs++; - } - } - else - { - pos += imageBytes; - if (pos > endAddr) - pos = endAddr; - } + int ofx = ctx.xyoffset.ofx >> 4; + int ofy = ctx.xyoffset.ofy >> 4; + + int x0 = static_cast(v0.x) - ofx; + int y0 = static_cast(v0.y) - ofy; + int x1 = static_cast(v1.x) - ofx; + int y1 = static_cast(v1.y) - ofy; + + int dx = std::abs(x1 - x0); + int dy = -std::abs(y1 - y0); + int sx = (x0 < x1) ? 1 : -1; + int sy = (y0 < y1) ? 1 : -1; + int err = dx + dy; + + int totalSteps = std::max(std::abs(x1 - x0), std::abs(y1 - y0)); + if (totalSteps == 0) totalSteps = 1; + int step = 0; + + for (;;) + { + float t = static_cast(step) / static_cast(totalSteps); + uint8_t r, g, b, a; + if (gs->m_prim.iip) + { + r = clampU8(static_cast(v0.r + (v1.r - v0.r) * t)); + g = clampU8(static_cast(v0.g + (v1.g - v0.g) * t)); + b = clampU8(static_cast(v0.b + (v1.b - v0.b) * t)); + a = clampU8(static_cast(v0.a + (v1.a - v0.a) * t)); } else { - // flg == 3: disabled/reserved - break; + r = v1.r; g = v1.g; b = v1.b; a = v1.a; } - if (eop) - break; - } + writePixel(gs, x0, y0, r, g, b, a); - m_gsDrawCtx.gifTagsProcessed = g_gsRegs.gifTagsProcessed; - m_gsDrawCtx.adWrites = g_gsRegs.adWrites; - m_gsDrawCtx.primitivesDrawn = g_gsRegs.primsDrawn; + if (x0 == x1 && y0 == y1) + break; - m_gsWriteCount.fetch_add(1, std::memory_order_relaxed); - m_seenGifCopy = true; + int e2 = 2 * err; + if (e2 >= dy) { err += dy; x0 += sx; } + if (e2 <= dx) { err += dx; y0 += sy; } + ++step; + } } diff --git a/ps2xRuntime/src/lib/ps2_memory.cpp b/ps2xRuntime/src/lib/ps2_memory.cpp index 3e424ce1..7215eaf8 100644 --- a/ps2xRuntime/src/lib/ps2_memory.cpp +++ b/ps2xRuntime/src/lib/ps2_memory.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace { @@ -121,6 +122,17 @@ PS2Memory::~PS2Memory() m_gsVRAM = nullptr; } + if (m_vu1Code) + { + delete[] m_vu1Code; + m_vu1Code = nullptr; + } + if (m_vu1Data) + { + delete[] m_vu1Data; + m_vu1Data = nullptr; + } + if (iop_ram) { delete[] iop_ram; @@ -136,11 +148,15 @@ bool PS2Memory::initialize(size_t ramSize) delete[] m_scratchpad; delete[] iop_ram; delete[] m_gsVRAM; + delete[] m_vu1Code; + delete[] m_vu1Data; m_rdram = nullptr; m_scratchpad = nullptr; ps2SetScratchpadHostPtr(nullptr); iop_ram = nullptr; m_gsVRAM = nullptr; + m_vu1Code = nullptr; + m_vu1Data = nullptr; }; cleanup(); @@ -176,12 +192,16 @@ bool PS2Memory::initialize(size_t ramSize) // Initialize GS registers memset(&gs_regs, 0, sizeof(gs_regs)); - m_gsDrawCtx = GSDrawContext{}; // Allocate GS VRAM (4MB) m_gsVRAM = new uint8_t[PS2_GS_VRAM_SIZE]; std::memset(m_gsVRAM, 0, PS2_GS_VRAM_SIZE); + m_vu1Code = new uint8_t[PS2_VU1_CODE_SIZE]; + m_vu1Data = new uint8_t[PS2_VU1_DATA_SIZE]; + std::memset(m_vu1Code, 0, PS2_VU1_CODE_SIZE); + std::memset(m_vu1Data, 0, PS2_VU1_DATA_SIZE); + // Initialize VIF registers memset(&vif0_regs, 0, sizeof(vif0_regs)); memset(&vif1_regs, 0, sizeof(vif1_regs)); @@ -656,28 +676,6 @@ bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) m_ioRegisters[address] = value; - { - static int io_total_log = 0; - if (io_total_log < 100) - { - std::cerr << "[IO_WRITE] addr=0x" << std::hex << address << " val=0x" << value << std::dec << std::endl; - ++io_total_log; - } - } - - if (address >= 0x10008000 && address < 0x1000F000) - { - static int dma_io_log = 0; - if (dma_io_log < 200) - { - uint32_t ch = (address >> 8) & 0xFF; - uint32_t off = address & 0xFF; - std::cerr << "[DMA_IO] ch=0x" << std::hex << (address & 0xFFFFFF00) - << " off=0x" << off << " val=0x" << value << std::dec << std::endl; - ++dma_io_log; - } - } - if (address >= 0x10008000 && address < 0x1000F000) { if ((address & 0xFF) == 0x00 && (value & 0x100)) @@ -689,128 +687,171 @@ bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) if ((channelBase == 0x1000A000 || channelBase == 0x10009000) && m_gsVRAM) { - auto dispatchTransfer = [&](uint32_t srcAddr, uint32_t qwCount) + auto enqueueTransfer = [&](uint32_t srcAddr, uint32_t qwCount) { if (qwCount == 0) - { - return; - } - - uint32_t srcPhys = 0; - try - { - srcPhys = translateAddress(srcAddr); - } - catch (const std::exception &) - { return; - } - - if (srcPhys >= PS2_RAM_SIZE) - { - return; - } - + const bool scratch = isScratchpad(srcAddr); + PendingTransfer pt; + pt.fromScratchpad = scratch; + pt.srcAddr = srcAddr; + pt.qwc = qwCount; if (channelBase == 0x1000A000) - { - processGIFPacket(srcPhys, qwCount); - return; - } - - const uint64_t bytes64 = static_cast(qwCount) * 16ull; - uint32_t bytes = bytes64 > static_cast(PS2_RAM_SIZE) - ? PS2_RAM_SIZE - : static_cast(bytes64); - if (srcPhys + bytes > PS2_RAM_SIZE) - { - bytes = PS2_RAM_SIZE - srcPhys; - } - processVIF1Data(srcPhys, bytes); + m_pendingGifTransfers.push_back(pt); + else if (channelBase == 0x10009000 && !scratch) + m_pendingVif1Transfers.push_back(pt); }; - auto walkChain = [&](uint32_t startTadr) + uint32_t chcr = value; + uint32_t mode = (chcr >> 2) & 0x3; + + if (mode == 0 && qwc > 0) + { + enqueueTransfer(madr, qwc); + } + else if (mode == 1) { - uint32_t curTadr = startTadr; - constexpr int kMaxTags = 4096; - for (int i = 0; i < kMaxTags; ++i) + uint32_t tagAddr = m_ioRegisters[channelBase + 0x30]; + const int kMaxChainTags = 4096; + std::vector chainBuf; + + auto appendData = [&](uint32_t srcAddr, uint32_t qwCount) + { + const uint64_t bytes64 = static_cast(qwCount) * 16ull; + uint32_t bytes = (bytes64 > 0xFFFFFFFFull) ? 0xFFFFFFFFu : static_cast(bytes64); + const bool scratch = isScratchpad(srcAddr); + uint32_t src = 0; + try + { + src = translateAddress(srcAddr); + } + catch (...) + { + return; + } + const uint8_t *base2; + uint32_t maxSz2; + if (scratch) + { + base2 = m_scratchpad; + maxSz2 = PS2_SCRATCHPAD_SIZE; + } + else + { + base2 = m_rdram; + maxSz2 = PS2_RAM_SIZE; + } + if (src >= maxSz2) + return; + if (src + bytes > maxSz2) + bytes = maxSz2 - src; + if (bytes == 0) + return; + chainBuf.insert(chainBuf.end(), base2 + src, base2 + src + bytes); + }; + + std::vector retStack; + retStack.reserve(8); + int tagsProcessed = 0; + + while (tagsProcessed < kMaxChainTags) { + const bool tagInSPR = isScratchpad(tagAddr); uint32_t physTag = 0; try { - physTag = translateAddress(curTadr); + physTag = translateAddress(tagAddr); } - catch (const std::exception &) + catch (...) { break; } - - if (physTag + 16 > PS2_RAM_SIZE) + const uint8_t *tagBase; + uint32_t tagMax; + if (tagInSPR) { - break; + tagBase = m_scratchpad; + tagMax = PS2_SCRATCHPAD_SIZE; } + else + { + tagBase = m_rdram; + tagMax = PS2_RAM_SIZE; + } + if (physTag + 16 > tagMax) + break; - const uint64_t tag = loadScalar(m_rdram, physTag, PS2_RAM_SIZE, "dma chain tag", curTadr); - const uint16_t tagQwc = static_cast(tag & 0xFFFFu); - const uint32_t id = static_cast((tag >> 28) & 0x7u); - const uint32_t addr = static_cast((tag >> 32) & 0x7FFFFFF0u); - const bool irq = ((tag >> 31) & 0x1u) != 0; - - uint32_t dataAddr = 0; - uint32_t nextTag = 0; - bool endChain = false; + const uint8_t *tp = tagBase + physTag; + uint64_t tag = loadScalar(tp, 0, 16, "dma chain tag", tagAddr); + uint16_t tagQwc = static_cast(tag & 0xFFFF); + uint32_t id = static_cast((tag >> 28) & 0x7); + uint32_t addr = static_cast((tag >> 32) & 0x7FFFFFFF); + ++tagsProcessed; switch (id) { - case 0: // REFE - dataAddr = addr; - endChain = true; + case 0: + if (tagQwc > 0) + appendData(addr, tagQwc); + goto chain_done; + case 1: + if (tagQwc > 0) + appendData(tagAddr + 16, tagQwc); + tagAddr = tagAddr + 16 + tagQwc * 16; break; - case 1: // CNT - dataAddr = curTadr + 16u; - nextTag = curTadr + 16u + static_cast(tagQwc) * 16u; + case 2: + if (tagQwc > 0) + appendData(tagAddr + 16, tagQwc); + tagAddr = addr; break; - case 2: // NEXT - dataAddr = curTadr + 16u; - nextTag = addr; + case 3: + case 4: + if (tagQwc > 0) + appendData(addr, tagQwc); + tagAddr = tagAddr + 16; break; - case 3: // REF - case 4: // REFS - dataAddr = addr; - nextTag = curTadr + 16u; + case 5: + if (tagQwc > 0) + appendData(addr, tagQwc); + if (retStack.size() < 16) + retStack.push_back(tagAddr + 16); + tagAddr = addr; break; - case 7: // END - dataAddr = curTadr + 16u; - endChain = true; + case 6: + if (!retStack.empty()) + { + tagAddr = retStack.back(); + retStack.pop_back(); + } + else + goto chain_done; break; + case 7: + if (tagQwc > 0) + appendData(tagAddr + 16, tagQwc); + goto chain_done; default: - endChain = true; - break; - } - - if (tagQwc > 0 && dataAddr != 0) - { - dispatchTransfer(dataAddr, tagQwc); - } - - if (endChain || irq) - { - break; + goto chain_done; } - curTadr = nextTag; } - }; - - if (qwc > 0) - { - dispatchTransfer(madr, qwc); + chain_done: + if (!chainBuf.empty()) + { + PendingTransfer pt; + pt.fromScratchpad = false; + pt.srcAddr = 0; + pt.qwc = 0; + pt.chainData = std::move(chainBuf); + if (channelBase == 0x1000A000) + m_pendingGifTransfers.push_back(std::move(pt)); + else if (channelBase == 0x10009000) + m_pendingVif1Transfers.push_back(std::move(pt)); + } } - else + else if (qwc > 0) { - const uint32_t tadr = m_ioRegisters[channelBase + 0x30]; - walkChain(tadr); + enqueueTransfer(madr, qwc); } - - m_ioRegisters[address] &= ~0x100; } } return true; @@ -845,22 +886,148 @@ bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) return false; } -// ============================================================================ -// pollDmaRegisters: Workaround for KSEG1 fast-path bypass -// When libsles.a is compiled with old headers, isSpecialAddress() doesn't -// recognize KSEG0/KSEG1 addresses (0x8xxx/0xBxxx). Game writes to e.g. -// 0xB000A000 (GIF DMA CHCR via KSEG1) go through Ps2FastWrite32 which -// stores to rdram[addr & 0x01FFFFFF] = rdram[0x1000A000], bypassing -// writeIORegister entirely. This function polls those shadow locations -// and triggers DMA processing when CHCR.STR (bit 8) is set. -// -// NOTE: DISABLED — sho_runner writes DMA regs via physical addresses which -// go through writeIORegister correctly. This function was reading garbage -// from rdram shadow (ELF code area) and triggering bogus DMA transfers. -// ============================================================================ +void PS2Memory::processPendingTransfers() +{ + const bool hadGif = !m_pendingGifTransfers.empty(); + for (size_t idx = 0; idx < m_pendingGifTransfers.size(); ++idx) + { + auto &p = m_pendingGifTransfers[idx]; + if (!p.chainData.empty()) + { + m_seenGifCopy = true; + m_gifCopyCount.fetch_add(1, std::memory_order_relaxed); + submitGifPacket(GifPathId::Path3, p.chainData.data(), static_cast(p.chainData.size()), false); + } + else if (p.qwc > 0) + { + const uint64_t bytes64 = static_cast(p.qwc) * 16ull; + uint32_t sizeBytes = (bytes64 > 0xFFFFFFFFull) ? 0xFFFFFFFFu : static_cast(bytes64); + uint32_t srcPhys = 0; + try + { + srcPhys = translateAddress(p.srcAddr); + } + catch (const std::exception &) + { + continue; + } + if (p.fromScratchpad) + { + if (srcPhys + sizeBytes <= PS2_SCRATCHPAD_SIZE && sizeBytes >= 16) + { + m_seenGifCopy = true; + m_gifCopyCount.fetch_add(1, std::memory_order_relaxed); + submitGifPacket(GifPathId::Path3, m_scratchpad + srcPhys, sizeBytes, false); + } + } + else if (srcPhys < PS2_RAM_SIZE) + { + if (static_cast(srcPhys) + sizeBytes > PS2_RAM_SIZE) + sizeBytes = PS2_RAM_SIZE - srcPhys; + if (sizeBytes >= 16) + { + m_seenGifCopy = true; + m_gifCopyCount.fetch_add(1, std::memory_order_relaxed); + submitGifPacket(GifPathId::Path3, m_rdram + srcPhys, sizeBytes, false); + } + } + } + } + m_pendingGifTransfers.clear(); + + const bool hadVif1 = !m_pendingVif1Transfers.empty(); + for (auto &p : m_pendingVif1Transfers) + { + if (!p.chainData.empty()) + { + processVIF1Data(p.chainData.data(), static_cast(p.chainData.size())); + } + else if (p.qwc > 0 && !p.fromScratchpad) + { + uint32_t srcPhys = 0; + try + { + srcPhys = translateAddress(p.srcAddr); + } + catch (const std::exception &) + { + continue; + } + if (srcPhys < PS2_RAM_SIZE) + { + const uint64_t bytes64 = static_cast(p.qwc) * 16ull; + uint32_t sizeBytes = (bytes64 > 0xFFFFFFFFull) ? 0xFFFFFFFFu : static_cast(bytes64); + if (srcPhys + sizeBytes > PS2_RAM_SIZE) + sizeBytes = PS2_RAM_SIZE - srcPhys; + if (sizeBytes > 0) + processVIF1Data(srcPhys, sizeBytes); + } + } + } + m_pendingVif1Transfers.clear(); + + if (m_gifArbiter) + m_gifArbiter->drain(); + + static constexpr uint32_t GIF_CHANNEL = 0x1000A000; + static constexpr uint32_t VIF1_CHANNEL = 0x10009000; + if (hadGif) + { + m_ioRegisters[GIF_CHANNEL + 0x00] &= ~0x100u; + m_ioRegisters[GIF_CHANNEL + 0x20] = 0; + } + if (hadVif1) + { + m_ioRegisters[VIF1_CHANNEL + 0x00] &= ~0x100u; + m_ioRegisters[VIF1_CHANNEL + 0x20] = 0; + } +} + +void PS2Memory::submitGifPacket(GifPathId pathId, const uint8_t *data, uint32_t sizeBytes, bool drainImmediately) +{ + if (!data || sizeBytes < 16) + return; + if (pathId == GifPathId::Path3 && m_path3Masked) + return; + if (m_gifArbiter) + { + m_gifArbiter->submit(pathId, data, sizeBytes); + if (drainImmediately) + m_gifArbiter->drain(); + } + else if (m_gifPacketCallback) + { + m_gifPacketCallback(data, sizeBytes); + } +} + +void PS2Memory::processGIFPacket(uint32_t srcPhysAddr, uint32_t qwCount) +{ + if (!m_rdram || qwCount == 0) + return; + const uint64_t bytes64 = static_cast(qwCount) * 16ull; + uint32_t sizeBytes = (bytes64 > 0xFFFFFFFFull) ? 0xFFFFFFFFu : static_cast(bytes64); + if (srcPhysAddr >= PS2_RAM_SIZE) + return; + if (static_cast(srcPhysAddr) + static_cast(sizeBytes) > static_cast(PS2_RAM_SIZE)) + sizeBytes = PS2_RAM_SIZE - srcPhysAddr; + if (sizeBytes < 16) + return; + m_seenGifCopy = true; + m_gifCopyCount.fetch_add(1, std::memory_order_relaxed); + submitGifPacket(GifPathId::Path3, m_rdram + srcPhysAddr, sizeBytes); +} + +void PS2Memory::processGIFPacket(const uint8_t *data, uint32_t sizeBytes) +{ + if (m_gifArbiter) + submitGifPacket(GifPathId::Path3, data, sizeBytes); + else if (m_gifPacketCallback && data && sizeBytes >= 16) + m_gifPacketCallback(data, sizeBytes); +} + int PS2Memory::pollDmaRegisters() { - // Disabled — DMA writes go through writeIORegister, not KSEG1 shadow return 0; } diff --git a/ps2xRuntime/src/lib/ps2_runtime.cpp b/ps2xRuntime/src/lib/ps2_runtime.cpp index b0377780..262e3c50 100644 --- a/ps2xRuntime/src/lib/ps2_runtime.cpp +++ b/ps2xRuntime/src/lib/ps2_runtime.cpp @@ -186,71 +186,107 @@ namespace static void UploadFrame(Texture2D &tex, PS2Runtime *rt) { - // Try to use GS dispfb/display registers to locate the visible buffer. const GSRegisters &gs = rt->memory().gs(); - // DISPFBUF1 fields: FBP bits 0-8, FBW bits 9-14, PSM bits 15-19. uint32_t dispfb = static_cast(gs.dispfb1 & 0xFFFFFFFFULL); uint32_t fbp = dispfb & 0x1FF; uint32_t fbw = (dispfb >> 9) & 0x3F; uint32_t psm = (dispfb >> 15) & 0x1F; - // DISPLAY1 fields used here: DX[11:0], DY[22:12], MAGH[25:23], MAGV[27:26], DW[43:32], DH[54:44]. uint64_t display64 = gs.display1; - uint32_t magh = static_cast((display64 >> 23) & 0x7); // magnification H (0-7) uint32_t dw = static_cast((display64 >> 32) & 0xFFF); uint32_t dh = static_cast((display64 >> 44) & 0x7FF); - // DW is in VCK units: actual pixel width = (DW + 1) / (MAGH + 1). - uint32_t maghDiv = magh + 1; - uint32_t width = (dw + 1) / maghDiv; + uint32_t width = (dw + 1); uint32_t height = (dh + 1); - if (dw == 0) + if (width < 64 || height < 64) + { width = FB_WIDTH; - if (dh == 0) height = FB_HEIGHT; + } if (width > FB_WIDTH) width = FB_WIDTH; if (height > FB_HEIGHT) height = FB_HEIGHT; - // Only handle PSMCT32 (0). - if (psm != 0) - { - // I can`t stand a random RAM glitch screen so lets use some magenta to calm down - Image blank = GenImageColor(FB_WIDTH, FB_HEIGHT, MAGENTA); - UpdateTexture(tex, blank.data); - UnloadImage(blank); - return; - } - - uint32_t baseBytes = fbp * 2048; + uint32_t baseBytes = fbp * 8192u; const uint32_t bytesPerPixel = (psm == 2u || psm == 0x0Au) ? 2u : 4u; uint32_t strideBytes = (fbw ? fbw : (FB_WIDTH / 64)) * 64 * bytesPerPixel; - static std::vector scratch(FB_WIDTH * FB_HEIGHT * 4, 0); - std::memset(scratch.data(), 0, scratch.size()); + std::vector scratch(FB_WIDTH * FB_HEIGHT * 4, 0); uint8_t *rdram = rt->memory().getRDRAM(); uint8_t *gsvram = rt->memory().getGSVRAM(); - for (uint32_t y = 0; y < height; ++y) + + uint32_t snapSize = 0; + const uint8_t *snapVram = rt->gs().lockDisplaySnapshot(snapSize); + const uint8_t *vramSrc = (snapVram && snapSize > 0) ? snapVram : gsvram; + + if (snapVram) { - uint32_t srcOff = baseBytes + y * strideBytes; - uint32_t dstOff = y * FB_WIDTH * 4; - uint32_t copyW = width * 4; - uint32_t srcIdx = srcOff; - if (srcIdx + copyW <= PS2_GS_VRAM_SIZE && gsvram) + baseBytes = rt->gs().getLastDisplayBaseBytes(); + } + + if (psm == 0u) + { + for (uint32_t y = 0; y < height; ++y) { - std::memcpy(&scratch[dstOff], gsvram + srcIdx, copyW); + uint32_t srcOff = baseBytes + y * strideBytes; + uint32_t dstOff = y * FB_WIDTH * 4; + uint32_t copyW = width * 4; + uint32_t srcIdx = srcOff; + if (srcIdx + copyW <= PS2_GS_VRAM_SIZE && vramSrc) + std::memcpy(&scratch[dstOff], vramSrc + srcIdx, copyW); + else + { + uint32_t rdramIdx = srcOff & PS2_RAM_MASK; + if (rdramIdx + copyW > PS2_RAM_SIZE) + copyW = PS2_RAM_SIZE - rdramIdx; + std::memcpy(&scratch[dstOff], rdram + rdramIdx, copyW); + } + uint8_t *row = scratch.data() + dstOff; + for (uint32_t x = 0; x < width; ++x) + row[x * 4 + 3] = 255u; } - else + } + else if (psm == 2u) + { + const uint32_t srcLineBytes = width * 2u; + for (uint32_t y = 0; y < height; ++y) { - uint32_t rdramIdx = srcOff & PS2_RAM_MASK; - if (rdramIdx + copyW > PS2_RAM_SIZE) - copyW = PS2_RAM_SIZE - rdramIdx; - std::memcpy(&scratch[dstOff], rdram + rdramIdx, copyW); + uint32_t srcOff = baseBytes + y * strideBytes; + uint32_t dstOff = y * FB_WIDTH * 4; + const uint8_t *src = nullptr; + if (srcOff + srcLineBytes <= PS2_GS_VRAM_SIZE && vramSrc) + src = vramSrc + srcOff; + else if ((srcOff & PS2_RAM_MASK) + srcLineBytes <= PS2_RAM_SIZE) + src = rdram + (srcOff & PS2_RAM_MASK); + if (!src) + continue; + uint8_t *dst = scratch.data() + dstOff; + for (uint32_t x = 0; x < width; ++x) + { + uint16_t p = *reinterpret_cast(src + x * 2); + uint32_t r = (p >> 10) & 31u; + uint32_t g = (p >> 5) & 31u; + uint32_t b = p & 31u; + dst[x * 4 + 0] = static_cast((r << 3) | (r >> 2)); + dst[x * 4 + 1] = static_cast((g << 3) | (g >> 2)); + dst[x * 4 + 2] = static_cast((b << 3) | (b >> 2)); + dst[x * 4 + 3] = 255u; + } } } + else + { + rt->gs().unlockDisplaySnapshot(); + Image blank = GenImageColor(FB_WIDTH, FB_HEIGHT, MAGENTA); + UpdateTexture(tex, blank.data); + UnloadImage(blank); + return; + } + + rt->gs().unlockDisplaySnapshot(); UpdateTexture(tex, scratch.data()); } @@ -296,6 +332,16 @@ bool PS2Runtime::initialize(const char *title) return false; } + m_gs.init(m_memory.getGSVRAM(), static_cast(PS2_GS_VRAM_SIZE), &m_memory.gs()); + m_gs.reset(); + m_gifArbiter.setProcessPacketFn([this](const uint8_t *data, uint32_t size) { m_gs.processGIFPacket(data, size); }); + m_memory.setGifArbiter(&m_gifArbiter); + m_memory.setVu1MscalCallback([this](uint32_t startPC, uint32_t itop) { + m_vu1.execute(m_memory.getVU1Code(), PS2_VU1_CODE_SIZE, + m_memory.getVU1Data(), PS2_VU1_DATA_SIZE, + m_gs, &m_memory, startPC, itop, 65536); + }); + m_iop.init(m_memory.getRDRAM()); m_iop.reset(); @@ -305,6 +351,8 @@ bool PS2Runtime::initialize(const char *title) m_audioBackend.setAudioReady(IsAudioDeviceReady()); SetTargetFPS(60); + m_vu1.reset(); + return true; } @@ -1451,18 +1499,11 @@ void PS2Runtime::run() lastVif = curVif; } } + UploadFrame(frameTex, this); + BeginDrawing(); ClearBackground(BLACK); - - bool gpuRendered = gsGpuRenderFrame(); - - if (!gpuRendered) - { - // lets draw for now as debug but we wont need this in future - UploadFrame(frameTex, this); - DrawTexture(frameTex, 0, 0, WHITE); - } - + DrawTexture(frameTex, 0, 0, WHITE); EndDrawing(); if (WindowShouldClose()) diff --git a/ps2xRuntime/src/lib/ps2_vif1_interpreter.cpp b/ps2xRuntime/src/lib/ps2_vif1_interpreter.cpp index 9d8fe9b5..28a0b4cb 100644 --- a/ps2xRuntime/src/lib/ps2_vif1_interpreter.cpp +++ b/ps2xRuntime/src/lib/ps2_vif1_interpreter.cpp @@ -25,17 +25,8 @@ enum VIFCmd : uint8_t VIF_MPG = 0x4A, VIF_DIRECT = 0x50, VIF_DIRECTHL = 0x51, - // UNPACK range: 0x60-0x6F (V4-32..V4-5) }; -namespace -{ - static int g_vifLogCount = 0; - static uint32_t g_vifDirectCount = 0; - static uint32_t g_vifUnpackCount = 0; - static uint32_t g_vifTotalCmds = 0; -} // namespace - void PS2Memory::processVIF1Data(uint32_t srcPhys, uint32_t sizeBytes) { if (!m_rdram || !m_gsVRAM || sizeBytes == 0u) @@ -47,82 +38,81 @@ void PS2Memory::processVIF1Data(uint32_t srcPhys, uint32_t sizeBytes) if (requestedEnd > static_cast(PS2_RAM_SIZE)) sizeBytes = PS2_RAM_SIZE - srcPhys; - const uint8_t *data = m_rdram + srcPhys; - uint32_t pos = 0; // byte offset + processVIF1Data(m_rdram + srcPhys, sizeBytes); +} + +void PS2Memory::processVIF1Data(const uint8_t *data, uint32_t sizeBytes) +{ + if (!data || !m_gsVRAM || sizeBytes == 0u) + return; + + uint32_t pos = 0; while (pos + 4 <= sizeBytes) { - // Read VIF command word (32 bits) uint32_t cmd; memcpy(&cmd, data + pos, 4); pos += 4; - uint8_t opcode = (cmd >> 24) & 0x7F; // bits 30:24 - // bool irq = (cmd >> 31) & 1; // bit 31: interrupt - uint16_t imm = cmd & 0xFFFF; // bits 15:0 (IMMEDIATE) - uint8_t num = (cmd >> 16) & 0xFF; // bits 23:16 (NUM) - - g_vifTotalCmds++; + uint8_t opcode = (cmd >> 24) & 0x7F; + uint16_t imm = cmd & 0xFFFF; + uint8_t num = (cmd >> 16) & 0xFF; if (opcode == VIF_NOP) { - // No operation continue; } else if (opcode == VIF_STCYCL) { - // Set write cycle: CL in bits 7:0, WL in bits 15:8 - // Used with UNPACK - store for later + vif1_regs.cycle = imm; continue; } else if (opcode == VIF_OFFSET) { - // Set double-buffer offset + vif1_regs.ofst = imm & 0x3FFu; continue; } else if (opcode == VIF_BASE) { - // Set double-buffer base + vif1_regs.base = imm & 0x3FFu; continue; } else if (opcode == VIF_ITOP) { - // Set ITOP register + vif1_regs.itop = imm & 0x3FFu; continue; } else if (opcode == VIF_STMOD) { - // Set decompression mode + vif1_regs.mode = imm & 3u; continue; } else if (opcode == VIF_MSKPATH3) { - // Mask/unmask GIF PATH3 + m_path3Masked = (imm & 1u) != 0; continue; } else if (opcode == VIF_MARK) { - // Set MARK register continue; } else if (opcode == VIF_FLUSHE || opcode == VIF_FLUSH || opcode == VIF_FLUSHA) { - // Wait for pipeline flush - no-op in software continue; } else if (opcode == VIF_MSCAL || opcode == VIF_MSCALF) { - // Start VU1 microprogram at address IMM - skip (no VU1 emu) + uint32_t startPC = (uint32_t)imm * 8u; + if (m_vu1MscalCallback) + m_vu1MscalCallback(startPC, vif1_regs.itop); continue; } else if (opcode == VIF_MSCNT) { - // Continue VU1 execution - skip continue; } else if (opcode == VIF_STMASK) { - // Next QW contains write mask - skip 4 bytes pos += 4; if (pos > sizeBytes) break; @@ -130,7 +120,6 @@ void PS2Memory::processVIF1Data(uint32_t srcPhys, uint32_t sizeBytes) } else if (opcode == VIF_STROW) { - // Next 4 words (16 bytes) = fill row registers pos += 16; if (pos > sizeBytes) break; @@ -138,7 +127,6 @@ void PS2Memory::processVIF1Data(uint32_t srcPhys, uint32_t sizeBytes) } else if (opcode == VIF_STCOL) { - // Next 4 words (16 bytes) = fill column registers pos += 16; if (pos > sizeBytes) break; @@ -146,10 +134,17 @@ void PS2Memory::processVIF1Data(uint32_t srcPhys, uint32_t sizeBytes) } else if (opcode == VIF_MPG) { - // Upload microprogram to VU1: NUM*8 bytes of data follow - uint32_t mpgBytes = (uint32_t)num * 8; - // Align to QW + uint32_t destAddr = (uint32_t)imm * 8u; + uint32_t mpgBytes = (uint32_t)num * 8u; mpgBytes = (mpgBytes + 15) & ~15u; + if (m_vu1Code && destAddr < PS2_VU1_CODE_SIZE && mpgBytes > 0) + { + uint32_t copyBytes = mpgBytes; + if (destAddr + copyBytes > PS2_VU1_CODE_SIZE) + copyBytes = PS2_VU1_CODE_SIZE - destAddr; + if (pos + copyBytes <= sizeBytes) + std::memcpy(m_vu1Code + destAddr, data + pos, copyBytes); + } pos += mpgBytes; if (pos > sizeBytes) break; @@ -157,24 +152,17 @@ void PS2Memory::processVIF1Data(uint32_t srcPhys, uint32_t sizeBytes) } else if (opcode == VIF_DIRECT || opcode == VIF_DIRECTHL) { - // IMM = number of 128-bit quadwords of GIF data following uint32_t qwCount = imm; if (qwCount == 0) - qwCount = 65536; // 0 means 65536 + qwCount = 65536; const uint32_t availableQw = (sizeBytes - pos) / 16u; const bool truncated = qwCount > availableQw; if (qwCount > availableQw) - { qwCount = availableQw; - } if (qwCount > 0) { - // The GIF data starts at current position in the source buffer - // processGIFPacket expects a physical RAM address - uint32_t gifPhysAddr = srcPhys + pos; - processGIFPacket(gifPhysAddr, qwCount); - g_vifDirectCount++; + submitGifPacket(GifPathId::Path2, data + pos, qwCount * 16); } pos += qwCount * 16; @@ -187,54 +175,48 @@ void PS2Memory::processVIF1Data(uint32_t srcPhys, uint32_t sizeBytes) } else if ((opcode & 0x60) == 0x60) { - // UNPACK commands (0x60-0x7F) - // Format: VN in bits 25:24, VL in bits 27:26 - // NUM = number of vectors, IMM = VU addr - // Skip the data payload - uint8_t vn = (opcode >> 2) & 0x3; // 0=S, 1=V2, 2=V3, 3=V4 - uint8_t vl = opcode & 0x3; // 0=32, 1=16, 2=8, 3=5 - - // Calculate component count and size + uint8_t vn = (opcode >> 2) & 0x3; + uint8_t vl = opcode & 0x3; int components = vn + 1; - int bitsPerComponent; + int bitsPerComponent = 32; switch (vl) { - case 0: - bitsPerComponent = 32; - break; - case 1: - bitsPerComponent = 16; - break; - case 2: - bitsPerComponent = 8; - break; - case 3: - bitsPerComponent = 16; - break; // V4-5 is special (4x16 packed) - default: - bitsPerComponent = 32; - break; - } - - // Total bits per vector - int bitsPerVector; - if (vl == 3 && vn == 3) - { - // V4-5: 4 components × 4-bit nibbles = 16 bits per vector. - bitsPerVector = 16; + case 0: bitsPerComponent = 32; break; + case 1: bitsPerComponent = 16; break; + case 2: bitsPerComponent = 8; break; + case 3: bitsPerComponent = (vn == 3) ? 4 : 16; break; + default: break; } - else - { - bitsPerVector = components * bitsPerComponent; - } - + int bitsPerVector = (vl == 3 && vn == 3) ? 16 : (components * bitsPerComponent); uint32_t bytesPerVector = (bitsPerVector + 7) / 8; uint32_t totalBytes = (uint32_t)num * bytesPerVector; - // Align to 32-bit word boundary totalBytes = (totalBytes + 3) & ~3u; + uint32_t vuAddr = (uint32_t)imm & 0x3FFu; + if (m_vu1Data && totalBytes > 0 && pos + totalBytes <= sizeBytes) + { + if (bytesPerVector == 16 && vuAddr * 16u < PS2_VU1_DATA_SIZE) + { + for (uint32_t i = 0; i < num; ++i) + { + uint32_t destOff = ((vuAddr + i) & 0x3FFu) * 16u; + if (destOff + 16 <= PS2_VU1_DATA_SIZE) + std::memcpy(m_vu1Data + destOff, data + pos + i * 16, 16); + } + } + else + { + uint32_t destOff = vuAddr * 16u; + if (destOff < PS2_VU1_DATA_SIZE) + { + uint32_t copyBytes = totalBytes; + if (destOff + copyBytes > PS2_VU1_DATA_SIZE) + copyBytes = PS2_VU1_DATA_SIZE - destOff; + std::memcpy(m_vu1Data + destOff, data + pos, copyBytes); + } + } + } pos += totalBytes; - g_vifUnpackCount++; if (pos > sizeBytes) break; @@ -242,27 +224,7 @@ void PS2Memory::processVIF1Data(uint32_t srcPhys, uint32_t sizeBytes) } else { - // Unknown VIF command - try to continue - if (g_vifLogCount < 10) - { - std::cerr << "[VIF1] Unknown opcode 0x" << std::hex << (int)opcode - << " at offset 0x" << (pos - 4) << std::dec << std::endl; - g_vifLogCount++; - } continue; } } - - static uint32_t s_logInterval = 0; - if (++s_logInterval >= 100) - { - if (g_vifLogCount < 50) - { - std::cerr << "[VIF1] stats: total_cmds=" << g_vifTotalCmds - << " direct=" << g_vifDirectCount - << " unpack=" << g_vifUnpackCount << std::endl; - g_vifLogCount++; - } - s_logInterval = 0; - } } diff --git a/ps2xRuntime/src/lib/ps2_vu1.cpp b/ps2xRuntime/src/lib/ps2_vu1.cpp new file mode 100644 index 00000000..d89d98f7 --- /dev/null +++ b/ps2xRuntime/src/lib/ps2_vu1.cpp @@ -0,0 +1,1030 @@ +#include "ps2_vu1.h" +#include "ps2_gs_gpu.h" +#include "ps2_gif_arbiter.h" +#include "ps2_memory.h" +#include +#include +#include +#include + +// Instruction field extraction helpers +static inline uint8_t DEST(uint32_t i) { return (uint8_t)((i >> 21) & 0xF); } +static inline uint8_t FT(uint32_t i) { return (uint8_t)((i >> 16) & 0x1F); } +static inline uint8_t FS(uint32_t i) { return (uint8_t)((i >> 11) & 0x1F); } +static inline uint8_t FD(uint32_t i) { return (uint8_t)((i >> 6) & 0x1F); } +static inline uint8_t BC(uint32_t i) { return (uint8_t)(i & 0x3); } + +// Lower instruction field helpers +static inline uint8_t LIT(uint32_t i) { return (uint8_t)((i >> 16) & 0x1F); } +static inline uint8_t LIS(uint32_t i) { return (uint8_t)((i >> 11) & 0x1F); } +static inline uint8_t LID(uint32_t i) { return (uint8_t)((i >> 6) & 0x1F); } +static inline int16_t IMM11(uint32_t i){ return (int16_t)(int32_t)((int32_t)(i << 21) >> 21); } +static inline int16_t IMM15(uint32_t i){ + uint32_t lo11 = i & 0x7FF; + uint32_t hi4 = (i >> 21) & 0xF; + uint32_t raw = (hi4 << 11) | lo11; + return (int16_t)(int32_t)((int32_t)(raw << 17) >> 17); +} + +VU1Interpreter::VU1Interpreter() +{ + reset(); +} + +void VU1Interpreter::reset() +{ + std::memset(&m_state, 0, sizeof(m_state)); + m_state.vf[0][3] = 1.0f; // VF0.w = 1.0 + m_state.q = 1.0f; +} + +float VU1Interpreter::broadcast(const float *vf, uint8_t bc) +{ + return vf[bc & 3]; +} + +void VU1Interpreter::applyDest(float *dst, const float *result, uint8_t dest) +{ + if (dest & 0x8) dst[0] = result[0]; // x + if (dest & 0x4) dst[1] = result[1]; // y + if (dest & 0x2) dst[2] = result[2]; // z + if (dest & 0x1) dst[3] = result[3]; // w +} + +void VU1Interpreter::applyDestAcc(const float *result, uint8_t dest) +{ + applyDest(m_state.acc, result, dest); +} + +void VU1Interpreter::execute(uint8_t *vuCode, uint32_t codeSize, + uint8_t *vuData, uint32_t dataSize, + GS &gs, PS2Memory *memory, + uint32_t startPC, uint32_t itop, + uint32_t maxCycles) +{ + m_state.pc = startPC; + m_state.ebit = false; + m_state.itop = itop; + m_state.vf[0][0] = 0.0f; + m_state.vf[0][1] = 0.0f; + m_state.vf[0][2] = 0.0f; + m_state.vf[0][3] = 1.0f; + run(vuCode, codeSize, vuData, dataSize, gs, memory, maxCycles); +} + +void VU1Interpreter::resume(uint8_t *vuCode, uint32_t codeSize, + uint8_t *vuData, uint32_t dataSize, + GS &gs, PS2Memory *memory, + uint32_t itop, uint32_t maxCycles) +{ + m_state.ebit = false; + m_state.itop = itop; + run(vuCode, codeSize, vuData, dataSize, gs, memory, maxCycles); +} + +void VU1Interpreter::run(uint8_t *vuCode, uint32_t codeSize, + uint8_t *vuData, uint32_t dataSize, + GS &gs, PS2Memory *memory, uint32_t maxCycles) +{ + for (uint32_t cycle = 0; cycle < maxCycles; ++cycle) + { + if (m_state.pc + 8 > codeSize) + break; + + uint32_t lower, upper; + std::memcpy(&lower, vuCode + m_state.pc, 4); + std::memcpy(&upper, vuCode + m_state.pc + 4, 4); + + bool eBit = (upper >> 30) & 1; + bool mBit = (upper >> 31) & 1; + (void)mBit; + + // LOI: if bit 31 of lower is set, the upper word is an immediate float loaded into I + bool loi = (lower >> 31) & 1; + if (loi) + { + std::memcpy(&m_state.i, &upper, 4); + } + else + { + execUpper(upper); + } + execLower(lower & 0x7FFFFFFF, vuData, dataSize, gs, memory, upper); + + // Enforce VF0 invariant + m_state.vf[0][0] = 0.0f; + m_state.vf[0][1] = 0.0f; + m_state.vf[0][2] = 0.0f; + m_state.vf[0][3] = 1.0f; + // Enforce VI0 invariant + m_state.vi[0] = 0; + + uint32_t nextPC = m_state.pc + 8; + if (nextPC >= codeSize) nextPC = 0; + m_state.pc = nextPC; + + if (m_state.ebit) + break; + + if (eBit) + m_state.ebit = true; + } +} + +// ============================================================================ +// Upper instructions (FMAC pipeline) +// ============================================================================ +void VU1Interpreter::execUpper(uint32_t instr) +{ + uint8_t dest = DEST(instr); + uint8_t ft = FT(instr); + uint8_t fs = FS(instr); + uint8_t fd = FD(instr); + uint8_t op = instr & 0x3F; + + float *vd = m_state.vf[fd]; + const float *vs = m_state.vf[fs]; + const float *vt = m_state.vf[ft]; + float result[4]; + + // Upper opcode decoding (bits 5:0 of upper word) + switch (op) + { + case 0x00: case 0x01: case 0x02: case 0x03: // ADDbc + { + float bc = broadcast(vt, op & 3); + for (int c = 0; c < 4; c++) result[c] = vs[c] + bc; + applyDest(vd, result, dest); + return; + } + case 0x04: case 0x05: case 0x06: case 0x07: // SUBbc + { + float bc = broadcast(vt, op & 3); + for (int c = 0; c < 4; c++) result[c] = vs[c] - bc; + applyDest(vd, result, dest); + return; + } + case 0x08: case 0x09: case 0x0A: case 0x0B: // MADDbc + { + float bc = broadcast(vt, op & 3); + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] + vs[c] * bc; + applyDest(vd, result, dest); + return; + } + case 0x0C: case 0x0D: case 0x0E: case 0x0F: // MSUBbc + { + float bc = broadcast(vt, op & 3); + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] - vs[c] * bc; + applyDest(vd, result, dest); + return; + } + case 0x10: case 0x11: case 0x12: case 0x13: // MAXbc + { + float bc = broadcast(vt, op & 3); + for (int c = 0; c < 4; c++) result[c] = (vs[c] > bc) ? vs[c] : bc; + applyDest(vd, result, dest); + return; + } + case 0x14: case 0x15: case 0x16: case 0x17: // MINIbc + { + float bc = broadcast(vt, op & 3); + for (int c = 0; c < 4; c++) result[c] = (vs[c] < bc) ? vs[c] : bc; + applyDest(vd, result, dest); + return; + } + case 0x18: case 0x19: case 0x1A: case 0x1B: // MULbc + { + float bc = broadcast(vt, op & 3); + for (int c = 0; c < 4; c++) result[c] = vs[c] * bc; + applyDest(vd, result, dest); + return; + } + case 0x1C: // MULq + for (int c = 0; c < 4; c++) result[c] = vs[c] * m_state.q; + applyDest(vd, result, dest); + return; + case 0x1D: // MAXi + for (int c = 0; c < 4; c++) result[c] = (vs[c] > m_state.i) ? vs[c] : m_state.i; + applyDest(vd, result, dest); + return; + case 0x1E: // MULi + for (int c = 0; c < 4; c++) result[c] = vs[c] * m_state.i; + applyDest(vd, result, dest); + return; + case 0x1F: // MINIi + for (int c = 0; c < 4; c++) result[c] = (vs[c] < m_state.i) ? vs[c] : m_state.i; + applyDest(vd, result, dest); + return; + case 0x20: // ADDq + for (int c = 0; c < 4; c++) result[c] = vs[c] + m_state.q; + applyDest(vd, result, dest); + return; + case 0x21: // MADDq + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] + vs[c] * m_state.q; + applyDest(vd, result, dest); + return; + case 0x22: // ADDi + for (int c = 0; c < 4; c++) result[c] = vs[c] + m_state.i; + applyDest(vd, result, dest); + return; + case 0x23: // MADDi + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] + vs[c] * m_state.i; + applyDest(vd, result, dest); + return; + case 0x24: // SUBq + for (int c = 0; c < 4; c++) result[c] = vs[c] - m_state.q; + applyDest(vd, result, dest); + return; + case 0x25: // MSUBq + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] - vs[c] * m_state.q; + applyDest(vd, result, dest); + return; + case 0x26: // SUBi + for (int c = 0; c < 4; c++) result[c] = vs[c] - m_state.i; + applyDest(vd, result, dest); + return; + case 0x27: // MSUBi + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] - vs[c] * m_state.i; + applyDest(vd, result, dest); + return; + case 0x28: // ADD + for (int c = 0; c < 4; c++) result[c] = vs[c] + vt[c]; + applyDest(vd, result, dest); + return; + case 0x29: // MADD + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] + vs[c] * vt[c]; + applyDest(vd, result, dest); + return; + case 0x2A: // MUL + for (int c = 0; c < 4; c++) result[c] = vs[c] * vt[c]; + applyDest(vd, result, dest); + return; + case 0x2B: // MAX + for (int c = 0; c < 4; c++) result[c] = (vs[c] > vt[c]) ? vs[c] : vt[c]; + applyDest(vd, result, dest); + return; + case 0x2C: // SUB + for (int c = 0; c < 4; c++) result[c] = vs[c] - vt[c]; + applyDest(vd, result, dest); + return; + case 0x2D: // MSUB + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] - vs[c] * vt[c]; + applyDest(vd, result, dest); + return; + case 0x2E: // OPMSUB + result[0] = m_state.acc[0] - vs[1] * vt[2]; + result[1] = m_state.acc[1] - vs[2] * vt[0]; + result[2] = m_state.acc[2] - vs[0] * vt[1]; + result[3] = 0.0f; + applyDest(vd, result, dest); + return; + case 0x2F: // MINI + for (int c = 0; c < 4; c++) result[c] = (vs[c] < vt[c]) ? vs[c] : vt[c]; + applyDest(vd, result, dest); + return; + + // Special1 group (0x3C..0x3F with secondary field) + case 0x3C: case 0x3D: case 0x3E: case 0x3F: + { + uint8_t special = (instr >> 6) & 0x1F; + uint8_t sop = (instr & 0x3) | ((instr >> 4) & 0x3C); + (void)sop; + + switch (instr & 0x3F) + { + case 0x3C: // Special1 (ADDAx..ADDAw, SUBAx..SUBAw, MADDAx..MADDAw, MSUBAx..MSUBAw, etc.) + { + uint8_t funct = (instr >> 6) & 0x1F; + uint8_t bc2 = (instr >> 0) & 0x3; + (void)bc2; + switch (funct) + { + case 0x00: case 0x01: case 0x02: case 0x03: // ADDAbc + { + float bc = broadcast(vt, funct & 3); + for (int c = 0; c < 4; c++) result[c] = vs[c] + bc; + applyDestAcc(result, dest); + return; + } + case 0x04: case 0x05: case 0x06: case 0x07: // SUBAbc + { + float bc = broadcast(vt, funct & 3); + for (int c = 0; c < 4; c++) result[c] = vs[c] - bc; + applyDestAcc(result, dest); + return; + } + case 0x08: case 0x09: case 0x0A: case 0x0B: // MADDAbc + { + float bc = broadcast(vt, funct & 3); + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] + vs[c] * bc; + applyDestAcc(result, dest); + return; + } + case 0x0C: case 0x0D: case 0x0E: case 0x0F: // MSUBAbc + { + float bc = broadcast(vt, funct & 3); + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] - vs[c] * bc; + applyDestAcc(result, dest); + return; + } + case 0x10: // ITOF0 + for (int c = 0; c < 4; c++) { int32_t iv; std::memcpy(&iv, &vs[c], 4); result[c] = (float)iv; } + applyDest(vd, result, dest); + return; + case 0x11: // ITOF4 + for (int c = 0; c < 4; c++) { int32_t iv; std::memcpy(&iv, &vs[c], 4); result[c] = (float)iv / 16.0f; } + applyDest(vd, result, dest); + return; + case 0x12: // ITOF12 + for (int c = 0; c < 4; c++) { int32_t iv; std::memcpy(&iv, &vs[c], 4); result[c] = (float)iv / 4096.0f; } + applyDest(vd, result, dest); + return; + case 0x13: // ITOF15 + for (int c = 0; c < 4; c++) { int32_t iv; std::memcpy(&iv, &vs[c], 4); result[c] = (float)iv / 32768.0f; } + applyDest(vd, result, dest); + return; + case 0x14: // FTOI0 + for (int c = 0; c < 4; c++) { int32_t iv = (int32_t)vs[c]; std::memcpy(&result[c], &iv, 4); } + applyDest(vd, result, dest); + return; + case 0x15: // FTOI4 + for (int c = 0; c < 4; c++) { int32_t iv = (int32_t)(vs[c] * 16.0f); std::memcpy(&result[c], &iv, 4); } + applyDest(vd, result, dest); + return; + case 0x16: // FTOI12 + for (int c = 0; c < 4; c++) { int32_t iv = (int32_t)(vs[c] * 4096.0f); std::memcpy(&result[c], &iv, 4); } + applyDest(vd, result, dest); + return; + case 0x17: // FTOI15 + for (int c = 0; c < 4; c++) { int32_t iv = (int32_t)(vs[c] * 32768.0f); std::memcpy(&result[c], &iv, 4); } + applyDest(vd, result, dest); + return; + case 0x18: case 0x19: case 0x1A: case 0x1B: // MULAbc + { + float bc = broadcast(vt, funct & 3); + for (int c = 0; c < 4; c++) result[c] = vs[c] * bc; + applyDestAcc(result, dest); + return; + } + case 0x1C: // MULAq + for (int c = 0; c < 4; c++) result[c] = vs[c] * m_state.q; + applyDestAcc(result, dest); + return; + case 0x1D: // ABS + for (int c = 0; c < 4; c++) result[c] = std::fabs(vs[c]); + applyDest(vd, result, dest); + return; + case 0x1E: // MULAi + for (int c = 0; c < 4; c++) result[c] = vs[c] * m_state.i; + applyDestAcc(result, dest); + return; + case 0x1F: // CLIP + { + float w = std::fabs(vt[3]); + uint32_t flags = 0; + if (vs[0] > +w) flags |= 0x01; + if (vs[0] < -w) flags |= 0x02; + if (vs[1] > +w) flags |= 0x04; + if (vs[1] < -w) flags |= 0x08; + if (vs[2] > +w) flags |= 0x10; + if (vs[2] < -w) flags |= 0x20; + m_state.clip = (m_state.clip << 6) | flags; + return; + } + default: + return; + } + } + case 0x3D: // Special2 (ADDAq, MADDAq, ADDAi, MADDAi, ADDA, MADDA, MULA, OPMULA, ...) + { + uint8_t funct = (instr >> 6) & 0x1F; + switch (funct) + { + case 0x00: // ADDAq + for (int c = 0; c < 4; c++) result[c] = vs[c] + m_state.q; + applyDestAcc(result, dest); + return; + case 0x01: // MADDAq + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] + vs[c] * m_state.q; + applyDestAcc(result, dest); + return; + case 0x02: // ADDAi + for (int c = 0; c < 4; c++) result[c] = vs[c] + m_state.i; + applyDestAcc(result, dest); + return; + case 0x03: // MADDAi + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] + vs[c] * m_state.i; + applyDestAcc(result, dest); + return; + case 0x04: // SUBAq + for (int c = 0; c < 4; c++) result[c] = vs[c] - m_state.q; + applyDestAcc(result, dest); + return; + case 0x05: // MSUBAq + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] - vs[c] * m_state.q; + applyDestAcc(result, dest); + return; + case 0x06: // SUBAi + for (int c = 0; c < 4; c++) result[c] = vs[c] - m_state.i; + applyDestAcc(result, dest); + return; + case 0x07: // MSUBAi + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] - vs[c] * m_state.i; + applyDestAcc(result, dest); + return; + case 0x08: // ADDA + for (int c = 0; c < 4; c++) result[c] = vs[c] + vt[c]; + applyDestAcc(result, dest); + return; + case 0x09: // MADDA + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] + vs[c] * vt[c]; + applyDestAcc(result, dest); + return; + case 0x0A: // MULA + for (int c = 0; c < 4; c++) result[c] = vs[c] * vt[c]; + applyDestAcc(result, dest); + return; + case 0x0C: // SUBA + for (int c = 0; c < 4; c++) result[c] = vs[c] - vt[c]; + applyDestAcc(result, dest); + return; + case 0x0D: // MSUBA + for (int c = 0; c < 4; c++) result[c] = m_state.acc[c] - vs[c] * vt[c]; + applyDestAcc(result, dest); + return; + case 0x0E: // OPMULA + result[0] = vs[1] * vt[2]; + result[1] = vs[2] * vt[0]; + result[2] = vs[0] * vt[1]; + result[3] = 0.0f; + applyDestAcc(result, dest); + return; + case 0x0F: // NOP + return; + default: + return; + } + } + case 0x3E: // Special (more upper ops, rarely used) + return; + case 0x3F: // Special (upper NOP typically) + return; + } + return; + } + + case 0x30: case 0x31: case 0x32: case 0x33: // iadd-like upper? No, these are valid upper ops + default: + // NOP / unimplemented upper + return; + } +} + +// ============================================================================ +// Lower instructions +// ============================================================================ +void VU1Interpreter::execLower(uint32_t instr, uint8_t *vuData, uint32_t dataSize, GS &gs, PS2Memory *memory, uint32_t upperInstr) +{ + (void)upperInstr; + if (instr == 0x00000000 || instr == 0x8000033C) // NOP + return; + + uint8_t opHi = (instr >> 25) & 0x7F; + + // The lower instruction encoding uses bits 31:25 for the primary opcode + switch (opHi) + { + case 0x00: // LQ (Load Quadword from VU data memory) + { + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + uint8_t dest = (instr >> 21) & 0xF; + int16_t imm = IMM11(instr); + uint32_t addr = ((uint32_t)(int32_t)(m_state.vi[is] + imm)) * 16u; + addr &= (dataSize - 1); + if (addr + 16 <= dataSize) + { + float tmp[4]; + std::memcpy(tmp, vuData + addr, 16); + applyDest(m_state.vf[it], tmp, dest); + } + return; + } + case 0x01: // SQ (Store Quadword to VU data memory) + { + uint8_t is = LIS(instr); + uint8_t it = LIT(instr); + uint8_t dest = (instr >> 21) & 0xF; + int16_t imm = IMM11(instr); + uint32_t addr = ((uint32_t)(int32_t)(m_state.vi[it] + imm)) * 16u; + addr &= (dataSize - 1); + if (addr + 16 <= dataSize) + { + float tmp[4]; + std::memcpy(tmp, vuData + addr, 16); + if (dest & 0x8) tmp[0] = m_state.vf[is][0]; + if (dest & 0x4) tmp[1] = m_state.vf[is][1]; + if (dest & 0x2) tmp[2] = m_state.vf[is][2]; + if (dest & 0x1) tmp[3] = m_state.vf[is][3]; + std::memcpy(vuData + addr, tmp, 16); + } + return; + } + case 0x04: // ILW (Integer Load Word from VU data memory) + { + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + uint8_t dest = (instr >> 21) & 0xF; + int16_t imm = IMM11(instr); + uint32_t addr = ((uint32_t)(int32_t)(m_state.vi[is] + imm)) * 16u; + addr &= (dataSize - 1); + if (addr + 16 <= dataSize) + { + int comp = 0; + if (dest & 0x8) comp = 0; + else if (dest & 0x4) comp = 1; + else if (dest & 0x2) comp = 2; + else comp = 3; + uint32_t v; + std::memcpy(&v, vuData + addr + comp * 4, 4); + if (it != 0) m_state.vi[it] = (int32_t)(int16_t)(v & 0xFFFF); + } + return; + } + case 0x05: // ISW (Integer Store Word to VU data memory) + { + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + uint8_t dest = (instr >> 21) & 0xF; + int16_t imm = IMM11(instr); + uint32_t addr = ((uint32_t)(int32_t)(m_state.vi[is] + imm)) * 16u; + addr &= (dataSize - 1); + if (addr + 16 <= dataSize) + { + uint32_t val = (uint32_t)(uint16_t)(m_state.vi[it] & 0xFFFF); + if (dest & 0x8) std::memcpy(vuData + addr + 0, &val, 4); + if (dest & 0x4) std::memcpy(vuData + addr + 4, &val, 4); + if (dest & 0x2) std::memcpy(vuData + addr + 8, &val, 4); + if (dest & 0x1) std::memcpy(vuData + addr + 12, &val, 4); + } + return; + } + case 0x08: // IADDIU + { + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + int16_t imm = (int16_t)(instr & 0x7FF) | ((instr >> 10) & 0x7800); + if (it != 0) + m_state.vi[it] = (int16_t)(m_state.vi[is] + imm); + return; + } + case 0x09: // ISUBIU + { + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + int16_t imm = (int16_t)(instr & 0x7FF) | ((instr >> 10) & 0x7800); + if (it != 0) + m_state.vi[it] = (int16_t)(m_state.vi[is] - imm); + return; + } + case 0x10: // FCEQ + { + uint32_t imm24 = instr & 0xFFFFFF; + if (1 != 0) m_state.vi[1] = ((m_state.clip & 0xFFFFFF) == imm24) ? 1 : 0; + return; + } + case 0x11: // FCSET + { + m_state.clip = instr & 0xFFFFFF; + return; + } + case 0x12: // FCAND + { + uint32_t imm24 = instr & 0xFFFFFF; + if (1 != 0) m_state.vi[1] = ((m_state.clip & imm24) != 0) ? 1 : 0; + return; + } + case 0x13: // FCOR + { + uint32_t imm24 = instr & 0xFFFFFF; + if (1 != 0) m_state.vi[1] = ((m_state.clip | imm24) == 0xFFFFFF) ? 1 : 0; + return; + } + case 0x14: // FSEQ + { + uint16_t imm12 = instr & 0xFFF; + if (1 != 0) m_state.vi[1] = ((m_state.status & 0xFFF) == imm12) ? 1 : 0; + return; + } + case 0x15: // FSSET + { + m_state.status = (instr >> 6) & 0xFC0; + return; + } + case 0x16: // FSAND + { + uint16_t imm12 = instr & 0xFFF; + if (1 != 0) m_state.vi[1] = (int32_t)(m_state.status & imm12); + return; + } + case 0x17: // FSOR + { + uint16_t imm12 = instr & 0xFFF; + if (1 != 0) m_state.vi[1] = ((m_state.status | imm12) == 0xFFF) ? 1 : 0; + return; + } + case 0x18: // FMAND + { + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + if (it != 0) m_state.vi[it] = (int32_t)(m_state.mac & (uint32_t)(uint16_t)m_state.vi[is]); + return; + } + case 0x1A: // FMEQ + { + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + if (it != 0) m_state.vi[it] = ((m_state.mac & 0xFFFF) == (uint32_t)(uint16_t)m_state.vi[is]) ? 1 : 0; + return; + } + case 0x1C: // FMOR + { + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + if (it != 0) m_state.vi[it] = (int32_t)(m_state.mac | (uint32_t)(uint16_t)m_state.vi[is]); + return; + } + case 0x20: // B (unconditional branch) + { + int16_t imm = IMM11(instr); + uint32_t target = (m_state.pc + 8 + imm * 8) & 0x3FFF; + // Simplified branch delay: set PC so next iteration lands on target + m_state.pc = target - 8; + return; + } + case 0x21: // BAL (Branch and link) + { + uint8_t it = LIT(instr); + int16_t imm = IMM11(instr); + uint32_t target = (m_state.pc + 8 + imm * 8) & 0x3FFF; + if (it != 0) m_state.vi[it] = (int32_t)((m_state.pc + 16) / 8); + m_state.pc = target - 8; + return; + } + case 0x24: // JR + { + uint8_t is = LIS(instr); + uint32_t target = ((uint32_t)(uint16_t)m_state.vi[is] * 8u) & 0x3FFF; + m_state.pc = target - 8; + return; + } + case 0x25: // JALR + { + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + uint32_t target = ((uint32_t)(uint16_t)m_state.vi[is] * 8u) & 0x3FFF; + if (it != 0) m_state.vi[it] = (int32_t)((m_state.pc + 16) / 8); + m_state.pc = target - 8; + return; + } + case 0x28: // IBEQ + { + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + int16_t imm = IMM11(instr); + if ((int16_t)m_state.vi[is] == (int16_t)m_state.vi[it]) + { + uint32_t target = (m_state.pc + 8 + imm * 8) & 0x3FFF; + m_state.pc = target - 8; + } + return; + } + case 0x29: // IBNE + { + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + int16_t imm = IMM11(instr); + if ((int16_t)m_state.vi[is] != (int16_t)m_state.vi[it]) + { + uint32_t target = (m_state.pc + 8 + imm * 8) & 0x3FFF; + m_state.pc = target - 8; + } + return; + } + case 0x2C: // IBLTZ + { + uint8_t is = LIS(instr); + int16_t imm = IMM11(instr); + if ((int16_t)m_state.vi[is] < 0) + { + uint32_t target = (m_state.pc + 8 + imm * 8) & 0x3FFF; + m_state.pc = target - 8; + } + return; + } + case 0x2D: // IBGTZ + { + uint8_t is = LIS(instr); + int16_t imm = IMM11(instr); + if ((int16_t)m_state.vi[is] > 0) + { + uint32_t target = (m_state.pc + 8 + imm * 8) & 0x3FFF; + m_state.pc = target - 8; + } + return; + } + case 0x2E: // IBLEZ + { + uint8_t is = LIS(instr); + int16_t imm = IMM11(instr); + if ((int16_t)m_state.vi[is] <= 0) + { + uint32_t target = (m_state.pc + 8 + imm * 8) & 0x3FFF; + m_state.pc = target - 8; + } + return; + } + case 0x2F: // IBGEZ + { + uint8_t is = LIS(instr); + int16_t imm = IMM11(instr); + if ((int16_t)m_state.vi[is] >= 0) + { + uint32_t target = (m_state.pc + 8 + imm * 8) & 0x3FFF; + m_state.pc = target - 8; + } + return; + } + + case 0x40: // Lower special (opcode in bits 5:0) + { + uint8_t funct = instr & 0x3F; + uint8_t it = LIT(instr); + uint8_t is = LIS(instr); + uint8_t id = LID(instr); + uint8_t dest = (instr >> 21) & 0xF; + + switch (funct) + { + case 0x30: // IADD + if (id != 0) + m_state.vi[id] = (int16_t)(m_state.vi[is] + m_state.vi[it]); + return; + case 0x31: // ISUB + if (id != 0) + m_state.vi[id] = (int16_t)(m_state.vi[is] - m_state.vi[it]); + return; + case 0x32: // IADDI + { + int16_t imm5 = (int16_t)((int32_t)((instr >> 6) & 0x1F) << 27 >> 27); + if (it != 0) + m_state.vi[it] = (int16_t)(m_state.vi[is] + imm5); + return; + } + case 0x34: // IAND + if (id != 0) + m_state.vi[id] = m_state.vi[is] & m_state.vi[it]; + return; + case 0x35: // IOR + if (id != 0) + m_state.vi[id] = m_state.vi[is] | m_state.vi[it]; + return; + + case 0x3C: // Lower special2 + { + uint8_t funct2 = (instr >> 6) & 0x1F; + switch (funct2) + { + case 0x00: // MOVE + { + float tmp[4]; + std::memcpy(tmp, m_state.vf[is], 16); + applyDest(m_state.vf[it], tmp, dest); + return; + } + case 0x01: // MR32 (rotate right by 32 bits = shift xyzw -> yzwx) + { + float tmp[4] = { m_state.vf[is][1], m_state.vf[is][2], m_state.vf[is][3], m_state.vf[is][0] }; + applyDest(m_state.vf[it], tmp, dest); + return; + } + case 0x03: // MFIR (Move From Integer Register) + { + float result[4]; + int32_t val = (int32_t)(int16_t)(m_state.vi[is] & 0xFFFF); + std::memcpy(&result[0], &val, 4); + result[1] = result[0]; result[2] = result[0]; result[3] = result[0]; + applyDest(m_state.vf[it], result, dest); + return; + } + case 0x04: // MTIR (Move To Integer Register) + { + int comp = 0; + if (dest & 0x8) comp = 0; + else if (dest & 0x4) comp = 1; + else if (dest & 0x2) comp = 2; + else comp = 3; + uint32_t fval; + std::memcpy(&fval, &m_state.vf[is][comp], 4); + if (it != 0) m_state.vi[it] = (int32_t)(int16_t)(fval & 0xFFFF); + return; + } + case 0x05: // RNEXT + return; + case 0x06: // RGET + return; + case 0x07: // RINIT + return; + case 0x10: // LQI (Load Quadword, post-increment) + { + uint32_t addr = ((uint32_t)(uint16_t)m_state.vi[is]) * 16u; + addr &= (dataSize - 1); + if (addr + 16 <= dataSize) + { + float tmp[4]; + std::memcpy(tmp, vuData + addr, 16); + applyDest(m_state.vf[it], tmp, dest); + } + if (is != 0) m_state.vi[is] = (int16_t)(m_state.vi[is] + 1); + return; + } + case 0x11: // SQI (Store Quadword, post-increment) + { + uint32_t addr = ((uint32_t)(uint16_t)m_state.vi[it]) * 16u; + addr &= (dataSize - 1); + if (addr + 16 <= dataSize) + { + float tmp[4]; + std::memcpy(tmp, vuData + addr, 16); + if (dest & 0x8) tmp[0] = m_state.vf[is][0]; + if (dest & 0x4) tmp[1] = m_state.vf[is][1]; + if (dest & 0x2) tmp[2] = m_state.vf[is][2]; + if (dest & 0x1) tmp[3] = m_state.vf[is][3]; + std::memcpy(vuData + addr, tmp, 16); + } + if (it != 0) m_state.vi[it] = (int16_t)(m_state.vi[it] + 1); + return; + } + case 0x12: // LQD (Load Quadword, pre-decrement) + { + if (is != 0) m_state.vi[is] = (int16_t)(m_state.vi[is] - 1); + uint32_t addr = ((uint32_t)(uint16_t)m_state.vi[is]) * 16u; + addr &= (dataSize - 1); + if (addr + 16 <= dataSize) + { + float tmp[4]; + std::memcpy(tmp, vuData + addr, 16); + applyDest(m_state.vf[it], tmp, dest); + } + return; + } + case 0x13: // SQD (Store Quadword, pre-decrement) + { + if (it != 0) m_state.vi[it] = (int16_t)(m_state.vi[it] - 1); + uint32_t addr = ((uint32_t)(uint16_t)m_state.vi[it]) * 16u; + addr &= (dataSize - 1); + if (addr + 16 <= dataSize) + { + float tmp[4]; + std::memcpy(tmp, vuData + addr, 16); + if (dest & 0x8) tmp[0] = m_state.vf[is][0]; + if (dest & 0x4) tmp[1] = m_state.vf[is][1]; + if (dest & 0x2) tmp[2] = m_state.vf[is][2]; + if (dest & 0x1) tmp[3] = m_state.vf[is][3]; + std::memcpy(vuData + addr, tmp, 16); + } + return; + } + case 0x14: // DIV + { + int fsf = (instr >> 21) & 0x3; + int ftf = (instr >> 23) & 0x3; + float num = m_state.vf[is][fsf]; + float den = m_state.vf[it][ftf]; + if (den != 0.0f) + m_state.q = num / den; + else + m_state.q = (num >= 0.0f) ? std::numeric_limits::max() : -std::numeric_limits::max(); + return; + } + case 0x15: // SQRT + { + int ftf = (instr >> 23) & 0x3; + float val = m_state.vf[it][ftf]; + m_state.q = std::sqrt(std::fabs(val)); + return; + } + case 0x16: // RSQRT + { + int fsf = (instr >> 21) & 0x3; + int ftf = (instr >> 23) & 0x3; + float num = m_state.vf[is][fsf]; + float den = std::sqrt(std::fabs(m_state.vf[it][ftf])); + if (den != 0.0f) + m_state.q = num / den; + else + m_state.q = std::numeric_limits::max(); + return; + } + case 0x17: // WAITQ + return; + case 0x18: // ESADD + return; + case 0x19: // ERSADD + return; + case 0x1B: // ELENG + { + float s = m_state.vf[is][0]*m_state.vf[is][0] + m_state.vf[is][1]*m_state.vf[is][1] + m_state.vf[is][2]*m_state.vf[is][2]; + m_state.p = std::sqrt(s); + return; + } + case 0x1C: // ERCPR + { + int fsf = (instr >> 21) & 0x3; + float val = m_state.vf[is][fsf]; + m_state.p = (val != 0.0f) ? (1.0f / val) : std::numeric_limits::max(); + return; + } + case 0x1D: // ERLENG + { + float s = m_state.vf[is][0]*m_state.vf[is][0] + m_state.vf[is][1]*m_state.vf[is][1] + m_state.vf[is][2]*m_state.vf[is][2]; + float len = std::sqrt(s); + m_state.p = (len != 0.0f) ? (1.0f / len) : std::numeric_limits::max(); + return; + } + case 0x1E: // WAITP + return; + case 0x1A: // EATAN / EATANxy / EATANxz + return; + case 0x1F: // MFP (Move From P register) + { + float result[4] = { m_state.p, m_state.p, m_state.p, m_state.p }; + applyDest(m_state.vf[it], result, dest); + return; + } + default: + return; + } + } + case 0x3D: // XGKICK — send GIF packet from VU1 data memory + { + uint32_t addr = ((uint32_t)(uint16_t)m_state.vi[is]) * 16u; + addr &= (dataSize - 1); + // Walk the GIF packet to find its total size + uint32_t pktOff = addr; + uint32_t totalBytes = 0; + bool done = false; + for (int safety = 0; safety < 256 && !done; ++safety) + { + if (pktOff + 16 > dataSize) break; + uint64_t tagLo; + std::memcpy(&tagLo, vuData + pktOff, 8); + uint32_t nloop = (uint32_t)(tagLo & 0x7FFF); + uint8_t flg = (uint8_t)((tagLo >> 58) & 0x3); + uint32_t nreg = (uint32_t)((tagLo >> 60) & 0xF); + if (nreg == 0) nreg = 16; + bool eop = (tagLo >> 15) & 1; + + uint32_t pktSize = 16; // GIF tag + if (flg == 0) // PACKED + pktSize += nloop * nreg * 16; + else if (flg == 1) // REGLIST + { + uint32_t regs = nloop * nreg; + pktSize += regs * 8; + if (regs & 1) pktSize += 8; // pad to 128-bit + } + else if (flg == 2) // IMAGE + pktSize += nloop * 16; + + pktOff += pktSize; + totalBytes += pktSize; + if (eop) done = true; + } + if (totalBytes > 0 && addr + totalBytes <= dataSize) + { + if (memory) + memory->submitGifPacket(GifPathId::Path1, vuData + addr, totalBytes); + else + gs.processGIFPacket(vuData + addr, totalBytes); + } + return; + } + case 0x3E: // XTOP + { + if (it != 0) m_state.vi[it] = (int32_t)m_state.itop; + return; + } + case 0x3F: // XITOP + { + if (it != 0) m_state.vi[it] = (int32_t)m_state.itop; + return; + } + default: + return; + } + } + default: + break; + } +} diff --git a/ps2xRuntime/src/lib/stubs/helpers/ps2_stubs_helpers.inl b/ps2xRuntime/src/lib/stubs/helpers/ps2_stubs_helpers.inl index bc689ad9..642cc9f2 100644 --- a/ps2xRuntime/src/lib/stubs/helpers/ps2_stubs_helpers.inl +++ b/ps2xRuntime/src/lib/stubs/helpers/ps2_stubs_helpers.inl @@ -1279,6 +1279,15 @@ namespace uint32_t toDmaPhys(uint32_t addr) { + if ((addr & 0x80000000u) != 0) + { + uint32_t lower = addr & 0x7FFFFFFFu; + if (lower >= PS2_SCRATCHPAD_BASE && + lower < PS2_SCRATCHPAD_BASE + PS2_SCRATCHPAD_SIZE) + { + return lower; + } + } return addr & 0x1FFFFFFFu; } @@ -1395,28 +1404,7 @@ namespace } else { - const ParsedDmaTag tag = tryParseDmaTag(rdram, payloadPhys); - if (tag.valid && tag.qwc != 0) - { - qwc = tag.qwc; - switch (tag.id) - { - case 0: // REFE - case 3: // REF - case 4: // REFS - madr = toDmaPhys(tag.addr); - break; - default: - // CNT/NEXT/CALL/RET-style tags carry payload inline after the tag. - madr = toDmaPhys(payloadPhys + 0x10u); - break; - } - } - else - { - // Fall back to chain mode so the runtime DMA path can walk TADR. - chcr = 0x00000185u; // MODE=1 chain, DIR=1, TIE=1, STR=1. - } + chcr = 0x00000185u; // MODE=1 chain, DIR=1, TIE=1, STR=1. } PS2Memory &mem = runtime->memory(); @@ -1497,8 +1485,11 @@ namespace struct GsDispEnvMem { - uint64_t display; + uint64_t pmode; + uint64_t smode2; uint64_t dispfb; + uint64_t display; + uint64_t bgcolor; }; struct GsImageMem @@ -1704,7 +1695,10 @@ namespace uint8_t *ptr = getMemPtr(rdram, addr); if (!ptr) return false; - GsDispEnvMem env{display, dispfb}; + GsDispEnvMem env{}; + std::memcpy(&env, ptr, sizeof(env)); + env.dispfb = dispfb; + env.display = display; std::memcpy(ptr, &env, sizeof(env)); return true; } diff --git a/ps2xRuntime/src/lib/stubs/ps2_stubs_gs.inl b/ps2xRuntime/src/lib/stubs/ps2_stubs_gs.inl index 9b39d67d..e2466faa 100644 --- a/ps2xRuntime/src/lib/stubs/ps2_stubs_gs.inl +++ b/ps2xRuntime/src/lib/stubs/ps2_stubs_gs.inl @@ -18,54 +18,55 @@ void sceGsExecLoadImage(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) } uint32_t fbw = img.vram_width ? img.vram_width : std::max(1, (img.width + 63) / 64); - uint32_t base = static_cast(img.vram_addr) * 2048u; - uint32_t stride = bytesForPixels(img.psm, fbw * 64u); - if (stride == 0) + const uint32_t totalImageBytes = rowBytes * static_cast(img.height); + const uint32_t headerQwc = 12u; + const uint32_t imageQwc = (totalImageBytes + 15u) / 16u; + const uint32_t totalQwc = headerQwc + imageQwc; + + uint32_t pktAddr = runtime->guestMalloc(totalQwc * 16u, 16u); + if (pktAddr == 0) { setReturnS32(ctx, -1); return; } - uint8_t *gsvram = runtime->memory().getGSVRAM(); - uint8_t *src = getMemPtr(rdram, srcAddr); - if (!gsvram || !src) + uint8_t *pkt = getMemPtr(rdram, pktAddr); + const uint8_t *src = getConstMemPtr(rdram, srcAddr); + if (!pkt || !src) { setReturnS32(ctx, -1); return; } - static int logCount = 0; - if (logCount < 8) - { - std::cout << "ps2_stub sceGsExecLoadImage: x=" << img.x - << " y=" << img.y - << " w=" << img.width - << " h=" << img.height - << " vram=0x" << std::hex << img.vram_addr - << " fbw=" << std::dec << static_cast(fbw) - << " psm=" << static_cast(img.psm) - << " src=0x" << std::hex << srcAddr << std::dec << std::endl; - ++logCount; - } - - for (uint32_t row = 0; row < img.height; ++row) - { - uint32_t dstOff = base + (static_cast(img.y) + row) * stride + bytesForPixels(img.psm, static_cast(img.x)); - uint32_t srcOff = row * rowBytes; - if (dstOff >= PS2_GS_VRAM_SIZE) - break; - uint32_t copyBytes = rowBytes; - if (dstOff + copyBytes > PS2_GS_VRAM_SIZE) - copyBytes = PS2_GS_VRAM_SIZE - dstOff; - std::memcpy(gsvram + dstOff, src + srcOff, copyBytes); - } - - if (img.width >= 320 && img.height >= 200) - { - auto &gs = runtime->memory().gs(); - gs.dispfb1 = makeDispFb(img.vram_addr, fbw, img.psm, 0, 0); - gs.display1 = makeDisplay(0, 0, 0, 0, img.width - 1, img.height - 1); - } + uint32_t dbp = (static_cast(img.vram_addr) * 2048u) / 256u; + uint32_t dsax = static_cast(img.x); + uint32_t dsay = static_cast(img.y); + + uint64_t *q = reinterpret_cast(pkt); + q[0] = 0x1000000000000004ULL; + q[1] = 0x0E0E0E0E0E0E0E0EULL; + q[2] = (static_cast(img.psm & 0x3Fu) << 24) | (static_cast(1u) << 16) | + (static_cast(dbp & 0x3FFFu) << 32) | (static_cast(fbw & 0x3Fu) << 48) | + (static_cast(img.psm & 0x3Fu) << 56); + q[3] = 0x50ULL; + q[4] = (static_cast(dsay & 0x7FFu) << 48) | (static_cast(dsax & 0x7FFu) << 32); + q[5] = 0x51ULL; + q[6] = (static_cast(img.height) << 32) | static_cast(img.width); + q[7] = 0x52ULL; + q[8] = 0ULL; + q[9] = 0x53ULL; + q[10] = (static_cast(2) << 58) | (static_cast(imageQwc) & 0x7FFF) | + (1ULL << 15); + q[11] = 0ULL; + + std::memcpy(pkt + 12 * 8, src, totalImageBytes); + + constexpr uint32_t GIF_CHANNEL = 0x1000A000; + constexpr uint32_t CHCR_STR_MODE0 = 0x101u; + auto &mem = runtime->memory(); + mem.writeIORegister(GIF_CHANNEL + 0x10u, pktAddr); + mem.writeIORegister(GIF_CHANNEL + 0x20u, totalQwc & 0xFFFFu); + mem.writeIORegister(GIF_CHANNEL + 0x00u, CHCR_STR_MODE0); setReturnS32(ctx, 0); } @@ -90,47 +91,63 @@ void sceGsExecStoreImage(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) } uint32_t fbw = img.vram_width ? img.vram_width : std::max(1, (img.width + 63) / 64); - uint32_t base = static_cast(img.vram_addr) * 2048u; - uint32_t stride = bytesForPixels(img.psm, fbw * 64u); - if (stride == 0) + const uint32_t totalImageBytes = rowBytes * static_cast(img.height); + + uint8_t *dst = getMemPtr(rdram, dstAddr); + if (!dst) { setReturnS32(ctx, -1); return; } - uint8_t *gsvram = runtime->memory().getGSVRAM(); - uint8_t *dst = getMemPtr(rdram, dstAddr); - if (!gsvram || !dst) + uint32_t sbp = (static_cast(img.vram_addr) * 2048u) / 256u; + uint64_t bitbltbuf = (static_cast(sbp & 0x3FFFu) << 0) | + (static_cast(fbw & 0x3Fu) << 16) | + (static_cast(img.psm & 0x3Fu) << 24) | + (static_cast(0u) << 32) | + (static_cast(1u) << 48) | + (static_cast(0u) << 56); + uint64_t trxpos = (static_cast(img.x & 0x7FFu) << 0) | + (static_cast(img.y & 0x7FFu) << 16) | + (static_cast(0u) << 32) | + (static_cast(0u) << 48); + uint64_t trxreg = static_cast(img.height) << 32 | static_cast(img.width); + + uint32_t pktAddr = runtime->guestMalloc(80u, 16u); + if (pktAddr == 0) { setReturnS32(ctx, -1); return; } - static int logCount = 0; - if (logCount < 8) + uint8_t *pkt = getMemPtr(rdram, pktAddr); + if (!pkt) { - std::cout << "ps2_stub sceGsExecStoreImage: x=" << img.x - << " y=" << img.y - << " w=" << img.width - << " h=" << img.height - << " vram=0x" << std::hex << img.vram_addr - << " fbw=" << std::dec << static_cast(fbw) - << " psm=" << static_cast(img.psm) - << " dst=0x" << std::hex << dstAddr << std::dec << std::endl; - ++logCount; + setReturnS32(ctx, -1); + return; } - for (uint32_t row = 0; row < img.height; ++row) - { - uint32_t srcOff = base + (static_cast(img.y) + row) * stride + bytesForPixels(img.psm, static_cast(img.x)); - uint32_t dstOff = row * rowBytes; - if (srcOff >= PS2_GS_VRAM_SIZE) - break; - uint32_t copyBytes = rowBytes; - if (srcOff + copyBytes > PS2_GS_VRAM_SIZE) - copyBytes = PS2_GS_VRAM_SIZE - srcOff; - std::memcpy(dst + dstOff, gsvram + srcOff, copyBytes); - } + uint64_t *q = reinterpret_cast(pkt); + q[0] = 0x1000000000000004ULL; + q[1] = 0x0E0E0E0E0E0E0E0EULL; + q[2] = bitbltbuf; + q[3] = 0x50ULL; + q[4] = trxpos; + q[5] = 0x51ULL; + q[6] = trxreg; + q[7] = 0x52ULL; + q[8] = 1ULL; + q[9] = 0x53ULL; + + constexpr uint32_t GIF_CHANNEL = 0x1000A000; + constexpr uint32_t CHCR_STR_MODE0 = 0x101u; + auto &mem = runtime->memory(); + mem.writeIORegister(GIF_CHANNEL + 0x10u, pktAddr); + mem.writeIORegister(GIF_CHANNEL + 0x20u, 5u); + mem.writeIORegister(GIF_CHANNEL + 0x00u, CHCR_STR_MODE0); + mem.processPendingTransfers(); + + runtime->gs().consumeLocalToHostBytes(dst, totalImageBytes); setReturnS32(ctx, 0); } @@ -144,53 +161,40 @@ void sceGsGetGParam(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) void sceGsPutDispEnv(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { uint32_t envAddr = getRegU32(ctx, 4); - GsDispEnvMem env{}; - if (readGsDispEnv(rdram, envAddr, env)) + uint8_t *ptr = getMemPtr(rdram, envAddr); + if (!ptr) { - auto &gs = runtime->memory().gs(); - gs.display1 = env.display; - gs.dispfb1 = env.dispfb; + setReturnS32(ctx, -1); + return; } + constexpr uint32_t GIF_CHANNEL = 0x1000A000; + constexpr uint32_t QWC = 5; + constexpr uint32_t CHCR_STR_MODE0 = 0x101u; + auto &mem = runtime->memory(); + mem.writeIORegister(GIF_CHANNEL + 0x10u, envAddr); + mem.writeIORegister(GIF_CHANNEL + 0x20u, QWC); + mem.writeIORegister(GIF_CHANNEL + 0x00u, CHCR_STR_MODE0); setReturnS32(ctx, 0); } void sceGsPutDrawEnv(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { uint32_t envAddr = getRegU32(ctx, 4); - uint32_t psm = getRegU32(ctx, 5); - uint32_t w = getRegU32(ctx, 6); - uint32_t h = getRegU32(ctx, 7); - - if (w == 0) - w = 640; - if (h == 0) - h = 448; - - GsDrawEnvMem env{}; - env.offset_x = static_cast(2048 - (w / 2)); - env.offset_y = static_cast(2048 - (h / 2)); - env.clip_x = 0; - env.clip_y = 0; - env.clip_w = static_cast(w); - env.clip_h = static_cast(h); - env.vram_addr = 0; - env.fbw = static_cast((w + 63) / 64); - env.psm = static_cast(psm); - env.vram_x = 0; - env.vram_y = 0; - env.draw_mask = 0; - env.auto_clear = 1; - env.bg_r = 1; - env.bg_g = 1; - env.bg_b = 1; - env.bg_a = 0x80; - env.bg_q = 0.0f; - uint8_t *ptr = getMemPtr(rdram, envAddr); - if (ptr) + if (!ptr) { - std::memcpy(ptr, &env, sizeof(env)); + setReturnS32(ctx, -1); + return; } + + constexpr uint32_t GIF_CHANNEL = 0x1000A000; + constexpr uint32_t QWC = 9; + constexpr uint32_t CHCR_STR_MODE0 = 0x101u; + auto &mem = runtime->memory(); + mem.writeIORegister(GIF_CHANNEL + 0x10u, envAddr); + mem.writeIORegister(GIF_CHANNEL + 0x20u, QWC); + mem.writeIORegister(GIF_CHANNEL + 0x00u, CHCR_STR_MODE0); + setReturnS32(ctx, 0); } @@ -208,11 +212,42 @@ void sceGsResetGraph(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) g_gparam.ffmode = static_cast(ffmode & 0x1); writeGsGParamToScratch(runtime); - auto &gs = runtime->memory().gs(); - gs.pmode = makePmode(1, 0, 0, 0, 0, 0x80); - gs.smode2 = (interlace & 0x1) | ((ffmode & 0x1) << 1); - gs.dispfb1 = makeDispFb(0, 10, 0, 0, 0); - gs.display1 = makeDisplay(0, 0, 0, 0, 639, 447); + uint64_t pmode = makePmode(1, 0, 0, 0, 0, 0x80); + uint64_t smode2 = (interlace & 0x1) | ((ffmode & 0x1) << 1); + uint64_t dispfb = makeDispFb(0, 10, 0, 0, 0); + uint64_t display = makeDisplay(0, 0, 0, 0, 639, 447); + uint64_t bgcolor = 0ULL; + + if (runtime) + { + uint32_t pktAddr = runtime->guestMalloc(192u, 16u); + if (pktAddr != 0u) + { + uint8_t *pkt = getMemPtr(rdram, pktAddr); + if (pkt) + { + uint64_t *q = reinterpret_cast(pkt); + q[0] = 0x1000000000000005ULL; + q[1] = 0x0E0E0E0E0E0E0E0EULL; + q[2] = pmode; + q[3] = 0x41ULL; + q[4] = smode2; + q[5] = 0x42ULL; + q[6] = dispfb; + q[7] = 0x59ULL; + q[8] = display; + q[9] = 0x5aULL; + q[10] = bgcolor; + q[11] = 0x5fULL; + constexpr uint32_t GIF_CHANNEL = 0x1000A000; + constexpr uint32_t CHCR_STR_MODE0 = 0x101u; + auto &mem = runtime->memory(); + mem.writeIORegister(GIF_CHANNEL + 0x10u, pktAddr); + mem.writeIORegister(GIF_CHANNEL + 0x20u, 12u); + mem.writeIORegister(GIF_CHANNEL + 0x00u, CHCR_STR_MODE0); + } + } + } } setReturnS32(ctx, 0); @@ -225,11 +260,9 @@ void sceGsResetPath(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) void sceGsSetDefClear(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - const uint32_t clearAddr = getRegU32(ctx, 4); - if (uint8_t *clear = getMemPtr(rdram, clearAddr)) - { - std::memset(clear, 0, 64); - } + (void)rdram; + (void)ctx; + (void)runtime; setReturnS32(ctx, 0); } @@ -262,45 +295,65 @@ void sceGsSetDefDispEnv(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) void sceGsSetDefDrawEnv(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - const uint32_t envAddr = getRegU32(ctx, 4); - uint32_t psm = getRegU32(ctx, 5); - uint32_t w = getRegU32(ctx, 6); - uint32_t h = getRegU32(ctx, 7); - const uint32_t vramAddr = readStackU32(rdram, ctx, 16); - const uint32_t vramX = readStackU32(rdram, ctx, 20); - const uint32_t vramY = readStackU32(rdram, ctx, 24); + uint32_t envAddr = getRegU32(ctx, 4); + uint32_t param_2 = getRegU32(ctx, 5); + int32_t w = static_cast(static_cast(getRegU32(ctx, 6) & 0xFFFF)); + int32_t h = static_cast(static_cast(getRegU32(ctx, 7) & 0xFFFF)); + uint32_t param_5 = readStackU32(rdram, ctx, 16); + uint32_t param_6 = readStackU32(rdram, ctx, 20); - if (w == 0) + if (w <= 0) w = 640; - if (h == 0) + if (h <= 0) h = 448; - GsDrawEnvMem env{}; - env.offset_x = static_cast(2048 - (w / 2)); - env.offset_y = static_cast(2048 - (h / 2)); - env.clip_x = 0; - env.clip_y = 0; - env.clip_w = static_cast(w); - env.clip_h = static_cast(h); - env.vram_addr = static_cast(vramAddr & 0xFFFFu); - env.fbw = static_cast((w + 63u) / 64u); - env.psm = static_cast(psm & 0xFFu); - env.vram_x = static_cast(vramX & 0xFFFFu); - env.vram_y = static_cast(vramY & 0xFFFFu); - env.draw_mask = 0; - env.auto_clear = 1; - env.bg_r = 0; - env.bg_g = 0; - env.bg_b = 0; - env.bg_a = 0x80; - env.bg_q = 0.0f; - - if (uint8_t *ptr = getMemPtr(rdram, envAddr)) + uint32_t psm = param_2 & 0xFU; + uint32_t fbw = ((static_cast(w) + 63u) >> 6) & 0x3FU; + sceGszbufaddr(rdram, ctx, runtime); + int32_t zbuf = static_cast(static_cast(getRegU32(ctx, 2) & 0xFFFF)); + + uint8_t *const ptr = getMemPtr(rdram, envAddr); + if (!ptr) { - std::memcpy(ptr, &env, sizeof(env)); + setReturnS32(ctx, 8); + return; } - setReturnS32(ctx, 0); + uint64_t *const words = reinterpret_cast(ptr); + + words[0] = 0x1000000000008008ULL; + words[1] = 0x000000000000000EULL; + + words[2] = (static_cast(fbw) << 16) | (static_cast(psm) << 24); + words[3] = 0x4c; + + words[4] = (static_cast(zbuf) & 0xFFFFULL) | (static_cast(param_6 & 0xF) << 24) | + (param_5 == 0 ? 0x100000000ULL : 0ULL); + words[5] = 0x4e; + + int32_t off_x = 0x800 - (w >> 1); + int32_t off_y = 0x800 - (h >> 1); + words[6] = (static_cast(static_cast(off_y) & 0xFFFF) << 36) | + (static_cast(off_x) & 0xFFFF) * 16ULL; + words[7] = 0x18; + + words[8] = (static_cast(static_cast(h - 1) & 0xFFFF) << 48) | + (static_cast(static_cast(w - 1) & 0xFFFF) << 16); + words[9] = 0x40; + + words[10] = 1; + words[11] = 0x1a; + + words[12] = 1; + words[13] = 0x46; + + words[14] = (param_2 & 2) ? 1ULL : 0ULL; + words[15] = 0x45; + + words[16] = (param_5 == 0) ? 0x30000ULL : ((static_cast(param_5 & 3) << 17) | 0x10000ULL); + words[17] = 0x47; + + setReturnS32(ctx, 8); } void sceGsSetDefDrawEnv2(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) @@ -333,7 +386,6 @@ void sceGsSetDefStoreImage(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtim void sceGsSwapDBuffDc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - // can we get away with that ? kkkk static int cur = 0; cur ^= 1; setReturnS32(ctx, cur); @@ -341,22 +393,123 @@ void sceGsSwapDBuffDc(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) void sceGsSyncPath(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - setReturnS32(ctx, 0); + int32_t mode = static_cast(getRegU32(ctx, 4)); + auto &mem = runtime->memory(); + + if (mode == 0) + { + mem.processPendingTransfers(); + + uint32_t count = 0; + constexpr uint32_t kTimeout = 0x1000000; + + while ((mem.readIORegister(0x10009000) & 0x100) != 0) + { + if (++count > kTimeout) + { + setReturnS32(ctx, -1); + return; + } + } + + while ((mem.readIORegister(0x1000A000) & 0x100) != 0) + { + if (++count > kTimeout) + { + setReturnS32(ctx, -1); + return; + } + } + + while ((mem.readIORegister(0x10003C00) & 0x1F000003) != 0) + { + if (++count > kTimeout) + { + setReturnS32(ctx, -1); + return; + } + } + + while ((mem.readIORegister(0x10003020) & 0xC00) != 0) + { + if (++count > kTimeout) + { + setReturnS32(ctx, -1); + return; + } + } + + setReturnS32(ctx, 0); + } + else + { + uint32_t result = 0; + + if ((mem.readIORegister(0x10009000) & 0x100) != 0) + result |= 1; + if ((mem.readIORegister(0x1000A000) & 0x100) != 0) + result |= 2; + if ((mem.readIORegister(0x10003C00) & 0x1F000003) != 0) + result |= 4; + if ((mem.readIORegister(0x10003020) & 0xC00) != 0) + result |= 0x10; + + setReturnS32(ctx, result); + } } void sceGsSyncV(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - ps2_syscalls::WaitVSyncTick(rdram, runtime); setReturnS32(ctx, 0); } void sceGsSyncVCallback(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - ps2_syscalls::WaitVSyncTick(rdram, runtime); setReturnS32(ctx, 0); } void sceGszbufaddr(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - setReturnU32(ctx, getRegU32(ctx, 4)); + (void)rdram; + uint32_t param_1 = getRegU32(ctx, 4); + int32_t w = static_cast(static_cast(getRegU32(ctx, 6) & 0xFFFF)); + int32_t h = static_cast(static_cast(getRegU32(ctx, 7) & 0xFFFF)); + + int32_t width_blocks = (w + 63) >> 6; + if (w + 63 < 0) + width_blocks = (w + 126) >> 6; + + int32_t height_blocks; + if ((param_1 & 2) != 0) + { + int32_t v = (h + 63) >> 6; + if (h + 63 < 0) + v = (h + 126) >> 6; + height_blocks = v; + } + else + { + int32_t v = (h + 31) >> 5; + if (h + 31 < 0) + v = (h + 62) >> 5; + height_blocks = v; + } + + int32_t product = width_blocks * height_blocks; + + uint64_t gparam_val = 0; + if (runtime) + { + uint8_t *scratch = runtime->memory().getScratchpad(); + if (scratch) + { + std::memcpy(&gparam_val, scratch + 0x100, sizeof(gparam_val)); + } + } + if ((gparam_val & 0xFFFF0000FFFFULL) == 1ULL) + product = (product * 0x10000) >> 16; + else + product = (product * 0x20000) >> 16; + + setReturnS32(ctx, product); } diff --git a/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl b/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl index e9ed2e42..7ea590fe 100644 --- a/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl +++ b/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl @@ -942,6 +942,298 @@ void sceFsReset(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, 0); } +static void writeU32AtGp(uint8_t *rdram, uint32_t gp, int32_t offset, uint32_t value) +{ + const uint32_t addr = gp + static_cast(offset); + if (uint8_t *p = getMemPtr(rdram, addr)) + *reinterpret_cast(p) = value; +} + +void sceeFontInit(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + const uint32_t gp = getRegU32(ctx, 28); + const uint32_t a0 = getRegU32(ctx, 4); + const uint32_t a1 = getRegU32(ctx, 5); + const uint32_t a2 = getRegU32(ctx, 6); + const uint32_t a3 = getRegU32(ctx, 7); + writeU32AtGp(rdram, gp, -0x7b60, a1); + writeU32AtGp(rdram, gp, -0x7b5c, a2); + writeU32AtGp(rdram, gp, -0x7b64, a0); + writeU32AtGp(rdram, gp, -0x7c98, a3); + writeU32AtGp(rdram, gp, -0x7b4c, 0x7f7f7f7f); + writeU32AtGp(rdram, gp, -0x7b50, 0x3f800000); + writeU32AtGp(rdram, gp, -0x7b54, 0x3f800000); + writeU32AtGp(rdram, gp, -0x7b58, 0); + + if (runtime && a0 != 0u) + { + if ((a0 * 256u) + 64u <= PS2_GS_VRAM_SIZE) + { + uint32_t clutData[16]; + for (uint32_t i = 0; i < 16u; ++i) + { + uint8_t alpha = static_cast((i * 0x80u) / 15u); + clutData[i] = (i == 0) + ? 0x00000000u + : (0x80u | (0x80u << 8) | (0x80u << 16) | (static_cast(alpha) << 24)); + } + constexpr uint32_t kClutQwc = 4u; + constexpr uint32_t kHeaderQwc = 6u; + constexpr uint32_t kTotalQwc = kHeaderQwc + kClutQwc; + uint32_t pktAddr = runtime->guestMalloc(kTotalQwc * 16u, 16u); + if (pktAddr != 0u) + { + uint8_t *pkt = getMemPtr(rdram, pktAddr); + if (pkt) + { + uint64_t *q = reinterpret_cast(pkt); + const uint32_t dbp = a0 & 0x3FFFu; + constexpr uint8_t psm = 0u; + q[0] = (4ULL << 60) | (1ULL << 56) | 1ULL; + q[1] = 0x0E0E0E0E0E0E0E0EULL; + q[2] = (static_cast(dbp) << 32) | (1ULL << 48) | (static_cast(psm) << 56); + q[3] = 0x50ULL; + q[4] = 0ULL; + q[5] = 0x51ULL; + q[6] = 16ULL | (1ULL << 32); + q[7] = 0x52ULL; + q[8] = 0ULL; + q[9] = 0x53ULL; + q[10] = (2ULL << 58) | (kClutQwc & 0x7FFF) | (1ULL << 15); + q[11] = 0ULL; + std::memcpy(pkt + 12u * 8u, clutData, 64u); + constexpr uint32_t GIF_CHANNEL = 0x1000A000; + constexpr uint32_t CHCR_STR_MODE0 = 0x101u; + runtime->memory().writeIORegister(GIF_CHANNEL + 0x10u, pktAddr); + runtime->memory().writeIORegister(GIF_CHANNEL + 0x20u, kTotalQwc & 0xFFFFu); + runtime->memory().writeIORegister(GIF_CHANNEL + 0x00u, CHCR_STR_MODE0); + runtime->memory().processPendingTransfers(); + } + } + } + } + + setReturnS32(ctx, static_cast(a0 + 4)); +} + +void sceeFontLoadFont(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + static constexpr uint32_t kFontBase = 0x176148u; + static constexpr uint32_t kFontEntrySz = 0x24u; + + const uint32_t fontDataAddr = getRegU32(ctx, 4); + const int fontId = static_cast(getRegU32(ctx, 5)); + const int tbp0 = static_cast(getRegU32(ctx, 7)); + + if (!fontDataAddr || !runtime) + { + setReturnS32(ctx, tbp0); + return; + } + + const uint8_t *fontPtr = getConstMemPtr(rdram, fontDataAddr); + if (!fontPtr) + { + setReturnS32(ctx, tbp0); + return; + } + + int width = static_cast(*reinterpret_cast(fontPtr + 0x00u)); + int height = static_cast(*reinterpret_cast(fontPtr + 0x04u)); + uint32_t raw8 = *reinterpret_cast(fontPtr + 0x08u); + int fontDataSz = static_cast(*reinterpret_cast(fontPtr + 0x0cu)); + + uint32_t pointsize = raw8; + uint32_t fontOff = static_cast(fontId * static_cast(kFontEntrySz)); + if (raw8 & 0x40000000u) + { + pointsize = raw8 - 0x40000000u; + if (uint8_t *p = getMemPtr(rdram, kFontBase + fontOff + 0x20u)) + *reinterpret_cast(p) = 1u; + } + else + { + if (uint8_t *p = getMemPtr(rdram, kFontBase + fontOff + 0x20u)) + *reinterpret_cast(p) = 0u; + } + + int tw = (width >= 0) ? (width >> 6) : ((width + 0x3f) >> 6); + int qwc = (fontDataSz >= 0) ? (fontDataSz >> 4) : ((fontDataSz + 0xf) >> 4); + + uint32_t glyphSrc = fontDataAddr + static_cast(fontDataSz) + 0x10u; + uint32_t glyphAlloc = runtime->guestMalloc(0x2010u, 0x40u); + if (uint8_t *p = getMemPtr(rdram, kFontBase + fontOff)) + *reinterpret_cast(p) = glyphAlloc; + + if (glyphAlloc != 0u) + { + uint8_t *dst = getMemPtr(rdram, glyphAlloc); + const uint8_t *src = getConstMemPtr(rdram, glyphSrc); + if (dst && src) + std::memcpy(dst, src, 0x2010u); + } + + uint32_t isDoubleByte = 0; + if (const uint8_t *p = getConstMemPtr(rdram, kFontBase + fontOff + 0x20u)) + isDoubleByte = *reinterpret_cast(p); + if (isDoubleByte == 0u) + { + uint32_t kernSrc = glyphSrc + 0x2010u; + uint32_t kernAlloc = runtime->guestMalloc(0xc400u, 0x40u); + if (glyphAlloc != 0u) + { + uint8_t *kernSlot = getMemPtr(rdram, glyphAlloc + 0x2000u); + if (kernSlot) + *reinterpret_cast(kernSlot) = kernAlloc; + } + if (kernAlloc != 0u) + { + uint8_t *dst = getMemPtr(rdram, kernAlloc); + const uint8_t *src = getConstMemPtr(rdram, kernSrc); + if (dst && src) + std::memcpy(dst, src, 0xc400u); + } + } + + auto writeFontField = [&](uint32_t off, uint32_t val) + { + if (uint8_t *p = getMemPtr(rdram, kFontBase + fontOff + off)) + *reinterpret_cast(p) = val; + }; + writeFontField(0x18u, pointsize); + writeFontField(0x08u, static_cast(tbp0)); + writeFontField(0x0cu, static_cast(tw)); + + int logW = 0; + for (int w = width; w != 1 && w != 0; w = static_cast(static_cast(w) >> 1)) + logW++; + writeFontField(0x10u, static_cast(logW)); + + int logH = 0; + for (int h = height; h != 1 && h != 0; h = static_cast(static_cast(h) >> 1)) + logH++; + writeFontField(0x14u, static_cast(logH)); + writeFontField(0x04u, 0u); + writeFontField(0x1cu, getRegU32(ctx, 6)); + + if (qwc > 0) + { + const uint32_t imageBytes = static_cast(qwc) * 16u; + const uint8_t psm = 20u; + const uint32_t headerQwc = 12u; + const uint32_t imageQwc = static_cast(qwc); + const uint32_t totalQwc = headerQwc + imageQwc; + uint32_t pktAddr = runtime->guestMalloc(totalQwc * 16u, 16u); + if (pktAddr != 0u) + { + uint8_t *pkt = getMemPtr(rdram, pktAddr); + const uint8_t *imgSrc = getConstMemPtr(rdram, fontDataAddr + 0x10u); + if (pkt && imgSrc) + { + uint64_t *q = reinterpret_cast(pkt); + const uint32_t dbp = static_cast(tbp0) & 0x3FFFu; + const uint32_t dbw = static_cast(tw > 0 ? tw : 1) & 0x3Fu; + const uint32_t rrw = static_cast(width > 0 ? width : 64); + const uint32_t rrh = static_cast(height > 0 ? height : 1); + + q[0] = (4ULL << 60) | (1ULL << 56) | 1ULL; + q[1] = 0x0E0E0E0E0E0E0E0EULL; + q[2] = (static_cast(psm) << 24) | (1ULL << 16) | + (static_cast(dbp) << 32) | (static_cast(dbw) << 48) | + (static_cast(psm) << 56); + q[3] = 0x50ULL; + q[4] = 0ULL; + q[5] = 0x51ULL; + q[6] = (static_cast(rrh) << 32) | static_cast(rrw); + q[7] = 0x52ULL; + q[8] = 0ULL; + q[9] = 0x53ULL; + q[10] = (2ULL << 58) | (imageQwc & 0x7FFF) | (1ULL << 15); + q[11] = 0ULL; + std::memcpy(pkt + 12 * 8, imgSrc, imageBytes); + + constexpr uint32_t GIF_CHANNEL = 0x1000A000; + constexpr uint32_t CHCR_STR_MODE0 = 0x101u; + runtime->memory().writeIORegister(GIF_CHANNEL + 0x10u, pktAddr); + runtime->memory().writeIORegister(GIF_CHANNEL + 0x20u, totalQwc & 0xFFFFu); + runtime->memory().writeIORegister(GIF_CHANNEL + 0x00u, CHCR_STR_MODE0); + } + } + } + + int retTbp = tbp0 + ((fontDataSz >= 0 ? fontDataSz : fontDataSz + 0x7f) >> 7); + setReturnS32(ctx, retTbp); +} + +void sceeFontPrintfAt(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + TODO_NAMED("sceeFontPrintfAt", rdram, ctx, runtime); +} + +void sceeFontPrintfAt2(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + setReturnS32(ctx, 0); +} + +void sceeFontClose(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + static constexpr uint32_t kFontBase = 0x176148u; + static constexpr uint32_t kFontEntrySz = 0x24u; + const int fontId = static_cast(getRegU32(ctx, 4)); + const uint32_t fontOff = static_cast(fontId * static_cast(kFontEntrySz)); + uint32_t glyphPtr = 0; + if (const uint8_t *p = getConstMemPtr(rdram, kFontBase + fontOff)) + glyphPtr = *reinterpret_cast(p); + if (glyphPtr != 0u) + { + if (runtime) + { + uint32_t kernPtr = 0; + if (const uint8_t *kp = getConstMemPtr(rdram, glyphPtr + 0x2000u)) + kernPtr = *reinterpret_cast(kp); + if (kernPtr != 0u) + runtime->guestFree(kernPtr); + runtime->guestFree(glyphPtr); + } + if (uint8_t *p = getMemPtr(rdram, kFontBase + fontOff)) + *reinterpret_cast(p) = 0u; + setReturnS32(ctx, 0); + } + else + { + setReturnS32(ctx, -1); + } +} + +void sceeFontSetColour(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + const uint32_t gp = getRegU32(ctx, 28); + writeU32AtGp(rdram, gp, -0x7b4c, getRegU32(ctx, 4)); +} + +void sceeFontSetMode(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + const uint32_t gp = getRegU32(ctx, 28); + writeU32AtGp(rdram, gp, -0x7c98, getRegU32(ctx, 4)); + setReturnS32(ctx, 0); +} + +void sceeFontSetFont(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + const uint32_t gp = getRegU32(ctx, 28); + writeU32AtGp(rdram, gp, -0x7b58, getRegU32(ctx, 4)); +} + +void sceeFontSetScale(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + const uint32_t gp = getRegU32(ctx, 28); + uint32_t sclx_bits, scly_bits; + std::memcpy(&sclx_bits, &ctx->f[12], sizeof(float)); + std::memcpy(&scly_bits, &ctx->f[13], sizeof(float)); + writeU32AtGp(rdram, gp, -0x7b54, sclx_bits); + writeU32AtGp(rdram, gp, -0x7b50, scly_bits); +} + void sceIoctl(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { setReturnS32(ctx, 0); From 944909618faafb538d17f87b835c0ddf7e9b1b28 Mon Sep 17 00:00:00 2001 From: Aslan Hud Date: Tue, 24 Feb 2026 22:29:35 +0100 Subject: [PATCH 2/3] added: added more scee font stubs in stubs misc fix: fixed flickering error when rendering in memory.cpp --- ps2xRuntime/include/ps2_call_list.h | 1 + ps2xRuntime/include/ps2_syscalls.h | 3 + ps2xRuntime/src/lib/ps2_gs_gpu.cpp | 12 + ps2xRuntime/src/lib/ps2_memory.cpp | 60 +- ps2xRuntime/src/lib/ps2_runtime.cpp | 19 +- ps2xRuntime/src/lib/ps2_stubs.cpp | 1 + ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl | 616 ++++++++++++++++++- 7 files changed, 629 insertions(+), 83 deletions(-) diff --git a/ps2xRuntime/include/ps2_call_list.h b/ps2xRuntime/include/ps2_call_list.h index 18ae4415..87b56a37 100644 --- a/ps2xRuntime/include/ps2_call_list.h +++ b/ps2xRuntime/include/ps2_call_list.h @@ -340,6 +340,7 @@ X(sceeFontLoadFont) \ X(sceeFontPrintfAt) \ X(sceeFontPrintfAt2) \ + X(sceeFontGenerateString) \ X(sceeFontClose) \ X(sceeFontSetColour) \ X(sceeFontSetMode) \ diff --git a/ps2xRuntime/include/ps2_syscalls.h b/ps2xRuntime/include/ps2_syscalls.h index 4a7bea37..6a78c848 100644 --- a/ps2xRuntime/include/ps2_syscalls.h +++ b/ps2xRuntime/include/ps2_syscalls.h @@ -7,6 +7,9 @@ #include #include #include +#include + +std::string translatePs2Path(const char *ps2Path); extern std::atomic g_activeThreads; diff --git a/ps2xRuntime/src/lib/ps2_gs_gpu.cpp b/ps2xRuntime/src/lib/ps2_gs_gpu.cpp index 105f4ed7..920d23fe 100644 --- a/ps2xRuntime/src/lib/ps2_gs_gpu.cpp +++ b/ps2xRuntime/src/lib/ps2_gs_gpu.cpp @@ -611,6 +611,18 @@ void GS::writeRegister(uint8_t regAddr, uint64_t value) } break; } + case 0x59: + if (m_privRegs) + m_privRegs->dispfb1 = value; + break; + case 0x5a: + if (m_privRegs) + m_privRegs->display1 = value; + break; + case 0x5f: + if (m_privRegs) + m_privRegs->bgcolor = value; + break; default: break; } diff --git a/ps2xRuntime/src/lib/ps2_memory.cpp b/ps2xRuntime/src/lib/ps2_memory.cpp index 7215eaf8..afb73e32 100644 --- a/ps2xRuntime/src/lib/ps2_memory.cpp +++ b/ps2xRuntime/src/lib/ps2_memory.cpp @@ -192,6 +192,8 @@ bool PS2Memory::initialize(size_t ramSize) // Initialize GS registers memset(&gs_regs, 0, sizeof(gs_regs)); + gs_regs.dispfb1 = (0ULL << 0) | (10ULL << 9) | (0ULL << 15) | (0ULL << 32) | (0ULL << 43); + gs_regs.display1 = (0ULL << 0) | (0ULL << 12) | (0ULL << 23) | (0ULL << 27) | (639ULL << 32) | (447ULL << 44); // Allocate GS VRAM (4MB) m_gsVRAM = new uint8_t[PS2_GS_VRAM_SIZE]; @@ -634,27 +636,12 @@ void PS2Memory::write128(uint32_t address, __m128i value) bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) { - // ── IPU registers (0x10002000-0x10002030) ────────────────── - // On real PS2, IPU_CTRL bit 31 (BUSY) is READ-ONLY — set by hardware. - // We must NOT store the raw value for IPU_CTRL because the game - // might write 0x40000000 (RST) and we'd return 0 with no BUSY, - // but if any stale value had bit 31, the polling loop would hang. if (address >= 0x10002000 && address <= 0x10002030) { - static int ipuWriteLog = 0; - if (ipuWriteLog < 30) - { - std::cerr << "[IPU] write addr=0x" << std::hex << address - << " val=0x" << value << std::dec << std::endl; - ++ipuWriteLog; - } if (address == 0x10002010) { - // IPU_CTRL write: bit 30 = RST (reset). After reset, - // all status bits clear. Never store BUSY (bit 31). if (value & (1u << 30)) { - // Reset IPU — clear all IPU registers m_ioRegisters[0x10002000] = 0; m_ioRegisters[0x10002010] = 0; m_ioRegisters[0x10002020] = 0; @@ -662,13 +649,11 @@ bool PS2Memory::writeIORegister(uint32_t address, uint32_t value) } else { - // Store without BUSY bit m_ioRegisters[address] = value & ~(1u << 31); } } else { - // IPU_CMD (0x10002000) — store command, don't set busy m_ioRegisters[address] = value; } return true; @@ -1033,37 +1018,25 @@ int PS2Memory::pollDmaRegisters() uint32_t PS2Memory::readIORegister(uint32_t address) { - // ── IPU registers (0x10002000-0x10002030) ────────────────── - // IPU_CMD 0x10002000: command result / FIFO output - // IPU_CTRL 0x10002010: status — bit 31=BUSY (always 0: we don't decode) - // IPU_BP 0x10002020: bitstream pointer - // IPU_TOP 0x10002030: top 32 bits of FIFO if (address >= 0x10002000 && address <= 0x10002030) { - static int ipuReadLog = 0; uint32_t val = 0; switch (address) { - case 0x10002000: // IPU_CMD — command result + case 0x10002000: val = m_ioRegisters[address]; break; - case 0x10002010: // IPU_CTRL — always NOT busy, ECD=0 - val = m_ioRegisters[address] & ~(1u << 31); // clear BUSY + case 0x10002010: + val = m_ioRegisters[address] & ~(1u << 31); break; - case 0x10002020: // IPU_BP - case 0x10002030: // IPU_TOP + case 0x10002020: + case 0x10002030: val = m_ioRegisters[address]; break; default: val = 0; break; } - if (ipuReadLog < 30) - { - std::cerr << "[IPU] read addr=0x" << std::hex << address - << " val=0x" << val << std::dec << std::endl; - ++ipuReadLog; - } return val; } if (address >= 0x10000000 && address < 0x10010000) @@ -1080,9 +1053,9 @@ uint32_t PS2Memory::readIORegister(uint32_t address) { if ((address & 0xFF) == 0x00) { - // Return CHCR as-is. STR (bit 8) is cleared after DMA - // completion in writeIORegister, not on read. - return m_ioRegisters[address]; + uint32_t channelStatus = m_ioRegisters[address] & ~0x100u; + m_ioRegisters[address] = channelStatus; + return channelStatus; } } @@ -1091,21 +1064,8 @@ uint32_t PS2Memory::readIORegister(uint32_t address) return 0; } - // SIF hardware registers — HLE: pretend IOP is always ready - // 0x1000F200: SIF_SMCOM — IOP communication status - // 0x1000F210: SIF_MSCOM — EE→IOP command - // 0x1000F220: SIF_MSFLG — Main→Sub flags - // 0x1000F230: SIF_SMFLG — Sub→Main flags (IOP ready bits) - // 0x1000F240: SIF_CTRL — SIF control if (address >= 0x1000F200 && address <= 0x1000F260) { - static std::atomic sifReads{0}; - uint64_t n = sifReads.fetch_add(1); - if (n < 5 || (n % 100000) == 0) - { - std::cerr << "[SIF-HW] read 0x" << std::hex << address - << " #" << std::dec << n << std::endl; - } if (address == 0x1000F230) { return 0x60000; diff --git a/ps2xRuntime/src/lib/ps2_runtime.cpp b/ps2xRuntime/src/lib/ps2_runtime.cpp index 262e3c50..394d1e52 100644 --- a/ps2xRuntime/src/lib/ps2_runtime.cpp +++ b/ps2xRuntime/src/lib/ps2_runtime.cpp @@ -1466,20 +1466,7 @@ void PS2Runtime::run() uint64_t tick = 0; while (!gameThreadFinished.load(std::memory_order_acquire)) { - const uint32_t pc = m_debugPc.load(std::memory_order_relaxed); - const uint32_t ra = m_debugRa.load(std::memory_order_relaxed); - const uint32_t sp = m_debugSp.load(std::memory_order_relaxed); - const uint32_t gp = m_debugGp.load(std::memory_order_relaxed); - - if ((tick++ % 120) == 0) - { - std::cout << "[run] activeThreads=" << g_activeThreads.load(std::memory_order_relaxed); - std::cout << " pc=0x" << std::hex << pc - << " ra=0x" << ra - << " sp=0x" << sp - << " gp=0x" << gp - << std::dec << std::endl; - } + tick++; if ((tick % 600) == 0) { static uint64_t lastDma = 0, lastGif = 0, lastGs = 0, lastVif = 0; @@ -1489,10 +1476,6 @@ void PS2Runtime::run() uint64_t curVif = m_memory.vifWriteCount(); if (curDma != lastDma || curGif != lastGif || curGs != lastGs || curVif != lastVif) { - std::cout << "[hw] dma_starts=" << curDma - << " gif_copies=" << curGif - << " gs_writes=" << curGs - << " vif_writes=" << curVif << std::endl; lastDma = curDma; lastGif = curGif; lastGs = curGs; diff --git a/ps2xRuntime/src/lib/ps2_stubs.cpp b/ps2xRuntime/src/lib/ps2_stubs.cpp index 7b48003b..527681f3 100644 --- a/ps2xRuntime/src/lib/ps2_stubs.cpp +++ b/ps2xRuntime/src/lib/ps2_stubs.cpp @@ -1,5 +1,6 @@ #include "ps2_stubs.h" #include "ps2_runtime.h" +#include "ps2_runtime_macros.h" #include "ps2_syscalls.h" #include #include diff --git a/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl b/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl index 7ea590fe..077ff136 100644 --- a/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl +++ b/ps2xRuntime/src/lib/stubs/ps2_stubs_misc.inl @@ -1081,11 +1081,7 @@ void sceeFontLoadFont(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) uint32_t kernSrc = glyphSrc + 0x2010u; uint32_t kernAlloc = runtime->guestMalloc(0xc400u, 0x40u); if (glyphAlloc != 0u) - { - uint8_t *kernSlot = getMemPtr(rdram, glyphAlloc + 0x2000u); - if (kernSlot) - *reinterpret_cast(kernSlot) = kernAlloc; - } + *reinterpret_cast(getMemPtr(rdram, glyphAlloc + 0x2000u)) = kernAlloc; if (kernAlloc != 0u) { uint8_t *dst = getMemPtr(rdram, kernAlloc); @@ -1165,14 +1161,388 @@ void sceeFontLoadFont(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) setReturnS32(ctx, retTbp); } +static constexpr uint32_t kFontBase = 0x176148u; +static constexpr uint32_t kFontEntrySz = 0x24u; + +void sceeFontGenerateString(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) +{ + const float sclx = ctx->f[12]; + const float scly = ctx->f[13]; + const uint32_t bufAddr = getRegU32(ctx, 4); + const uint64_t paramX = GPR_U64(ctx, 5); + const int64_t paramY = GPR_S64(ctx, 6); + const int paramW = static_cast(getRegU32(ctx, 7)); + const int paramH = static_cast(getRegU32(ctx, 8)); + const uint32_t colour = getRegU32(ctx, 9); + const int alignCh = static_cast(getRegU32(ctx, 10) & 0xffu); + const int fontId = static_cast(getRegU32(ctx, 11)); + + const uint32_t sp = getRegU32(ctx, 29); + const uint32_t strAddr = FAST_READ32(sp + 0x00u); + const uint32_t param14 = FAST_READ32(sp + 0x18u); + + if (bufAddr == 0u) + { + setReturnS32(ctx, 0); + ctx->pc = getRegU32(ctx, 31); + return; + } + + const uint32_t gp = getRegU32(ctx, 28); + const uint32_t fontModeAdj = FAST_READ32(gp + static_cast(static_cast(-0x7c98))); + const uint32_t shiftAmt = fontModeAdj & 0x1fu; + const int scrHeight = static_cast(FAST_READ32(gp + static_cast(static_cast(-0x7b5c)))); + const int scrWidth = static_cast(FAST_READ32(gp + static_cast(static_cast(-0x7b60)))); + const uint32_t fontClut = FAST_READ32(gp + static_cast(static_cast(-0x7b64))); + + const uint32_t fontOff = static_cast(fontId * static_cast(kFontEntrySz)); + const int lineH = static_cast(FAST_READ32(kFontBase + fontOff + 0x18u)); + + int iVar21 = 0; + int iStack_dc = 0; + uint32_t uStack_d8 = 0; + int iVar15 = 0; + + int16_t sVar8; + { + int yStepRaw = static_cast(static_cast((lineH + 6) * 16) * scly); + sVar8 = static_cast((static_cast(paramY) + 0x700) * 16) + static_cast(yStepRaw >> static_cast(shiftAmt)); + } + + int16_t baseX = static_cast((static_cast(paramX) + 0x6c0) * 16); + + if (param14 != 0u) + { + int64_t clipY1 = static_cast(static_cast(paramY) + paramH); + int64_t clipX1 = static_cast(static_cast(paramX) + paramW); + if (clipY1 > scrHeight - 1) clipY1 = static_cast(scrHeight - 1); + if (clipX1 > scrWidth - 1) clipX1 = static_cast(scrWidth - 1); + int64_t clipY0 = 0; + if (paramY > 0) clipY0 = paramY; + uint64_t clipX0 = 0; + if (static_cast(paramX) > 0) clipX0 = paramX; + + uint64_t scissor = clipX0 | (static_cast(static_cast(clipX1)) << 16) + | (static_cast(static_cast(clipY0)) << 32) | (static_cast(static_cast(clipY1)) << 48); + + FAST_WRITE64(bufAddr + 0x00, 0x1000000000000005ull); + FAST_WRITE64(bufAddr + 0x08, 0x0eull); + FAST_WRITE64(bufAddr + 0x10, scissor); + FAST_WRITE64(bufAddr + 0x18, 0x40ull); + FAST_WRITE64(bufAddr + 0x20, 0x20000ull); + FAST_WRITE64(bufAddr + 0x28, 0x47ull); + FAST_WRITE64(bufAddr + 0x30, 0x44ull); + FAST_WRITE64(bufAddr + 0x38, 0x42ull); + FAST_WRITE64(bufAddr + 0x40, 0x160ull); + FAST_WRITE64(bufAddr + 0x48, 0x14ull); + FAST_WRITE64(bufAddr + 0x50, 0x156ull); + FAST_WRITE64(bufAddr + 0x58, 0ull); + FAST_WRITE64(bufAddr + 0x60, 0x1000000000000001ull); + FAST_WRITE64(bufAddr + 0x68, 0x0eull); + + uint64_t iVar5 = static_cast(FAST_READ32(kFontBase + fontOff + 0x08u)); + uint64_t iVar22 = static_cast(FAST_READ32(kFontBase + fontOff + 0x0cu)); + uint64_t iVar3 = static_cast(FAST_READ32(kFontBase + fontOff + 0x10u)); + uint64_t iVar4 = static_cast(FAST_READ32(kFontBase + fontOff + 0x14u)); + + uint64_t tex0 = iVar5 + | 0x2000000000000000ull + | (iVar22 << 14) + | 0x400000000ull + | (iVar3 << 26) + | 0x1400000ull + | (iVar4 << 30) + | (static_cast(fontClut) << 37); + + FAST_WRITE64(bufAddr + 0x70, tex0); + FAST_WRITE64(bufAddr + 0x78, 6ull); + FAST_WRITE64(bufAddr + 0x80, 0x1000000000000001ull); + FAST_WRITE64(bufAddr + 0x88, 0x0eull); + FAST_WRITE64(bufAddr + 0x90, static_cast(colour)); + FAST_WRITE64(bufAddr + 0x98, 1ull); + + iVar21 = 10; + } + + int iVar22_qw = iVar21 + 1; + uint32_t s2 = bufAddr + static_cast(iVar22_qw * 16); + uint32_t uVar20 = 0; + + size_t sLen = 0; + { + const char *hostStr = reinterpret_cast(getConstMemPtr(rdram, strAddr)); + if (hostStr) sLen = ::strlen(hostStr); + } + + while (uVar20 < sLen) + { + uint8_t bVar1 = FAST_READ8(strAddr + uVar20); + uint32_t uVar9 = static_cast(bVar1); + int8_t chSigned = static_cast(bVar1); + + if (uStack_d8 < 0x21u) + { + goto label_check_printable; + } + + if (uVar9 > 0x20u) + { + uint32_t dat176168 = FAST_READ32(kFontBase + fontOff + 0x20u); + if (dat176168 == 0u) + { + uint32_t fontPtr0 = FAST_READ32(kFontBase + fontOff); + uint32_t tableAddr = FAST_READ32(fontPtr0 + 0x2000u); + int8_t kern = static_cast(FAST_READ8(tableAddr - 0x1c20u + uStack_d8 * 0xe0u + uVar9)); + iVar15 += static_cast(static_cast(static_cast(kern)) * sclx); + } + goto label_check_printable; + } + + goto label_space; + +label_check_printable: + if (uVar9 < 0x21u) + { + goto label_space; + } + + { + int glyphIdx = static_cast(chSigned); + uint32_t iVar19_off = static_cast(glyphIdx * 0x20); + + if (param14 != 0u) + { + uint32_t fontPtr = FAST_READ32(kFontBase + fontOff); + int16_t sVar7 = baseX + static_cast(iVar15); + + iVar22_qw += 2; + iStack_dc += 1; + + uint16_t wU0 = FAST_READ16(fontPtr + iVar19_off + 0); + uint16_t wV0 = FAST_READ16(fontPtr + iVar19_off + 2); + FAST_WRITE16(s2 + 0x00, wU0); + FAST_WRITE16(s2 + 0x02, wV0); + + int16_t dx0 = static_cast(FAST_READ16(fontPtr + iVar19_off + 8)); + int16_t dy0 = static_cast(FAST_READ16(fontPtr + iVar19_off + 10)); + uint16_t wX0 = static_cast(sVar7 + static_cast(static_cast(static_cast(static_cast(dx0)) * sclx))); + int yVal0 = static_cast(static_cast(static_cast(dy0)) * scly) >> static_cast(shiftAmt); + uint16_t wY0 = static_cast(sVar8 + static_cast(yVal0)); + FAST_WRITE16(s2 + 0x08, wX0); + FAST_WRITE16(s2 + 0x0a, wY0); + FAST_WRITE32(s2 + 0x0c, 1u); + + s2 += 0x10u; + + uint16_t wU1 = FAST_READ16(fontPtr + iVar19_off + 4); + uint16_t wV1 = FAST_READ16(fontPtr + iVar19_off + 6); + FAST_WRITE16(s2 + 0x00, wU1); + FAST_WRITE16(s2 + 0x02, wV1); + + int16_t dx1 = static_cast(FAST_READ16(fontPtr + iVar19_off + 12)); + int16_t dy1 = static_cast(FAST_READ16(fontPtr + iVar19_off + 14)); + uint16_t wX1 = static_cast(sVar7 + static_cast(static_cast(static_cast(static_cast(dx1)) * sclx))); + int yVal1 = static_cast(static_cast(static_cast(dy1)) * scly) >> static_cast(shiftAmt); + uint16_t wY1 = static_cast(sVar8 + static_cast(yVal1)); + FAST_WRITE16(s2 + 0x08, wX1); + FAST_WRITE16(s2 + 0x0a, wY1); + FAST_WRITE32(s2 + 0x0c, 1u); + + s2 += 0x10u; + } + + { + uint32_t fontPtr = FAST_READ32(kFontBase + fontOff); + uint32_t advOff = static_cast((glyphIdx * 2 + 1) * 16 + 8); + int16_t advW = static_cast(FAST_READ16(fontPtr + advOff)); + iVar15 += static_cast(static_cast(static_cast(advW)) * sclx); + } + } + goto label_next; + +label_space: + { + int spaceW = static_cast(FAST_READ32(kFontBase + fontOff + 0x1cu)); + iVar15 += static_cast(static_cast(spaceW) * sclx); + } + +label_next: + uStack_d8 = uVar9; + uVar20++; + } + + if (param14 != 0u) + { + if (alignCh != 'L') + { + if (alignCh == 'C' || alignCh == 'R') + { + int shift = paramW * 16 - iVar15; + if (alignCh == 'C') shift >>= 1; + if (iStack_dc > 0) + { + uint32_t adj = bufAddr + static_cast(iVar21 * 16) + 0x20u; + for (int k = 0; k < iStack_dc; k++) + { + int16_t oldX0 = static_cast(FAST_READ16(adj - 8u)); + int16_t oldX1 = static_cast(FAST_READ16(adj + 8u)); + FAST_WRITE16(adj - 8u, static_cast(oldX0 + static_cast(shift))); + FAST_WRITE16(adj + 8u, static_cast(oldX1 + static_cast(shift))); + adj += 0x20u; + } + } + } + else if (alignCh == 'J' && sLen > 1) + { + int iVar19_div = static_cast(sLen) - 1; + if (iVar19_div == 0) iVar19_div = 1; + int spacePer = (paramW * 16 - iVar15) / iVar19_div; + uint32_t adj = bufAddr + static_cast(iVar21 * 16) + 0x20u; + int accum = 0; + for (uint32_t jj = 0; jj < sLen; jj++) + { + int8_t jch = static_cast(FAST_READ8(strAddr + jj)); + if (jch > 0x20) + { + int16_t oldX0 = static_cast(FAST_READ16(adj - 8u)); + int16_t oldX1 = static_cast(FAST_READ16(adj + 8u)); + FAST_WRITE16(adj - 8u, static_cast(oldX0 + static_cast(accum))); + FAST_WRITE16(adj + 8u, static_cast(oldX1 + static_cast(accum))); + adj += 0x20u; + } + accum += spacePer; + } + } + } + + if (param14 != 0u) + { + uint32_t tagAddr = bufAddr + static_cast(iVar21 * 16); + FAST_WRITE64(tagAddr + 0x00, static_cast(static_cast(iStack_dc)) | 0x4400000000000000ull); + FAST_WRITE64(tagAddr + 0x08, 0x5353ull); + + uint32_t endAddr = bufAddr + static_cast(iVar22_qw * 16); + FAST_WRITE64(endAddr + 0x00, 0x1000000000008001ull); + FAST_WRITE64(endAddr + 0x08, 0x0eull); + + int iVar19_end = iVar22_qw + 1; + uint32_t endAddr2 = bufAddr + static_cast(iVar19_end * 16); + FAST_WRITE64(endAddr2 + 0x00, 0x01ff0000027f0000ull); + FAST_WRITE64(endAddr2 + 0x08, 0x40ull); + + iVar22_qw += 2; + } + } + + int ret = 0; + if (param14 != 0u) ret = iVar22_qw; + setReturnS32(ctx, ret); + ctx->pc = getRegU32(ctx, 31); +} + void sceeFontPrintfAt(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - TODO_NAMED("sceeFontPrintfAt", rdram, ctx, runtime); + const uint32_t oldSp = getRegU32(ctx, 29); + const uint32_t frame = oldSp - 0x900u; + + const uint32_t bufAddr = getRegU32(ctx, 4); + const uint32_t paramX = getRegU32(ctx, 5); + const uint32_t paramY = getRegU32(ctx, 6); + const uint32_t fmtAddr = getRegU32(ctx, 7); + + const uint8_t *callerVa = getConstMemPtr(rdram, oldSp + 16u); + uint8_t *frameVa = getMemPtr(rdram, frame + 0x8f8u); + if (callerVa && frameVa) + std::memcpy(frameVa, callerVa, 64u); + + SET_GPR_U32(ctx, 4, frame + 0x20u); + SET_GPR_U32(ctx, 5, fmtAddr); + SET_GPR_U32(ctx, 6, frame + 0x8f8u); + vsprintf(rdram, ctx, runtime); + + const uint32_t gp = getRegU32(ctx, 28); + uint32_t defaultSclxBits = FAST_READ32(gp + static_cast(static_cast(-0x7b54))); + uint32_t defaultSclyBits = FAST_READ32(gp + static_cast(static_cast(-0x7b50))); + uint32_t defaultColour = FAST_READ32(gp + static_cast(static_cast(-0x7b4c))); + uint32_t defaultFontId = FAST_READ32(gp + static_cast(static_cast(-0x7b58))); + uint32_t scrWidth = FAST_READ32(gp + static_cast(static_cast(-0x7b60))); + uint32_t scrHeight = FAST_READ32(gp + static_cast(static_cast(-0x7b5c))); + + std::memcpy(&ctx->f[12], &defaultSclxBits, sizeof(float)); + std::memcpy(&ctx->f[13], &defaultSclyBits, sizeof(float)); + + FAST_WRITE32(frame + 0x00u, frame + 0x20u); + FAST_WRITE32(frame + 0x08u, frame + 0x820u); + FAST_WRITE32(frame + 0x10u, frame + 0x824u); + FAST_WRITE32(frame + 0x18u, 1u); + + SET_GPR_U32(ctx, 29, frame); + SET_GPR_U32(ctx, 4, bufAddr); + SET_GPR_U32(ctx, 5, paramX); + SET_GPR_U32(ctx, 6, paramY); + SET_GPR_U32(ctx, 7, scrWidth); + SET_GPR_U32(ctx, 8, scrHeight); + SET_GPR_U32(ctx, 9, defaultColour); + SET_GPR_U32(ctx, 10, 0x4cu); + SET_GPR_U32(ctx, 11, defaultFontId); + + sceeFontGenerateString(rdram, ctx, runtime); + + SET_GPR_U32(ctx, 29, oldSp); + ctx->pc = getRegU32(ctx, 31); } void sceeFontPrintfAt2(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - setReturnS32(ctx, 0); + const uint32_t oldSp = getRegU32(ctx, 29); + const uint32_t frame = oldSp - 0x900u; + + const uint32_t bufAddr = getRegU32(ctx, 4); + const uint32_t paramX = getRegU32(ctx, 5); + const uint32_t paramY = getRegU32(ctx, 6); + const uint32_t paramW = getRegU32(ctx, 7); + const uint32_t paramH = getRegU32(ctx, 8); + const uint32_t alignRaw = getRegU32(ctx, 9); + const uint32_t fmtAddr = getRegU32(ctx, 10); + const uint64_t param8 = GPR_U64(ctx, 11); + + int8_t alignChar = static_cast(alignRaw & 0xffu); + + FAST_WRITE64(frame + 0x8f8u, param8); + + SET_GPR_U32(ctx, 4, frame + 0x20u); + SET_GPR_U32(ctx, 5, fmtAddr); + SET_GPR_U32(ctx, 6, frame + 0x8f8u); + vsprintf(rdram, ctx, runtime); + + const uint32_t gp = getRegU32(ctx, 28); + uint32_t defaultSclxBits = FAST_READ32(gp + static_cast(static_cast(-0x7b54))); + uint32_t defaultSclyBits = FAST_READ32(gp + static_cast(static_cast(-0x7b50))); + uint32_t defaultColour = FAST_READ32(gp + static_cast(static_cast(-0x7b4c))); + uint32_t defaultFontId = FAST_READ32(gp + static_cast(static_cast(-0x7b58))); + + std::memcpy(&ctx->f[12], &defaultSclxBits, sizeof(float)); + std::memcpy(&ctx->f[13], &defaultSclyBits, sizeof(float)); + + FAST_WRITE32(frame + 0x00u, frame + 0x20u); + FAST_WRITE32(frame + 0x08u, frame + 0x820u); + FAST_WRITE32(frame + 0x10u, frame + 0x824u); + FAST_WRITE32(frame + 0x18u, 1u); + + SET_GPR_U32(ctx, 29, frame); + SET_GPR_U32(ctx, 4, bufAddr); + SET_GPR_U32(ctx, 5, paramX); + SET_GPR_U32(ctx, 6, paramY); + SET_GPR_U32(ctx, 7, paramW); + SET_GPR_U32(ctx, 8, paramH); + SET_GPR_U32(ctx, 9, defaultColour); + SET_GPR_U32(ctx, 10, static_cast(static_cast(alignChar))); + SET_GPR_U32(ctx, 11, defaultFontId); + + sceeFontGenerateString(rdram, ctx, runtime); + + SET_GPR_U32(ctx, 29, oldSp); + ctx->pc = getRegU32(ctx, 31); } void sceeFontClose(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) @@ -1241,7 +1611,56 @@ void sceIoctl(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) void sceIpuInit(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - setReturnS32(ctx, 0); + static constexpr uint32_t REG_IPU_CTRL = 0x10002010u; + static constexpr uint32_t REG_IPU_CMD = 0x10002000u; + static constexpr uint32_t REG_IPU_IN_FIFO = 0x10007010u; + static constexpr uint32_t IQVAL_BASE = 0x1721e0u; + static constexpr uint32_t VQVAL_BASE = 0x172230u; + static constexpr uint32_t SETD4_CHCR_ENTRY = 0x126428u; + + if (!runtime) + return; + + PS2Memory &mem = runtime->memory(); + + auto setD4 = runtime->lookupFunction(SETD4_CHCR_ENTRY); + if (setD4) + { + ctx->r[4] = _mm_set_epi64x(0, 1); + setD4(rdram, ctx, runtime); + } + + mem.write32(REG_IPU_CTRL, 0x40000000u); + mem.write32(REG_IPU_CMD, 0u); + + __m128i v; + v = runtime->Load128(rdram, ctx, IQVAL_BASE + 0x00u); + mem.write128(REG_IPU_IN_FIFO, v); + v = runtime->Load128(rdram, ctx, IQVAL_BASE + 0x10u); + mem.write128(REG_IPU_IN_FIFO, v); + v = runtime->Load128(rdram, ctx, IQVAL_BASE + 0x20u); + mem.write128(REG_IPU_IN_FIFO, v); + v = runtime->Load128(rdram, ctx, IQVAL_BASE + 0x30u); + mem.write128(REG_IPU_IN_FIFO, v); + v = runtime->Load128(rdram, ctx, IQVAL_BASE + 0x40u); + mem.write128(REG_IPU_IN_FIFO, v); + mem.write128(REG_IPU_IN_FIFO, v); + mem.write128(REG_IPU_IN_FIFO, v); + mem.write128(REG_IPU_IN_FIFO, v); + + mem.write32(REG_IPU_CMD, 0x50000000u); + mem.write32(REG_IPU_CMD, 0x58000000u); + + v = runtime->Load128(rdram, ctx, VQVAL_BASE + 0x00u); + mem.write128(REG_IPU_IN_FIFO, v); + v = runtime->Load128(rdram, ctx, VQVAL_BASE + 0x10u); + mem.write128(REG_IPU_IN_FIFO, v); + + mem.write32(REG_IPU_CMD, 0x60000000u); + mem.write32(REG_IPU_CMD, 0x90000000u); + + mem.write32(REG_IPU_CTRL, 0x40000000u); + mem.write32(REG_IPU_CMD, 0u); } void sceIpuRestartDMA(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) @@ -1514,17 +1933,122 @@ void sceMpegAddCallback(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) void sceMpegAddStrCallback(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - TODO_NAMED("sceMpegAddStrCallback", rdram, ctx, runtime); + (void)rdram; + (void)runtime; + setReturnU32(ctx, 0u); } void sceMpegClearRefBuff(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - TODO_NAMED("sceMpegClearRefBuff", rdram, ctx, runtime); + (void)ctx; + (void)runtime; + static const uint32_t kRefGlobalAddrs[] = { + 0x171800u, 0x17180Cu, 0x171818u, 0x171804u, 0x171810u, 0x17181Cu + }; + for (uint32_t addr : kRefGlobalAddrs) + { + uint8_t *p = getMemPtr(rdram, addr); + if (!p) + continue; + uint32_t ptr = *reinterpret_cast(p); + if (ptr != 0u) + { + uint8_t *q = getMemPtr(rdram, ptr + 0x28u); + if (q) + *reinterpret_cast(q) = 0u; + } + } + setReturnU32(ctx, 1u); +} + +static void mpegGuestWrite32(uint8_t *rdram, uint32_t addr, uint32_t value) +{ + if (uint8_t *p = getMemPtr(rdram, addr)) + *reinterpret_cast(p) = value; +} +static void mpegGuestWrite64(uint8_t *rdram, uint32_t addr, uint64_t value) +{ + if (uint8_t *p = getMemPtr(rdram, addr)) + { + *reinterpret_cast(p) = static_cast(value); + *reinterpret_cast(p + 4) = static_cast(value >> 32); + } } void sceMpegCreate(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - TODO_NAMED("sceMpegCreate", rdram, ctx, runtime); + const uint32_t param_1 = getRegU32(ctx, 4); // a0 + const uint32_t param_2 = getRegU32(ctx, 5); // a1 + const uint32_t param_3 = getRegU32(ctx, 6); // a2 + + const uint32_t uVar3 = (param_2 + 3u) & 0xFFFFFFFCu; + const int32_t iVar2_signed = static_cast(param_3) - static_cast(uVar3 - param_2); + + if (iVar2_signed <= 0x117) + { + setReturnU32(ctx, 0u); + return; + } + + const uint32_t puVar4 = uVar3 + 0x108u; + const uint32_t innerSize = static_cast(iVar2_signed) - 0x118u; + + mpegGuestWrite32(rdram, param_1 + 0x40, uVar3); + + const uint32_t a1_init = uVar3 + 0x118u; + mpegGuestWrite32(rdram, puVar4 + 0x0, a1_init); + mpegGuestWrite32(rdram, puVar4 + 0x4, innerSize); + mpegGuestWrite32(rdram, puVar4 + 0x8, a1_init); + mpegGuestWrite32(rdram, puVar4 + 0xC, a1_init); + + const uint32_t allocResult = runtime ? runtime->guestMalloc(0x600, 8u) : (uVar3 + 0x200u); + mpegGuestWrite32(rdram, uVar3 + 0x44, allocResult); + + // param_1[0..2] = 0; param_1[4..0xe] = 0xffffffff/0 as per decompilation + mpegGuestWrite32(rdram, param_1 + 0x00, 0); + mpegGuestWrite32(rdram, param_1 + 0x04, 0); + mpegGuestWrite32(rdram, param_1 + 0x08, 0); + mpegGuestWrite64(rdram, param_1 + 0x10, 0xFFFFFFFFFFFFFFFFULL); + mpegGuestWrite64(rdram, param_1 + 0x18, 0xFFFFFFFFFFFFFFFFULL); + mpegGuestWrite64(rdram, param_1 + 0x20, 0); + mpegGuestWrite64(rdram, param_1 + 0x28, 0xFFFFFFFFFFFFFFFFULL); + mpegGuestWrite64(rdram, param_1 + 0x30, 0xFFFFFFFFFFFFFFFFULL); + mpegGuestWrite64(rdram, param_1 + 0x38, 0); + + static const unsigned s_zeroOffsets[] = { + 0xB4, 0xB8, 0xBC, 0xC0, 0xC4, 0xC8, 0xCC, 0xD0, 0xD4, 0xD8, 0xDC, 0xE0, 0xE4, 0xE8, 0xF8, + 0x0C, 0x14, 0x2C, 0x34, 0x3C, + 0x48, 0xFC, 0x100, 0x104, 0x70, 0x90, 0xAC + }; + for (unsigned off : s_zeroOffsets) + mpegGuestWrite32(rdram, uVar3 + off, 0u); + mpegGuestWrite64(rdram, uVar3 + 0x78, 0); + mpegGuestWrite64(rdram, uVar3 + 0x88, 0); + + mpegGuestWrite64(rdram, uVar3 + 0xF0, 0xFFFFFFFFFFFFFFFFULL); + mpegGuestWrite32(rdram, uVar3 + 0x1C, 0x1209F8u); + mpegGuestWrite32(rdram, uVar3 + 0x24, 0x120A08u); + mpegGuestWrite32(rdram, uVar3 + 0xB0, 1u); + mpegGuestWrite32(rdram, uVar3 + 0x9C, 0xFFFFFFFFu); + mpegGuestWrite32(rdram, uVar3 + 0x80, 0xFFFFFFFFu); + mpegGuestWrite32(rdram, uVar3 + 0x94, 0xFFFFFFFFu); + mpegGuestWrite32(rdram, uVar3 + 0x98, 0xFFFFFFFFu); + + mpegGuestWrite32(rdram, 0x1717BCu, param_1); + + static const uint32_t s_refValues[] = { + 0x171A50u, 0x171C58u, 0x171CC0u, 0x171D28u, 0x171D90u, + 0x171AB8u, 0x171B20u, 0x171B88u, 0x171BF0u + }; + for (unsigned i = 0; i < 9u; ++i) + mpegGuestWrite32(rdram, 0x171800u + i * 4u, s_refValues[i]); + + uint32_t setDynamicRet = a1_init; + if (uint8_t *p = getMemPtr(rdram, puVar4 + 8)) + setDynamicRet = *reinterpret_cast(p); + mpegGuestWrite32(rdram, puVar4 + 12, setDynamicRet); + + setReturnU32(ctx, setDynamicRet); } void sceMpegDelete(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) @@ -1569,7 +2093,21 @@ void sceMpegGetDecodeMode(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime void sceMpegGetPicture(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - TODO_NAMED("sceMpegGetPicture", rdram, ctx, runtime); + (void)runtime; + const uint32_t param_1 = getRegU32(ctx, 4); + if (uint8_t *base = getMemPtr(rdram, param_1)) + { + const uint32_t iVar1 = *reinterpret_cast(base + 0x40); + if (uint8_t *inner = getMemPtr(rdram, iVar1)) + { + *reinterpret_cast(inner + 0xb0) = 1; + *reinterpret_cast(inner + 0xd8) = (getRegU32(ctx, 5) & 0x0FFFFFFFu) | 0x20000000u; + *reinterpret_cast(inner + 0xe4) = getRegU32(ctx, 6); + *reinterpret_cast(inner + 0xdc) = 0; + *reinterpret_cast(inner + 0xe0) = 0; + } + } + setReturnU32(ctx, 0u); } void sceMpegGetPictureRAW8(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) @@ -1589,7 +2127,20 @@ void sceMpegInit(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) void sceMpegIsEnd(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - TODO_NAMED("sceMpegIsEnd", rdram, ctx, runtime); + (void)runtime; + const uint32_t param_1 = getRegU32(ctx, 4); + uint8_t *base = getMemPtr(rdram, param_1 + 0x40u); + if (base) + { + uint32_t ptrAddr = *reinterpret_cast(base); + if (ptrAddr != 0u) + { + uint8_t *p = getMemPtr(rdram, ptrAddr); + if (p) + *reinterpret_cast(p) = 1u; + } + } + setReturnS32(ctx, 1); } void sceMpegIsRefBuffEmpty(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) @@ -1599,7 +2150,21 @@ void sceMpegIsRefBuffEmpty(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtim void sceMpegReset(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - TODO_NAMED("sceMpegReset", rdram, ctx, runtime); + (void)runtime; + const uint32_t param_1 = getRegU32(ctx, 4); + uint8_t *base = getMemPtr(rdram, param_1); + if (!base) + return; + uint32_t inner = *reinterpret_cast(base + 0x40); + if (inner == 0u) + return; + mpegGuestWrite32(rdram, inner + 0x00, 0u); + mpegGuestWrite32(rdram, inner + 0x04, 0u); + mpegGuestWrite32(rdram, inner + 0x08, 0u); + mpegGuestWrite32(rdram, param_1 + 0x08, 0u); + mpegGuestWrite32(rdram, inner + 0x80, 0xFFFFFFFFu); + mpegGuestWrite32(rdram, inner + 0xAC, 0u); + mpegGuestWrite32(rdram, 0x171904u, 0u); } void sceMpegResetDefaultPtsGap(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) @@ -1889,7 +2454,28 @@ void scePadStateIntToStr(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) void scePrintf(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) { - TODO_NAMED("scePrintf", rdram, ctx, runtime); + uint32_t format_addr = getRegU32(ctx, 4); + const std::string formatOwned = readPs2CStringBounded(rdram, runtime, format_addr, 1024); + if (format_addr == 0) + return; + std::string rendered = formatPs2StringWithArgs(rdram, ctx, runtime, formatOwned.c_str(), 1); + if (rendered.size() > 2048) + rendered.resize(2048); + const std::string logLine = sanitizeForLog(rendered); + uint32_t count = 0; + { + std::lock_guard lock(g_printfLogMutex); + count = ++g_printfLogCount; + } + if (count <= kMaxPrintfLogs) + { + std::cout << "PS2 scePrintf: " << logLine; + std::cout << std::flush; + } + else if (count == kMaxPrintfLogs + 1) + { + std::cerr << "PS2 printf logging suppressed after " << kMaxPrintfLogs << " lines" << std::endl; + } } void sceRead(uint8_t *rdram, R5900Context *ctx, PS2Runtime *runtime) From e599749b419619eac4c7a6b9ee0109ef648e8acb Mon Sep 17 00:00:00 2001 From: Aslan Hud Date: Tue, 24 Feb 2026 22:55:52 +0100 Subject: [PATCH 3/3] fix: fixed linux build error --- ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp b/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp index 2e07bda7..4cdc407d 100644 --- a/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp +++ b/ps2xRuntime/src/lib/ps2_gs_rasterizer.cpp @@ -2,6 +2,7 @@ #include "ps2_gs_gpu.h" #include "ps2_gs_common.h" #include "ps2_gs_psmt4.h" +#include #include #include #include