From 57c4d9d7709c1e183b08e07912ce298df54d7415 Mon Sep 17 00:00:00 2001 From: kikugrave Date: Sun, 15 Feb 2026 10:47:41 -0800 Subject: [PATCH 01/13] Add PSP port with shared fast software renderer --- .gitignore | 5 + Makefile | 42 +- README.md | 1 + asm/macros/portable.inc | 3 + include/config.h | 14 +- include/gba/defines.h | 6 + .../shared/rendering/sw_renderer_common.h | 56 + libagbsyscall/Makefile | 4 + src/background.c | 4 +- src/core.c | 4 +- src/lib/m4a/m4a.c | 9 +- src/platform/pret_sdl/sdl2.c | 554 ++++---- src/platform/psp/psp_module.c | 41 + src/platform/shared/audio/m4a_sound_mixer.c | 20 +- src/platform/shared/dma.c | 22 +- .../shared/rendering/sw_renderer_fast.c | 1167 +++++++++++++++++ 16 files changed, 1708 insertions(+), 244 deletions(-) create mode 100644 include/platform/shared/rendering/sw_renderer_common.h create mode 100644 src/platform/psp/psp_module.c create mode 100644 src/platform/shared/rendering/sw_renderer_fast.c diff --git a/.gitignore b/.gitignore index 60cc4e932..8d9dfac69 100644 --- a/.gitignore +++ b/.gitignore @@ -86,5 +86,10 @@ libagbsyscall/*.s *.dll *.sdl +# PSP build outputs +EBOOT.PBP +PARAM.SFO +sa2_debug.log + # third party deps /ext diff --git a/Makefile b/Makefile index b972750e0..16f001474 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,12 @@ else ifeq ($(CPU_ARCH),i386) TOOLCHAIN := /usr/x86_64-w64-mingw32/ PREFIX := x86_64-w64-mingw32- endif +# PSP +else ifeq ($(PLATFORM),psp) + PSPDEV ?= $(HOME)/pspdev + PSPSDK := $(PSPDEV)/psp/sdk + export PATH := $(PSPDEV)/bin:$(PATH) + PREFIX := psp- else # Native ifneq ($(PLATFORM),sdl) @@ -120,6 +126,10 @@ else ifeq ($(PLATFORM),sdl) ROM := $(BUILD_NAME).sdl ELF := $(ROM).elf MAP := $(ROM).map +else ifeq ($(PLATFORM),psp) +ROM := EBOOT.PBP +ELF := $(BUILD_NAME).psp.elf +MAP := $(BUILD_NAME).psp.map else ROM := $(BUILD_NAME).$(PLATFORM).exe ELF := $(ROM:.exe=.elf) @@ -156,11 +166,13 @@ TILESETS_SUBDIR = graphics/tilesets/ ifeq ($(PLATFORM),gba) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/*") else ifeq ($(PLATFORM),sdl) +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") +else ifeq ($(PLATFORM),psp) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*") else ifeq ($(PLATFORM),sdl_win32) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") else ifeq ($(PLATFORM),win32) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/pret_sdl/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/pret_sdl/*" -not -path "*/platform/psp/*") else C_SRCS := $(shell find $(C_SUBDIR) -name "*.c") endif @@ -225,6 +237,9 @@ else ifeq ($(PLATFORM),sdl) CC1FLAGS += -Wno-parentheses-equality -Wno-unused-value CPPFLAGS += -D TITLE_BAR=$(BUILD_NAME).$(PLATFORM) -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 $(shell sdl2-config --cflags) + else ifeq ($(PLATFORM),psp) + CC1FLAGS += -G0 + CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 -D SDL_MAIN_HANDLED -I$(PSPDEV)/psp/include/SDL2 -I$(PSPDEV)/psp/include -I$(PSPSDK)/include -D_PSP_FW_VERSION=600 else ifeq ($(PLATFORM),sdl_win32) CPPFLAGS += -D TITLE_BAR=$(BUILD_NAME).$(PLATFORM) -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 $(SDL_MINGW_FLAGS) else ifeq ($(PLATFORM),win32) @@ -249,6 +264,8 @@ else # for modern we are using a modern compiler # so instead of CPP we can use gcc -E to "preprocess only" CPP := $(CC1) -E + else ifeq ($(PLATFORM), psp) + CPP := $(CC1) -E endif # Allow file input through stdin on modern GCC and set it to "compile only" CC1FLAGS += -x c -S @@ -258,7 +275,12 @@ ifeq ($(DEBUG),1) CC1FLAGS += -g3 -O0 CPPFLAGS += -D DEBUG=1 else - CC1FLAGS += -O2 + ifeq ($(PLATFORM),psp) + # -O3 for PSP (Allegrex MIPS, small D-cache) + CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer + else + CC1FLAGS += -O2 + endif CPPFLAGS += -D DEBUG=0 endif @@ -297,6 +319,9 @@ else ifeq ($(PLATFORM),sdl) else MAP_FLAG := -Xlinker -Map= endif +# PSP +else ifeq ($(PLATFORM),psp) + MAP_FLAG := -Xlinker -Map= # Win32 else MAP_FLAG := -Xlinker -Map= @@ -307,6 +332,8 @@ ifeq ($(PLATFORM),gba) LIBS := $(ROOT_DIR)/tools/agbcc/lib/libgcc.a $(ROOT_DIR)/tools/agbcc/lib/libc.a $(LIBABGSYSCALL_LIBS) else ifeq ($(PLATFORM),sdl) LIBS := $(shell sdl2-config --cflags --libs) +else ifeq ($(PLATFORM),psp) + LIBS := -L$(PSPDEV)/psp/lib -L$(PSPSDK)/lib -lSDL2 -lm -lGL -lpspvram -lpspaudio -lpspvfpu -lpspdisplay -lpspgu -lpspge -lpsphprm -lpspctrl -lpsppower -lpspdebug -lpspnet -lpspnet_apctl -Wl,-zmax-page-size=128 else ifeq ($(PLATFORM),sdl_win32) LIBS := -mwin32 -lkernel32 -lwinmm -lmingw32 -lxinput $(SDL_MINGW_LIBS) else ifeq ($(PLATFORM), win32) @@ -398,6 +425,7 @@ tidy: $(RM) -r build/* $(RM) SDL2.dll $(RM) $(BUILD_NAME)*.exe $(BUILD_NAME)*.elf $(BUILD_NAME)*.map $(BUILD_NAME)*.sdl $(BUILD_NAME)*.gba + $(RM) EBOOT.PBP PARAM.SFO usa_beta: ; @$(MAKE) GAME_REGION=USA GAME_VARIANT=BETA @@ -409,6 +437,8 @@ europe: ; @$(MAKE) GAME_REGION=EUROPE sdl: ; @$(MAKE) PLATFORM=sdl +psp: ; @$(MAKE) PLATFORM=psp + tas_sdl: ; @$(MAKE) sdl TAS_TESTING=1 sdl_win32: @@ -476,6 +506,12 @@ ifeq ($(PLATFORM),gba) $(FIX) $@ -p -t"$(TITLE)" -c$(GAME_CODE) -m$(MAKER_CODE) -r$(GAME_REVISION) --silent else ifeq ($(PLATFORM),sdl) cp $< $@ +else ifeq ($(PLATFORM),psp) + psp-fixup-imports $< + mksfoex 'Sonic Advance 2' PARAM.SFO + psp-strip $< -o $(BUILD_NAME).psp_strip.elf + pack-pbp $@ PARAM.SFO NULL NULL NULL NULL NULL $(BUILD_NAME).psp_strip.elf NULL + -rm -f $(BUILD_NAME).psp_strip.elf else $(OBJCOPY) -O pei-x86-64 $< $@ endif diff --git a/README.md b/README.md index 382027ade..1dd07df9f 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ It can also build: * **sa2.sdl** `make sdl` (Linux/MacOS SDL 64bit port) * **sa2.sdl_win32.exe** `make sdl_win32` (Windows SDL 64bit port) * :construction: **sa2.win32.exe** `make win32` (Win32 native port, not functional) +* **EBOOT.PBP** `make psp` (PlayStation Portable homebrew port, requires [PSPDEV](https://github.com/pspdev/pspdev)) ## Current state diff --git a/asm/macros/portable.inc b/asm/macros/portable.inc index b389fb26a..e3f80ed9d 100644 --- a/asm/macros/portable.inc +++ b/asm/macros/portable.inc @@ -10,6 +10,9 @@ .macro mPtr value #if defined(__aarch64__) || defined(__x86_64__) .quad \value +#elif defined(__mips__) + .balign 4 + .int \value #else .int \value #endif diff --git a/include/config.h b/include/config.h index 15b9df4ec..845e61104 100644 --- a/include/config.h +++ b/include/config.h @@ -39,14 +39,20 @@ #define TAS_TESTING_WIDESCREEN_HACK 1 -#define RENDERER_SOFTWARE 0 -#define RENDERER_OPENGL 1 -#define RENDERER_COUNT 2 -#if PLATFORM_WIN32 && !PLATFORM_SDL +#define RENDERER_SOFTWARE 0 +#define RENDERER_OPENGL 1 +#define RENDERER_SOFTWARE_FAST 2 +#define RENDERER_COUNT 3 + +#ifndef RENDERER +#if defined(__PSP__) || defined(__PS2__) +#define RENDERER RENDERER_SOFTWARE_FAST +#elif PLATFORM_WIN32 && !PLATFORM_SDL // TODO: Only win32 for now #define RENDERER RENDERER_OPENGL #else #define RENDERER RENDERER_SOFTWARE #endif +#endif #endif // GUARD_SA2_CONFIG_H diff --git a/include/gba/defines.h b/include/gba/defines.h index b904ee74d..190284510 100644 --- a/include/gba/defines.h +++ b/include/gba/defines.h @@ -39,8 +39,14 @@ #define OAM_ENTRY_COUNT 128 #if PORTABLE // NOTE: Used in gba/types.h, so they have to be defined before the #include +#ifdef __PSP__ +// PSP: Use GBA-native resolution, SDL scales to 480x272 +#define DISPLAY_WIDTH 240 +#define DISPLAY_HEIGHT 160 +#else #define DISPLAY_WIDTH 426 #define DISPLAY_HEIGHT 240 +#endif // NOTE: We shouldn't consider WIDESCREEN_HACK a permanent thing. // This hack should best be removed once there's a "native" platform layer. diff --git a/include/platform/shared/rendering/sw_renderer_common.h b/include/platform/shared/rendering/sw_renderer_common.h new file mode 100644 index 000000000..1e1a626d9 --- /dev/null +++ b/include/platform/shared/rendering/sw_renderer_common.h @@ -0,0 +1,56 @@ +#ifndef GUARD_SW_RENDERER_COMMON_H +#define GUARD_SW_RENDERER_COMMON_H + +// shared color math for the gba ppu blend unit +// used by both the normal (multi-pass) and fast (single-pass) software renderers + +#include + +// bgr555 channel extraction +#define getAlphaBit(x) (((x) >> 15) & 1) +#define getRedChannel(x) (((x) >> 0) & 0x1F) +#define getGreenChannel(x) (((x) >> 5) & 0x1F) +#define getBlueChannel(x) (((x) >> 10) & 0x1F) +#define COLOR_OPAQUE 0x8000 + +static inline uint16_t alphaBlendColor(uint16_t targetA, uint16_t targetB, + unsigned int eva, unsigned int evb) +{ + unsigned int r = ((getRedChannel(targetA) * eva) + (getRedChannel(targetB) * evb)) >> 4; + unsigned int g = ((getGreenChannel(targetA) * eva) + (getGreenChannel(targetB) * evb)) >> 4; + unsigned int b = ((getBlueChannel(targetA) * eva) + (getBlueChannel(targetB) * evb)) >> 4; + + if (r > 31) r = 31; + if (g > 31) g = 31; + if (b > 31) b = 31; + + return r | (g << 5) | (b << 10) | COLOR_OPAQUE; +} + +static inline uint16_t alphaBrightnessIncrease(uint16_t targetA, unsigned int evy) +{ + unsigned int r = getRedChannel(targetA) + (31 - getRedChannel(targetA)) * evy / 16; + unsigned int g = getGreenChannel(targetA) + (31 - getGreenChannel(targetA)) * evy / 16; + unsigned int b = getBlueChannel(targetA) + (31 - getBlueChannel(targetA)) * evy / 16; + + if (r > 31) r = 31; + if (g > 31) g = 31; + if (b > 31) b = 31; + + return r | (g << 5) | (b << 10) | COLOR_OPAQUE; +} + +static inline uint16_t alphaBrightnessDecrease(uint16_t targetA, unsigned int evy) +{ + unsigned int r = getRedChannel(targetA) - getRedChannel(targetA) * evy / 16; + unsigned int g = getGreenChannel(targetA) - getGreenChannel(targetA) * evy / 16; + unsigned int b = getBlueChannel(targetA) - getBlueChannel(targetA) * evy / 16; + + if (r > 31) r = 31; + if (g > 31) g = 31; + if (b > 31) b = 31; + + return r | (g << 5) | (b << 10) | COLOR_OPAQUE; +} + +#endif // GUARD_SW_RENDERER_COMMON_H diff --git a/libagbsyscall/Makefile b/libagbsyscall/Makefile index 654a44e4b..7f6c55693 100644 --- a/libagbsyscall/Makefile +++ b/libagbsyscall/Makefile @@ -34,6 +34,10 @@ else ifeq ($(CPU_ARCH),i386) TOOLCHAIN := /usr/x86_64-w64-mingw32/ PREFIX := x86_64-w64-mingw32- endif +else ifeq ($(PLATFORM),psp) + PSPDEV ?= $(HOME)/pspdev + export PATH := $(PSPDEV)/bin:$(PATH) + PREFIX := psp- else ifneq ($(PLATFORM),sdl) $(error Unknown CPU architecture $(CPU_ARCH)) endif # (PLATFORM == gba) diff --git a/src/background.c b/src/background.c index 7fba66626..190160224 100644 --- a/src/background.c +++ b/src/background.c @@ -650,7 +650,7 @@ END_NONMATCH void UpdateBgAnimationTiles(Background *bg) { -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) Tilemap *tilemap = gTilemapsRef[bg->tilemapId]; if (tilemap->animFrameCount > 0) { if (tilemap->animDelay <= ++bg->animDelayCounter) { @@ -872,7 +872,7 @@ NONMATCH("asm/non_matching/engine/sub_80039E4.inc", bool32 sub_80039E4(void)) return TRUE; #endif -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) if (gBgSpritesCount != 0) { OamDataShort oam; s32 r5; diff --git a/src/core.c b/src/core.c index ea52547b7..a5ffe374d 100644 --- a/src/core.c +++ b/src/core.c @@ -924,7 +924,7 @@ bool32 ProcessVramGraphicsCopyQueue(void) if ((graphics->src != 0) && (graphics->dest != 0)) #endif { -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) DmaCopy16(3, (void *)(graphics->src + offset), (void *)(graphics->dest + offset), COPY_CHUNK_SIZE); #endif graphics->size -= COPY_CHUNK_SIZE; @@ -939,7 +939,7 @@ bool32 ProcessVramGraphicsCopyQueue(void) if ((graphics->src != 0) && (graphics->dest != 0)) #endif { -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) DmaCopy16(3, (void *)(graphics->src + offset), (void *)(graphics->dest + offset), graphics->size); #endif } diff --git a/src/lib/m4a/m4a.c b/src/lib/m4a/m4a.c index db161254b..a7119e5a3 100644 --- a/src/lib/m4a/m4a.c +++ b/src/lib/m4a/m4a.c @@ -1377,8 +1377,15 @@ cond_true : { return; } -cond_false: +cond_false: { +#ifdef __mips__ + // Align to 4 bytes (mPtr adds .balign 4 on MIPS) + u8 *ptrStart = (u8 *)(((uintptr_t)track->cmdPtr + 3) & ~(uintptr_t)3); + track->cmdPtr = ptrStart + 4; +#else track->cmdPtr += 4; +#endif +} } void MP2K_event_xcmd(struct MP2KPlayerState *mplayInfo, struct MP2KTrack *track) diff --git a/src/platform/pret_sdl/sdl2.c b/src/platform/pret_sdl/sdl2.c index f4ceaf79a..f93af8ed5 100644 --- a/src/platform/pret_sdl/sdl2.c +++ b/src/platform/pret_sdl/sdl2.c @@ -10,6 +10,11 @@ #include #endif +#ifdef __PSP__ +#include +extern int setupPspCallbacks(void); +#endif + #include #include "global.h" @@ -21,6 +26,7 @@ #include "lib/agb_flash/flash_internal.h" #include "platform/shared/dma.h" #include "platform/shared/input.h" +#include "platform/shared/rendering/sw_renderer_common.h" #if ENABLE_AUDIO #include "platform/shared/audio/cgb_audio.h" @@ -92,6 +98,13 @@ bool paused = false; bool stepOneFrame = false; bool headless = false; +#ifdef __PSP__ +static SDL_Joystick *pspJoystick = NULL; +#define PSP_SCREEN_W 480 +#define PSP_SCREEN_H 272 +static SDL_Rect pspDestRect; +#endif + double lastGameTime = 0; double curGameTime = 0; double fixedTimestep = 1.0 / 60.0; // 16.666667ms @@ -122,6 +135,10 @@ void Platform_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); } int main(int argc, char **argv) { +#ifdef __PSP__ + setupPspCallbacks(); +#endif + const char *headlessEnv = getenv("HEADLESS"); if (headlessEnv && strcmp(headlessEnv, "true") == 0) { @@ -162,14 +179,24 @@ int main(int argc, char **argv) return 1; } +#ifdef __PSP__ + if (SDL_NumJoysticks() > 0) { + pspJoystick = SDL_JoystickOpen(0); + } +#endif + #ifdef TITLE_BAR const char *title = STR(TITLE_BAR); #else const char *title = "SAT-R sa2"; #endif +#ifdef __PSP__ + sdlWindow = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 480, 272, SDL_WINDOW_SHOWN); +#else sdlWindow = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, DISPLAY_WIDTH * videoScale, DISPLAY_HEIGHT * videoScale, SDL_WINDOW_SHOWN | SDL_WINDOW_RESIZABLE); +#endif if (sdlWindow == NULL) { fprintf(stderr, "Window could not be created! SDL_Error: %s\n", SDL_GetError()); return 1; @@ -191,7 +218,15 @@ int main(int argc, char **argv) } #endif +#ifdef __PSP__ + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC); + if (sdlRenderer == NULL) + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_ACCELERATED); + if (sdlRenderer == NULL) + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, 0); +#else sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_PRESENTVSYNC); +#endif if (sdlRenderer == NULL) { fprintf(stderr, "Renderer could not be created! SDL_Error: %s\n", SDL_GetError()); return 1; @@ -208,7 +243,12 @@ int main(int argc, char **argv) SDL_SetRenderDrawColor(sdlRenderer, 0, 0, 0, 255); SDL_RenderClear(sdlRenderer); SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "0"); +#ifdef __PSP__ + // SDL_RenderSetLogicalSize is broken on PSP, stretch to fill manually + pspDestRect = (SDL_Rect){ 0, 0, PSP_SCREEN_W, PSP_SCREEN_H }; +#else SDL_RenderSetLogicalSize(sdlRenderer, DISPLAY_WIDTH, DISPLAY_HEIGHT); +#endif #if ENABLE_VRAM_VIEW SDL_SetRenderDrawColor(vramRenderer, 0, 0, 0, 255); SDL_RenderClear(vramRenderer); @@ -239,9 +279,9 @@ int main(int argc, char **argv) want.samples = (want.freq / 60); cgb_audio_init(want.freq); - if (SDL_OpenAudio(&want, 0) < 0) + if (SDL_OpenAudio(&want, 0) < 0) { SDL_Log("Failed to open audio: %s", SDL_GetError()); - else { + } else { if (want.format != AUDIO_F32) /* we let this one thing change. */ SDL_Log("We didn't get Float32 audio format."); SDL_PauseAudio(0); @@ -259,12 +299,10 @@ int main(int argc, char **argv) bool newFrameRequested = FALSE; -// Every GBA frame we process the SDL events and render the number of times -// SDL requires us to for vsync. When we need another frame we break out of -// the loop via a return +// called every gba frame. we process sdl events and render as many times +// as vsync needs, then return when a new game frame is needed. void VBlankIntrWait(void) { - // ((struct MultiSioPacket *)gMultiSioArea.nextSendBufp) #define HANDLE_VBLANK_INTRS() \ ({ \ REG_DISPSTAT |= INTR_FLAG_VBLANK; \ @@ -281,6 +319,11 @@ void VBlankIntrWait(void) } bool frameAvailable = TRUE; + bool frameDrawn = false; +#ifdef __PSP__ + static int psp_frames_skipped = 0; +#define PSP_MAX_FRAME_SKIP 2 +#endif while (isRunning) { ProcessSDLEvents(); @@ -288,9 +331,8 @@ void VBlankIntrWait(void) if (!paused || stepOneFrame) { double dt = fixedTimestep / timeScale; // TODO: Fix speedup - // Hack to emulate the behaviour of threaded sdl - // it will not add any new values to the accumulator - // when a new frame was requested within a frame cycle + // don't accumulate time if we already requested a new frame + // this frame cycle (emulates threaded sdl behavior) if (!newFrameRequested) { double deltaTime = 0; @@ -312,8 +354,21 @@ void VBlankIntrWait(void) while (accumulator >= dt) { REG_KEYINPUT = KEYS_MASK ^ Platform_GetKeyInput(); if (frameAvailable) { +#ifdef __PSP__ + // frame skip: let game logic catch up when behind + if (accumulator >= dt * 2.0 && psp_frames_skipped < PSP_MAX_FRAME_SKIP) { + psp_frames_skipped++; + frameAvailable = FALSE; + HANDLE_VBLANK_INTRS(); + accumulator -= dt; + newFrameRequested = TRUE; + return; + } + psp_frames_skipped = 0; +#endif VDraw(sdlTexture); frameAvailable = FALSE; + frameDrawn = true; HANDLE_VBLANK_INTRS(); @@ -329,6 +384,17 @@ void VBlankIntrWait(void) } } + // present +#ifdef __PSP__ + // manual blit since SDL_RenderSetLogicalSize doesn't work on psp + if (frameDrawn) { + SDL_RenderCopy(sdlRenderer, sdlTexture, NULL, &pspDestRect); + SDL_RenderPresent(sdlRenderer); + frameDrawn = false; + } else { + SDL_Delay(1); + } +#else SDL_RenderClear(sdlRenderer); SDL_RenderCopy(sdlRenderer, sdlTexture, NULL, NULL); @@ -345,6 +411,7 @@ void VBlankIntrWait(void) SDL_RenderPresent(sdlRenderer); #if ENABLE_VRAM_VIEW SDL_RenderPresent(vramRenderer); +#endif #endif } @@ -352,8 +419,11 @@ void VBlankIntrWait(void) SDL_DestroyWindow(sdlWindow); SDL_Quit(); +#ifdef __PSP__ + sceKernelExitGame(); +#endif exit(0); -#undef RUN_VBLANK_INTRS +#undef HANDLE_VBLANK_INTRS } static void ReadSaveFile(char *path) @@ -421,6 +491,55 @@ static void CloseSaveFile() static u16 keys; +#ifdef __PSP__ +#define PSP_BTN_TRIANGLE 0 +#define PSP_BTN_CIRCLE 1 +#define PSP_BTN_CROSS 2 +#define PSP_BTN_SQUARE 3 +#define PSP_BTN_LTRIGGER 4 +#define PSP_BTN_RTRIGGER 5 +#define PSP_BTN_DOWN 6 +#define PSP_BTN_LEFT 7 +#define PSP_BTN_UP 8 +#define PSP_BTN_RIGHT 9 +#define PSP_BTN_SELECT 10 +#define PSP_BTN_START 11 + +static u16 PollPSPButtons(void) +{ + u16 pspKeys = 0; + if (pspJoystick == NULL) + return pspKeys; + + SDL_JoystickUpdate(); + + if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_CROSS)) + pspKeys |= A_BUTTON; + if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_CIRCLE)) + pspKeys |= B_BUTTON; + if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_SQUARE)) + pspKeys |= B_BUTTON; // Square also B + if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_START)) + pspKeys |= START_BUTTON; + if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_SELECT)) + pspKeys |= SELECT_BUTTON; + if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_LTRIGGER)) + pspKeys |= L_BUTTON; + if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_RTRIGGER)) + pspKeys |= R_BUTTON; + if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_UP)) + pspKeys |= DPAD_UP; + if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_DOWN)) + pspKeys |= DPAD_DOWN; + if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_LEFT)) + pspKeys |= DPAD_LEFT; + if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_RIGHT)) + pspKeys |= DPAD_RIGHT; + + return pspKeys; +} +#endif + u32 fullScreenFlags = 0; static SDL_DisplayMode sdlDispMode = { 0 }; @@ -561,18 +680,33 @@ u16 Platform_GetKeyInput(void) return (gamepadKeys != 0) ? gamepadKeys : keys; #endif +#ifdef __PSP__ + return keys | PollPSPButtons(); +#endif + return keys; } // BIOS function implementations are based on the VBA-M source code. -static uint32_t CPUReadMemory(const void *src) { return *(uint32_t *)src; } +// safe unaligned access for MIPS +static uint32_t CPUReadMemory(const void *src) +{ + uint32_t val; + memcpy(&val, src, sizeof(val)); + return val; +} -static void CPUWriteMemory(void *dest, uint32_t val) { *(uint32_t *)dest = val; } +static void CPUWriteMemory(void *dest, uint32_t val) { memcpy(dest, &val, sizeof(val)); } -static uint16_t CPUReadHalfWord(const void *src) { return *(uint16_t *)src; } +static uint16_t CPUReadHalfWord(const void *src) +{ + uint16_t val; + memcpy(&val, src, sizeof(val)); + return val; +} -static void CPUWriteHalfWord(void *dest, uint16_t val) { *(uint16_t *)dest = val; } +static void CPUWriteHalfWord(void *dest, uint16_t val) { memcpy(dest, &val, sizeof(val)); } static uint8_t CPUReadByte(const void *src) { return *(uint8_t *)src; } @@ -968,25 +1102,26 @@ static const uint16_t bgMapSizes[][2] = { #define applySpriteHorizontalMosaicEffect(x) (x - (x % (mosaicSpriteEffectX + 1))) #define applySpriteVerticalMosaicEffect(y) (y - (y % (mosaicSpriteEffectY + 1))) -// NOTE: This is the corrected function. static void RenderBGScanline(int bgNum, uint16_t control, uint16_t hoffs, uint16_t voffs, int lineNum, uint16_t *line) { unsigned int charBaseBlock = (control >> 2) & 3; unsigned int screenBaseBlock = (control & BGCNT_SCREENBASE_MASK) >> 8; - unsigned int bitsPerPixel = ((control >> 7) & 1) ? 8 : 4; + unsigned int is8bpp = (control >> 7) & 1; // Determine background dimensions from the control register unsigned int mapWidth = bgMapSizes[control >> 14][0]; // in tiles - unsigned int mapHeight = bgMapSizes[control >> 14][1]; // in tiles - unsigned int mapPixelWidth = mapWidth * TILE_WIDTH; - unsigned int mapPixelHeight = mapHeight * TILE_WIDTH; + unsigned int mapPixelWidth = mapWidth << 3; + unsigned int mapPixelHeight = bgMapSizes[control >> 14][1] << 3; + unsigned int pixelWidthMask = mapPixelWidth - 1; + unsigned int pixelHeightMask = mapPixelHeight - 1; uint8_t *bgtiles = (uint8_t *)BG_CHAR_ADDR(charBaseBlock); uint16_t *bgmap = (uint16_t *)BG_SCREEN_ADDR(screenBaseBlock); uint16_t *pal = (uint16_t *)PLTT; // Apply vertical mosaic effect to the entire scanline if enabled - if (control & BGCNT_MOSAIC) { + bool hasMosaic = control & BGCNT_MOSAIC; + if (hasMosaic) { lineNum = applyBGVerticalMosaicEffect(lineNum); } @@ -994,29 +1129,22 @@ static void RenderBGScanline(int bgNum, uint16_t control, uint16_t hoffs, uint16 hoffs &= 0x1FF; voffs &= 0x1FF; + unsigned int yy = (lineNum + voffs) & pixelHeightMask; + unsigned int mapY = yy >> 3; + unsigned int tileY = yy & 7; + unsigned int mapRowBase = mapY * mapWidth; + for (unsigned int x = 0; x < DISPLAY_WIDTH; x++) { - unsigned int xx, yy; + unsigned int xx; - // Calculate the source coordinate in the background map, applying scroll and mosaic - if (control & BGCNT_MOSAIC) { - xx = applyBGHorizontalMosaicEffect(x) + hoffs; + if (hasMosaic) { + xx = (applyBGHorizontalMosaicEffect(x) + hoffs) & pixelWidthMask; } else { - xx = x + hoffs; + xx = (x + hoffs) & pixelWidthMask; } - yy = lineNum + voffs; - // Wrap the coordinates based on the background's actual pixel dimensions. - // This fixes issues with backgrounds that are not 256x256. - xx &= (mapPixelWidth - 1); - yy &= (mapPixelHeight - 1); - - // Convert pixel coordinates to tile coordinates - unsigned int mapX = xx / TILE_WIDTH; - unsigned int mapY = yy / TILE_WIDTH; - - // Calculate the 1D index into the tilemap. This was the primary source of bugs, - // as the original code used a hardcoded map width of 32 tiles. - unsigned int mapIndex = mapY * mapWidth + mapX; + unsigned int mapX = xx >> 3; + unsigned int mapIndex = mapRowBase + mapX; uint16_t entry = bgmap[mapIndex]; unsigned int tileNum = entry & 0x3FF; @@ -1026,40 +1154,30 @@ static void RenderBGScanline(int bgNum, uint16_t control, uint16_t hoffs, uint16 vramPalIdBuffer[tileNum] = paletteNum; #endif - // Get the coordinate within the specific tile - unsigned int tileX = xx % TILE_WIDTH; - unsigned int tileY = yy % TILE_WIDTH; + unsigned int tx = xx & 7; + unsigned int ty = tileY; - // Handle horizontal and vertical tile flipping if (entry & (1 << 10)) - tileX = (TILE_WIDTH - 1) - tileX; // H-flip + tx = 7 - tx; if (entry & (1 << 11)) - tileY = (TILE_WIDTH - 1) - tileY; // V-flip + ty = 7 - ty; - // Calculate address of the pixel data and extract the color - if (bitsPerPixel == 4) { - uint32_t tileDataOffset = tileNum * TILE_SIZE_4BPP; - uint32_t pixelByteOffset = (tileY * TILE_WIDTH + tileX) / 2; + if (!is8bpp) { + uint32_t tileDataOffset = tileNum << 5; + uint32_t pixelByteOffset = (ty << 2) + (tx >> 1); uint8_t pixelPair = bgtiles[tileDataOffset + pixelByteOffset]; - uint8_t pixel; - if (tileX & 1) { - pixel = pixelPair >> 4; - } else { - pixel = pixelPair & 0xF; - } + uint8_t pixel = (tx & 1) ? (pixelPair >> 4) : (pixelPair & 0xF); if (pixel != 0) { - line[x] = pal[16 * paletteNum + pixel] | 0x8000; + line[x] = pal[(paletteNum << 4) + pixel] | 0x8000; } } else { // 8 bits per pixel - uint32_t tileDataOffset = tileNum * TILE_SIZE_8BPP; - uint32_t pixelByteOffset = tileY * TILE_WIDTH + tileX; + uint32_t tileDataOffset = tileNum << 6; + uint32_t pixelByteOffset = (ty << 3) + tx; uint8_t pixel = bgtiles[tileDataOffset + pixelByteOffset]; if (pixel != 0) { - // For 8bpp tiles, the palette number in the tile entry is ignored. - // The pixel value is a direct index into the 256-color palette. line[x] = pal[pixel] | 0x8000; } } @@ -1257,65 +1375,8 @@ const u8 spriteSizes[][2] = { { 32, 64 }, }; -#define getAlphaBit(x) ((x >> 15) & 1) -#define getRedChannel(x) ((x >> 0) & 0x1F) -#define getGreenChannel(x) ((x >> 5) & 0x1F) -#define getBlueChannel(x) ((x >> 10) & 0x1F) #define isbgEnabled(x) ((REG_DISPCNT >> 8) & 0xF) & (1 << x) -static uint16_t alphaBlendColor(uint16_t targetA, uint16_t targetB) -{ - unsigned int eva = REG_BLDALPHA & 0x1F; - unsigned int evb = (REG_BLDALPHA >> 8) & 0x1F; - // shift right by 4 = division by 16 - unsigned int r = ((getRedChannel(targetA) * eva) + (getRedChannel(targetB) * evb)) >> 4; - unsigned int g = ((getGreenChannel(targetA) * eva) + (getGreenChannel(targetB) * evb)) >> 4; - unsigned int b = ((getBlueChannel(targetA) * eva) + (getBlueChannel(targetB) * evb)) >> 4; - - if (r > 31) - r = 31; - if (g > 31) - g = 31; - if (b > 31) - b = 31; - - return r | (g << 5) | (b << 10) | (1 << 15); -} - -static uint16_t alphaBrightnessIncrease(uint16_t targetA) -{ - unsigned int evy = (REG_BLDY & 0x1F); - unsigned int r = getRedChannel(targetA) + (31 - getRedChannel(targetA)) * evy / 16; - unsigned int g = getGreenChannel(targetA) + (31 - getGreenChannel(targetA)) * evy / 16; - unsigned int b = getBlueChannel(targetA) + (31 - getBlueChannel(targetA)) * evy / 16; - - if (r > 31) - r = 31; - if (g > 31) - g = 31; - if (b > 31) - b = 31; - - return r | (g << 5) | (b << 10) | (1 << 15); -} - -static uint16_t alphaBrightnessDecrease(uint16_t targetA) -{ - unsigned int evy = (REG_BLDY & 0x1F); - unsigned int r = getRedChannel(targetA) - getRedChannel(targetA) * evy / 16; - unsigned int g = getGreenChannel(targetA) - getGreenChannel(targetA) * evy / 16; - unsigned int b = getBlueChannel(targetA) - getBlueChannel(targetA) * evy / 16; - - if (r > 31) - r = 31; - if (g > 31) - g = 31; - if (b > 31) - b = 31; - - return r | (g << 5) | (b << 10) | (1 << 15); -} - // outputs the blended pixel in colorOutput, the prxxx are the bg priority and // subpriority, pixelpos is pixel offset in scanline static bool alphaBlendSelectTargetB(struct scanlineData *scanline, uint16_t *colorOutput, char prnum, char prsub, int pixelpos, @@ -1396,8 +1457,6 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool bool isAffine = oam->split.affineMode & 1; bool doubleSizeOrDisabled = (oam->split.affineMode >> 1) & 1; - bool isSemiTransparent = (oam->split.objMode == 1); - bool isObjWin = (oam->split.objMode == 2); if (!(isAffine) && doubleSizeOrDisabled) // disable for non-affine { @@ -1408,31 +1467,41 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool width = gOamShapesSizes[index][0]; height = gOamShapesSizes[index][1]; - int rect_width = width; - int rect_height = height; - int half_width = width / 2; int half_height = height / 2; - pixels = scanline->spriteLayers[oam->split.priority]; - int32_t x = oam->split.x; int32_t y = oam->split.y; #if !EXTENDED_OAM - // The regular, unextended values are 9 and 8 unsigned bits for x and y respectively. - // Once they have exceeded the screen's right or bottom, they get treated as signed values on original hardware. - // This is done so that, for example, a sprite at 0 on either axis that moves left or up will not suddenly disappear. - // - // With EXTENDED_OAM we are using signed 16 bit values, so we don't want to change the raw value. if (x >= DISPLAY_WIDTH) x -= 512; if (y >= DISPLAY_HEIGHT) y -= 256; #endif + if (isAffine && doubleSizeOrDisabled) { + half_width *= 2; + half_height *= 2; + } + + int spriteTop = y; + int spriteBottom = y + (half_height * 2); + if ((int)vcount < spriteTop || (int)vcount >= spriteBottom) + continue; + + int spriteLeft = x; + int spriteRight = x + (half_width * 2); + if (spriteRight < 0 || spriteLeft >= DISPLAY_WIDTH) + continue; + + bool isSemiTransparent = (oam->split.objMode == 1); + bool isObjWin = (oam->split.objMode == 2); + + int rect_width = width; + int rect_height = height; + if (isAffine) { - // TODO: there is probably a better way to do this u8 matrixNum = oam->split.matrixNum * 4; OamData *oam1 = &((OamData *)OAM)[matrixNum]; @@ -1445,26 +1514,22 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool matrix[1][0] = oam3->all.affineParam; matrix[1][1] = oam4->all.affineParam; - if (doubleSizeOrDisabled) // double size for affine - { + if (doubleSizeOrDisabled) { rect_width *= 2; rect_height *= 2; - half_width *= 2; - half_height *= 2; } } else { - // Identity matrix[0][0] = 0x100; matrix[0][1] = 0; matrix[1][0] = 0; matrix[1][1] = 0x100; } + pixels = scanline->spriteLayers[oam->split.priority]; x += half_width; y += half_height; - // Does this sprite actually draw on this scanline? - if (vcount >= (y - half_height) && vcount < (y + half_height)) { + { int local_y = (oam->split.mosaic == 1) ? applySpriteVerticalMosaicEffect(vcount) - y : vcount - y; int number = oam->split.tileNum; int palette = oam->split.paletteNum; @@ -1472,9 +1537,10 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool bool flipY = !isAffine && ((oam->split.matrixNum >> 4) & 1); bool is8BPP = oam->split.bpp & 1; + { + uint8_t *tiledata = (uint8_t *)objtiles; + uint16_t *sprpal = (uint16_t *)(PLTT + (0x200 / 2)); for (int local_x = -half_width; local_x <= half_width; local_x++) { - uint8_t *tiledata = (uint8_t *)objtiles; - uint16_t *palette = (uint16_t *)(PLTT + (0x200 / 2)); int local_mosaicX; int tex_x; int tex_y; @@ -1505,30 +1571,32 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool if (flipY) tex_y = height - tex_y - 1; - int tile_x = tex_x % 8; - int tile_y = tex_y % 8; - int block_x = tex_x / 8; - int block_y = tex_y / 8; - int block_offset = ((block_y * (REG_DISPCNT & 0x40 ? (width / 8) : 16)) + block_x); + int tile_x = tex_x & 7; + int tile_y = tex_y & 7; + int block_x = tex_x >> 3; + int block_y = tex_y >> 3; + int block_offset = ((block_y * (REG_DISPCNT & 0x40 ? (width >> 3) : 16)) + block_x); uint16_t pixel = 0; + uint16_t *pixpal; if (!is8BPP) { - int tileDataIndex = (block_offset + oam->split.tileNum) * 32 + (tile_y * 4) + (tile_x / 2); + int tileDataIndex = ((block_offset + oam->split.tileNum) << 5) + (tile_y << 2) + (tile_x >> 1); pixel = tiledata[tileDataIndex]; if (tile_x & 1) pixel >>= 4; else pixel &= 0xF; - palette += oam->split.paletteNum * 16; + pixpal = sprpal + (oam->split.paletteNum << 4); #if ENABLE_VRAM_VIEW - vramPalIdBuffer[0x800 + (tileDataIndex / 32)] = 16 + oam->split.paletteNum; + vramPalIdBuffer[0x800 + (tileDataIndex >> 5)] = 16 + oam->split.paletteNum; #endif } else { - pixel = tiledata[(block_offset * 2 + oam->split.tileNum) * 32 + (tile_y * 8) + tile_x]; + pixel = tiledata[((block_offset * 2 + oam->split.tileNum) << 5) + (tile_y << 3) + tile_x]; + pixpal = sprpal; } if (pixel != 0) { - uint16_t color = palette[pixel]; + uint16_t color = pixpal[pixel]; // if sprite mode is 2 then write to the window mask instead if (isObjWin) { @@ -1547,15 +1615,15 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool uint16_t targetA = color; uint16_t targetB = 0; if (alphaBlendSelectTargetB(scanline, &targetB, oam->split.priority, 0, global_x, false)) { - color = alphaBlendColor(targetA, targetB); + color = alphaBlendColor(targetA, targetB, REG_BLDALPHA & 0x1F, (REG_BLDALPHA >> 8) & 0x1F); } } else if (REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) { switch (blendMode) { case 2: - color = alphaBrightnessIncrease(color); + color = alphaBrightnessIncrease(color, REG_BLDY & 0x1F); break; case 3: - color = alphaBrightnessDecrease(color); + color = alphaBrightnessDecrease(color, REG_BLDY & 0x1F); break; } } @@ -1565,6 +1633,7 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool } } } + } } } } @@ -1574,14 +1643,19 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) unsigned int mode = REG_DISPCNT & 3; unsigned char numOfBgs = (mode == 0 ? 4 : 3); int bgnum, prnum; - struct scanlineData scanline; + static struct scanlineData scanline; unsigned int blendMode = (REG_BLDCNT >> 6) & 3; unsigned int xpos; + unsigned int enabledBgs = (REG_DISPCNT >> 8) & 0xF; - // initialize all priority bookkeeping data - memset(scanline.layers, 0, sizeof(scanline.layers)); - memset(scanline.winMask, 0, sizeof(scanline.winMask)); - memset(scanline.spriteLayers, 0, sizeof(scanline.spriteLayers)); + // Only zero the layers that are actually enabled, + // instead of blindly zeroing all 4+4 layers (~8KB total) every scanline. + for (bgnum = 0; bgnum < numOfBgs; bgnum++) { + if (enabledBgs & (1 << bgnum)) + memset(scanline.layers[bgnum], 0, sizeof(scanline.layers[bgnum])); + } + if (REG_DISPCNT & DISPCNT_OBJ_ON) + memset(scanline.spriteLayers, 0, sizeof(scanline.spriteLayers)); memset(scanline.prioritySortedBgsCount, 0, sizeof(scanline.prioritySortedBgsCount)); for (bgnum = 0; bgnum < numOfBgs; bgnum++) { @@ -1696,63 +1770,89 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) if (REG_DISPCNT & DISPCNT_OBJ_ON) DrawOamSprites(&scanline, vcount, windowsEnabled); - // iterate trough every priority in order - for (prnum = 3; prnum >= 0; prnum--) { - for (char prsub = scanline.prioritySortedBgsCount[prnum] - 1; prsub >= 0; prsub--) { - char bgnum = scanline.prioritySortedBgs[prnum][prsub]; - // if background is enabled then draw it - if (isbgEnabled(bgnum)) { - uint16_t *src = scanline.layers[bgnum]; - // copy all pixels to framebuffer + // iterate through every priority in order + if (blendMode == 0 && !windowsEnabled) { + for (prnum = 3; prnum >= 0; prnum--) { + for (char prsub = scanline.prioritySortedBgsCount[prnum] - 1; prsub >= 0; prsub--) { + char bgnum = scanline.prioritySortedBgs[prnum][prsub]; + if (isbgEnabled(bgnum)) { + uint16_t *src = scanline.layers[bgnum]; + for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { + uint16_t color = src[xpos]; + if (color & 0x8000) // alpha bit set = opaque + pixels[xpos] = color; + } + } + } + // draw sprites on current priority + if (REG_DISPCNT & DISPCNT_OBJ_ON) { + uint16_t *src = scanline.spriteLayers[prnum]; for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { - uint16_t color = src[xpos]; - bool winEffectEnable = true; - - if (!getAlphaBit(color)) - continue; // do nothing if alpha bit is not set + if (src[xpos] & 0x8000) + pixels[xpos] = src[xpos]; + } + } + } + } else { + // FULL PATH: blending and/or windows are active + for (prnum = 3; prnum >= 0; prnum--) { + for (char prsub = scanline.prioritySortedBgsCount[prnum] - 1; prsub >= 0; prsub--) { + char bgnum = scanline.prioritySortedBgs[prnum][prsub]; + // if background is enabled then draw it + if (isbgEnabled(bgnum)) { + uint16_t *src = scanline.layers[bgnum]; + // copy all pixels to framebuffer + for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { + uint16_t color = src[xpos]; + bool winEffectEnable = true; + + if (!getAlphaBit(color)) + continue; // do nothing if alpha bit is not set + + if (windowsEnabled) { + winEffectEnable = ((scanline.winMask[xpos] & WINMASK_CLR) >> 5); + // if bg is disabled inside the window then do not draw the pixel + if (!(scanline.winMask[xpos] & 1 << bgnum)) + continue; + } - if (windowsEnabled) { - winEffectEnable = ((scanline.winMask[xpos] & WINMASK_CLR) >> 5); - // if bg is disabled inside the window then do not draw the pixel - if (!(scanline.winMask[xpos] & 1 << bgnum)) - continue; - } + // blending code + if (blendMode != 0 && REG_BLDCNT & (1 << bgnum) && winEffectEnable) { + uint16_t targetA = color; + uint16_t targetB = 0; - // blending code - if (blendMode != 0 && REG_BLDCNT & (1 << bgnum) && winEffectEnable) { - uint16_t targetA = color; - uint16_t targetB = 0; - - switch (blendMode) { - case 1: { - char isSpriteBlendingEnabled = REG_BLDCNT & BLDCNT_TGT2_OBJ ? 1 : 0; - // find targetB and blend it - if (alphaBlendSelectTargetB(&scanline, &targetB, prnum, prsub + 1, xpos, isSpriteBlendingEnabled)) { - color = alphaBlendColor(targetA, targetB); - } - } break; - case 2: - color = alphaBrightnessIncrease(targetA); - break; - case 3: - color = alphaBrightnessDecrease(targetA); - break; + switch (blendMode) { + case 1: { + char isSpriteBlendingEnabled = REG_BLDCNT & BLDCNT_TGT2_OBJ ? 1 : 0; + // find targetB and blend it + if (alphaBlendSelectTargetB(&scanline, &targetB, prnum, prsub + 1, xpos, + isSpriteBlendingEnabled)) { + color = alphaBlendColor(targetA, targetB, REG_BLDALPHA & 0x1F, (REG_BLDALPHA >> 8) & 0x1F); + } + } break; + case 2: + color = alphaBrightnessIncrease(targetA, REG_BLDY & 0x1F); + break; + case 3: + color = alphaBrightnessDecrease(targetA, REG_BLDY & 0x1F); + break; + } } + // write the pixel to scanline buffer output + pixels[xpos] = color; } - // write the pixel to scanline buffer output - pixels[xpos] = color; } } - } - // draw sprites on current priority - uint16_t *src = scanline.spriteLayers[prnum]; - for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { - if (getAlphaBit(src[xpos])) { - // check if sprite pixel draws inside window - if (windowsEnabled && !(scanline.winMask[xpos] & WINMASK_OBJ)) - continue; - // draw the pixel - pixels[xpos] = src[xpos]; + // draw sprites on current priority + uint16_t *src = scanline.spriteLayers[prnum]; + for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { + if (getAlphaBit(src[xpos])) { + // check if sprite pixel draws inside window + if (windowsEnabled && !(scanline.winMask[xpos] & WINMASK_OBJ)) + continue; + // draw the pixel + pixels[xpos] = src[xpos]; + } } } } @@ -1760,21 +1860,25 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) uint16_t *memsetu16(uint16_t *dst, uint16_t fill, size_t count) { - for (int i = 0; i < count; i++) { - *dst++ = fill; + uint32_t fill32 = ((uint32_t)fill << 16) | fill; + uint32_t *dst32 = (uint32_t *)dst; + size_t pairs = count >> 1; + for (size_t i = 0; i < pairs; i++) { + dst32[i] = fill32; } - - return 0; + if (count & 1) { + dst[count - 1] = fill; + } + return dst; } static void DrawFrame(uint16_t *pixels) { int i; - int j; - static uint16_t scanlines[DISPLAY_HEIGHT][DISPLAY_WIDTH]; - unsigned int blendMode = (REG_BLDCNT >> 6) & 3; for (i = 0; i < DISPLAY_HEIGHT; i++) { + uint16_t *scanline = &pixels[i * DISPLAY_WIDTH]; + REG_VCOUNT = i; if (((REG_DISPSTAT >> 8) & 0xFF) == REG_VCOUNT) { REG_DISPSTAT |= INTR_FLAG_VCOUNT; @@ -1782,10 +1886,10 @@ static void DrawFrame(uint16_t *pixels) gIntrTable[INTR_INDEX_VCOUNT](); } - // Render the backdrop color before the each individual scanline. - // HBlank interrupt code could have changed it inbetween lines. - memsetu16(scanlines[i], *(uint16_t *)PLTT, DISPLAY_WIDTH); - DrawScanline(scanlines[i], i); + // Render the backdrop color before each individual scanline. + // HBlank interrupt code could have changed it in between lines. + memsetu16(scanline, *(uint16_t *)PLTT, DISPLAY_WIDTH); + DrawScanline(scanline, i); REG_DISPSTAT |= INTR_FLAG_HBLANK; @@ -1797,14 +1901,6 @@ static void DrawFrame(uint16_t *pixels) REG_DISPSTAT &= ~INTR_FLAG_HBLANK; REG_DISPSTAT &= ~INTR_FLAG_VCOUNT; } - - // Copy to screen - for (i = 0; i < DISPLAY_HEIGHT; i++) { - uint16_t *src = scanlines[i]; - for (j = 0; j < DISPLAY_WIDTH; j++) { - pixels[i * DISPLAY_WIDTH + j] = src[j]; - } - } } #if ENABLE_VRAM_VIEW @@ -1845,8 +1941,14 @@ void VramDraw(SDL_Texture *texture) void VDraw(SDL_Texture *texture) { - memset(gameImage, 0, sizeof(gameImage)); +#if RENDERER == RENDERER_SOFTWARE_FAST + { + extern void DrawFrame_Fast(uint16_t *pixels); + DrawFrame_Fast(gameImage); + } +#else DrawFrame(gameImage); +#endif SDL_UpdateTexture(texture, NULL, gameImage, DISPLAY_WIDTH * sizeof(Uint16)); REG_VCOUNT = DISPLAY_HEIGHT + 1; // prep for being in VBlank period } diff --git a/src/platform/psp/psp_module.c b/src/platform/psp/psp_module.c new file mode 100644 index 000000000..a070da463 --- /dev/null +++ b/src/platform/psp/psp_module.c @@ -0,0 +1,41 @@ +#include +#include +#include +#include + +PSP_MODULE_INFO("SonicAdvance2", 0, 1, 0); +PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER | THREAD_ATTR_VFPU); +PSP_HEAP_SIZE_KB(-1024); + +unsigned int sce_newlib_stack_size = 512 * 1024; + +extern bool isRunning; + +int exitCallback(int arg1, int arg2, void *common) +{ + (void)arg1; + (void)arg2; + (void)common; + isRunning = false; + return 0; +} + +int callbackThread(SceSize args, void *argp) +{ + (void)args; + (void)argp; + int cbid = sceKernelCreateCallback("Exit Callback", exitCallback, NULL); + sceKernelRegisterExitCallback(cbid); + sceKernelSleepThreadCB(); + return 0; +} + +int setupPspCallbacks(void) +{ + int thid = sceKernelCreateThread("update_thread", callbackThread, 0x11, 0xFA0, 0, 0); + if (thid >= 0) + { + sceKernelStartThread(thid, 0, 0); + } + return thid; +} diff --git a/src/platform/shared/audio/m4a_sound_mixer.c b/src/platform/shared/audio/m4a_sound_mixer.c index 49b0d65bc..b9236e984 100644 --- a/src/platform/shared/audio/m4a_sound_mixer.c +++ b/src/platform/shared/audio/m4a_sound_mixer.c @@ -368,10 +368,20 @@ void MP2K_event_fine(struct MP2KPlayerState *unused, struct MP2KTrack *track) track->status = 0; } +// mPtr aligns to 4 bytes on MIPS; match that here before reading pointer data +#ifdef __mips__ +static inline u8 *alignCmdPtr4(u8 *p) +{ + return (u8 *)(((uintptr_t)p + 3) & ~(uintptr_t)3); +} +#else +#define alignCmdPtr4(p) (p) +#endif + // Sets the track's cmdPtr to the specified address. void MP2K_event_goto(struct MP2KPlayerState *unused, struct MP2KTrack *track) { - u8 *cmdPtr = track->cmdPtr; + u8 *cmdPtr = alignCmdPtr4(track->cmdPtr); uintptr_t addr = 0; for (size_t i = sizeof(uintptr_t) - 1; i > 0; i--) { addr |= cmdPtr[i]; @@ -386,7 +396,9 @@ void MP2K_event_patt(struct MP2KPlayerState *unused, struct MP2KTrack *track) { u8 level = track->patternLevel; if (level < 3) { - track->patternStack[level] = track->cmdPtr + sizeof(u8 *); + // Return address is past the aligned pointer data + u8 *ptrStart = alignCmdPtr4(track->cmdPtr); + track->patternStack[level] = ptrStart + sizeof(u8 *); track->patternLevel++; MP2K_event_goto(unused, track); } else { @@ -419,7 +431,9 @@ void MP2K_event_rept(struct MP2KPlayerState *unused, struct MP2KTrack *track) MP2K_event_goto(unused, track); } else { track->repeatCount = 0; - track->cmdPtr += sizeof(u8) + sizeof(u8 *); + // Skip past the aligned pointer data + u8 *ptrStart = alignCmdPtr4(track->cmdPtr); + track->cmdPtr = ptrStart + sizeof(u8 *); } } } diff --git a/src/platform/shared/dma.c b/src/platform/shared/dma.c index 60ad6144f..d2f55e045 100644 --- a/src/platform/shared/dma.c +++ b/src/platform/shared/dma.c @@ -1,10 +1,26 @@ #include +#include #include "global.h" #include "platform/shared/dma.h" +// safe unaligned access for MIPS +static inline void dma_copy32(void *dst, const void *src) +{ + u32 tmp; + memcpy(&tmp, src, 4); + memcpy(dst, &tmp, 4); +} + +static inline void dma_copy16(void *dst, const void *src) +{ + u16 tmp; + memcpy(&tmp, src, 2); + memcpy(dst, &tmp, 2); +} + struct DMATransfer DMAList[DMA_COUNT] = { 0 }; -void RunDMAs(u32 type) +void RunDMAs(DmaStartTypes type) { for (int dmaNum = 0; dmaNum < DMA_COUNT; dmaNum++) { struct DMATransfer *dma = &DMAList[dmaNum]; @@ -23,9 +39,9 @@ void RunDMAs(u32 type) // printf("DMA%d src=%p, dest=%p, control=%d\n", dmaNum, dma->src, dma->dst, dma->control); for (int i = 0; i < dma->size; i++) { if ((dma->control) & DMA_32BIT) - *dma->dst32 = *dma->src32; + dma_copy32(dma->dst, dma->src); else - *dma->dst16 = *dma->src16; + dma_copy16(dma->dst, dma->src); // process destination pointer changes if (((dma->control) & DMA_DEST_MASK) == DMA_DEST_INC) { diff --git a/src/platform/shared/rendering/sw_renderer_fast.c b/src/platform/shared/rendering/sw_renderer_fast.c new file mode 100644 index 000000000..72327a2de --- /dev/null +++ b/src/platform/shared/rendering/sw_renderer_fast.c @@ -0,0 +1,1167 @@ +// sw_renderer_fast.c -- single-pass back-to-front gba ppu renderer +// +// the default renderer does multiple passes per scanline which thrashes +// the data cache on older platforms with tiny L1 and no L2 + +// this one composites everything in one pass per scanline, painting +// layers directly into the output buffer from back to front. a +// layerIds[] side-buffer tracks what wrote each pixel so alpha +// blending can find its target-b inline. +// +// 4bpp text bgs get a batched path that reads one u32 per 8 pixels. +// 8bpp and mosaic bgs fall back to per-pixel. sprites are pre-filtered +// per scanline so we only touch the ones that actually matter. + +#include "config.h" + +#if RENDERER == RENDERER_SOFTWARE_FAST + +#include +#include +#include +#include + +#include "global.h" +#include "core.h" +#include "gba/defines.h" +#include "gba/io_reg.h" +#include "gba/types.h" +#include "platform/shared/dma.h" +#include "platform/shared/rendering/sw_renderer_common.h" + +extern IntrFunc gIntrTable[16]; +extern uint8_t REG_BASE[IO_SIZE]; +extern uint16_t PLTT[PLTT_SIZE / sizeof(uint16_t)]; +extern uint8_t VRAM[VRAM_SIZE]; +extern uint8_t OAM[OAM_SIZE]; +extern const u8 gOamShapesSizes[12][2]; + +#ifndef TILE_WIDTH +#define TILE_WIDTH 8 +#endif + +#define IsBGEnabled(n) (((REG_DISPCNT >> 8) & 0xF) & (1 << (n))) + +// mosaic +#define MOSAIC_BG_X (REG_MOSAIC & 0xF) +#define MOSAIC_BG_Y ((REG_MOSAIC >> 4) & 0xF) +#define MOSAIC_SPR_X ((REG_MOSAIC >> 8) & 0xF) +#define MOSAIC_SPR_Y ((REG_MOSAIC >> 12) & 0xF) +#define ApplyMosaicBGX(x) ((x) - ((x) % (MOSAIC_BG_X + 1))) +#define ApplyMosaicBGY(y) ((y) - ((y) % (MOSAIC_BG_Y + 1))) +#define ApplyMosaicSprX(x) ((x) - ((x) % (MOSAIC_SPR_X + 1))) +#define ApplyMosaicSprY(y) ((y) - ((y) % (MOSAIC_SPR_Y + 1))) + +// tilemap entry fields +#define TILE_NUM(e) ((e) & 0x3FF) +#define TILE_PALETTE(e) (((e) >> 12) & 0xF) +#define TILE_HFLIP(e) ((e) & (1 << 10)) +#define TILE_VFLIP(e) ((e) & (1 << 11)) + +// window mask bits +#define WINMASK_BG0 (1 << 0) +#define WINMASK_BG1 (1 << 1) +#define WINMASK_BG2 (1 << 2) +#define WINMASK_BG3 (1 << 3) +#define WINMASK_OBJ (1 << 4) +#define WINMASK_CLR (1 << 5) +#define WINMASK_WINOUT (1 << 6) + +// layer ids for blend target tracking +#define LAYER_BG0 0 +#define LAYER_BG1 1 +#define LAYER_BG2 2 +#define LAYER_BG3 3 +#define LAYER_OBJ 4 +#define LAYER_BACKDROP 5 + +static const uint16_t bgMapSizes[][2] = { + { 32, 32 }, { 64, 32 }, { 32, 64 }, { 64, 64 }, +}; + + +// 16-bit fill using 32-bit writes +static inline void Memset16(uint16_t *dst, uint16_t fill, unsigned int count) +{ + uint32_t fill32 = ((uint32_t)fill << 16) | fill; + uint32_t *dst32 = (uint32_t *)dst; + unsigned int pairs = count >> 1; + for (unsigned int i = 0; i < pairs; i++) + dst32[i] = fill32; + if (count & 1) + dst[count - 1] = fill; +} + +static inline uint32_t GetBgRefX(int bg) { return (bg == 2) ? REG_BG2X : (bg == 3) ? REG_BG3X : 0; } +static inline uint32_t GetBgRefY(int bg) { return (bg == 2) ? REG_BG2Y : (bg == 3) ? REG_BG3Y : 0; } +static inline uint16_t GetBgPA(int bg) { return (bg == 2) ? REG_BG2PA : (bg == 3) ? REG_BG3PA : 0; } +static inline uint16_t GetBgPB(int bg) { return (bg == 2) ? REG_BG2PB : (bg == 3) ? REG_BG3PB : 0; } +static inline uint16_t GetBgPC(int bg) { return (bg == 2) ? REG_BG2PC : (bg == 3) ? REG_BG3PC : 0; } +static inline uint16_t GetBgPD(int bg) { return (bg == 2) ? REG_BG2PD : (bg == 3) ? REG_BG3PD : 0; } + +// handles the wraparound case where left > right +static inline bool WindowContainsX(u16 left, u16 right, u16 x) +{ + if (left > right) + return (x >= left || x < right); + return (x >= left && x < right); +} + +// check if a layer can be the target-b for alpha blending +static inline bool IsBlendTargetB(uint8_t layerId, unsigned int bldcnt) +{ + if (layerId <= 3) + return (bldcnt & (1 << (8 + layerId))) != 0; + if (layerId == LAYER_OBJ) + return (bldcnt & BLDCNT_TGT2_OBJ) != 0; + if (layerId == LAYER_BACKDROP) + return (bldcnt & BLDCNT_TGT2_BD) != 0; + return false; +} + +// sprites with oam mode 1 always try alpha blending regardless of bldcnt +static inline uint16_t BlendSpritePixel( + uint16_t color, unsigned int x, + uint16_t *output, uint8_t *layerIds, + bool isSemiTransparent, + unsigned int blendMode, unsigned int bldcnt, + bool windowsEnabled, uint16_t *winMask, + unsigned int eva, unsigned int evb, unsigned int evy) +{ + bool winAllowsBlend = !windowsEnabled + || (winMask && (winMask[x] & WINMASK_CLR)); + + bool doAlpha = (blendMode == 1 && (bldcnt & BLDCNT_TGT1_OBJ) && winAllowsBlend) + || isSemiTransparent; + + if (doAlpha) { + if (IsBlendTargetB(layerIds[x], bldcnt)) + return alphaBlendColor(color, output[x], eva, evb); + } else if ((bldcnt & BLDCNT_TGT1_OBJ) && winAllowsBlend) { + if (blendMode == 2) + return alphaBrightnessIncrease(color, evy); + if (blendMode == 3) + return alphaBrightnessDecrease(color, evy); + } + + return color; +} + +// write a bg pixel with inline blend resolution +static inline void WriteBGPixelBlended( + unsigned int x, uint8_t pixel, + const uint16_t *palBase, int bgNum, + uint16_t *output, uint8_t *layerIds, + unsigned int blendMode, bool bgIsTargetA, + bool useWindows, unsigned int winBgBit, uint16_t *winMask, + unsigned int bldcnt, + unsigned int eva, unsigned int evb, unsigned int evy) +{ + uint16_t color = palBase[pixel] | COLOR_OPAQUE; + + if (useWindows && !(winMask[x] & winBgBit)) + return; + + if (bgIsTargetA && (!useWindows || (winMask[x] & WINMASK_CLR))) { + uint16_t src = color; + switch (blendMode) { + case 1: + if (IsBlendTargetB(layerIds[x], bldcnt)) + color = alphaBlendColor(src, output[x], eva, evb); + break; + case 2: color = alphaBrightnessIncrease(src, evy); break; + case 3: color = alphaBrightnessDecrease(src, evy); break; + } + } + + output[x] = color; + layerIds[x] = bgNum; +} + +static void RenderTextBG(int bgNum, uint16_t control, + uint16_t hoffs, uint16_t voffs, + int lineNum, uint16_t *output) +{ + unsigned int charBase = (control >> 2) & 3; + unsigned int screenBase = (control & BGCNT_SCREENBASE_MASK) >> 8; + unsigned int is8bpp = (control >> 7) & 1; + + unsigned int mapW = bgMapSizes[control >> 14][0]; + unsigned int mapPxW = mapW << 3; + unsigned int mapPxH = bgMapSizes[control >> 14][1] << 3; + unsigned int wMask = mapPxW - 1; + unsigned int hMask = mapPxH - 1; + + uint8_t *tiles = (uint8_t *)BG_CHAR_ADDR(charBase); + uint16_t *map = (uint16_t *)BG_SCREEN_ADDR(screenBase); + uint16_t *pal = (uint16_t *)PLTT; + + bool hasMosaic = control & BGCNT_MOSAIC; + if (hasMosaic) + lineNum = ApplyMosaicBGY(lineNum); + + hoffs &= 0x1FF; + voffs &= 0x1FF; + + unsigned int yy = (lineNum + voffs) & hMask; + unsigned int mapY = yy >> 3; + unsigned int tileY = yy & 7; + unsigned int rowBase = mapY * mapW; + + // slow path: 8bpp or mosaic, one pixel at a time + if (hasMosaic || is8bpp) { + for (unsigned int x = 0; x < DISPLAY_WIDTH; x++) { + unsigned int xx = hasMosaic + ? (ApplyMosaicBGX(x) + hoffs) & wMask + : (x + hoffs) & wMask; + + uint16_t entry = map[rowBase + (xx >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int tx = xx & 7; + unsigned int ty = tileY; + if (TILE_HFLIP(entry)) tx = 7 - tx; + if (TILE_VFLIP(entry)) ty = 7 - ty; + + if (!is8bpp) { + uint8_t pair = tiles[(tileNum << 5) + (ty << 2) + (tx >> 1)]; + uint8_t pixel = (tx & 1) ? (pair >> 4) : (pair & 0xF); + if (pixel) + output[x] = pal[(palNum << 4) + pixel] | COLOR_OPAQUE; + } else { + uint8_t pixel = tiles[(tileNum << 6) + (ty << 3) + tx]; + if (pixel) + output[x] = pal[pixel] | COLOR_OPAQUE; + } + } + return; + } + + // fast path: 4bpp, read one u32 per tile row, unroll 8 pixels + unsigned int x = 0; + + // left edge: partial tile if scroll isn't tile-aligned + { + unsigned int startX = hoffs & wMask; + unsigned int startOff = startX & 7; + + if (startOff != 0) { + uint16_t entry = map[rowBase + (startX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) ty = 7 - ty; + bool hflip = TILE_HFLIP(entry); + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + + unsigned int partial = 8 - startOff; + if (partial > DISPLAY_WIDTH) partial = DISPLAY_WIDTH; + + for (unsigned int t = 0; t < partial && x < DISPLAY_WIDTH; t++, x++) { + unsigned int tx = startOff + t; + if (hflip) tx = 7 - tx; + uint8_t pixel = (row >> (tx << 2)) & 0xF; + if (pixel) + output[x] = pal[(palNum << 4) + pixel] | COLOR_OPAQUE; + } + } + } + + // middle: full tiles, 8 pixels at a time + while (x + 8 <= DISPLAY_WIDTH) { + unsigned int srcX = (x + hoffs) & wMask; + uint16_t entry = map[rowBase + (srcX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) ty = 7 - ty; + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint16_t *palBase = pal + (palNum << 4); + + if (!TILE_HFLIP(entry)) { + uint8_t p; + p = row & 0xF; if (p) output[x ] = palBase[p] | COLOR_OPAQUE; + p = (row >> 4) & 0xF; if (p) output[x+1] = palBase[p] | COLOR_OPAQUE; + p = (row >> 8) & 0xF; if (p) output[x+2] = palBase[p] | COLOR_OPAQUE; + p = (row >> 12) & 0xF; if (p) output[x+3] = palBase[p] | COLOR_OPAQUE; + p = (row >> 16) & 0xF; if (p) output[x+4] = palBase[p] | COLOR_OPAQUE; + p = (row >> 20) & 0xF; if (p) output[x+5] = palBase[p] | COLOR_OPAQUE; + p = (row >> 24) & 0xF; if (p) output[x+6] = palBase[p] | COLOR_OPAQUE; + p = (row >> 28) & 0xF; if (p) output[x+7] = palBase[p] | COLOR_OPAQUE; + } else { + uint8_t p; + p = (row >> 28) & 0xF; if (p) output[x ] = palBase[p] | COLOR_OPAQUE; + p = (row >> 24) & 0xF; if (p) output[x+1] = palBase[p] | COLOR_OPAQUE; + p = (row >> 20) & 0xF; if (p) output[x+2] = palBase[p] | COLOR_OPAQUE; + p = (row >> 16) & 0xF; if (p) output[x+3] = palBase[p] | COLOR_OPAQUE; + p = (row >> 12) & 0xF; if (p) output[x+4] = palBase[p] | COLOR_OPAQUE; + p = (row >> 8) & 0xF; if (p) output[x+5] = palBase[p] | COLOR_OPAQUE; + p = (row >> 4) & 0xF; if (p) output[x+6] = palBase[p] | COLOR_OPAQUE; + p = row & 0xF; if (p) output[x+7] = palBase[p] | COLOR_OPAQUE; + } + x += 8; + } + + // right edge: leftover partial tile + if (x < DISPLAY_WIDTH) { + unsigned int srcX = (x + hoffs) & wMask; + uint16_t entry = map[rowBase + (srcX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) ty = 7 - ty; + bool hflip = TILE_HFLIP(entry); + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + + for (unsigned int t = 0; x < DISPLAY_WIDTH; t++, x++) { + unsigned int tx = hflip ? (7 - t) : t; + uint8_t pixel = (row >> (tx << 2)) & 0xF; + if (pixel) + output[x] = pal[(palNum << 4) + pixel] | COLOR_OPAQUE; + } + } +} + +// same thing but with blend/window tracking baked in +static void RenderTextBGBlend(int bgNum, uint16_t control, + uint16_t hoffs, uint16_t voffs, + int lineNum, uint16_t *output, + uint8_t *layerIds, + unsigned int blendMode, + bool windowsEnabled, + uint16_t *winMask, + unsigned int bldcnt, + unsigned int eva, unsigned int evb, + unsigned int evy) +{ + unsigned int charBase = (control >> 2) & 3; + unsigned int screenBase = (control & BGCNT_SCREENBASE_MASK) >> 8; + unsigned int is8bpp = (control >> 7) & 1; + + unsigned int mapW = bgMapSizes[control >> 14][0]; + unsigned int mapPxW = mapW << 3; + unsigned int mapPxH = bgMapSizes[control >> 14][1] << 3; + unsigned int wMask = mapPxW - 1; + unsigned int hMask = mapPxH - 1; + + uint8_t *tiles = (uint8_t *)BG_CHAR_ADDR(charBase); + uint16_t *map = (uint16_t *)BG_SCREEN_ADDR(screenBase); + uint16_t *pal = (uint16_t *)PLTT; + + bool hasMosaic = control & BGCNT_MOSAIC; + if (hasMosaic) + lineNum = ApplyMosaicBGY(lineNum); + + hoffs &= 0x1FF; + voffs &= 0x1FF; + + unsigned int yy = (lineNum + voffs) & hMask; + unsigned int mapY = yy >> 3; + unsigned int tileY = yy & 7; + unsigned int rowBase = mapY * mapW; + + bool bgIsTargetA = (blendMode != 0) && (bldcnt & (1 << bgNum)); + bool useWindows = windowsEnabled && (winMask != NULL); + unsigned int winBgBit = 1 << bgNum; + + // slow path: 8bpp or mosaic + if (hasMosaic || is8bpp) { + for (unsigned int x = 0; x < DISPLAY_WIDTH; x++) { + unsigned int xx = hasMosaic + ? (ApplyMosaicBGX(x) + hoffs) & wMask + : (x + hoffs) & wMask; + + uint16_t entry = map[rowBase + (xx >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int tx = xx & 7; + unsigned int ty = tileY; + if (TILE_HFLIP(entry)) tx = 7 - tx; + if (TILE_VFLIP(entry)) ty = 7 - ty; + + uint8_t pixel; + if (!is8bpp) { + uint8_t pair = tiles[(tileNum << 5) + (ty << 2) + (tx >> 1)]; + pixel = (tx & 1) ? (pair >> 4) : (pair & 0xF); + } else { + pixel = tiles[(tileNum << 6) + (ty << 3) + tx]; + } + + if (pixel == 0) + continue; + + uint16_t color = !is8bpp + ? pal[(palNum << 4) + pixel] | COLOR_OPAQUE + : pal[pixel] | COLOR_OPAQUE; + + if (useWindows && !(winMask[x] & winBgBit)) + continue; + + if (bgIsTargetA && (!useWindows || (winMask[x] & WINMASK_CLR))) { + uint16_t src = color; + switch (blendMode) { + case 1: + if (IsBlendTargetB(layerIds[x], bldcnt)) + color = alphaBlendColor(src, output[x], eva, evb); + break; + case 2: color = alphaBrightnessIncrease(src, evy); break; + case 3: color = alphaBrightnessDecrease(src, evy); break; + } + } + + output[x] = color; + layerIds[x] = bgNum; + } + return; + } + + // fast path: 4bpp batched with inline blend + unsigned int x = 0; + + // left edge partial tile + { + unsigned int startX = hoffs & wMask; + unsigned int startOff = startX & 7; + + if (startOff != 0) { + uint16_t entry = map[rowBase + (startX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) ty = 7 - ty; + bool hflip = TILE_HFLIP(entry); + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint16_t *palBase = pal + (palNum << 4); + + unsigned int partial = 8 - startOff; + if (partial > DISPLAY_WIDTH) partial = DISPLAY_WIDTH; + + for (unsigned int t = 0; t < partial && x < DISPLAY_WIDTH; t++, x++) { + unsigned int tx = startOff + t; + if (hflip) tx = 7 - tx; + uint8_t pixel = (row >> (tx << 2)) & 0xF; + if (pixel) + WriteBGPixelBlended(x, pixel, palBase, bgNum, output, + layerIds, blendMode, bgIsTargetA, + useWindows, winBgBit, winMask, + bldcnt, eva, evb, evy); + } + } + } + + // middle: full tiles + while (x + 8 <= DISPLAY_WIDTH) { + unsigned int srcX = (x + hoffs) & wMask; + uint16_t entry = map[rowBase + (srcX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) ty = 7 - ty; + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint16_t *palBase = pal + (palNum << 4); + + #define BLEND_PX(off, shift) do { \ + uint8_t p = (row >> (shift)) & 0xF; \ + if (p) WriteBGPixelBlended(x+(off), p, palBase, bgNum, \ + output, layerIds, blendMode, bgIsTargetA, \ + useWindows, winBgBit, winMask, \ + bldcnt, eva, evb, evy); \ + } while (0) + + if (!TILE_HFLIP(entry)) { + BLEND_PX(0, 0); BLEND_PX(1, 4); + BLEND_PX(2, 8); BLEND_PX(3, 12); + BLEND_PX(4, 16); BLEND_PX(5, 20); + BLEND_PX(6, 24); BLEND_PX(7, 28); + } else { + BLEND_PX(0, 28); BLEND_PX(1, 24); + BLEND_PX(2, 20); BLEND_PX(3, 16); + BLEND_PX(4, 12); BLEND_PX(5, 8); + BLEND_PX(6, 4); BLEND_PX(7, 0); + } + + #undef BLEND_PX + + x += 8; + } + + // right edge partial tile + if (x < DISPLAY_WIDTH) { + unsigned int srcX = (x + hoffs) & wMask; + uint16_t entry = map[rowBase + (srcX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) ty = 7 - ty; + bool hflip = TILE_HFLIP(entry); + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint16_t *palBase = pal + (palNum << 4); + + for (unsigned int t = 0; x < DISPLAY_WIDTH; t++, x++) { + unsigned int tx = hflip ? (7 - t) : t; + uint8_t pixel = (row >> (tx << 2)) & 0xF; + if (pixel) + WriteBGPixelBlended(x, pixel, palBase, bgNum, output, + layerIds, blendMode, bgIsTargetA, + useWindows, winBgBit, winMask, + bldcnt, eva, evb, evy); + } + } +} + +static void RenderAffineBG(int bgNum, uint16_t control, + int lineNum, uint16_t *output) +{ + vBgCnt *bgcnt = (vBgCnt *)&control; + + uint8_t *tiles = (uint8_t *)(VRAM + bgcnt->charBaseBlock * 0x4000); + uint8_t *map = (uint8_t *)(VRAM + bgcnt->screenBaseBlock * 0x800); + uint16_t *pal = (uint16_t *)PLTT; + + if (control & BGCNT_MOSAIC) + lineNum = ApplyMosaicBGY(lineNum); + + s16 pa = GetBgPA(bgNum); + s16 pb = GetBgPB(bgNum); + s16 pc = GetBgPC(bgNum); + s16 pd = GetBgPD(bgNum); + + // always square: 128/256/512/1024 + int size = 128; + switch (bgcnt->screenSize) { + case 1: size = 256; break; + case 2: size = 512; break; + case 3: size = 1024; break; + } + int mask = size - 1; + int yshift = ((control >> 14) & 3) + 4; + + // sign-extend 28-bit reference point, advance by scanline + s32 refX = GetBgRefX(bgNum); + s32 refY = GetBgRefY(bgNum); + refX = (refX & (1 << 27)) ? refX | 0xF0000000 : refX; + refY = (refY & (1 << 27)) ? refY | 0xF0000000 : refY; + refX += lineNum * pb; + refY += lineNum * pd; + + int curX = refX; + int curY = refY; + + if (bgcnt->areaOverflowMode) { + // wraparound + for (int x = 0; x < DISPLAY_WIDTH; x++) { + int tx = (curX >> 8) & mask; + int ty = (curY >> 8) & mask; + int tile = map[(tx >> 3) + ((ty >> 3) << yshift)]; + uint8_t pixel = tiles[(tile << 6) + ((ty & 7) << 3) + (tx & 7)]; + if (pixel) + output[x] = pal[pixel] | COLOR_OPAQUE; + curX += pa; + curY += pc; + } + } else { + // clamp: outside the map = transparent + for (int x = 0; x < DISPLAY_WIDTH; x++) { + int tx = curX >> 8; + int ty = curY >> 8; + if (tx >= 0 && ty >= 0 && tx < size && ty < size) { + int tile = map[(tx >> 3) + ((ty >> 3) << yshift)]; + uint8_t pixel = tiles[(tile << 6) + ((ty & 7) << 3) + (tx & 7)]; + if (pixel) + output[x] = pal[pixel] | COLOR_OPAQUE; + } + curX += pa; + curY += pc; + } + } + + // horizontal mosaic as a post-pass + if ((control & BGCNT_MOSAIC) && MOSAIC_BG_X > 0) { + for (int x = 0; x < DISPLAY_WIDTH; x++) + output[x] = output[ApplyMosaicBGX(x)]; + } +} + +// same deal with blend/window support +static void RenderAffineBGBlend(int bgNum, uint16_t control, + int lineNum, uint16_t *output, + uint8_t *layerIds, + unsigned int blendMode, + bool windowsEnabled, + uint16_t *winMask, + unsigned int bldcnt, + unsigned int eva, unsigned int evb, + unsigned int evy) +{ + vBgCnt *bgcnt = (vBgCnt *)&control; + + uint8_t *tiles = (uint8_t *)(VRAM + bgcnt->charBaseBlock * 0x4000); + uint8_t *map = (uint8_t *)(VRAM + bgcnt->screenBaseBlock * 0x800); + uint16_t *pal = (uint16_t *)PLTT; + + if (control & BGCNT_MOSAIC) + lineNum = ApplyMosaicBGY(lineNum); + + s16 pa = GetBgPA(bgNum); + s16 pb = GetBgPB(bgNum); + s16 pc = GetBgPC(bgNum); + s16 pd = GetBgPD(bgNum); + + int size = 128; + switch (bgcnt->screenSize) { + case 1: size = 256; break; + case 2: size = 512; break; + case 3: size = 1024; break; + } + int mask = size - 1; + int yshift = ((control >> 14) & 3) + 4; + + s32 refX = GetBgRefX(bgNum); + s32 refY = GetBgRefY(bgNum); + refX = (refX & (1 << 27)) ? refX | 0xF0000000 : refX; + refY = (refY & (1 << 27)) ? refY | 0xF0000000 : refY; + refX += lineNum * pb; + refY += lineNum * pd; + + int curX = refX; + int curY = refY; + + bool bgIsTargetA = (blendMode != 0) && (bldcnt & (1 << bgNum)); + + for (int x = 0; x < DISPLAY_WIDTH; x++) { + int tx, ty; + + if (bgcnt->areaOverflowMode) { + tx = (curX >> 8) & mask; + ty = (curY >> 8) & mask; + } else { + tx = curX >> 8; + ty = curY >> 8; + if (tx < 0 || ty < 0 || tx >= size || ty >= size) { + curX += pa; + curY += pc; + continue; + } + } + + int tile = map[(tx >> 3) + ((ty >> 3) << yshift)]; + uint8_t pixel = tiles[(tile << 6) + ((ty & 7) << 3) + (tx & 7)]; + + curX += pa; + curY += pc; + + if (pixel == 0) + continue; + + uint16_t color = pal[pixel] | COLOR_OPAQUE; + + if (windowsEnabled && winMask && !(winMask[x] & (1 << bgNum))) + continue; + + bool winAllowsBlend = true; + if (windowsEnabled && winMask) + winAllowsBlend = (winMask[x] & WINMASK_CLR) >> 5; + + if (bgIsTargetA && winAllowsBlend) { + uint16_t src = color; + switch (blendMode) { + case 1: + if (IsBlendTargetB(layerIds[x], bldcnt)) + color = alphaBlendColor(src, output[x], eva, evb); + break; + case 2: color = alphaBrightnessIncrease(src, evy); break; + case 3: color = alphaBrightnessDecrease(src, evy); break; + } + } + + output[x] = color; + layerIds[x] = bgNum; + } + + if ((control & BGCNT_MOSAIC) && MOSAIC_BG_X > 0) { + for (int x = 0; x < DISPLAY_WIDTH; x++) + output[x] = output[ApplyMosaicBGX(x)]; + } +} + +#define MAX_SPRITES_PER_PRIORITY 32 + +typedef struct { + uint8_t oamIndex; +} ActiveSprite; + +static ActiveSprite sActiveSprites[4][MAX_SPRITES_PER_PRIORITY]; +static int sActiveSpriteCount[4]; + +static void PrefilterSprites(uint16_t vcount) +{ + sActiveSpriteCount[0] = 0; + sActiveSpriteCount[1] = 0; + sActiveSpriteCount[2] = 0; + sActiveSpriteCount[3] = 0; + + if (!(REG_DISPCNT & DISPCNT_OBJ_ON)) + return; + + // back-to-front so lower oam indices (higher hw priority) draw last + for (int i = OAM_ENTRY_COUNT - 1; i >= 0; i--) { + OamData *oam = &((OamData *)OAM)[i]; + + bool isAffine = oam->split.affineMode & 1; + bool isDisabled = (oam->split.affineMode >> 1) & 1; + + if (!isAffine && isDisabled) + continue; + + s32 idx = (oam->split.shape << 2) | oam->split.size; + unsigned int width = gOamShapesSizes[idx][0]; + unsigned int height = gOamShapesSizes[idx][1]; + int halfW = width / 2; + int halfH = height / 2; + + int32_t sx = oam->split.x; + int32_t sy = oam->split.y; +#if !EXTENDED_OAM + if (sx >= DISPLAY_WIDTH) sx -= 512; + if (sy >= DISPLAY_HEIGHT) sy -= 256; +#endif + + // double-size affine sprites have 2x bounding box + if (isAffine && isDisabled) { + halfW *= 2; + halfH *= 2; + } + + if ((int)vcount < sy || (int)vcount >= sy + halfH * 2) + continue; + if (sx + halfW * 2 < 0 || sx >= DISPLAY_WIDTH) + continue; + + int pri = oam->split.priority; + if (sActiveSpriteCount[pri] < MAX_SPRITES_PER_PRIORITY) { + sActiveSprites[pri][sActiveSpriteCount[pri]].oamIndex = i; + sActiveSpriteCount[pri]++; + } + } +} + +static void DrawSpritesAtPriority(int priority, uint16_t vcount, + uint16_t *output, uint8_t *layerIds, + bool windowsEnabled, uint16_t *winMask, + unsigned int blendMode, bool objWinOnly, + unsigned int bldcnt, + unsigned int eva, unsigned int evb, + unsigned int evy) +{ + uint8_t *tiledata = (uint8_t *)OBJ_VRAM0; + uint16_t *sprpal = (uint16_t *)PLTT + (0x200 / 2); + int16_t matrix[2][2]; + + // only 1-D tile mapping supported + if (!(REG_DISPCNT & (1 << 6))) + return; + + for (int s = 0; s < sActiveSpriteCount[priority]; s++) { + int i = sActiveSprites[priority][s].oamIndex; + OamData *oam = &((OamData *)OAM)[i]; + + bool isAffine = oam->split.affineMode & 1; + bool doubleSize = (oam->split.affineMode >> 1) & 1; + + s32 idx = (oam->split.shape << 2) | oam->split.size; + unsigned int width = gOamShapesSizes[idx][0]; + unsigned int height = gOamShapesSizes[idx][1]; + int halfW = width / 2; + int halfH = height / 2; + + int32_t x = oam->split.x; + int32_t y = oam->split.y; +#if !EXTENDED_OAM + if (x >= DISPLAY_WIDTH) x -= 512; + if (y >= DISPLAY_HEIGHT) y -= 256; +#endif + if (isAffine && doubleSize) { + halfW *= 2; + halfH *= 2; + } + + bool isSemiTransparent = (oam->split.objMode == 1); + bool isObjWin = (oam->split.objMode == 2); + + if (objWinOnly && !isObjWin) continue; + if (!objWinOnly && isObjWin) continue; + + int rectWidth = width; + int rectHeight = height; + + if (isAffine) { + u8 matrixNum = oam->split.matrixNum * 4; + OamData *m0 = &((OamData *)OAM)[matrixNum]; + OamData *m1 = &((OamData *)OAM)[matrixNum + 1]; + OamData *m2 = &((OamData *)OAM)[matrixNum + 2]; + OamData *m3 = &((OamData *)OAM)[matrixNum + 3]; + matrix[0][0] = m0->all.affineParam; + matrix[0][1] = m1->all.affineParam; + matrix[1][0] = m2->all.affineParam; + matrix[1][1] = m3->all.affineParam; + if (doubleSize) { + rectWidth *= 2; + rectHeight *= 2; + } + } else { + matrix[0][0] = 0x100; // identity in 8.8 fixed point + matrix[0][1] = 0; + matrix[1][0] = 0; + matrix[1][1] = 0x100; + } + + x += halfW; + y += halfH; + + int localY = (oam->split.mosaic == 1) + ? ApplyMosaicSprY(vcount) - y + : vcount - y; + bool flipX = !isAffine && ((oam->split.matrixNum >> 3) & 1); + bool flipY = !isAffine && ((oam->split.matrixNum >> 4) & 1); + bool is8bpp = oam->split.bpp & 1; + + int startLX = -halfW; + int endLX = halfW; + if (startLX + x < 0) startLX = -x; + if (endLX + x >= DISPLAY_WIDTH) endLX = DISPLAY_WIDTH - 1 - x; + + // fast path: non-affine 4bpp, no mosaic -- batched tile row reads + if (!isAffine && !is8bpp && !oam->split.mosaic) { + int texY = localY + halfH; + if (flipY) texY = height - texY - 1; + if (texY < 0 || texY >= (int)height) + continue; + + int tileRowY = texY & 7; + int blockY = texY >> 3; + int tilesPerRow = (REG_DISPCNT & 0x40) ? ((int)width >> 3) : 16; + int tileBase = blockY * tilesPerRow + oam->split.tileNum; + int rowByteOff = tileRowY << 2; + uint16_t *pixpal = sprpal + (oam->split.paletteNum << 4); + + int lx = startLX; + while (lx <= endLX) { + int rawX = lx + halfW; + int texX = flipX ? ((int)width - 1 - rawX) : rawX; + + if (texX < 0 || texX >= (int)width) { + lx++; + continue; + } + + int blockX = texX >> 3; + int tileXStart = texX & 7; + + uint32_t rowData = *(uint32_t *)(tiledata + + ((tileBase + blockX) << 5) + rowByteOff); + + int pixelsInTile = !flipX ? (8 - tileXStart) : (tileXStart + 1); + int remain = endLX - lx + 1; + if (pixelsInTile > remain) pixelsInTile = remain; + + if (!flipX) { + int texRemain = (int)width - texX; + if (pixelsInTile > texRemain) pixelsInTile = texRemain; + } else { + int texRemain = texX + 1; + if (pixelsInTile > texRemain) pixelsInTile = texRemain; + } + + for (int p = 0; p < pixelsInTile; p++, lx++) { + int curTX = flipX ? (tileXStart - p) : (tileXStart + p); + uint8_t pixel = (rowData >> (curTX << 2)) & 0xF; + if (pixel == 0) + continue; + + int gx = lx + x; + uint16_t color = pixpal[pixel]; + + // obj window sprites modify the window mask, not the framebuffer + if (isObjWin) { + if (windowsEnabled && winMask && (winMask[gx] & WINMASK_WINOUT)) + winMask[gx] = (REG_WINOUT >> 8) & 0x3F; + continue; + } + + if (layerIds && blendMode != 0) + color = BlendSpritePixel(color, gx, output, layerIds, + isSemiTransparent, blendMode, + bldcnt, windowsEnabled, winMask, + eva, evb, evy); + + if (windowsEnabled && winMask && !(winMask[gx] & WINMASK_OBJ)) + continue; + + output[gx] = color | COLOR_OPAQUE; + if (layerIds) + layerIds[gx] = LAYER_OBJ; + } + } + continue; + } + + // generic path: affine, 8bpp, or mosaic -- per pixel + for (int localX = startLX; localX <= endLX; localX++) { + int gx = localX + x; + int texX, texY; + + if (!isAffine) { + int lmx = localX; + if (oam->split.mosaic == 1) + lmx = ApplyMosaicSprX(gx) - x; + texX = lmx + halfW; + texY = localY + halfH; + if (flipX) texX = width - texX - 1; + if (flipY) texY = height - texY - 1; + } else { + int lmx = localX; + int lmy = localY; + if (oam->split.mosaic == 1) { + lmx = ApplyMosaicSprX(gx) - x; + lmy = ApplyMosaicSprY(vcount) - y; + } + // apply 2x2 affine matrix (8.8 fixed point) + texX = ((matrix[0][0] * lmx + matrix[0][1] * lmy) >> 8) + (width / 2); + texY = ((matrix[1][0] * lmx + matrix[1][1] * lmy) >> 8) + (height / 2); + } + + if (texX < 0 || texY < 0 || texX >= (int)width || texY >= (int)height) + continue; + + int tileX = texX & 7; + int tileY = texY & 7; + int blockX = texX >> 3; + int blockY = texY >> 3; + int blockOffset = blockY * (REG_DISPCNT & 0x40 ? ((int)width >> 3) : 16) + blockX; + + uint16_t pixel = 0; + uint16_t *pixpal; + + if (!is8bpp) { + int tdi = ((blockOffset + oam->split.tileNum) << 5) + (tileY << 2) + (tileX >> 1); + pixel = tiledata[tdi]; + if (tileX & 1) pixel >>= 4; + else pixel &= 0xF; + pixpal = sprpal + (oam->split.paletteNum << 4); + } else { + pixel = tiledata[((blockOffset * 2 + oam->split.tileNum) << 5) + (tileY << 3) + tileX]; + pixpal = sprpal; + } + + if (pixel == 0) + continue; + + uint16_t color = pixpal[pixel]; + + if (isObjWin) { + if (windowsEnabled && winMask && (winMask[gx] & WINMASK_WINOUT)) + winMask[gx] = (REG_WINOUT >> 8) & 0x3F; + continue; + } + + if (layerIds && blendMode != 0) + color = BlendSpritePixel(color, gx, output, layerIds, + isSemiTransparent, blendMode, + bldcnt, windowsEnabled, winMask, + eva, evb, evy); + + if (windowsEnabled && winMask && !(winMask[gx] & WINMASK_OBJ)) + continue; + + output[gx] = color | COLOR_OPAQUE; + if (layerIds) + layerIds[gx] = LAYER_OBJ; + } + } +} + +static void DrawScanline(uint16_t *pixels, uint16_t vcount) +{ + unsigned int mode = REG_DISPCNT & 3; + unsigned int numBGs = (mode == 0) ? 4 : 3; + unsigned int blendMode = (REG_BLDCNT >> 6) & 3; + unsigned int enabledBgs = (REG_DISPCNT >> 8) & 0xF; + + // sort bgs by priority + uint16_t bgcnts[4]; + char bgPriority[4]; + char bgsByPri[4][4]; + char bgsByPriCount[4] = {0, 0, 0, 0}; + + for (int bg = 0; bg < (int)numBGs; bg++) { + uint16_t cnt = *(uint16_t *)(REG_ADDR_BG0CNT + bg * 2); + bgcnts[bg] = cnt; + uint16_t pri = cnt & 3; + bgPriority[bg] = pri; + bgsByPri[pri][bgsByPriCount[pri]] = bg; + bgsByPriCount[pri]++; + } + + // window setup + bool windowsEnabled = false; + u16 win0Bot, win0Top, win0Right, win0Left; + u16 win1Bot, win1Top, win1Right, win1Left; + bool win0Active = false, win1Active = false; + static uint16_t winMask[DISPLAY_WIDTH]; + + if (REG_DISPCNT & DISPCNT_WIN0_ON) { + win0Bot = WIN_GET_HIGHER(REG_WIN0V); + win0Top = WIN_GET_LOWER(REG_WIN0V); + win0Right = WIN_GET_HIGHER(REG_WIN0H); + win0Left = WIN_GET_LOWER(REG_WIN0H); + if (win0Top > win0Bot) + win0Active = (vcount >= win0Top || vcount < win0Bot); + else + win0Active = (vcount >= win0Top && vcount < win0Bot); + windowsEnabled = true; + } + if (REG_DISPCNT & DISPCNT_WIN1_ON) { + win1Bot = WIN_GET_HIGHER(REG_WIN1V); + win1Top = WIN_GET_LOWER(REG_WIN1V); + win1Right = WIN_GET_HIGHER(REG_WIN1H); + win1Left = WIN_GET_LOWER(REG_WIN1H); + if (win1Top > win1Bot) + win1Active = (vcount >= win1Top || vcount < win1Bot); + else + win1Active = (vcount >= win1Top && vcount < win1Bot); + windowsEnabled = true; + } + if ((REG_DISPCNT & DISPCNT_OBJWIN_ON) && (REG_DISPCNT & DISPCNT_OBJ_ON)) + windowsEnabled = true; + + // build per-pixel window mask + if (windowsEnabled) { + for (unsigned int xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { + if (win0Active && WindowContainsX(win0Left, win0Right, xpos)) + winMask[xpos] = REG_WININ & 0x3F; + else if (win1Active && WindowContainsX(win1Left, win1Right, xpos)) + winMask[xpos] = (REG_WININ >> 8) & 0x3F; + else + winMask[xpos] = (REG_WINOUT & 0x3F) | WINMASK_WINOUT; + } + } + + PrefilterSprites(vcount); + + // layerIds tracks who wrote each pixel so alpha blend can find target-b + static uint8_t layerIds[DISPLAY_WIDTH]; + bool needLayerIds = (blendMode != 0 || windowsEnabled); + uint8_t *lids = needLayerIds ? layerIds : NULL; + uint16_t *wmask = windowsEnabled ? winMask : NULL; + + if (needLayerIds) + memset(layerIds, LAYER_BACKDROP, DISPLAY_WIDTH); + + // grab blend regs once per scanline + unsigned int bldcnt = REG_BLDCNT; + unsigned int bld_eva = REG_BLDALPHA & 0x1F; + unsigned int bld_evb = (REG_BLDALPHA >> 8) & 0x1F; + unsigned int bld_evy = REG_BLDY & 0x1F; + + // obj window pass -- these sprites modify the window mask, not the framebuffer + if (windowsEnabled && (REG_DISPCNT & DISPCNT_OBJWIN_ON) && (REG_DISPCNT & DISPCNT_OBJ_ON)) { + for (int pri = 0; pri < 4; pri++) + DrawSpritesAtPriority(pri, vcount, pixels, lids, + windowsEnabled, wmask, blendMode, + /*objWinOnly=*/true, + bldcnt, bld_eva, bld_evb, bld_evy); + } + + // back-to-front: priority 3 first, 0 last (0 is topmost) + for (int pri = 3; pri >= 0; pri--) { + for (int sub = bgsByPriCount[pri] - 1; sub >= 0; sub--) { + int bg = bgsByPri[pri][sub]; + if (!IsBGEnabled(bg)) + continue; + + if (!needLayerIds) { + switch (mode) { + case 0: + RenderTextBG(bg, bgcnts[bg], + *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), + *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), + vcount, pixels); + break; + case 1: + if (bg == 2) + RenderAffineBG(bg, bgcnts[bg], vcount, pixels); + else + RenderTextBG(bg, bgcnts[bg], + *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), + *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), + vcount, pixels); + break; + } + } else { + switch (mode) { + case 0: + RenderTextBGBlend(bg, bgcnts[bg], + *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), + *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), + vcount, pixels, lids, blendMode, + windowsEnabled, wmask, + bldcnt, bld_eva, bld_evb, bld_evy); + break; + case 1: + if (bg == 2) + RenderAffineBGBlend(bg, bgcnts[bg], + vcount, pixels, lids, blendMode, + windowsEnabled, wmask, + bldcnt, bld_eva, bld_evb, bld_evy); + else + RenderTextBGBlend(bg, bgcnts[bg], + *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), + *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), + vcount, pixels, lids, blendMode, + windowsEnabled, wmask, + bldcnt, bld_eva, bld_evb, bld_evy); + break; + } + } + } + + if (REG_DISPCNT & DISPCNT_OBJ_ON) + DrawSpritesAtPriority(pri, vcount, pixels, lids, + windowsEnabled, wmask, blendMode, + /*objWinOnly=*/false, + bldcnt, bld_eva, bld_evb, bld_evy); + } +} + +void DrawFrame_Fast(uint16_t *pixels) +{ + for (int i = 0; i < DISPLAY_HEIGHT; i++) { + uint16_t *scanline = &pixels[i * DISPLAY_WIDTH]; + + REG_VCOUNT = i; + if (((REG_DISPSTAT >> 8) & 0xFF) == REG_VCOUNT) { + REG_DISPSTAT |= INTR_FLAG_VCOUNT; + if (REG_DISPSTAT & DISPSTAT_VCOUNT_INTR) + gIntrTable[INTR_INDEX_VCOUNT](); + } + + Memset16(scanline, *(uint16_t *)PLTT, DISPLAY_WIDTH); + DrawScanline(scanline, i); + + REG_DISPSTAT |= INTR_FLAG_HBLANK; + RunDMAs(DMA_HBLANK); + if (REG_DISPSTAT & DISPSTAT_HBLANK_INTR) + gIntrTable[INTR_INDEX_HBLANK](); + + REG_DISPSTAT &= ~INTR_FLAG_HBLANK; + REG_DISPSTAT &= ~INTR_FLAG_VCOUNT; + } +} + +#endif From 347875845cd077530464de5c9c985a43b57f193e Mon Sep 17 00:00:00 2001 From: kikugrave Date: Sun, 15 Feb 2026 10:54:56 -0800 Subject: [PATCH 02/13] Fix lint --- .../shared/rendering/sw_renderer_common.h | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/include/platform/shared/rendering/sw_renderer_common.h b/include/platform/shared/rendering/sw_renderer_common.h index 1e1a626d9..ddb85d7ff 100644 --- a/include/platform/shared/rendering/sw_renderer_common.h +++ b/include/platform/shared/rendering/sw_renderer_common.h @@ -8,21 +8,23 @@ // bgr555 channel extraction #define getAlphaBit(x) (((x) >> 15) & 1) -#define getRedChannel(x) (((x) >> 0) & 0x1F) -#define getGreenChannel(x) (((x) >> 5) & 0x1F) +#define getRedChannel(x) (((x) >> 0) & 0x1F) +#define getGreenChannel(x) (((x) >> 5) & 0x1F) #define getBlueChannel(x) (((x) >> 10) & 0x1F) -#define COLOR_OPAQUE 0x8000 +#define COLOR_OPAQUE 0x8000 -static inline uint16_t alphaBlendColor(uint16_t targetA, uint16_t targetB, - unsigned int eva, unsigned int evb) +static inline uint16_t alphaBlendColor(uint16_t targetA, uint16_t targetB, unsigned int eva, unsigned int evb) { unsigned int r = ((getRedChannel(targetA) * eva) + (getRedChannel(targetB) * evb)) >> 4; unsigned int g = ((getGreenChannel(targetA) * eva) + (getGreenChannel(targetB) * evb)) >> 4; unsigned int b = ((getBlueChannel(targetA) * eva) + (getBlueChannel(targetB) * evb)) >> 4; - if (r > 31) r = 31; - if (g > 31) g = 31; - if (b > 31) b = 31; + if (r > 31) + r = 31; + if (g > 31) + g = 31; + if (b > 31) + b = 31; return r | (g << 5) | (b << 10) | COLOR_OPAQUE; } @@ -33,9 +35,12 @@ static inline uint16_t alphaBrightnessIncrease(uint16_t targetA, unsigned int ev unsigned int g = getGreenChannel(targetA) + (31 - getGreenChannel(targetA)) * evy / 16; unsigned int b = getBlueChannel(targetA) + (31 - getBlueChannel(targetA)) * evy / 16; - if (r > 31) r = 31; - if (g > 31) g = 31; - if (b > 31) b = 31; + if (r > 31) + r = 31; + if (g > 31) + g = 31; + if (b > 31) + b = 31; return r | (g << 5) | (b << 10) | COLOR_OPAQUE; } @@ -46,9 +51,12 @@ static inline uint16_t alphaBrightnessDecrease(uint16_t targetA, unsigned int ev unsigned int g = getGreenChannel(targetA) - getGreenChannel(targetA) * evy / 16; unsigned int b = getBlueChannel(targetA) - getBlueChannel(targetA) * evy / 16; - if (r > 31) r = 31; - if (g > 31) g = 31; - if (b > 31) b = 31; + if (r > 31) + r = 31; + if (g > 31) + g = 31; + if (b > 31) + b = 31; return r | (g << 5) | (b << 10) | COLOR_OPAQUE; } From f881e7c588345506471599312bb374e6a3ec7603 Mon Sep 17 00:00:00 2001 From: kikugrave Date: Sun, 15 Feb 2026 10:57:27 -0800 Subject: [PATCH 03/13] Fix lint 2 --- src/lib/m4a/m4a.c | 2 +- src/platform/pret_sdl/sdl2.c | 181 +++-- src/platform/psp/psp_module.c | 3 +- src/platform/shared/audio/m4a_sound_mixer.c | 5 +- .../shared/rendering/sw_renderer_fast.c | 648 +++++++++--------- 5 files changed, 433 insertions(+), 406 deletions(-) diff --git a/src/lib/m4a/m4a.c b/src/lib/m4a/m4a.c index a7119e5a3..108dae897 100644 --- a/src/lib/m4a/m4a.c +++ b/src/lib/m4a/m4a.c @@ -1369,7 +1369,7 @@ void MP2K_event_memacc(struct MP2KPlayerState *mplayInfo, struct MP2KTrack *trac return; } -cond_true : { +cond_true: { { void (*func)(struct MP2KPlayerState *, struct MP2KTrack *) = *(&gMPlayJumpTable[1]); func(mplayInfo, track); diff --git a/src/platform/pret_sdl/sdl2.c b/src/platform/pret_sdl/sdl2.c index f93af8ed5..044d2ab4c 100644 --- a/src/platform/pret_sdl/sdl2.c +++ b/src/platform/pret_sdl/sdl2.c @@ -114,7 +114,7 @@ double accumulator = 0.0; static FILE *sSaveFile = NULL; extern void AgbMain(void); -void DoSoftReset(void) {}; +void DoSoftReset(void) { }; void ProcessSDLEvents(void); void VDraw(SDL_Texture *texture); @@ -245,7 +245,7 @@ int main(int argc, char **argv) SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "0"); #ifdef __PSP__ // SDL_RenderSetLogicalSize is broken on PSP, stretch to fill manually - pspDestRect = (SDL_Rect){ 0, 0, PSP_SCREEN_W, PSP_SCREEN_H }; + pspDestRect = (SDL_Rect) { 0, 0, PSP_SCREEN_W, PSP_SCREEN_H }; #else SDL_RenderSetLogicalSize(sdlRenderer, DISPLAY_WIDTH, DISPLAY_HEIGHT); #endif @@ -1375,7 +1375,7 @@ const u8 spriteSizes[][2] = { { 32, 64 }, }; -#define isbgEnabled(x) ((REG_DISPCNT >> 8) & 0xF) & (1 << x) +#define isbgEnabled(x) ((REG_DISPCNT >> 8) & 0xF) & (1 << x) // outputs the blended pixel in colorOutput, the prxxx are the bg priority and // subpriority, pixelpos is pixel offset in scanline @@ -1538,102 +1538,102 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool bool is8BPP = oam->split.bpp & 1; { - uint8_t *tiledata = (uint8_t *)objtiles; - uint16_t *sprpal = (uint16_t *)(PLTT + (0x200 / 2)); - for (int local_x = -half_width; local_x <= half_width; local_x++) { - int local_mosaicX; - int tex_x; - int tex_y; - - unsigned int global_x = local_x + x; - - if (global_x < 0 || global_x >= DISPLAY_WIDTH) - continue; - - if (oam->split.mosaic == 1) { - // mosaic effect has to be applied to global coordinates otherwise - // the mosaic will scroll - local_mosaicX = applySpriteHorizontalMosaicEffect(global_x) - x; - tex_x = ((matrix[0][0] * local_mosaicX + matrix[0][1] * local_y) >> 8) + (width / 2); - tex_y = ((matrix[1][0] * local_mosaicX + matrix[1][1] * local_y) >> 8) + (height / 2); - } else { - tex_x = ((matrix[0][0] * local_x + matrix[0][1] * local_y) >> 8) + (width / 2); - tex_y = ((matrix[1][0] * local_x + matrix[1][1] * local_y) >> 8) + (height / 2); - } + uint8_t *tiledata = (uint8_t *)objtiles; + uint16_t *sprpal = (uint16_t *)(PLTT + (0x200 / 2)); + for (int local_x = -half_width; local_x <= half_width; local_x++) { + int local_mosaicX; + int tex_x; + int tex_y; - /* Check if transformed coordinates are inside bounds. */ - - if (tex_x >= width || tex_y >= height || tex_x < 0 || tex_y < 0) - continue; - - if (flipX) - tex_x = width - tex_x - 1; - if (flipY) - tex_y = height - tex_y - 1; - - int tile_x = tex_x & 7; - int tile_y = tex_y & 7; - int block_x = tex_x >> 3; - int block_y = tex_y >> 3; - int block_offset = ((block_y * (REG_DISPCNT & 0x40 ? (width >> 3) : 16)) + block_x); - uint16_t pixel = 0; - - uint16_t *pixpal; - if (!is8BPP) { - int tileDataIndex = ((block_offset + oam->split.tileNum) << 5) + (tile_y << 2) + (tile_x >> 1); - pixel = tiledata[tileDataIndex]; - if (tile_x & 1) - pixel >>= 4; - else - pixel &= 0xF; - pixpal = sprpal + (oam->split.paletteNum << 4); -#if ENABLE_VRAM_VIEW - vramPalIdBuffer[0x800 + (tileDataIndex >> 5)] = 16 + oam->split.paletteNum; -#endif - } else { - pixel = tiledata[((block_offset * 2 + oam->split.tileNum) << 5) + (tile_y << 3) + tile_x]; - pixpal = sprpal; - } + unsigned int global_x = local_x + x; - if (pixel != 0) { - uint16_t color = pixpal[pixel]; + if (global_x < 0 || global_x >= DISPLAY_WIDTH) + continue; - // if sprite mode is 2 then write to the window mask instead - if (isObjWin) { - if (scanline->winMask[global_x] & WINMASK_WINOUT) - scanline->winMask[global_x] = (REG_WINOUT >> 8) & 0x3F; + if (oam->split.mosaic == 1) { + // mosaic effect has to be applied to global coordinates otherwise + // the mosaic will scroll + local_mosaicX = applySpriteHorizontalMosaicEffect(global_x) - x; + tex_x = ((matrix[0][0] * local_mosaicX + matrix[0][1] * local_y) >> 8) + (width / 2); + tex_y = ((matrix[1][0] * local_mosaicX + matrix[1][1] * local_y) >> 8) + (height / 2); + } else { + tex_x = ((matrix[0][0] * local_x + matrix[0][1] * local_y) >> 8) + (width / 2); + tex_y = ((matrix[1][0] * local_x + matrix[1][1] * local_y) >> 8) + (height / 2); + } + + /* Check if transformed coordinates are inside bounds. */ + + if (tex_x >= width || tex_y >= height || tex_x < 0 || tex_y < 0) continue; + + if (flipX) + tex_x = width - tex_x - 1; + if (flipY) + tex_y = height - tex_y - 1; + + int tile_x = tex_x & 7; + int tile_y = tex_y & 7; + int block_x = tex_x >> 3; + int block_y = tex_y >> 3; + int block_offset = ((block_y * (REG_DISPCNT & 0x40 ? (width >> 3) : 16)) + block_x); + uint16_t pixel = 0; + + uint16_t *pixpal; + if (!is8BPP) { + int tileDataIndex = ((block_offset + oam->split.tileNum) << 5) + (tile_y << 2) + (tile_x >> 1); + pixel = tiledata[tileDataIndex]; + if (tile_x & 1) + pixel >>= 4; + else + pixel &= 0xF; + pixpal = sprpal + (oam->split.paletteNum << 4); +#if ENABLE_VRAM_VIEW + vramPalIdBuffer[0x800 + (tileDataIndex >> 5)] = 16 + oam->split.paletteNum; +#endif + } else { + pixel = tiledata[((block_offset * 2 + oam->split.tileNum) << 5) + (tile_y << 3) + tile_x]; + pixpal = sprpal; } - // this code runs if pixel is to be drawn - if (global_x < DISPLAY_WIDTH && global_x >= 0) { - // check if its enabled in the window (if window is enabled) - winShouldBlendPixel = (windowsEnabled == false || scanline->winMask[global_x] & WINMASK_CLR); - - // has to be separated from the blend mode switch statement - // because of OBJ semi transparancy feature - if ((blendMode == 1 && REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) || isSemiTransparent) { - uint16_t targetA = color; - uint16_t targetB = 0; - if (alphaBlendSelectTargetB(scanline, &targetB, oam->split.priority, 0, global_x, false)) { - color = alphaBlendColor(targetA, targetB, REG_BLDALPHA & 0x1F, (REG_BLDALPHA >> 8) & 0x1F); - } - } else if (REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) { - switch (blendMode) { - case 2: - color = alphaBrightnessIncrease(color, REG_BLDY & 0x1F); - break; - case 3: - color = alphaBrightnessDecrease(color, REG_BLDY & 0x1F); - break; - } + + if (pixel != 0) { + uint16_t color = pixpal[pixel]; + + // if sprite mode is 2 then write to the window mask instead + if (isObjWin) { + if (scanline->winMask[global_x] & WINMASK_WINOUT) + scanline->winMask[global_x] = (REG_WINOUT >> 8) & 0x3F; + continue; } + // this code runs if pixel is to be drawn + if (global_x < DISPLAY_WIDTH && global_x >= 0) { + // check if its enabled in the window (if window is enabled) + winShouldBlendPixel = (windowsEnabled == false || scanline->winMask[global_x] & WINMASK_CLR); + + // has to be separated from the blend mode switch statement + // because of OBJ semi transparancy feature + if ((blendMode == 1 && REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) || isSemiTransparent) { + uint16_t targetA = color; + uint16_t targetB = 0; + if (alphaBlendSelectTargetB(scanline, &targetB, oam->split.priority, 0, global_x, false)) { + color = alphaBlendColor(targetA, targetB, REG_BLDALPHA & 0x1F, (REG_BLDALPHA >> 8) & 0x1F); + } + } else if (REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) { + switch (blendMode) { + case 2: + color = alphaBrightnessIncrease(color, REG_BLDY & 0x1F); + break; + case 3: + color = alphaBrightnessDecrease(color, REG_BLDY & 0x1F); + break; + } + } - // write pixel to pixel framebuffer - pixels[global_x] = color | (1 << 15); + // write pixel to pixel framebuffer + pixels[global_x] = color | (1 << 15); + } } } } - } } } } @@ -1825,8 +1825,7 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) case 1: { char isSpriteBlendingEnabled = REG_BLDCNT & BLDCNT_TGT2_OBJ ? 1 : 0; // find targetB and blend it - if (alphaBlendSelectTargetB(&scanline, &targetB, prnum, prsub + 1, xpos, - isSpriteBlendingEnabled)) { + if (alphaBlendSelectTargetB(&scanline, &targetB, prnum, prsub + 1, xpos, isSpriteBlendingEnabled)) { color = alphaBlendColor(targetA, targetB, REG_BLDALPHA & 0x1F, (REG_BLDALPHA >> 8) & 0x1F); } } break; diff --git a/src/platform/psp/psp_module.c b/src/platform/psp/psp_module.c index a070da463..18040b1e1 100644 --- a/src/platform/psp/psp_module.c +++ b/src/platform/psp/psp_module.c @@ -33,8 +33,7 @@ int callbackThread(SceSize args, void *argp) int setupPspCallbacks(void) { int thid = sceKernelCreateThread("update_thread", callbackThread, 0x11, 0xFA0, 0, 0); - if (thid >= 0) - { + if (thid >= 0) { sceKernelStartThread(thid, 0, 0); } return thid; diff --git a/src/platform/shared/audio/m4a_sound_mixer.c b/src/platform/shared/audio/m4a_sound_mixer.c index b9236e984..f7a8927b3 100644 --- a/src/platform/shared/audio/m4a_sound_mixer.c +++ b/src/platform/shared/audio/m4a_sound_mixer.c @@ -370,10 +370,7 @@ void MP2K_event_fine(struct MP2KPlayerState *unused, struct MP2KTrack *track) // mPtr aligns to 4 bytes on MIPS; match that here before reading pointer data #ifdef __mips__ -static inline u8 *alignCmdPtr4(u8 *p) -{ - return (u8 *)(((uintptr_t)p + 3) & ~(uintptr_t)3); -} +static inline u8 *alignCmdPtr4(u8 *p) { return (u8 *)(((uintptr_t)p + 3) & ~(uintptr_t)3); } #else #define alignCmdPtr4(p) (p) #endif diff --git a/src/platform/shared/rendering/sw_renderer_fast.c b/src/platform/shared/rendering/sw_renderer_fast.c index 72327a2de..484ddf042 100644 --- a/src/platform/shared/rendering/sw_renderer_fast.c +++ b/src/platform/shared/rendering/sw_renderer_fast.c @@ -30,33 +30,33 @@ #include "platform/shared/rendering/sw_renderer_common.h" extern IntrFunc gIntrTable[16]; -extern uint8_t REG_BASE[IO_SIZE]; +extern uint8_t REG_BASE[IO_SIZE]; extern uint16_t PLTT[PLTT_SIZE / sizeof(uint16_t)]; -extern uint8_t VRAM[VRAM_SIZE]; -extern uint8_t OAM[OAM_SIZE]; +extern uint8_t VRAM[VRAM_SIZE]; +extern uint8_t OAM[OAM_SIZE]; extern const u8 gOamShapesSizes[12][2]; #ifndef TILE_WIDTH #define TILE_WIDTH 8 #endif -#define IsBGEnabled(n) (((REG_DISPCNT >> 8) & 0xF) & (1 << (n))) +#define IsBGEnabled(n) (((REG_DISPCNT >> 8) & 0xF) & (1 << (n))) // mosaic -#define MOSAIC_BG_X (REG_MOSAIC & 0xF) -#define MOSAIC_BG_Y ((REG_MOSAIC >> 4) & 0xF) -#define MOSAIC_SPR_X ((REG_MOSAIC >> 8) & 0xF) -#define MOSAIC_SPR_Y ((REG_MOSAIC >> 12) & 0xF) -#define ApplyMosaicBGX(x) ((x) - ((x) % (MOSAIC_BG_X + 1))) -#define ApplyMosaicBGY(y) ((y) - ((y) % (MOSAIC_BG_Y + 1))) -#define ApplyMosaicSprX(x) ((x) - ((x) % (MOSAIC_SPR_X + 1))) -#define ApplyMosaicSprY(y) ((y) - ((y) % (MOSAIC_SPR_Y + 1))) +#define MOSAIC_BG_X (REG_MOSAIC & 0xF) +#define MOSAIC_BG_Y ((REG_MOSAIC >> 4) & 0xF) +#define MOSAIC_SPR_X ((REG_MOSAIC >> 8) & 0xF) +#define MOSAIC_SPR_Y ((REG_MOSAIC >> 12) & 0xF) +#define ApplyMosaicBGX(x) ((x) - ((x) % (MOSAIC_BG_X + 1))) +#define ApplyMosaicBGY(y) ((y) - ((y) % (MOSAIC_BG_Y + 1))) +#define ApplyMosaicSprX(x) ((x) - ((x) % (MOSAIC_SPR_X + 1))) +#define ApplyMosaicSprY(y) ((y) - ((y) % (MOSAIC_SPR_Y + 1))) // tilemap entry fields -#define TILE_NUM(e) ((e) & 0x3FF) -#define TILE_PALETTE(e) (((e) >> 12) & 0xF) -#define TILE_HFLIP(e) ((e) & (1 << 10)) -#define TILE_VFLIP(e) ((e) & (1 << 11)) +#define TILE_NUM(e) ((e) & 0x3FF) +#define TILE_PALETTE(e) (((e) >> 12) & 0xF) +#define TILE_HFLIP(e) ((e) & (1 << 10)) +#define TILE_VFLIP(e) ((e) & (1 << 11)) // window mask bits #define WINMASK_BG0 (1 << 0) @@ -68,18 +68,20 @@ extern const u8 gOamShapesSizes[12][2]; #define WINMASK_WINOUT (1 << 6) // layer ids for blend target tracking -#define LAYER_BG0 0 -#define LAYER_BG1 1 -#define LAYER_BG2 2 -#define LAYER_BG3 3 -#define LAYER_OBJ 4 -#define LAYER_BACKDROP 5 +#define LAYER_BG0 0 +#define LAYER_BG1 1 +#define LAYER_BG2 2 +#define LAYER_BG3 3 +#define LAYER_OBJ 4 +#define LAYER_BACKDROP 5 static const uint16_t bgMapSizes[][2] = { - { 32, 32 }, { 64, 32 }, { 32, 64 }, { 64, 64 }, + { 32, 32 }, + { 64, 32 }, + { 32, 64 }, + { 64, 64 }, }; - // 16-bit fill using 32-bit writes static inline void Memset16(uint16_t *dst, uint16_t fill, unsigned int count) { @@ -94,10 +96,10 @@ static inline void Memset16(uint16_t *dst, uint16_t fill, unsigned int count) static inline uint32_t GetBgRefX(int bg) { return (bg == 2) ? REG_BG2X : (bg == 3) ? REG_BG3X : 0; } static inline uint32_t GetBgRefY(int bg) { return (bg == 2) ? REG_BG2Y : (bg == 3) ? REG_BG3Y : 0; } -static inline uint16_t GetBgPA(int bg) { return (bg == 2) ? REG_BG2PA : (bg == 3) ? REG_BG3PA : 0; } -static inline uint16_t GetBgPB(int bg) { return (bg == 2) ? REG_BG2PB : (bg == 3) ? REG_BG3PB : 0; } -static inline uint16_t GetBgPC(int bg) { return (bg == 2) ? REG_BG2PC : (bg == 3) ? REG_BG3PC : 0; } -static inline uint16_t GetBgPD(int bg) { return (bg == 2) ? REG_BG2PD : (bg == 3) ? REG_BG3PD : 0; } +static inline uint16_t GetBgPA(int bg) { return (bg == 2) ? REG_BG2PA : (bg == 3) ? REG_BG3PA : 0; } +static inline uint16_t GetBgPB(int bg) { return (bg == 2) ? REG_BG2PB : (bg == 3) ? REG_BG3PB : 0; } +static inline uint16_t GetBgPC(int bg) { return (bg == 2) ? REG_BG2PC : (bg == 3) ? REG_BG3PC : 0; } +static inline uint16_t GetBgPD(int bg) { return (bg == 2) ? REG_BG2PD : (bg == 3) ? REG_BG3PD : 0; } // handles the wraparound case where left > right static inline bool WindowContainsX(u16 left, u16 right, u16 x) @@ -120,19 +122,13 @@ static inline bool IsBlendTargetB(uint8_t layerId, unsigned int bldcnt) } // sprites with oam mode 1 always try alpha blending regardless of bldcnt -static inline uint16_t BlendSpritePixel( - uint16_t color, unsigned int x, - uint16_t *output, uint8_t *layerIds, - bool isSemiTransparent, - unsigned int blendMode, unsigned int bldcnt, - bool windowsEnabled, uint16_t *winMask, - unsigned int eva, unsigned int evb, unsigned int evy) +static inline uint16_t BlendSpritePixel(uint16_t color, unsigned int x, uint16_t *output, uint8_t *layerIds, bool isSemiTransparent, + unsigned int blendMode, unsigned int bldcnt, bool windowsEnabled, uint16_t *winMask, + unsigned int eva, unsigned int evb, unsigned int evy) { - bool winAllowsBlend = !windowsEnabled - || (winMask && (winMask[x] & WINMASK_CLR)); + bool winAllowsBlend = !windowsEnabled || (winMask && (winMask[x] & WINMASK_CLR)); - bool doAlpha = (blendMode == 1 && (bldcnt & BLDCNT_TGT1_OBJ) && winAllowsBlend) - || isSemiTransparent; + bool doAlpha = (blendMode == 1 && (bldcnt & BLDCNT_TGT1_OBJ) && winAllowsBlend) || isSemiTransparent; if (doAlpha) { if (IsBlendTargetB(layerIds[x], bldcnt)) @@ -148,14 +144,9 @@ static inline uint16_t BlendSpritePixel( } // write a bg pixel with inline blend resolution -static inline void WriteBGPixelBlended( - unsigned int x, uint8_t pixel, - const uint16_t *palBase, int bgNum, - uint16_t *output, uint8_t *layerIds, - unsigned int blendMode, bool bgIsTargetA, - bool useWindows, unsigned int winBgBit, uint16_t *winMask, - unsigned int bldcnt, - unsigned int eva, unsigned int evb, unsigned int evy) +static inline void WriteBGPixelBlended(unsigned int x, uint8_t pixel, const uint16_t *palBase, int bgNum, uint16_t *output, + uint8_t *layerIds, unsigned int blendMode, bool bgIsTargetA, bool useWindows, unsigned int winBgBit, + uint16_t *winMask, unsigned int bldcnt, unsigned int eva, unsigned int evb, unsigned int evy) { uint16_t color = palBase[pixel] | COLOR_OPAQUE; @@ -169,8 +160,12 @@ static inline void WriteBGPixelBlended( if (IsBlendTargetB(layerIds[x], bldcnt)) color = alphaBlendColor(src, output[x], eva, evb); break; - case 2: color = alphaBrightnessIncrease(src, evy); break; - case 3: color = alphaBrightnessDecrease(src, evy); break; + case 2: + color = alphaBrightnessIncrease(src, evy); + break; + case 3: + color = alphaBrightnessDecrease(src, evy); + break; } } @@ -178,23 +173,21 @@ static inline void WriteBGPixelBlended( layerIds[x] = bgNum; } -static void RenderTextBG(int bgNum, uint16_t control, - uint16_t hoffs, uint16_t voffs, - int lineNum, uint16_t *output) +static void RenderTextBG(int bgNum, uint16_t control, uint16_t hoffs, uint16_t voffs, int lineNum, uint16_t *output) { - unsigned int charBase = (control >> 2) & 3; + unsigned int charBase = (control >> 2) & 3; unsigned int screenBase = (control & BGCNT_SCREENBASE_MASK) >> 8; - unsigned int is8bpp = (control >> 7) & 1; + unsigned int is8bpp = (control >> 7) & 1; - unsigned int mapW = bgMapSizes[control >> 14][0]; - unsigned int mapPxW = mapW << 3; - unsigned int mapPxH = bgMapSizes[control >> 14][1] << 3; - unsigned int wMask = mapPxW - 1; - unsigned int hMask = mapPxH - 1; + unsigned int mapW = bgMapSizes[control >> 14][0]; + unsigned int mapPxW = mapW << 3; + unsigned int mapPxH = bgMapSizes[control >> 14][1] << 3; + unsigned int wMask = mapPxW - 1; + unsigned int hMask = mapPxH - 1; - uint8_t *tiles = (uint8_t *)BG_CHAR_ADDR(charBase); - uint16_t *map = (uint16_t *)BG_SCREEN_ADDR(screenBase); - uint16_t *pal = (uint16_t *)PLTT; + uint8_t *tiles = (uint8_t *)BG_CHAR_ADDR(charBase); + uint16_t *map = (uint16_t *)BG_SCREEN_ADDR(screenBase); + uint16_t *pal = (uint16_t *)PLTT; bool hasMosaic = control & BGCNT_MOSAIC; if (hasMosaic) @@ -203,25 +196,25 @@ static void RenderTextBG(int bgNum, uint16_t control, hoffs &= 0x1FF; voffs &= 0x1FF; - unsigned int yy = (lineNum + voffs) & hMask; - unsigned int mapY = yy >> 3; - unsigned int tileY = yy & 7; + unsigned int yy = (lineNum + voffs) & hMask; + unsigned int mapY = yy >> 3; + unsigned int tileY = yy & 7; unsigned int rowBase = mapY * mapW; // slow path: 8bpp or mosaic, one pixel at a time if (hasMosaic || is8bpp) { for (unsigned int x = 0; x < DISPLAY_WIDTH; x++) { - unsigned int xx = hasMosaic - ? (ApplyMosaicBGX(x) + hoffs) & wMask - : (x + hoffs) & wMask; + unsigned int xx = hasMosaic ? (ApplyMosaicBGX(x) + hoffs) & wMask : (x + hoffs) & wMask; uint16_t entry = map[rowBase + (xx >> 3)]; unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); + unsigned int palNum = TILE_PALETTE(entry); unsigned int tx = xx & 7; unsigned int ty = tileY; - if (TILE_HFLIP(entry)) tx = 7 - tx; - if (TILE_VFLIP(entry)) ty = 7 - ty; + if (TILE_HFLIP(entry)) + tx = 7 - tx; + if (TILE_VFLIP(entry)) + ty = 7 - ty; if (!is8bpp) { uint8_t pair = tiles[(tileNum << 5) + (ty << 2) + (tx >> 1)]; @@ -242,25 +235,28 @@ static void RenderTextBG(int bgNum, uint16_t control, // left edge: partial tile if scroll isn't tile-aligned { - unsigned int startX = hoffs & wMask; + unsigned int startX = hoffs & wMask; unsigned int startOff = startX & 7; if (startOff != 0) { uint16_t entry = map[rowBase + (startX >> 3)]; unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) ty = 7 - ty; + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; bool hflip = TILE_HFLIP(entry); uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); unsigned int partial = 8 - startOff; - if (partial > DISPLAY_WIDTH) partial = DISPLAY_WIDTH; + if (partial > DISPLAY_WIDTH) + partial = DISPLAY_WIDTH; for (unsigned int t = 0; t < partial && x < DISPLAY_WIDTH; t++, x++) { unsigned int tx = startOff + t; - if (hflip) tx = 7 - tx; + if (hflip) + tx = 7 - tx; uint8_t pixel = (row >> (tx << 2)) & 0xF; if (pixel) output[x] = pal[(palNum << 4) + pixel] | COLOR_OPAQUE; @@ -271,35 +267,68 @@ static void RenderTextBG(int bgNum, uint16_t control, // middle: full tiles, 8 pixels at a time while (x + 8 <= DISPLAY_WIDTH) { unsigned int srcX = (x + hoffs) & wMask; - uint16_t entry = map[rowBase + (srcX >> 3)]; + uint16_t entry = map[rowBase + (srcX >> 3)]; unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) ty = 7 - ty; + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; - uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); uint16_t *palBase = pal + (palNum << 4); if (!TILE_HFLIP(entry)) { uint8_t p; - p = row & 0xF; if (p) output[x ] = palBase[p] | COLOR_OPAQUE; - p = (row >> 4) & 0xF; if (p) output[x+1] = palBase[p] | COLOR_OPAQUE; - p = (row >> 8) & 0xF; if (p) output[x+2] = palBase[p] | COLOR_OPAQUE; - p = (row >> 12) & 0xF; if (p) output[x+3] = palBase[p] | COLOR_OPAQUE; - p = (row >> 16) & 0xF; if (p) output[x+4] = palBase[p] | COLOR_OPAQUE; - p = (row >> 20) & 0xF; if (p) output[x+5] = palBase[p] | COLOR_OPAQUE; - p = (row >> 24) & 0xF; if (p) output[x+6] = palBase[p] | COLOR_OPAQUE; - p = (row >> 28) & 0xF; if (p) output[x+7] = palBase[p] | COLOR_OPAQUE; + p = row & 0xF; + if (p) + output[x] = palBase[p] | COLOR_OPAQUE; + p = (row >> 4) & 0xF; + if (p) + output[x + 1] = palBase[p] | COLOR_OPAQUE; + p = (row >> 8) & 0xF; + if (p) + output[x + 2] = palBase[p] | COLOR_OPAQUE; + p = (row >> 12) & 0xF; + if (p) + output[x + 3] = palBase[p] | COLOR_OPAQUE; + p = (row >> 16) & 0xF; + if (p) + output[x + 4] = palBase[p] | COLOR_OPAQUE; + p = (row >> 20) & 0xF; + if (p) + output[x + 5] = palBase[p] | COLOR_OPAQUE; + p = (row >> 24) & 0xF; + if (p) + output[x + 6] = palBase[p] | COLOR_OPAQUE; + p = (row >> 28) & 0xF; + if (p) + output[x + 7] = palBase[p] | COLOR_OPAQUE; } else { uint8_t p; - p = (row >> 28) & 0xF; if (p) output[x ] = palBase[p] | COLOR_OPAQUE; - p = (row >> 24) & 0xF; if (p) output[x+1] = palBase[p] | COLOR_OPAQUE; - p = (row >> 20) & 0xF; if (p) output[x+2] = palBase[p] | COLOR_OPAQUE; - p = (row >> 16) & 0xF; if (p) output[x+3] = palBase[p] | COLOR_OPAQUE; - p = (row >> 12) & 0xF; if (p) output[x+4] = palBase[p] | COLOR_OPAQUE; - p = (row >> 8) & 0xF; if (p) output[x+5] = palBase[p] | COLOR_OPAQUE; - p = (row >> 4) & 0xF; if (p) output[x+6] = palBase[p] | COLOR_OPAQUE; - p = row & 0xF; if (p) output[x+7] = palBase[p] | COLOR_OPAQUE; + p = (row >> 28) & 0xF; + if (p) + output[x] = palBase[p] | COLOR_OPAQUE; + p = (row >> 24) & 0xF; + if (p) + output[x + 1] = palBase[p] | COLOR_OPAQUE; + p = (row >> 20) & 0xF; + if (p) + output[x + 2] = palBase[p] | COLOR_OPAQUE; + p = (row >> 16) & 0xF; + if (p) + output[x + 3] = palBase[p] | COLOR_OPAQUE; + p = (row >> 12) & 0xF; + if (p) + output[x + 4] = palBase[p] | COLOR_OPAQUE; + p = (row >> 8) & 0xF; + if (p) + output[x + 5] = palBase[p] | COLOR_OPAQUE; + p = (row >> 4) & 0xF; + if (p) + output[x + 6] = palBase[p] | COLOR_OPAQUE; + p = row & 0xF; + if (p) + output[x + 7] = palBase[p] | COLOR_OPAQUE; } x += 8; } @@ -307,11 +336,12 @@ static void RenderTextBG(int bgNum, uint16_t control, // right edge: leftover partial tile if (x < DISPLAY_WIDTH) { unsigned int srcX = (x + hoffs) & wMask; - uint16_t entry = map[rowBase + (srcX >> 3)]; + uint16_t entry = map[rowBase + (srcX >> 3)]; unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) ty = 7 - ty; + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; bool hflip = TILE_HFLIP(entry); uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); @@ -326,30 +356,23 @@ static void RenderTextBG(int bgNum, uint16_t control, } // same thing but with blend/window tracking baked in -static void RenderTextBGBlend(int bgNum, uint16_t control, - uint16_t hoffs, uint16_t voffs, - int lineNum, uint16_t *output, - uint8_t *layerIds, - unsigned int blendMode, - bool windowsEnabled, - uint16_t *winMask, - unsigned int bldcnt, - unsigned int eva, unsigned int evb, - unsigned int evy) +static void RenderTextBGBlend(int bgNum, uint16_t control, uint16_t hoffs, uint16_t voffs, int lineNum, uint16_t *output, uint8_t *layerIds, + unsigned int blendMode, bool windowsEnabled, uint16_t *winMask, unsigned int bldcnt, unsigned int eva, + unsigned int evb, unsigned int evy) { - unsigned int charBase = (control >> 2) & 3; + unsigned int charBase = (control >> 2) & 3; unsigned int screenBase = (control & BGCNT_SCREENBASE_MASK) >> 8; - unsigned int is8bpp = (control >> 7) & 1; + unsigned int is8bpp = (control >> 7) & 1; - unsigned int mapW = bgMapSizes[control >> 14][0]; - unsigned int mapPxW = mapW << 3; - unsigned int mapPxH = bgMapSizes[control >> 14][1] << 3; - unsigned int wMask = mapPxW - 1; - unsigned int hMask = mapPxH - 1; + unsigned int mapW = bgMapSizes[control >> 14][0]; + unsigned int mapPxW = mapW << 3; + unsigned int mapPxH = bgMapSizes[control >> 14][1] << 3; + unsigned int wMask = mapPxW - 1; + unsigned int hMask = mapPxH - 1; - uint8_t *tiles = (uint8_t *)BG_CHAR_ADDR(charBase); - uint16_t *map = (uint16_t *)BG_SCREEN_ADDR(screenBase); - uint16_t *pal = (uint16_t *)PLTT; + uint8_t *tiles = (uint8_t *)BG_CHAR_ADDR(charBase); + uint16_t *map = (uint16_t *)BG_SCREEN_ADDR(screenBase); + uint16_t *pal = (uint16_t *)PLTT; bool hasMosaic = control & BGCNT_MOSAIC; if (hasMosaic) @@ -358,29 +381,29 @@ static void RenderTextBGBlend(int bgNum, uint16_t control, hoffs &= 0x1FF; voffs &= 0x1FF; - unsigned int yy = (lineNum + voffs) & hMask; - unsigned int mapY = yy >> 3; - unsigned int tileY = yy & 7; + unsigned int yy = (lineNum + voffs) & hMask; + unsigned int mapY = yy >> 3; + unsigned int tileY = yy & 7; unsigned int rowBase = mapY * mapW; - bool bgIsTargetA = (blendMode != 0) && (bldcnt & (1 << bgNum)); - bool useWindows = windowsEnabled && (winMask != NULL); + bool bgIsTargetA = (blendMode != 0) && (bldcnt & (1 << bgNum)); + bool useWindows = windowsEnabled && (winMask != NULL); unsigned int winBgBit = 1 << bgNum; // slow path: 8bpp or mosaic if (hasMosaic || is8bpp) { for (unsigned int x = 0; x < DISPLAY_WIDTH; x++) { - unsigned int xx = hasMosaic - ? (ApplyMosaicBGX(x) + hoffs) & wMask - : (x + hoffs) & wMask; + unsigned int xx = hasMosaic ? (ApplyMosaicBGX(x) + hoffs) & wMask : (x + hoffs) & wMask; uint16_t entry = map[rowBase + (xx >> 3)]; unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); + unsigned int palNum = TILE_PALETTE(entry); unsigned int tx = xx & 7; unsigned int ty = tileY; - if (TILE_HFLIP(entry)) tx = 7 - tx; - if (TILE_VFLIP(entry)) ty = 7 - ty; + if (TILE_HFLIP(entry)) + tx = 7 - tx; + if (TILE_VFLIP(entry)) + ty = 7 - ty; uint8_t pixel; if (!is8bpp) { @@ -393,9 +416,7 @@ static void RenderTextBGBlend(int bgNum, uint16_t control, if (pixel == 0) continue; - uint16_t color = !is8bpp - ? pal[(palNum << 4) + pixel] | COLOR_OPAQUE - : pal[pixel] | COLOR_OPAQUE; + uint16_t color = !is8bpp ? pal[(palNum << 4) + pixel] | COLOR_OPAQUE : pal[pixel] | COLOR_OPAQUE; if (useWindows && !(winMask[x] & winBgBit)) continue; @@ -407,8 +428,12 @@ static void RenderTextBGBlend(int bgNum, uint16_t control, if (IsBlendTargetB(layerIds[x], bldcnt)) color = alphaBlendColor(src, output[x], eva, evb); break; - case 2: color = alphaBrightnessIncrease(src, evy); break; - case 3: color = alphaBrightnessDecrease(src, evy); break; + case 2: + color = alphaBrightnessIncrease(src, evy); + break; + case 3: + color = alphaBrightnessDecrease(src, evy); + break; } } @@ -423,31 +448,32 @@ static void RenderTextBGBlend(int bgNum, uint16_t control, // left edge partial tile { - unsigned int startX = hoffs & wMask; + unsigned int startX = hoffs & wMask; unsigned int startOff = startX & 7; if (startOff != 0) { uint16_t entry = map[rowBase + (startX >> 3)]; unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) ty = 7 - ty; + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; bool hflip = TILE_HFLIP(entry); - uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); uint16_t *palBase = pal + (palNum << 4); unsigned int partial = 8 - startOff; - if (partial > DISPLAY_WIDTH) partial = DISPLAY_WIDTH; + if (partial > DISPLAY_WIDTH) + partial = DISPLAY_WIDTH; for (unsigned int t = 0; t < partial && x < DISPLAY_WIDTH; t++, x++) { unsigned int tx = startOff + t; - if (hflip) tx = 7 - tx; + if (hflip) + tx = 7 - tx; uint8_t pixel = (row >> (tx << 2)) & 0xF; if (pixel) - WriteBGPixelBlended(x, pixel, palBase, bgNum, output, - layerIds, blendMode, bgIsTargetA, - useWindows, winBgBit, winMask, + WriteBGPixelBlended(x, pixel, palBase, bgNum, output, layerIds, blendMode, bgIsTargetA, useWindows, winBgBit, winMask, bldcnt, eva, evb, evy); } } @@ -456,36 +482,45 @@ static void RenderTextBGBlend(int bgNum, uint16_t control, // middle: full tiles while (x + 8 <= DISPLAY_WIDTH) { unsigned int srcX = (x + hoffs) & wMask; - uint16_t entry = map[rowBase + (srcX >> 3)]; + uint16_t entry = map[rowBase + (srcX >> 3)]; unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) ty = 7 - ty; + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; - uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); uint16_t *palBase = pal + (palNum << 4); - #define BLEND_PX(off, shift) do { \ - uint8_t p = (row >> (shift)) & 0xF; \ - if (p) WriteBGPixelBlended(x+(off), p, palBase, bgNum, \ - output, layerIds, blendMode, bgIsTargetA, \ - useWindows, winBgBit, winMask, \ - bldcnt, eva, evb, evy); \ - } while (0) +#define BLEND_PX(off, shift) \ + do { \ + uint8_t p = (row >> (shift)) & 0xF; \ + if (p) \ + WriteBGPixelBlended(x + (off), p, palBase, bgNum, output, layerIds, blendMode, bgIsTargetA, useWindows, winBgBit, winMask, \ + bldcnt, eva, evb, evy); \ + } while (0) if (!TILE_HFLIP(entry)) { - BLEND_PX(0, 0); BLEND_PX(1, 4); - BLEND_PX(2, 8); BLEND_PX(3, 12); - BLEND_PX(4, 16); BLEND_PX(5, 20); - BLEND_PX(6, 24); BLEND_PX(7, 28); + BLEND_PX(0, 0); + BLEND_PX(1, 4); + BLEND_PX(2, 8); + BLEND_PX(3, 12); + BLEND_PX(4, 16); + BLEND_PX(5, 20); + BLEND_PX(6, 24); + BLEND_PX(7, 28); } else { - BLEND_PX(0, 28); BLEND_PX(1, 24); - BLEND_PX(2, 20); BLEND_PX(3, 16); - BLEND_PX(4, 12); BLEND_PX(5, 8); - BLEND_PX(6, 4); BLEND_PX(7, 0); + BLEND_PX(0, 28); + BLEND_PX(1, 24); + BLEND_PX(2, 20); + BLEND_PX(3, 16); + BLEND_PX(4, 12); + BLEND_PX(5, 8); + BLEND_PX(6, 4); + BLEND_PX(7, 0); } - #undef BLEND_PX +#undef BLEND_PX x += 8; } @@ -493,36 +528,34 @@ static void RenderTextBGBlend(int bgNum, uint16_t control, // right edge partial tile if (x < DISPLAY_WIDTH) { unsigned int srcX = (x + hoffs) & wMask; - uint16_t entry = map[rowBase + (srcX >> 3)]; + uint16_t entry = map[rowBase + (srcX >> 3)]; unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) ty = 7 - ty; + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; bool hflip = TILE_HFLIP(entry); - uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); uint16_t *palBase = pal + (palNum << 4); for (unsigned int t = 0; x < DISPLAY_WIDTH; t++, x++) { unsigned int tx = hflip ? (7 - t) : t; uint8_t pixel = (row >> (tx << 2)) & 0xF; if (pixel) - WriteBGPixelBlended(x, pixel, palBase, bgNum, output, - layerIds, blendMode, bgIsTargetA, - useWindows, winBgBit, winMask, + WriteBGPixelBlended(x, pixel, palBase, bgNum, output, layerIds, blendMode, bgIsTargetA, useWindows, winBgBit, winMask, bldcnt, eva, evb, evy); } } } -static void RenderAffineBG(int bgNum, uint16_t control, - int lineNum, uint16_t *output) +static void RenderAffineBG(int bgNum, uint16_t control, int lineNum, uint16_t *output) { vBgCnt *bgcnt = (vBgCnt *)&control; - uint8_t *tiles = (uint8_t *)(VRAM + bgcnt->charBaseBlock * 0x4000); - uint8_t *map = (uint8_t *)(VRAM + bgcnt->screenBaseBlock * 0x800); - uint16_t *pal = (uint16_t *)PLTT; + uint8_t *tiles = (uint8_t *)(VRAM + bgcnt->charBaseBlock * 0x4000); + uint8_t *map = (uint8_t *)(VRAM + bgcnt->screenBaseBlock * 0x800); + uint16_t *pal = (uint16_t *)PLTT; if (control & BGCNT_MOSAIC) lineNum = ApplyMosaicBGY(lineNum); @@ -535,11 +568,17 @@ static void RenderAffineBG(int bgNum, uint16_t control, // always square: 128/256/512/1024 int size = 128; switch (bgcnt->screenSize) { - case 1: size = 256; break; - case 2: size = 512; break; - case 3: size = 1024; break; + case 1: + size = 256; + break; + case 2: + size = 512; + break; + case 3: + size = 1024; + break; } - int mask = size - 1; + int mask = size - 1; int yshift = ((control >> 14) & 3) + 4; // sign-extend 28-bit reference point, advance by scanline @@ -589,21 +628,15 @@ static void RenderAffineBG(int bgNum, uint16_t control, } // same deal with blend/window support -static void RenderAffineBGBlend(int bgNum, uint16_t control, - int lineNum, uint16_t *output, - uint8_t *layerIds, - unsigned int blendMode, - bool windowsEnabled, - uint16_t *winMask, - unsigned int bldcnt, - unsigned int eva, unsigned int evb, +static void RenderAffineBGBlend(int bgNum, uint16_t control, int lineNum, uint16_t *output, uint8_t *layerIds, unsigned int blendMode, + bool windowsEnabled, uint16_t *winMask, unsigned int bldcnt, unsigned int eva, unsigned int evb, unsigned int evy) { vBgCnt *bgcnt = (vBgCnt *)&control; - uint8_t *tiles = (uint8_t *)(VRAM + bgcnt->charBaseBlock * 0x4000); - uint8_t *map = (uint8_t *)(VRAM + bgcnt->screenBaseBlock * 0x800); - uint16_t *pal = (uint16_t *)PLTT; + uint8_t *tiles = (uint8_t *)(VRAM + bgcnt->charBaseBlock * 0x4000); + uint8_t *map = (uint8_t *)(VRAM + bgcnt->screenBaseBlock * 0x800); + uint16_t *pal = (uint16_t *)PLTT; if (control & BGCNT_MOSAIC) lineNum = ApplyMosaicBGY(lineNum); @@ -615,11 +648,17 @@ static void RenderAffineBGBlend(int bgNum, uint16_t control, int size = 128; switch (bgcnt->screenSize) { - case 1: size = 256; break; - case 2: size = 512; break; - case 3: size = 1024; break; + case 1: + size = 256; + break; + case 2: + size = 512; + break; + case 3: + size = 1024; + break; } - int mask = size - 1; + int mask = size - 1; int yshift = ((control >> 14) & 3) + 4; s32 refX = GetBgRefX(bgNum); @@ -675,8 +714,12 @@ static void RenderAffineBGBlend(int bgNum, uint16_t control, if (IsBlendTargetB(layerIds[x], bldcnt)) color = alphaBlendColor(src, output[x], eva, evb); break; - case 2: color = alphaBrightnessIncrease(src, evy); break; - case 3: color = alphaBrightnessDecrease(src, evy); break; + case 2: + color = alphaBrightnessIncrease(src, evy); + break; + case 3: + color = alphaBrightnessDecrease(src, evy); + break; } } @@ -697,7 +740,7 @@ typedef struct { } ActiveSprite; static ActiveSprite sActiveSprites[4][MAX_SPRITES_PER_PRIORITY]; -static int sActiveSpriteCount[4]; +static int sActiveSpriteCount[4]; static void PrefilterSprites(uint16_t vcount) { @@ -713,14 +756,14 @@ static void PrefilterSprites(uint16_t vcount) for (int i = OAM_ENTRY_COUNT - 1; i >= 0; i--) { OamData *oam = &((OamData *)OAM)[i]; - bool isAffine = oam->split.affineMode & 1; + bool isAffine = oam->split.affineMode & 1; bool isDisabled = (oam->split.affineMode >> 1) & 1; if (!isAffine && isDisabled) continue; s32 idx = (oam->split.shape << 2) | oam->split.size; - unsigned int width = gOamShapesSizes[idx][0]; + unsigned int width = gOamShapesSizes[idx][0]; unsigned int height = gOamShapesSizes[idx][1]; int halfW = width / 2; int halfH = height / 2; @@ -728,8 +771,10 @@ static void PrefilterSprites(uint16_t vcount) int32_t sx = oam->split.x; int32_t sy = oam->split.y; #if !EXTENDED_OAM - if (sx >= DISPLAY_WIDTH) sx -= 512; - if (sy >= DISPLAY_HEIGHT) sy -= 256; + if (sx >= DISPLAY_WIDTH) + sx -= 512; + if (sy >= DISPLAY_HEIGHT) + sy -= 256; #endif // double-size affine sprites have 2x bounding box @@ -751,16 +796,12 @@ static void PrefilterSprites(uint16_t vcount) } } -static void DrawSpritesAtPriority(int priority, uint16_t vcount, - uint16_t *output, uint8_t *layerIds, - bool windowsEnabled, uint16_t *winMask, - unsigned int blendMode, bool objWinOnly, - unsigned int bldcnt, - unsigned int eva, unsigned int evb, - unsigned int evy) +static void DrawSpritesAtPriority(int priority, uint16_t vcount, uint16_t *output, uint8_t *layerIds, bool windowsEnabled, + uint16_t *winMask, unsigned int blendMode, bool objWinOnly, unsigned int bldcnt, unsigned int eva, + unsigned int evb, unsigned int evy) { - uint8_t *tiledata = (uint8_t *)OBJ_VRAM0; - uint16_t *sprpal = (uint16_t *)PLTT + (0x200 / 2); + uint8_t *tiledata = (uint8_t *)OBJ_VRAM0; + uint16_t *sprpal = (uint16_t *)PLTT + (0x200 / 2); int16_t matrix[2][2]; // only 1-D tile mapping supported @@ -771,11 +812,11 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, int i = sActiveSprites[priority][s].oamIndex; OamData *oam = &((OamData *)OAM)[i]; - bool isAffine = oam->split.affineMode & 1; + bool isAffine = oam->split.affineMode & 1; bool doubleSize = (oam->split.affineMode >> 1) & 1; s32 idx = (oam->split.shape << 2) | oam->split.size; - unsigned int width = gOamShapesSizes[idx][0]; + unsigned int width = gOamShapesSizes[idx][0]; unsigned int height = gOamShapesSizes[idx][1]; int halfW = width / 2; int halfH = height / 2; @@ -783,8 +824,10 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, int32_t x = oam->split.x; int32_t y = oam->split.y; #if !EXTENDED_OAM - if (x >= DISPLAY_WIDTH) x -= 512; - if (y >= DISPLAY_HEIGHT) y -= 256; + if (x >= DISPLAY_WIDTH) + x -= 512; + if (y >= DISPLAY_HEIGHT) + y -= 256; #endif if (isAffine && doubleSize) { halfW *= 2; @@ -792,12 +835,14 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, } bool isSemiTransparent = (oam->split.objMode == 1); - bool isObjWin = (oam->split.objMode == 2); + bool isObjWin = (oam->split.objMode == 2); - if (objWinOnly && !isObjWin) continue; - if (!objWinOnly && isObjWin) continue; + if (objWinOnly && !isObjWin) + continue; + if (!objWinOnly && isObjWin) + continue; - int rectWidth = width; + int rectWidth = width; int rectHeight = height; if (isAffine) { @@ -811,7 +856,7 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, matrix[1][0] = m2->all.affineParam; matrix[1][1] = m3->all.affineParam; if (doubleSize) { - rectWidth *= 2; + rectWidth *= 2; rectHeight *= 2; } } else { @@ -824,30 +869,31 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, x += halfW; y += halfH; - int localY = (oam->split.mosaic == 1) - ? ApplyMosaicSprY(vcount) - y - : vcount - y; + int localY = (oam->split.mosaic == 1) ? ApplyMosaicSprY(vcount) - y : vcount - y; bool flipX = !isAffine && ((oam->split.matrixNum >> 3) & 1); bool flipY = !isAffine && ((oam->split.matrixNum >> 4) & 1); bool is8bpp = oam->split.bpp & 1; int startLX = -halfW; - int endLX = halfW; - if (startLX + x < 0) startLX = -x; - if (endLX + x >= DISPLAY_WIDTH) endLX = DISPLAY_WIDTH - 1 - x; + int endLX = halfW; + if (startLX + x < 0) + startLX = -x; + if (endLX + x >= DISPLAY_WIDTH) + endLX = DISPLAY_WIDTH - 1 - x; // fast path: non-affine 4bpp, no mosaic -- batched tile row reads if (!isAffine && !is8bpp && !oam->split.mosaic) { int texY = localY + halfH; - if (flipY) texY = height - texY - 1; + if (flipY) + texY = height - texY - 1; if (texY < 0 || texY >= (int)height) continue; - int tileRowY = texY & 7; - int blockY = texY >> 3; + int tileRowY = texY & 7; + int blockY = texY >> 3; int tilesPerRow = (REG_DISPCNT & 0x40) ? ((int)width >> 3) : 16; - int tileBase = blockY * tilesPerRow + oam->split.tileNum; - int rowByteOff = tileRowY << 2; + int tileBase = blockY * tilesPerRow + oam->split.tileNum; + int rowByteOff = tileRowY << 2; uint16_t *pixpal = sprpal + (oam->split.paletteNum << 4); int lx = startLX; @@ -860,22 +906,24 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, continue; } - int blockX = texX >> 3; + int blockX = texX >> 3; int tileXStart = texX & 7; - uint32_t rowData = *(uint32_t *)(tiledata - + ((tileBase + blockX) << 5) + rowByteOff); + uint32_t rowData = *(uint32_t *)(tiledata + ((tileBase + blockX) << 5) + rowByteOff); int pixelsInTile = !flipX ? (8 - tileXStart) : (tileXStart + 1); int remain = endLX - lx + 1; - if (pixelsInTile > remain) pixelsInTile = remain; + if (pixelsInTile > remain) + pixelsInTile = remain; if (!flipX) { int texRemain = (int)width - texX; - if (pixelsInTile > texRemain) pixelsInTile = texRemain; + if (pixelsInTile > texRemain) + pixelsInTile = texRemain; } else { int texRemain = texX + 1; - if (pixelsInTile > texRemain) pixelsInTile = texRemain; + if (pixelsInTile > texRemain) + pixelsInTile = texRemain; } for (int p = 0; p < pixelsInTile; p++, lx++) { @@ -895,9 +943,7 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, } if (layerIds && blendMode != 0) - color = BlendSpritePixel(color, gx, output, layerIds, - isSemiTransparent, blendMode, - bldcnt, windowsEnabled, winMask, + color = BlendSpritePixel(color, gx, output, layerIds, isSemiTransparent, blendMode, bldcnt, windowsEnabled, winMask, eva, evb, evy); if (windowsEnabled && winMask && !(winMask[gx] & WINMASK_OBJ)) @@ -922,8 +968,10 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, lmx = ApplyMosaicSprX(gx) - x; texX = lmx + halfW; texY = localY + halfH; - if (flipX) texX = width - texX - 1; - if (flipY) texY = height - texY - 1; + if (flipX) + texX = width - texX - 1; + if (flipY) + texY = height - texY - 1; } else { int lmx = localX; int lmy = localY; @@ -932,17 +980,17 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, lmy = ApplyMosaicSprY(vcount) - y; } // apply 2x2 affine matrix (8.8 fixed point) - texX = ((matrix[0][0] * lmx + matrix[0][1] * lmy) >> 8) + (width / 2); + texX = ((matrix[0][0] * lmx + matrix[0][1] * lmy) >> 8) + (width / 2); texY = ((matrix[1][0] * lmx + matrix[1][1] * lmy) >> 8) + (height / 2); } if (texX < 0 || texY < 0 || texX >= (int)width || texY >= (int)height) continue; - int tileX = texX & 7; - int tileY = texY & 7; - int blockX = texX >> 3; - int blockY = texY >> 3; + int tileX = texX & 7; + int tileY = texY & 7; + int blockX = texX >> 3; + int blockY = texY >> 3; int blockOffset = blockY * (REG_DISPCNT & 0x40 ? ((int)width >> 3) : 16) + blockX; uint16_t pixel = 0; @@ -951,8 +999,10 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, if (!is8bpp) { int tdi = ((blockOffset + oam->split.tileNum) << 5) + (tileY << 2) + (tileX >> 1); pixel = tiledata[tdi]; - if (tileX & 1) pixel >>= 4; - else pixel &= 0xF; + if (tileX & 1) + pixel >>= 4; + else + pixel &= 0xF; pixpal = sprpal + (oam->split.paletteNum << 4); } else { pixel = tiledata[((blockOffset * 2 + oam->split.tileNum) << 5) + (tileY << 3) + tileX]; @@ -971,10 +1021,8 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, } if (layerIds && blendMode != 0) - color = BlendSpritePixel(color, gx, output, layerIds, - isSemiTransparent, blendMode, - bldcnt, windowsEnabled, winMask, - eva, evb, evy); + color = BlendSpritePixel(color, gx, output, layerIds, isSemiTransparent, blendMode, bldcnt, windowsEnabled, winMask, eva, + evb, evy); if (windowsEnabled && winMask && !(winMask[gx] & WINMASK_OBJ)) continue; @@ -988,21 +1036,21 @@ static void DrawSpritesAtPriority(int priority, uint16_t vcount, static void DrawScanline(uint16_t *pixels, uint16_t vcount) { - unsigned int mode = REG_DISPCNT & 3; - unsigned int numBGs = (mode == 0) ? 4 : 3; - unsigned int blendMode = (REG_BLDCNT >> 6) & 3; + unsigned int mode = REG_DISPCNT & 3; + unsigned int numBGs = (mode == 0) ? 4 : 3; + unsigned int blendMode = (REG_BLDCNT >> 6) & 3; unsigned int enabledBgs = (REG_DISPCNT >> 8) & 0xF; // sort bgs by priority uint16_t bgcnts[4]; - char bgPriority[4]; - char bgsByPri[4][4]; - char bgsByPriCount[4] = {0, 0, 0, 0}; + char bgPriority[4]; + char bgsByPri[4][4]; + char bgsByPriCount[4] = { 0, 0, 0, 0 }; for (int bg = 0; bg < (int)numBGs; bg++) { uint16_t cnt = *(uint16_t *)(REG_ADDR_BG0CNT + bg * 2); - bgcnts[bg] = cnt; - uint16_t pri = cnt & 3; + bgcnts[bg] = cnt; + uint16_t pri = cnt & 3; bgPriority[bg] = pri; bgsByPri[pri][bgsByPriCount[pri]] = bg; bgsByPriCount[pri]++; @@ -1016,10 +1064,10 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) static uint16_t winMask[DISPLAY_WIDTH]; if (REG_DISPCNT & DISPCNT_WIN0_ON) { - win0Bot = WIN_GET_HIGHER(REG_WIN0V); - win0Top = WIN_GET_LOWER(REG_WIN0V); + win0Bot = WIN_GET_HIGHER(REG_WIN0V); + win0Top = WIN_GET_LOWER(REG_WIN0V); win0Right = WIN_GET_HIGHER(REG_WIN0H); - win0Left = WIN_GET_LOWER(REG_WIN0H); + win0Left = WIN_GET_LOWER(REG_WIN0H); if (win0Top > win0Bot) win0Active = (vcount >= win0Top || vcount < win0Bot); else @@ -1027,10 +1075,10 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) windowsEnabled = true; } if (REG_DISPCNT & DISPCNT_WIN1_ON) { - win1Bot = WIN_GET_HIGHER(REG_WIN1V); - win1Top = WIN_GET_LOWER(REG_WIN1V); + win1Bot = WIN_GET_HIGHER(REG_WIN1V); + win1Top = WIN_GET_LOWER(REG_WIN1V); win1Right = WIN_GET_HIGHER(REG_WIN1H); - win1Left = WIN_GET_LOWER(REG_WIN1H); + win1Left = WIN_GET_LOWER(REG_WIN1H); if (win1Top > win1Bot) win1Active = (vcount >= win1Top || vcount < win1Bot); else @@ -1057,14 +1105,14 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) // layerIds tracks who wrote each pixel so alpha blend can find target-b static uint8_t layerIds[DISPLAY_WIDTH]; bool needLayerIds = (blendMode != 0 || windowsEnabled); - uint8_t *lids = needLayerIds ? layerIds : NULL; + uint8_t *lids = needLayerIds ? layerIds : NULL; uint16_t *wmask = windowsEnabled ? winMask : NULL; if (needLayerIds) memset(layerIds, LAYER_BACKDROP, DISPLAY_WIDTH); // grab blend regs once per scanline - unsigned int bldcnt = REG_BLDCNT; + unsigned int bldcnt = REG_BLDCNT; unsigned int bld_eva = REG_BLDALPHA & 0x1F; unsigned int bld_evb = (REG_BLDALPHA >> 8) & 0x1F; unsigned int bld_evy = REG_BLDY & 0x1F; @@ -1072,10 +1120,8 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) // obj window pass -- these sprites modify the window mask, not the framebuffer if (windowsEnabled && (REG_DISPCNT & DISPCNT_OBJWIN_ON) && (REG_DISPCNT & DISPCNT_OBJ_ON)) { for (int pri = 0; pri < 4; pri++) - DrawSpritesAtPriority(pri, vcount, pixels, lids, - windowsEnabled, wmask, blendMode, - /*objWinOnly=*/true, - bldcnt, bld_eva, bld_evb, bld_evy); + DrawSpritesAtPriority(pri, vcount, pixels, lids, windowsEnabled, wmask, blendMode, + /*objWinOnly=*/true, bldcnt, bld_eva, bld_evb, bld_evy); } // back-to-front: priority 3 first, 0 last (0 is topmost) @@ -1088,54 +1134,40 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) if (!needLayerIds) { switch (mode) { case 0: - RenderTextBG(bg, bgcnts[bg], - *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), - *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), - vcount, pixels); + RenderTextBG(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), + vcount, pixels); break; case 1: if (bg == 2) RenderAffineBG(bg, bgcnts[bg], vcount, pixels); else - RenderTextBG(bg, bgcnts[bg], - *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), - *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), - vcount, pixels); + RenderTextBG(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), + vcount, pixels); break; } } else { switch (mode) { case 0: - RenderTextBGBlend(bg, bgcnts[bg], - *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), - *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), - vcount, pixels, lids, blendMode, - windowsEnabled, wmask, - bldcnt, bld_eva, bld_evb, bld_evy); + RenderTextBGBlend(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), + *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), vcount, pixels, lids, blendMode, windowsEnabled, wmask, + bldcnt, bld_eva, bld_evb, bld_evy); break; case 1: if (bg == 2) - RenderAffineBGBlend(bg, bgcnts[bg], - vcount, pixels, lids, blendMode, - windowsEnabled, wmask, - bldcnt, bld_eva, bld_evb, bld_evy); + RenderAffineBGBlend(bg, bgcnts[bg], vcount, pixels, lids, blendMode, windowsEnabled, wmask, bldcnt, bld_eva, + bld_evb, bld_evy); else - RenderTextBGBlend(bg, bgcnts[bg], - *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), - *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), - vcount, pixels, lids, blendMode, - windowsEnabled, wmask, - bldcnt, bld_eva, bld_evb, bld_evy); + RenderTextBGBlend(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), + *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), vcount, pixels, lids, blendMode, windowsEnabled, + wmask, bldcnt, bld_eva, bld_evb, bld_evy); break; } } } if (REG_DISPCNT & DISPCNT_OBJ_ON) - DrawSpritesAtPriority(pri, vcount, pixels, lids, - windowsEnabled, wmask, blendMode, - /*objWinOnly=*/false, - bldcnt, bld_eva, bld_evb, bld_evy); + DrawSpritesAtPriority(pri, vcount, pixels, lids, windowsEnabled, wmask, blendMode, + /*objWinOnly=*/false, bldcnt, bld_eva, bld_evb, bld_evy); } } @@ -1164,4 +1196,4 @@ void DrawFrame_Fast(uint16_t *pixels) } } -#endif +#endif From 6b3bca250e892c61d0c52ac4e35b9b1d8211214f Mon Sep 17 00:00:00 2001 From: kikugrave Date: Sun, 15 Feb 2026 11:17:47 -0800 Subject: [PATCH 04/13] Fix lint 3 --- src/lib/m4a/m4a.c | 4 ++-- src/platform/pret_sdl/sdl2.c | 4 ++-- src/platform/shared/rendering/sw_renderer_fast.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/m4a/m4a.c b/src/lib/m4a/m4a.c index 108dae897..329741bf3 100644 --- a/src/lib/m4a/m4a.c +++ b/src/lib/m4a/m4a.c @@ -1369,7 +1369,7 @@ void MP2K_event_memacc(struct MP2KPlayerState *mplayInfo, struct MP2KTrack *trac return; } -cond_true: { +cond_true : { { void (*func)(struct MP2KPlayerState *, struct MP2KTrack *) = *(&gMPlayJumpTable[1]); func(mplayInfo, track); @@ -1377,7 +1377,7 @@ cond_true: { return; } -cond_false: { +cond_false : { #ifdef __mips__ // Align to 4 bytes (mPtr adds .balign 4 on MIPS) u8 *ptrStart = (u8 *)(((uintptr_t)track->cmdPtr + 3) & ~(uintptr_t)3); diff --git a/src/platform/pret_sdl/sdl2.c b/src/platform/pret_sdl/sdl2.c index 044d2ab4c..d253bd418 100644 --- a/src/platform/pret_sdl/sdl2.c +++ b/src/platform/pret_sdl/sdl2.c @@ -114,7 +114,7 @@ double accumulator = 0.0; static FILE *sSaveFile = NULL; extern void AgbMain(void); -void DoSoftReset(void) { }; +void DoSoftReset(void) {}; void ProcessSDLEvents(void); void VDraw(SDL_Texture *texture); @@ -1942,7 +1942,7 @@ void VDraw(SDL_Texture *texture) { #if RENDERER == RENDERER_SOFTWARE_FAST { - extern void DrawFrame_Fast(uint16_t *pixels); + extern void DrawFrame_Fast(uint16_t * pixels); DrawFrame_Fast(gameImage); } #else diff --git a/src/platform/shared/rendering/sw_renderer_fast.c b/src/platform/shared/rendering/sw_renderer_fast.c index 484ddf042..9a1283d2e 100644 --- a/src/platform/shared/rendering/sw_renderer_fast.c +++ b/src/platform/shared/rendering/sw_renderer_fast.c @@ -53,7 +53,7 @@ extern const u8 gOamShapesSizes[12][2]; #define ApplyMosaicSprY(y) ((y) - ((y) % (MOSAIC_SPR_Y + 1))) // tilemap entry fields -#define TILE_NUM(e) ((e) & 0x3FF) +#define TILE_NUM(e) ((e)&0x3FF) #define TILE_PALETTE(e) (((e) >> 12) & 0xF) #define TILE_HFLIP(e) ((e) & (1 << 10)) #define TILE_VFLIP(e) ((e) & (1 << 11)) From efeebe0df70196a3127e63a5db3e52a7b84031d7 Mon Sep 17 00:00:00 2001 From: Oliver Bell Date: Mon, 16 Feb 2026 00:40:34 +0000 Subject: [PATCH 05/13] fixes, add ps2 build --- .gitignore | 1 + Makefile | 32 +++- asm/macros/portable.inc | 3 +- config.mk | 1 + include/gba/defines.h | 2 +- include/gba/types.h | 4 + ps2/ntsc/SYSTEM.CNF | 3 + src/lib/m4a/m4a.c | 6 - src/platform/pret_sdl/sdl2.c | 190 ++++++++++++++------ src/platform/shared/audio/m4a_sound_mixer.c | 17 +- 10 files changed, 179 insertions(+), 80 deletions(-) create mode 100644 ps2/ntsc/SYSTEM.CNF diff --git a/.gitignore b/.gitignore index 8d9dfac69..cf418a746 100644 --- a/.gitignore +++ b/.gitignore @@ -85,6 +85,7 @@ libagbsyscall/*.s *.exe *.dll *.sdl +*.iso # PSP build outputs EBOOT.PBP diff --git a/Makefile b/Makefile index 16f001474..c7979cd8f 100644 --- a/Makefile +++ b/Makefile @@ -58,6 +58,8 @@ else ifeq ($(PLATFORM),psp) PSPSDK := $(PSPDEV)/psp/sdk export PATH := $(PSPDEV)/bin:$(PATH) PREFIX := psp- +else ifeq ($(PLATFORM),ps2) + PREFIX := mips64r5900el-ps2-elf- else # Native ifneq ($(PLATFORM),sdl) @@ -130,6 +132,10 @@ else ifeq ($(PLATFORM),psp) ROM := EBOOT.PBP ELF := $(BUILD_NAME).psp.elf MAP := $(BUILD_NAME).psp.map +else ifeq ($(PLATFORM),ps2) +ROM := $(BUILD_NAME).$(PLATFORM).iso +ELF := $(ROM:.iso=.elf) +MAP := $(ROM:.iso=.map) else ROM := $(BUILD_NAME).$(PLATFORM).exe ELF := $(ROM:.exe=.elf) @@ -169,6 +175,8 @@ else ifeq ($(PLATFORM),sdl) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") else ifeq ($(PLATFORM),psp) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*") +else ifeq ($(PLATFORM),ps2) +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") else ifeq ($(PLATFORM),sdl_win32) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") else ifeq ($(PLATFORM),win32) @@ -240,6 +248,9 @@ else else ifeq ($(PLATFORM),psp) CC1FLAGS += -G0 CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 -D SDL_MAIN_HANDLED -I$(PSPDEV)/psp/include/SDL2 -I$(PSPDEV)/psp/include -I$(PSPSDK)/include -D_PSP_FW_VERSION=600 + else ifeq ($(PLATFORM),ps2) + CC1FLAGS += -G0 -Wno-parentheses-equality -Wno-unused-value -ffast-math + CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 -D SDL_MAIN_HANDLED -D_EE -D__PS2__ -I$(PS2SDK)/common/include -I$(PS2SDK)/ee/include -I$(PS2SDK)/ports/include $(shell $(PS2SDK)/ports/bin/sdl2-config --cflags) else ifeq ($(PLATFORM),sdl_win32) CPPFLAGS += -D TITLE_BAR=$(BUILD_NAME).$(PLATFORM) -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 $(SDL_MINGW_FLAGS) else ifeq ($(PLATFORM),win32) @@ -266,6 +277,8 @@ else CPP := $(CC1) -E else ifeq ($(PLATFORM), psp) CPP := $(CC1) -E + else ifeq ($(PLATFORM), ps2) + ASFLAGS += -msingle-float endif # Allow file input through stdin on modern GCC and set it to "compile only" CC1FLAGS += -x c -S @@ -278,6 +291,8 @@ else ifeq ($(PLATFORM),psp) # -O3 for PSP (Allegrex MIPS, small D-cache) CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer + else ifeq ($(PLATFORM),ps2) + CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer else CC1FLAGS += -O2 endif @@ -322,6 +337,8 @@ else ifeq ($(PLATFORM),sdl) # PSP else ifeq ($(PLATFORM),psp) MAP_FLAG := -Xlinker -Map= +else ifeq ($(PLATFORM),ps2) + MAP_FLAG := -Xlinker -Map= # Win32 else MAP_FLAG := -Xlinker -Map= @@ -334,6 +351,8 @@ else ifeq ($(PLATFORM),sdl) LIBS := $(shell sdl2-config --cflags --libs) else ifeq ($(PLATFORM),psp) LIBS := -L$(PSPDEV)/psp/lib -L$(PSPSDK)/lib -lSDL2 -lm -lGL -lpspvram -lpspaudio -lpspvfpu -lpspdisplay -lpspgu -lpspge -lpsphprm -lpspctrl -lpsppower -lpspdebug -lpspnet -lpspnet_apctl -Wl,-zmax-page-size=128 +else ifeq ($(PLATFORM),ps2) + LIBS := -lSDL2 $(shell $(PS2SDK)/ports/bin/sdl2-config --libs) -T$(PS2SDK)/ee/startup/linkfile -L$(PS2SDK)/common/lib -L$(PS2SDK)/ee/lib -L$(PS2DEV)/gsKit/lib -Wl,-zmax-page-size=128 else ifeq ($(PLATFORM),sdl_win32) LIBS := -mwin32 -lkernel32 -lwinmm -lmingw32 -lxinput $(SDL_MINGW_LIBS) else ifeq ($(PLATFORM), win32) @@ -343,7 +362,7 @@ endif #### MAIN TARGETS #### # these commands will run regardless of deps being completed -.PHONY: clean tools tidy clean-tools $(TOOLDIRS) libagbsyscall +.PHONY: clean tools tidy clean-tools $(TOOLDIRS) libagbsyscall ps2 # Ensure required directories exist $(shell mkdir -p $(C_BUILDDIR) $(ASM_BUILDDIR) $(DATA_ASM_BUILDDIR) $(SOUND_ASM_BUILDDIR) $(SONG_BUILDDIR) $(MID_BUILDDIR)) @@ -424,7 +443,7 @@ clean-tools: tidy: $(RM) -r build/* $(RM) SDL2.dll - $(RM) $(BUILD_NAME)*.exe $(BUILD_NAME)*.elf $(BUILD_NAME)*.map $(BUILD_NAME)*.sdl $(BUILD_NAME)*.gba + $(RM) $(BUILD_NAME)*.exe $(BUILD_NAME)*.elf $(BUILD_NAME)*.map $(BUILD_NAME)*.sdl $(BUILD_NAME)*.gba $(BUILD_NAME)*.iso $(RM) EBOOT.PBP PARAM.SFO usa_beta: ; @$(MAKE) GAME_REGION=USA GAME_VARIANT=BETA @@ -439,6 +458,8 @@ sdl: ; @$(MAKE) PLATFORM=sdl psp: ; @$(MAKE) PLATFORM=psp +ps2: ; @$(MAKE) PLATFORM=ps2 + tas_sdl: ; @$(MAKE) sdl TAS_TESTING=1 sdl_win32: @@ -489,7 +510,7 @@ data/mb_chao_garden_japan.gba.lz: data/mb_chao_garden_japan.gba %.bin: %.aif ; $(AIF) $< $@ -$(ELF): $(OBJS) libagbsyscall +$(ELF): $(OBJS) ifeq ($(PLATFORM),gba) @echo "$(LD) -T $(LDSCRIPT) $(MAP_FLAG) $(MAP) -o $@" @$(CPP) -P $(CPPFLAGS) $(LDSCRIPT) > $(OBJ_DIR)/$(LDSCRIPT) @@ -512,6 +533,11 @@ else ifeq ($(PLATFORM),psp) psp-strip $< -o $(BUILD_NAME).psp_strip.elf pack-pbp $@ PARAM.SFO NULL NULL NULL NULL NULL $(BUILD_NAME).psp_strip.elf NULL -rm -f $(BUILD_NAME).psp_strip.elf +else ifeq ($(PLATFORM),ps2) + @echo Creating $(ROM) from $(ELF) + @cp -r ps2/ntsc $(OBJ_DIR)/iso + @cp $< $(OBJ_DIR)/iso/$(PS2_GAME_CODE) + @mkisofs -o $(ROM) $(OBJ_DIR)/iso/ else $(OBJCOPY) -O pei-x86-64 $< $@ endif diff --git a/asm/macros/portable.inc b/asm/macros/portable.inc index e3f80ed9d..fd66638b8 100644 --- a/asm/macros/portable.inc +++ b/asm/macros/portable.inc @@ -11,8 +11,7 @@ #if defined(__aarch64__) || defined(__x86_64__) .quad \value #elif defined(__mips__) - .balign 4 - .int \value + .4byte \value #else .int \value #endif diff --git a/config.mk b/config.mk index 5f687443a..15affde18 100644 --- a/config.mk +++ b/config.mk @@ -63,6 +63,7 @@ MAKER_CODE := 78 BUILD_NAME := sa2 TITLE := SONICADVANC2 GAME_CODE := A2N +PS2_GAME_CODE := SLUS_054.02 # Revision diff --git a/include/gba/defines.h b/include/gba/defines.h index 190284510..6b9d09afc 100644 --- a/include/gba/defines.h +++ b/include/gba/defines.h @@ -39,7 +39,7 @@ #define OAM_ENTRY_COUNT 128 #if PORTABLE // NOTE: Used in gba/types.h, so they have to be defined before the #include -#ifdef __PSP__ +#if defined(__PSP__) // PSP: Use GBA-native resolution, SDL scales to 480x272 #define DISPLAY_WIDTH 240 #define DISPLAY_HEIGHT 160 diff --git a/include/gba/types.h b/include/gba/types.h index 72e721ad7..fa3234357 100644 --- a/include/gba/types.h +++ b/include/gba/types.h @@ -20,6 +20,9 @@ typedef struct __attribute__((packed)) name struct_body name; #endif +#ifdef __PS2__ +#include +#else typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; @@ -28,6 +31,7 @@ typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; +#endif #if (GAME == GAME_SA1) typedef u8 MetatileIndexType; diff --git a/ps2/ntsc/SYSTEM.CNF b/ps2/ntsc/SYSTEM.CNF new file mode 100644 index 000000000..9c440527d --- /dev/null +++ b/ps2/ntsc/SYSTEM.CNF @@ -0,0 +1,3 @@ +BOOT2 = cdrom0:\SLUS_054.02;1 +VER = 1.00 +VMODE = NTSC \ No newline at end of file diff --git a/src/lib/m4a/m4a.c b/src/lib/m4a/m4a.c index 329741bf3..7b90fe3a9 100644 --- a/src/lib/m4a/m4a.c +++ b/src/lib/m4a/m4a.c @@ -1378,13 +1378,7 @@ cond_true : { } cond_false : { -#ifdef __mips__ - // Align to 4 bytes (mPtr adds .balign 4 on MIPS) - u8 *ptrStart = (u8 *)(((uintptr_t)track->cmdPtr + 3) & ~(uintptr_t)3); - track->cmdPtr = ptrStart + 4; -#else track->cmdPtr += 4; -#endif } } diff --git a/src/platform/pret_sdl/sdl2.c b/src/platform/pret_sdl/sdl2.c index d253bd418..1a77546ea 100644 --- a/src/platform/pret_sdl/sdl2.c +++ b/src/platform/pret_sdl/sdl2.c @@ -98,8 +98,11 @@ bool paused = false; bool stepOneFrame = false; bool headless = false; +#if defined(__PSP__) || defined(__PS2__) +static SDL_Joystick *joystick = NULL; +#endif + #ifdef __PSP__ -static SDL_Joystick *pspJoystick = NULL; #define PSP_SCREEN_W 480 #define PSP_SCREEN_H 272 static SDL_Rect pspDestRect; @@ -133,12 +136,57 @@ void *Platform_malloc(size_t numBytes) { return HeapAlloc(GetProcessHeap(), HEAP void Platform_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); } #endif +#ifdef __PS2__ +// TODO: clean these for what is needed +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +void reset_IOP() +{ + SifInitRpc(0); + while (!SifIopReset(NULL, 0)) { } // Comment this line if you want to "debug" through ps2link + while (!SifIopSync()) { } +} + +static void prepare_IOP() +{ + reset_IOP(); + SifInitRpc(0); + sbv_patch_enable_lmb(); + sbv_patch_disable_prefix_check(); +} + +static void init_drivers() +{ + init_only_boot_ps2_filesystem_driver(); + init_memcard_driver(true); +} + +static void deinit_drivers() +{ + deinit_memcard_driver(true); + deinit_only_boot_ps2_filesystem_driver(); +} +#endif + int main(int argc, char **argv) { #ifdef __PSP__ setupPspCallbacks(); #endif +#ifdef __PS2__ + prepare_IOP(); +#endif + const char *headlessEnv = getenv("HEADLESS"); if (headlessEnv && strcmp(headlessEnv, "true") == 0) { @@ -161,7 +209,9 @@ int main(int argc, char **argv) freopen("CON", "w", stdout); #endif +#ifndef __PS2__ ReadSaveFile("sa2.sav"); +#endif // Prevent the multiplayer screen from being drawn ( see core.c:EngineInit() ) REG_RCNT = 0x8000; @@ -179,9 +229,9 @@ int main(int argc, char **argv) return 1; } -#ifdef __PSP__ +#if defined(__PSP__) || defined(__PS2__) if (SDL_NumJoysticks() > 0) { - pspJoystick = SDL_JoystickOpen(0); + joystick = SDL_JoystickOpen(0); } #endif @@ -193,6 +243,8 @@ int main(int argc, char **argv) #ifdef __PSP__ sdlWindow = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 480, 272, SDL_WINDOW_SHOWN); +#elif defined(__PS2__) + sdlWindow = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 640, 448, SDL_WINDOW_SHOWN); #else sdlWindow = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, DISPLAY_WIDTH * videoScale, DISPLAY_HEIGHT * videoScale, SDL_WINDOW_SHOWN | SDL_WINDOW_RESIZABLE); @@ -224,6 +276,8 @@ int main(int argc, char **argv) sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_ACCELERATED); if (sdlRenderer == NULL) sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, 0); +#elif defined(__PS2__) + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_ACCELERATED); #else sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_PRESENTVSYNC); #endif @@ -269,6 +323,12 @@ int main(int argc, char **argv) } #endif +#ifdef __PS2__ + SDL_SetTextureScaleMode(sdlTexture, SDL_ScaleModeLinear); + // For some reason we are WAY blown out on the PS2 + SDL_SetTextureColorMod(sdlTexture, 140, 140, 140); +#endif + #if ENABLE_AUDIO SDL_AudioSpec want; @@ -320,13 +380,15 @@ void VBlankIntrWait(void) bool frameAvailable = TRUE; bool frameDrawn = false; -#ifdef __PSP__ - static int psp_frames_skipped = 0; -#define PSP_MAX_FRAME_SKIP 2 +#if defined(__PSP__) || defined(__PS2__) + static int frames_skipped = 0; +#define MAX_FRAME_SKIP 2 #endif while (isRunning) { +#if !defined(__PS2__) && !defined(__PSP__) ProcessSDLEvents(); +#endif if (!paused || stepOneFrame) { double dt = fixedTimestep / timeScale; // TODO: Fix speedup @@ -354,17 +416,17 @@ void VBlankIntrWait(void) while (accumulator >= dt) { REG_KEYINPUT = KEYS_MASK ^ Platform_GetKeyInput(); if (frameAvailable) { -#ifdef __PSP__ +#if defined(__PSP__) || defined(__PS2__) // frame skip: let game logic catch up when behind - if (accumulator >= dt * 2.0 && psp_frames_skipped < PSP_MAX_FRAME_SKIP) { - psp_frames_skipped++; + if (accumulator >= dt * 2.0 && frames_skipped < MAX_FRAME_SKIP) { + frames_skipped++; frameAvailable = FALSE; HANDLE_VBLANK_INTRS(); accumulator -= dt; newFrameRequested = TRUE; return; } - psp_frames_skipped = 0; + frames_skipped = 0; #endif VDraw(sdlTexture); frameAvailable = FALSE; @@ -395,6 +457,10 @@ void VBlankIntrWait(void) SDL_Delay(1); } #else +#ifdef __PS2__ + // Allow audio to play + DelayThread(800); +#endif SDL_RenderClear(sdlRenderer); SDL_RenderCopy(sdlRenderer, sdlTexture, NULL, NULL); @@ -491,52 +557,68 @@ static void CloseSaveFile() static u16 keys; -#ifdef __PSP__ -#define PSP_BTN_TRIANGLE 0 -#define PSP_BTN_CIRCLE 1 -#define PSP_BTN_CROSS 2 -#define PSP_BTN_SQUARE 3 -#define PSP_BTN_LTRIGGER 4 -#define PSP_BTN_RTRIGGER 5 -#define PSP_BTN_DOWN 6 -#define PSP_BTN_LEFT 7 -#define PSP_BTN_UP 8 -#define PSP_BTN_RIGHT 9 -#define PSP_BTN_SELECT 10 -#define PSP_BTN_START 11 - -static u16 PollPSPButtons(void) +#if defined(__PSP__) || defined(__PS2__) + +#ifdef __PS2__ +#define BTN_TRIANGLE 12 +#define BTN_CIRCLE 13 +#define BTN_CROSS 14 +#define BTN_SQUARE 15 +#define BTN_LTRIGGER 10 +#define BTN_RTRIGGER 11 +#define BTN_DOWN 6 +#define BTN_LEFT 7 +#define BTN_UP 4 +#define BTN_RIGHT 5 +#define BTN_SELECT 0 +#define BTN_START 3 +#else +#define BTN_TRIANGLE 0 +#define BTN_CIRCLE 1 +#define BTN_CROSS 2 +#define BTN_SQUARE 3 +#define BTN_LTRIGGER 4 +#define BTN_RTRIGGER 5 +#define BTN_DOWN 6 +#define BTN_LEFT 7 +#define BTN_UP 8 +#define BTN_RIGHT 9 +#define BTN_SELECT 10 +#define BTN_START 11 +#endif + +static u16 PollJoystickButtons(void) { - u16 pspKeys = 0; - if (pspJoystick == NULL) - return pspKeys; + u16 keys = 0; + if (joystick == NULL) + return keys; SDL_JoystickUpdate(); - if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_CROSS)) - pspKeys |= A_BUTTON; - if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_CIRCLE)) - pspKeys |= B_BUTTON; - if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_SQUARE)) - pspKeys |= B_BUTTON; // Square also B - if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_START)) - pspKeys |= START_BUTTON; - if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_SELECT)) - pspKeys |= SELECT_BUTTON; - if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_LTRIGGER)) - pspKeys |= L_BUTTON; - if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_RTRIGGER)) - pspKeys |= R_BUTTON; - if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_UP)) - pspKeys |= DPAD_UP; - if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_DOWN)) - pspKeys |= DPAD_DOWN; - if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_LEFT)) - pspKeys |= DPAD_LEFT; - if (SDL_JoystickGetButton(pspJoystick, PSP_BTN_RIGHT)) - pspKeys |= DPAD_RIGHT; - - return pspKeys; + if (SDL_JoystickGetButton(joystick, BTN_CROSS)) + keys |= A_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_CIRCLE)) + keys |= B_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_SQUARE)) + keys |= B_BUTTON; // Square also B + if (SDL_JoystickGetButton(joystick, BTN_START)) + keys |= START_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_SELECT)) + keys |= SELECT_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_LTRIGGER)) + keys |= L_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_RTRIGGER)) + keys |= R_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_UP)) + keys |= DPAD_UP; + if (SDL_JoystickGetButton(joystick, BTN_DOWN)) + keys |= DPAD_DOWN; + if (SDL_JoystickGetButton(joystick, BTN_LEFT)) + keys |= DPAD_LEFT; + if (SDL_JoystickGetButton(joystick, BTN_RIGHT)) + keys |= DPAD_RIGHT; + + return keys; } #endif @@ -680,8 +762,8 @@ u16 Platform_GetKeyInput(void) return (gamepadKeys != 0) ? gamepadKeys : keys; #endif -#ifdef __PSP__ - return keys | PollPSPButtons(); +#if defined(__PSP__) || defined(__PS2__) + return keys | PollJoystickButtons(); #endif return keys; diff --git a/src/platform/shared/audio/m4a_sound_mixer.c b/src/platform/shared/audio/m4a_sound_mixer.c index f7a8927b3..67e276fb1 100644 --- a/src/platform/shared/audio/m4a_sound_mixer.c +++ b/src/platform/shared/audio/m4a_sound_mixer.c @@ -368,17 +368,10 @@ void MP2K_event_fine(struct MP2KPlayerState *unused, struct MP2KTrack *track) track->status = 0; } -// mPtr aligns to 4 bytes on MIPS; match that here before reading pointer data -#ifdef __mips__ -static inline u8 *alignCmdPtr4(u8 *p) { return (u8 *)(((uintptr_t)p + 3) & ~(uintptr_t)3); } -#else -#define alignCmdPtr4(p) (p) -#endif - // Sets the track's cmdPtr to the specified address. void MP2K_event_goto(struct MP2KPlayerState *unused, struct MP2KTrack *track) { - u8 *cmdPtr = alignCmdPtr4(track->cmdPtr); + u8 *cmdPtr = track->cmdPtr; uintptr_t addr = 0; for (size_t i = sizeof(uintptr_t) - 1; i > 0; i--) { addr |= cmdPtr[i]; @@ -393,9 +386,7 @@ void MP2K_event_patt(struct MP2KPlayerState *unused, struct MP2KTrack *track) { u8 level = track->patternLevel; if (level < 3) { - // Return address is past the aligned pointer data - u8 *ptrStart = alignCmdPtr4(track->cmdPtr); - track->patternStack[level] = ptrStart + sizeof(u8 *); + track->patternStack[level] = track->cmdPtr + sizeof(u8 *); track->patternLevel++; MP2K_event_goto(unused, track); } else { @@ -428,9 +419,7 @@ void MP2K_event_rept(struct MP2KPlayerState *unused, struct MP2KTrack *track) MP2K_event_goto(unused, track); } else { track->repeatCount = 0; - // Skip past the aligned pointer data - u8 *ptrStart = alignCmdPtr4(track->cmdPtr); - track->cmdPtr = ptrStart + sizeof(u8 *); + track->cmdPtr = track->cmdPtr + sizeof(u8 *); } } } From 12e9621e6ef64162d55aea1987e6af7b8ac47ac5 Mon Sep 17 00:00:00 2001 From: Oliver Bell Date: Mon, 16 Feb 2026 19:33:01 +0000 Subject: [PATCH 06/13] convert to sdl_ps2 --- Makefile | 30 +++++++++++++++++------------- src/lib/m4a/m4a.c | 3 +-- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index c7979cd8f..3af4ea831 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ else ifeq ($(PLATFORM),psp) PSPSDK := $(PSPDEV)/psp/sdk export PATH := $(PSPDEV)/bin:$(PATH) PREFIX := psp- -else ifeq ($(PLATFORM),ps2) +else ifeq ($(PLATFORM),sdl_ps2) PREFIX := mips64r5900el-ps2-elf- else # Native @@ -132,7 +132,7 @@ else ifeq ($(PLATFORM),psp) ROM := EBOOT.PBP ELF := $(BUILD_NAME).psp.elf MAP := $(BUILD_NAME).psp.map -else ifeq ($(PLATFORM),ps2) +else ifeq ($(PLATFORM),sdl_ps2) ROM := $(BUILD_NAME).$(PLATFORM).iso ELF := $(ROM:.iso=.elf) MAP := $(ROM:.iso=.map) @@ -175,7 +175,7 @@ else ifeq ($(PLATFORM),sdl) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") else ifeq ($(PLATFORM),psp) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*") -else ifeq ($(PLATFORM),ps2) +else ifeq ($(PLATFORM),sdl_ps2) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") else ifeq ($(PLATFORM),sdl_win32) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") @@ -248,7 +248,7 @@ else else ifeq ($(PLATFORM),psp) CC1FLAGS += -G0 CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 -D SDL_MAIN_HANDLED -I$(PSPDEV)/psp/include/SDL2 -I$(PSPDEV)/psp/include -I$(PSPSDK)/include -D_PSP_FW_VERSION=600 - else ifeq ($(PLATFORM),ps2) + else ifeq ($(PLATFORM),sdl_ps2) CC1FLAGS += -G0 -Wno-parentheses-equality -Wno-unused-value -ffast-math CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 -D SDL_MAIN_HANDLED -D_EE -D__PS2__ -I$(PS2SDK)/common/include -I$(PS2SDK)/ee/include -I$(PS2SDK)/ports/include $(shell $(PS2SDK)/ports/bin/sdl2-config --cflags) else ifeq ($(PLATFORM),sdl_win32) @@ -277,7 +277,7 @@ else CPP := $(CC1) -E else ifeq ($(PLATFORM), psp) CPP := $(CC1) -E - else ifeq ($(PLATFORM), ps2) + else ifeq ($(PLATFORM), sdl_ps2) ASFLAGS += -msingle-float endif # Allow file input through stdin on modern GCC and set it to "compile only" @@ -291,7 +291,7 @@ else ifeq ($(PLATFORM),psp) # -O3 for PSP (Allegrex MIPS, small D-cache) CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer - else ifeq ($(PLATFORM),ps2) + else ifeq ($(PLATFORM),sdl_ps2) CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer else CC1FLAGS += -O2 @@ -337,7 +337,7 @@ else ifeq ($(PLATFORM),sdl) # PSP else ifeq ($(PLATFORM),psp) MAP_FLAG := -Xlinker -Map= -else ifeq ($(PLATFORM),ps2) +else ifeq ($(PLATFORM),sdl_ps2) MAP_FLAG := -Xlinker -Map= # Win32 else @@ -351,7 +351,7 @@ else ifeq ($(PLATFORM),sdl) LIBS := $(shell sdl2-config --cflags --libs) else ifeq ($(PLATFORM),psp) LIBS := -L$(PSPDEV)/psp/lib -L$(PSPSDK)/lib -lSDL2 -lm -lGL -lpspvram -lpspaudio -lpspvfpu -lpspdisplay -lpspgu -lpspge -lpsphprm -lpspctrl -lpsppower -lpspdebug -lpspnet -lpspnet_apctl -Wl,-zmax-page-size=128 -else ifeq ($(PLATFORM),ps2) +else ifeq ($(PLATFORM),sdl_ps2) LIBS := -lSDL2 $(shell $(PS2SDK)/ports/bin/sdl2-config --libs) -T$(PS2SDK)/ee/startup/linkfile -L$(PS2SDK)/common/lib -L$(PS2SDK)/ee/lib -L$(PS2DEV)/gsKit/lib -Wl,-zmax-page-size=128 else ifeq ($(PLATFORM),sdl_win32) LIBS := -mwin32 -lkernel32 -lwinmm -lmingw32 -lxinput $(SDL_MINGW_LIBS) @@ -362,7 +362,7 @@ endif #### MAIN TARGETS #### # these commands will run regardless of deps being completed -.PHONY: clean tools tidy clean-tools $(TOOLDIRS) libagbsyscall ps2 +.PHONY: clean tools tidy clean-tools $(TOOLDIRS) libagbsyscall # Ensure required directories exist $(shell mkdir -p $(C_BUILDDIR) $(ASM_BUILDDIR) $(DATA_ASM_BUILDDIR) $(SOUND_ASM_BUILDDIR) $(SONG_BUILDDIR) $(MID_BUILDDIR)) @@ -458,7 +458,7 @@ sdl: ; @$(MAKE) PLATFORM=sdl psp: ; @$(MAKE) PLATFORM=psp -ps2: ; @$(MAKE) PLATFORM=ps2 +sdl_ps2: ; @$(MAKE) PLATFORM=sdl_ps2 tas_sdl: ; @$(MAKE) sdl TAS_TESTING=1 @@ -521,11 +521,14 @@ else @cd $(OBJ_DIR) && $(CC1) $(MAP_FLAG)$(ROOT_DIR)/$(MAP) $(OBJS_REL) $(LIBS) -o $(ROOT_DIR)/$@ endif -$(ROM): $(ELF) + ifeq ($(PLATFORM),gba) +$(ROM): $(ELF) libagbsyscall $(OBJCOPY) -O binary --pad-to 0x8400000 $< $@ $(FIX) $@ -p -t"$(TITLE)" -c$(GAME_CODE) -m$(MAKER_CODE) -r$(GAME_REVISION) --silent -else ifeq ($(PLATFORM),sdl) +else +$(ROM): $(ELF) +ifeq ($(PLATFORM),sdl) cp $< $@ else ifeq ($(PLATFORM),psp) psp-fixup-imports $< @@ -533,7 +536,7 @@ else ifeq ($(PLATFORM),psp) psp-strip $< -o $(BUILD_NAME).psp_strip.elf pack-pbp $@ PARAM.SFO NULL NULL NULL NULL NULL $(BUILD_NAME).psp_strip.elf NULL -rm -f $(BUILD_NAME).psp_strip.elf -else ifeq ($(PLATFORM),ps2) +else ifeq ($(PLATFORM),sdl_ps2) @echo Creating $(ROM) from $(ELF) @cp -r ps2/ntsc $(OBJ_DIR)/iso @cp $< $(OBJ_DIR)/iso/$(PS2_GAME_CODE) @@ -541,6 +544,7 @@ else ifeq ($(PLATFORM),ps2) else $(OBJCOPY) -O pei-x86-64 $< $@ endif +endif # Build c sources, and ensure alignment $(C_BUILDDIR)/%.o: $(C_SUBDIR)/%.c diff --git a/src/lib/m4a/m4a.c b/src/lib/m4a/m4a.c index 7b90fe3a9..db161254b 100644 --- a/src/lib/m4a/m4a.c +++ b/src/lib/m4a/m4a.c @@ -1377,10 +1377,9 @@ cond_true : { return; } -cond_false : { +cond_false: track->cmdPtr += 4; } -} void MP2K_event_xcmd(struct MP2KPlayerState *mplayInfo, struct MP2KTrack *track) { From 31419cafde30bc46589855880cdc0583eccb8db6 Mon Sep 17 00:00:00 2001 From: Oliver Bell Date: Tue, 17 Feb 2026 01:49:25 +0000 Subject: [PATCH 07/13] add ps2 non sdl --- Makefile | 45 +- include/lib/m4a/m4a_internal.h | 2 + include/platform/platform.h | 2 +- src/game/special_stage/world.c | 4 +- src/platform/pret_sdl/sdl2.c | 2 +- src/platform/ps2/ps2.c | 949 ++++++++++++++++++++ src/platform/shared/audio/m4a_sound_mixer.c | 13 +- src/platform/win32/win32.c | 2 +- src/sprite.c | 4 +- 9 files changed, 999 insertions(+), 24 deletions(-) create mode 100644 src/platform/ps2/ps2.c diff --git a/Makefile b/Makefile index 3af4ea831..e17045f1f 100644 --- a/Makefile +++ b/Makefile @@ -60,6 +60,8 @@ else ifeq ($(PLATFORM),psp) PREFIX := psp- else ifeq ($(PLATFORM),sdl_ps2) PREFIX := mips64r5900el-ps2-elf- +else ifeq ($(PLATFORM),ps2) + PREFIX := mips64r5900el-ps2-elf- else # Native ifneq ($(PLATFORM),sdl) @@ -136,6 +138,10 @@ else ifeq ($(PLATFORM),sdl_ps2) ROM := $(BUILD_NAME).$(PLATFORM).iso ELF := $(ROM:.iso=.elf) MAP := $(ROM:.iso=.map) +else ifeq ($(PLATFORM),ps2) +ROM := $(BUILD_NAME).$(PLATFORM).iso +ELF := $(ROM:.iso=.elf) +MAP := $(ROM:.iso=.map) else ROM := $(BUILD_NAME).$(PLATFORM).exe ELF := $(ROM:.exe=.elf) @@ -172,15 +178,17 @@ TILESETS_SUBDIR = graphics/tilesets/ ifeq ($(PLATFORM),gba) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/*") else ifeq ($(PLATFORM),sdl) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*" -not -path "*/platform/ps2/*") else ifeq ($(PLATFORM),psp) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/ps2/*") else ifeq ($(PLATFORM),sdl_ps2) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*" -not -path "*/platform/ps2/*") +else ifeq ($(PLATFORM),ps2) +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*" -not -path "*/platform/pret_sdl/*") else ifeq ($(PLATFORM),sdl_win32) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*" -not -path "*/platform/ps2/*") else ifeq ($(PLATFORM),win32) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/pret_sdl/*" -not -path "*/platform/psp/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/pret_sdl/*" -not -path "*/platform/psp/*" -not -path "*/platform/ps2/*") else C_SRCS := $(shell find $(C_SUBDIR) -name "*.c") endif @@ -251,6 +259,9 @@ else else ifeq ($(PLATFORM),sdl_ps2) CC1FLAGS += -G0 -Wno-parentheses-equality -Wno-unused-value -ffast-math CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 -D SDL_MAIN_HANDLED -D_EE -D__PS2__ -I$(PS2SDK)/common/include -I$(PS2SDK)/ee/include -I$(PS2SDK)/ports/include $(shell $(PS2SDK)/ports/bin/sdl2-config --cflags) + else ifeq ($(PLATFORM),ps2) + CC1FLAGS += -G0 -Wno-parentheses-equality -Wno-unused-value -ffast-math + CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=0 -D PLATFORM_WIN32=0 -D_EE -D__PS2__ -I$(PS2SDK)/common/include -I$(PS2SDK)/ee/include -I$(PS2DEV)/gsKit/include -I$(PS2SDK)/ports/include else ifeq ($(PLATFORM),sdl_win32) CPPFLAGS += -D TITLE_BAR=$(BUILD_NAME).$(PLATFORM) -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 $(SDL_MINGW_FLAGS) else ifeq ($(PLATFORM),win32) @@ -279,6 +290,8 @@ else CPP := $(CC1) -E else ifeq ($(PLATFORM), sdl_ps2) ASFLAGS += -msingle-float + else ifeq ($(PLATFORM), ps2) + ASFLAGS += -msingle-float endif # Allow file input through stdin on modern GCC and set it to "compile only" CC1FLAGS += -x c -S @@ -293,6 +306,8 @@ else CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer else ifeq ($(PLATFORM),sdl_ps2) CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer + else ifeq ($(PLATFORM),ps2) + CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer else CC1FLAGS += -O2 endif @@ -339,6 +354,8 @@ else ifeq ($(PLATFORM),psp) MAP_FLAG := -Xlinker -Map= else ifeq ($(PLATFORM),sdl_ps2) MAP_FLAG := -Xlinker -Map= +else ifeq ($(PLATFORM),ps2) + MAP_FLAG := -Xlinker -Map= # Win32 else MAP_FLAG := -Xlinker -Map= @@ -353,6 +370,8 @@ else ifeq ($(PLATFORM),psp) LIBS := -L$(PSPDEV)/psp/lib -L$(PSPSDK)/lib -lSDL2 -lm -lGL -lpspvram -lpspaudio -lpspvfpu -lpspdisplay -lpspgu -lpspge -lpsphprm -lpspctrl -lpsppower -lpspdebug -lpspnet -lpspnet_apctl -Wl,-zmax-page-size=128 else ifeq ($(PLATFORM),sdl_ps2) LIBS := -lSDL2 $(shell $(PS2SDK)/ports/bin/sdl2-config --libs) -T$(PS2SDK)/ee/startup/linkfile -L$(PS2SDK)/common/lib -L$(PS2SDK)/ee/lib -L$(PS2DEV)/gsKit/lib -Wl,-zmax-page-size=128 +else ifeq ($(PLATFORM),ps2) + LIBS := -T$(PS2SDK)/ee/startup/linkfile -L$(PS2SDK)/common/lib -L$(PS2SDK)/ee/lib -L$(PS2DEV)/gsKit/lib -L$(PS2SDK)/ports/lib -lgskit -ldmakit -lps2_drivers -lmc -lpatches -Wl,-zmax-page-size=128 else ifeq ($(PLATFORM),sdl_win32) LIBS := -mwin32 -lkernel32 -lwinmm -lmingw32 -lxinput $(SDL_MINGW_LIBS) else ifeq ($(PLATFORM), win32) @@ -362,7 +381,7 @@ endif #### MAIN TARGETS #### # these commands will run regardless of deps being completed -.PHONY: clean tools tidy clean-tools $(TOOLDIRS) libagbsyscall +.PHONY: clean tools tidy clean-tools $(TOOLDIRS) libagbsyscall ps2 # Ensure required directories exist $(shell mkdir -p $(C_BUILDDIR) $(ASM_BUILDDIR) $(DATA_ASM_BUILDDIR) $(SOUND_ASM_BUILDDIR) $(SONG_BUILDDIR) $(MID_BUILDDIR)) @@ -460,6 +479,8 @@ psp: ; @$(MAKE) PLATFORM=psp sdl_ps2: ; @$(MAKE) PLATFORM=sdl_ps2 +ps2: ; @$(MAKE) PLATFORM=ps2 + tas_sdl: ; @$(MAKE) sdl TAS_TESTING=1 sdl_win32: @@ -526,8 +547,11 @@ ifeq ($(PLATFORM),gba) $(ROM): $(ELF) libagbsyscall $(OBJCOPY) -O binary --pad-to 0x8400000 $< $@ $(FIX) $@ -p -t"$(TITLE)" -c$(GAME_CODE) -m$(MAKER_CODE) -r$(GAME_REVISION) --silent +else ifeq ($(PLATFORM),win32) +$(ROM): $(ELF) libagbsyscall + $(OBJCOPY) -O pei-x86-64 $< $@ else -$(ROM): $(ELF) +$(ROM): $(ELF) ifeq ($(PLATFORM),sdl) cp $< $@ else ifeq ($(PLATFORM),psp) @@ -541,8 +565,11 @@ else ifeq ($(PLATFORM),sdl_ps2) @cp -r ps2/ntsc $(OBJ_DIR)/iso @cp $< $(OBJ_DIR)/iso/$(PS2_GAME_CODE) @mkisofs -o $(ROM) $(OBJ_DIR)/iso/ -else - $(OBJCOPY) -O pei-x86-64 $< $@ +else ifeq ($(PLATFORM),ps2) + @echo Creating $(ROM) from $(ELF) + @cp -r ps2/ntsc $(OBJ_DIR)/iso + @cp $< $(OBJ_DIR)/iso/$(PS2_GAME_CODE) + @mkisofs -o $(ROM) $(OBJ_DIR)/iso/ endif endif diff --git a/include/lib/m4a/m4a_internal.h b/include/lib/m4a/m4a_internal.h index 7755591c6..0ae01ef88 100644 --- a/include/lib/m4a/m4a_internal.h +++ b/include/lib/m4a/m4a_internal.h @@ -243,6 +243,8 @@ struct SoundMixerState { #if PLATFORM_GBA s8 pcmBuffer[PCM_DMA_BUF_SIZE * 2]; #else + // TODO: let's not make this float, they are slow + // on older systems float pcmBuffer[PCM_DMA_BUF_SIZE * 2]; #endif }; diff --git a/include/platform/platform.h b/include/platform/platform.h index 0a44b8f1f..504c23586 100644 --- a/include/platform/platform.h +++ b/include/platform/platform.h @@ -21,6 +21,6 @@ extern void Platform_RLFree(unsigned char *dest); extern void Platform_LZDecompressUnsafe(unsigned char *src, unsigned char *dest); extern void Platform_RLDecompressUnsafe(unsigned char *src, unsigned char *dest); -extern void Platform_QueueAudio(const void *data, u32 numBytes); +extern void Platform_QueueAudio(const float *data, u32 numBytes); #endif // GUARD_SA2_PLATFORM_H diff --git a/src/game/special_stage/world.c b/src/game/special_stage/world.c index eb293bf74..a41ac6d5c 100644 --- a/src/game/special_stage/world.c +++ b/src/game/special_stage/world.c @@ -239,14 +239,14 @@ void sub_806EA04(void) *unk1884++ = (Q_16_16_TO_INT(temp) * cos) >> 0x10; // BG2PA // HACK: in SDL we don't handle these PB and PD values properly -#if PLATFORM_SDL +#if !PLATFORM_GBA && (RENDERER == RENDERER_SOFTWARE_FAST || RENDERER == RENDERER_SOFTWARE) *unk1884++ = 0; #else *unk1884++ = (Q_16_16_TO_INT(temp) * sin) >> 0x10; // BG2PB #endif *unk1884++ = (Q_16_16_TO_INT(temp) * -sin) >> 0x10; // BG2PC -#if PLATFORM_SDL +#if !PLATFORM_GBA && (RENDERER == RENDERER_SOFTWARE_FAST || RENDERER == RENDERER_SOFTWARE) *unk1884++ = 0; #else *unk1884++ = (Q_16_16_TO_INT(temp) * cos) >> 0x10; // BG2PD diff --git a/src/platform/pret_sdl/sdl2.c b/src/platform/pret_sdl/sdl2.c index 1a77546ea..4e4086ca9 100644 --- a/src/platform/pret_sdl/sdl2.c +++ b/src/platform/pret_sdl/sdl2.c @@ -625,7 +625,7 @@ static u16 PollJoystickButtons(void) u32 fullScreenFlags = 0; static SDL_DisplayMode sdlDispMode = { 0 }; -void Platform_QueueAudio(const void *data, uint32_t bytesCount) +void Platform_QueueAudio(const float *data, uint32_t bytesCount) { if (headless) { return; diff --git a/src/platform/ps2/ps2.c b/src/platform/ps2/ps2.c new file mode 100644 index 000000000..b4c09442e --- /dev/null +++ b/src/platform/ps2/ps2.c @@ -0,0 +1,949 @@ +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "audsrv.h" + +#include "global.h" +#include "core.h" +#include "multi_sio.h" +#include "gba/defines.h" +#include "gba/io_reg.h" +#include "gba/types.h" +#include "lib/agb_flash/flash_internal.h" +#include "platform/shared/dma.h" + +static GSGLOBAL *gsGlobal; +static GSTEXTURE screen; + +#include "platform/shared/audio/cgb_audio.h" + +#ifndef TILE_WIDTH +#define TILE_WIDTH 8 +#endif + +extern IntrFunc gIntrTable[16]; + +ALIGNED(256) uint16_t gameImage[DISPLAY_WIDTH * DISPLAY_HEIGHT]; + +struct VidMode { + const char *name; + s16 mode; + s16 interlace; + s16 field; + int max_width; + int max_height; + int width; + int height; + int vck; + int iPassCount; + int x_off; + int y_off; +}; + +static const struct VidMode vid_modes[] = { + { "240p", GS_MODE_NTSC, GS_NONINTERLACED, GS_FRAME, 652, 224, 320, 224, 2, 1, 0, 0 }, +#if !defined(VERSION_EU) + // NTSC + { "480i", GS_MODE_NTSC, GS_INTERLACED, GS_FIELD, 704, 480, 704, 452, 4, 1, 0, 0 }, + { "480p", GS_MODE_DTV_480P, GS_NONINTERLACED, GS_FRAME, 704, 480, 704, 452, 2, 1, 0, 0 }, +#else + // PAL + { "576i", GS_MODE_PAL, GS_INTERLACED, GS_FIELD, 704, 576, 704, 536, 4, 1, 0, 0 }, + { "576p", GS_MODE_DTV_576P, GS_NONINTERLACED, GS_FRAME, 704, 576, 704, 536, 2, 1, 0, 0 }, +#endif + // HDTV + { "720p", GS_MODE_DTV_720P, GS_NONINTERLACED, GS_FRAME, 1280, 720, 1280, 720, 1, 2, 0, 0 }, + { "1080i", GS_MODE_DTV_1080I, GS_INTERLACED, GS_FRAME, 1920, 1080, 1920, 1080, 1, 2, 0, 0 }, +}; + +static int vsync_sema_1st_id; +static int vsync_sema_2nd_id; +static int vsync_sema_id = -1; +static int vsync_id = -1; + +static const struct VidMode *vid_mode; +static bool use_hires = false; + +bool speedUp = false; +bool isRunning = true; +bool paused = false; +bool stepOneFrame = false; +bool headless = false; + +double lastGameTime = 0; +double curGameTime = 0; +double fixedTimestep = 1.0 / 60.0; // 16.666667ms +double timeScale = 1.0; +double accumulator = 0.0; + +static FILE *sSaveFile = NULL; + +extern void AgbMain(void); +void DoSoftReset(void) {}; + +void VDraw(void); +void UpdateTexture(void); + +static void ReadSaveFile(char *path); +static void StoreSaveFile(void); +static void CloseSaveFile(void); + +u16 Platform_GetKeyInput(void); + +#define SAMPLES_HIGH 544 +#define SAMPLES_LOW 528 + +static bool audio_ps2_init(void) +{ + if (init_audio_driver() != 0) + return false; + audsrv_set_volume(MAX_VOLUME); + + audsrv_fmt_t fmt; + + fmt.freq = 48000; + fmt.bits = 16; + fmt.channels = 2; + + if (audsrv_set_format(&fmt)) { + printf("audio_ps2: unsupported sound format\n"); + audsrv_quit(); + return false; + } + + return true; +} + +static int audio_ps2_buffered(void) { return audsrv_queued() / 4; } + +static void audio_ps2_play(const uint8_t *buf, size_t len) +{ + if (audio_ps2_buffered() < 6000) { + audsrv_play_audio(buf, len); + } +} + +void reset_IOP() +{ + SifInitRpc(0); + while (!SifIopReset(NULL, 0)) { } // Comment this line if you want to "debug" through ps2link + while (!SifIopSync()) { } +} + +static void prepare_IOP() +{ + reset_IOP(); + SifInitRpc(0); + sbv_patch_enable_lmb(); + sbv_patch_disable_prefix_check(); +} + +static void init_drivers() +{ + init_only_boot_ps2_filesystem_driver(); + init_memcard_driver(true); +} + +static void deinit_drivers() +{ + deinit_memcard_driver(true); + deinit_only_boot_ps2_filesystem_driver(); +} + +void platform_video_init(void) +{ + if (vid_mode == NULL) { + vid_mode = &vid_modes[2]; // Standard def 480p + } else { + if (use_hires) { + gsKit_hires_deinit_global(gsGlobal); + } else { + gsKit_deinit_global(gsGlobal); + if (vsync_id != -1) { + gsKit_remove_vsync_handler(vsync_id); + } + vsync_sema_id = -1; + } + } + use_hires = (vid_mode->mode == GS_MODE_DTV_720P || vid_mode->mode == GS_MODE_DTV_1080I); + + if (use_hires) { + gsGlobal = gsKit_hires_init_global(); + } else { + gsGlobal = gsKit_init_global(); + } + + dmaKit_init(D_CTRL_RELE_OFF, D_CTRL_MFD_OFF, D_CTRL_STS_UNSPEC, D_CTRL_STD_OFF, D_CTRL_RCYC_8, 1 << DMA_CHANNEL_GIF); + + dmaKit_chan_init(DMA_CHANNEL_GIF); + + gsGlobal->Mode = vid_mode->mode; + gsGlobal->Width = vid_mode->width; + gsGlobal->Height = vid_mode->height; + if (gsGlobal->Mode == GS_MODE_DTV_1080I) { + gsGlobal->Height /= 2; + } + + gsGlobal->Interlace = vid_mode->interlace; + gsGlobal->Field = vid_mode->field; + gsGlobal->ZBuffering = GS_SETTING_ON; + gsGlobal->DoubleBuffering = GS_SETTING_ON; + gsGlobal->PrimAAEnable = GS_SETTING_OFF; + gsGlobal->Dithering = GS_SETTING_OFF; + gsGlobal->PSM = GS_PSM_CT16; + gsGlobal->PSMZ = GS_PSMZ_16; + + if (use_hires) { + gsKit_hires_init_screen(gsGlobal, vid_mode->iPassCount); + } else { + gsKit_init_screen(gsGlobal); + } + // hires sets the texture pointer to the wrong location. Ensure it's correct. + gsGlobal->TexturePointer = gsGlobal->CurrentPointer; + gsKit_TexManager_init(gsGlobal); + + screen.Width = DISPLAY_WIDTH; + screen.Height = DISPLAY_HEIGHT; + screen.PSM = GS_PSM_CT16; + screen.Mem = (void *)gameImage; +} + +int main(int argc, char **argv) +{ + prepare_IOP(); + + // ReadSaveFile("sa2.sav"); + + // Prevent the multiplayer screen from being drawn ( see core.c:EngineInit() ) + REG_RCNT = 0x8000; + REG_KEYINPUT = 0x3FF; + + audio_ps2_init(); + platform_video_init(); + // controller init + + cgb_audio_init(48000); + + VDraw(); + AgbMain(); + + return 0; +} + +bool newFrameRequested = FALSE; +int skipFrame = 0; + +// called every gba frame. we process sdl events and render as many times +// as vsync needs, then return when a new game frame is needed. +void VBlankIntrWait(void) +{ +#define HANDLE_VBLANK_INTRS() \ + ({ \ + REG_DISPSTAT |= INTR_FLAG_VBLANK; \ + RunDMAs(DMA_VBLANK); \ + if (REG_DISPSTAT & DISPSTAT_VBLANK_INTR) \ + gIntrTable[INTR_INDEX_VBLANK](); \ + REG_DISPSTAT &= ~INTR_FLAG_VBLANK; \ + }) + + bool frameAvailable = TRUE; + bool frameDrawn = false; + static int frames_skipped = 0; + if (isRunning) { + REG_KEYINPUT = KEYS_MASK ^ Platform_GetKeyInput(); + +// Only render 30fps when in widescreen as the draw func is too slow for the ps2 +#if DISPLAY_WIDTH > 240 + skipFrame++; + skipFrame %= 2; +#endif + if (skipFrame == 0) { + VDraw(); + } else { + UpdateTexture(); + } + HANDLE_VBLANK_INTRS(); + if (skipFrame != 0) { + return; + } + + if (use_hires) { + gsKit_hires_flip_ext(gsGlobal, GSFLIP_RATE_LIMIT_1); + } else { + gsKit_sync_flip(gsGlobal); + gsKit_queue_exec(gsGlobal); + } + gsKit_TexManager_nextFrame(gsGlobal); + return; + } + // #define MAX_FRAME_SKIP 2 + + // while (isRunning) { + // if (!paused || stepOneFrame) { + // double dt = fixedTimestep / timeScale; // TODO: Fix speedup + + // // don't accumulate time if we already requested a new frame + // // this frame cycle (emulates threaded sdl behavior) + // if (!newFrameRequested) { + // double deltaTime = 0; + + // // TODO: fix + // curGameTime += dt; + // if (stepOneFrame) { + // deltaTime = dt; + // } else { + // // TODO: divide by expected frequency + // deltaTime = (double)((curGameTime - lastGameTime) / 1); + // if (deltaTime > (dt * 5)) + // deltaTime = dt * 5; + // } + // lastGameTime = curGameTime; + + // accumulator += deltaTime; + // } else { + // newFrameRequested = FALSE; + // } + + // while (accumulator >= dt) { + // REG_KEYINPUT = KEYS_MASK ^ Platform_GetKeyInput(); + // if (frameAvailable) { + // // frame skip: let game logic catch up when behind + // if (accumulator >= dt * 2.0 && frames_skipped < MAX_FRAME_SKIP) { + // frames_skipped++; + // frameAvailable = FALSE; + // HANDLE_VBLANK_INTRS(); + // accumulator -= dt; + // newFrameRequested = TRUE; + // return; + // } + // frames_skipped = 0; + // VDraw(); + // frameAvailable = FALSE; + // frameDrawn = true; + + // HANDLE_VBLANK_INTRS(); + + // accumulator -= dt; + // } else { + // newFrameRequested = TRUE; + // return; + // } + // } + + // if (paused && stepOneFrame) { + // stepOneFrame = false; + // } + // } + + // if (use_hires) { + // gsKit_hires_flip_ext(gsGlobal, GSFLIP_RATE_LIMIT_1); + // } else { + // // gsKit_flip(gs_global); + // gsKit_sync_flip(gsGlobal); + // gsKit_queue_exec(gsGlobal); + // } + // gsKit_TexManager_nextFrame(gsGlobal); + // } + + CloseSaveFile(); + + deinit_drivers(); + exit(0); +#undef HANDLE_VBLANK_INTRS +} + +static void ReadSaveFile(char *path) +{ + // Check whether the saveFile exists, and create it if not + sSaveFile = fopen(path, "r+b"); + if (sSaveFile == NULL) { + sSaveFile = fopen(path, "w+b"); + } + + fseek(sSaveFile, 0, SEEK_END); + int fileSize = ftell(sSaveFile); + fseek(sSaveFile, 0, SEEK_SET); + + // Only read as many bytes as fit inside the buffer + // or as many bytes as are in the file + int bytesToRead = (fileSize < sizeof(FLASH_BASE)) ? fileSize : sizeof(FLASH_BASE); + + int bytesRead = fread(FLASH_BASE, 1, bytesToRead, sSaveFile); + + // Fill the buffer if the savefile was just created or smaller than the buffer itself + for (int i = bytesRead; i < sizeof(FLASH_BASE); i++) { + FLASH_BASE[i] = 0xFF; + } +} + +static void StoreSaveFile() +{ + if (sSaveFile != NULL) { + fseek(sSaveFile, 0, SEEK_SET); + fwrite(FLASH_BASE, 1, sizeof(FLASH_BASE), sSaveFile); + } +} + +void Platform_StoreSaveFile(void) { StoreSaveFile(); } + +static void CloseSaveFile() +{ + if (sSaveFile != NULL) { + fclose(sSaveFile); + } +} + +s16 converted_audio[4096]; + +void float_audio_to_s16(const float *input, int16_t *output, size_t length) +{ + if (!input || !output) + return; + + for (size_t i = 0; i < length; i++) { + float sample = input[i]; + + if (sample > 1.0f) + sample = 1.0f; + else if (sample < -1.0f) + sample = -1.0f; + + output[i] = (int16_t)(sample * 32767.0f + (sample >= 0 ? 0.5f : -0.5f)); + } +} + +void Platform_QueueAudio(const float *data, uint32_t bytesCount) +{ + float_audio_to_s16(data, converted_audio, bytesCount / sizeof(float)); + audio_ps2_play((void *)converted_audio, bytesCount / sizeof(float) * sizeof(u16)); +} + +// TODO: handle input +u16 Platform_GetKeyInput(void) { return 0; } + +// BIOS function implementations are based on the VBA-M source code. + +// safe unaligned access for MIPS +static uint32_t CPUReadMemory(const void *src) +{ + uint32_t val; + memcpy(&val, src, sizeof(val)); + return val; +} + +static void CPUWriteMemory(void *dest, uint32_t val) { memcpy(dest, &val, sizeof(val)); } + +static uint16_t CPUReadHalfWord(const void *src) +{ + uint16_t val; + memcpy(&val, src, sizeof(val)); + return val; +} + +static void CPUWriteHalfWord(void *dest, uint16_t val) { memcpy(dest, &val, sizeof(val)); } + +static uint8_t CPUReadByte(const void *src) { return *(uint8_t *)src; } + +static void CPUWriteByte(void *dest, uint8_t val) { *(uint8_t *)dest = val; } + +void CpuSet(const void *src, void *dst, u32 cnt) +{ + if (dst == NULL) { + puts("Attempted to CpuSet to NULL\n"); + return; + } + + int count = cnt & 0x1FFFFF; + + const u8 *source = src; + u8 *dest = dst; + + // 32-bit ? + if ((cnt >> 26) & 1) { + // assert(((uintptr_t)src & ~3) == (uintptr_t)src); + // assert(((uintptr_t)dst & ~3) == (uintptr_t)dst); + + // needed for 32-bit mode! + // source = (u8 *)((uint32_t )source & ~3); + // dest = (u8 *)((uint32_t )dest & ~3); + + // fill ? + if ((cnt >> 24) & 1) { + uint32_t value = CPUReadMemory(source); + while (count) { + CPUWriteMemory(dest, value); + dest += 4; + count--; + } + } else { + // copy + while (count) { + CPUWriteMemory(dest, CPUReadMemory(source)); + source += 4; + dest += 4; + count--; + } + } + } else { + // No align on 16-bit fill? + // assert(((uintptr_t)src & ~1) == (uintptr_t)src); + // assert(((uintptr_t)dst & ~1) == (uintptr_t)dst); + + // 16-bit fill? + if ((cnt >> 24) & 1) { + uint16_t value = CPUReadHalfWord(source); + while (count) { + CPUWriteHalfWord(dest, value); + dest += 2; + count--; + } + } else { + // copy + while (count) { + CPUWriteHalfWord(dest, CPUReadHalfWord(source)); + source += 2; + dest += 2; + count--; + } + } + } +} + +void CpuFastSet(const void *src, void *dst, u32 cnt) +{ + if (dst == NULL) { + puts("Attempted to CpuFastSet to NULL\n"); + return; + } + + int count = cnt & 0x1FFFFF; + + const u8 *source = src; + u8 *dest = dst; + + // source = (u8 *)((uint32_t )source & ~3); + // dest = (u8 *)((uint32_t )dest & ~3); + + // fill? + if ((cnt >> 24) & 1) { + uint32_t value = CPUReadMemory(source); + while (count > 0) { + // BIOS always transfers 32 bytes at a time + for (int i = 0; i < 8; i++) { + CPUWriteMemory(dest, value); + dest += 4; + } + count -= 8; + } + } else { + // copy + while (count > 0) { + // BIOS always transfers 32 bytes at a time + for (int i = 0; i < 8; i++) { + uint32_t value = CPUReadMemory(source); + CPUWriteMemory(dest, value); + source += 4; + dest += 4; + } + count -= 8; + } + } +} + +void LZ77UnCompVram(const void *src_, void *dest_) +{ + const u8 *src = (const u8 *)src_; + u8 *dest = dest_; + int destSize = (src[3] << 16) | (src[2] << 8) | src[1]; + int srcPos = 4; + int destPos = 0; + + for (;;) { + unsigned char flags = src[srcPos++]; + + for (int i = 0; i < 8; i++) { + if (flags & 0x80) { + int blockSize = (src[srcPos] >> 4) + 3; + int blockDistance = (((src[srcPos] & 0xF) << 8) | src[srcPos + 1]) + 1; + + srcPos += 2; + + int blockPos = destPos - blockDistance; + + // Some Ruby/Sapphire tilesets overflow. + if (destPos + blockSize > destSize) { + blockSize = destSize - destPos; + // fprintf(stderr, "Destination buffer overflow.\n"); + puts("Destination buffer overflow.\n"); + } + + if (blockPos < 0) + goto fail; + + for (int j = 0; j < blockSize; j++) + dest[destPos++] = dest[blockPos + j]; + } else { + if (destPos >= destSize) + goto fail; + + dest[destPos++] = src[srcPos++]; + } + + if (destPos == destSize) { + return; + } + + flags <<= 1; + } + } + +fail: + puts("Fatal error while decompressing LZ file.\n"); +} + +void LZ77UnCompWram(const void *src, void *dst) +{ + const uint8_t *source = src; + uint8_t *dest = dst; + + uint32_t header = CPUReadMemory(source); + source += 4; + + int len = header >> 8; + + while (len > 0) { + uint8_t d = CPUReadByte(source++); + + if (d) { + for (int i = 0; i < 8; i++) { + if (d & 0x80) { + uint16_t data = CPUReadByte(source++) << 8; + data |= CPUReadByte(source++); + int length = (data >> 12) + 3; + int offset = (data & 0x0FFF); + uint8_t *windowOffset = dest - offset - 1; + for (int i2 = 0; i2 < length; i2++) { + CPUWriteByte(dest++, CPUReadByte(windowOffset++)); + len--; + if (len == 0) + return; + } + } else { + CPUWriteByte(dest++, CPUReadByte(source++)); + len--; + if (len == 0) + return; + } + d <<= 1; + } + } else { + for (int i = 0; i < 8; i++) { + CPUWriteByte(dest++, CPUReadByte(source++)); + len--; + if (len == 0) + return; + } + } + } +} + +void RLUnCompWram(const void *src, void *dest) +{ + int remaining = CPUReadMemory(src) >> 8; + int blockHeader; + int block; + src += 4; + while (remaining > 0) { + blockHeader = CPUReadByte(src); + src++; + if (blockHeader & 0x80) // Compressed? + { + blockHeader &= 0x7F; + blockHeader += 3; + block = CPUReadByte(src); + src++; + while (blockHeader-- && remaining) { + remaining--; + CPUWriteByte(dest, block); + dest++; + } + } else // Uncompressed + { + blockHeader++; + while (blockHeader-- && remaining) { + remaining--; + u8 byte = CPUReadByte(src); + src++; + CPUWriteByte(dest, byte); + dest++; + } + } + } +} + +void RLUnCompVram(const void *src, void *dest) +{ + int remaining = CPUReadMemory(src) >> 8; + int padding = (4 - remaining) & 0x3; + int blockHeader; + int block; + int halfWord = 0; + src += 4; + while (remaining > 0) { + blockHeader = CPUReadByte(src); + src++; + if (blockHeader & 0x80) // Compressed? + { + blockHeader &= 0x7F; + blockHeader += 3; + block = CPUReadByte(src); + src++; + while (blockHeader-- && remaining) { + remaining--; + if ((uintptr_t)dest & 1) { + halfWord |= block << 8; + CPUWriteHalfWord((void *)((uintptr_t)dest ^ 1), halfWord); + } else + halfWord = block; + dest++; + } + } else // Uncompressed + { + blockHeader++; + while (blockHeader-- && remaining) { + remaining--; + u8 byte = CPUReadByte(src); + src++; + if ((uintptr_t)dest & 1) { + halfWord |= byte << 8; + CPUWriteHalfWord((void *)((uintptr_t)dest ^ 1), halfWord); + } else + halfWord = byte; + dest++; + } + } + } + if ((uintptr_t)dest & 1) { + padding--; + dest++; + } + for (; padding > 0; padding -= 2, dest += 2) + CPUWriteHalfWord(dest, 0); +} + +const s16 sineTable[256] + = { (s16)0x0000, (s16)0x0192, (s16)0x0323, (s16)0x04B5, (s16)0x0645, (s16)0x07D5, (s16)0x0964, (s16)0x0AF1, (s16)0x0C7C, (s16)0x0E05, + (s16)0x0F8C, (s16)0x1111, (s16)0x1294, (s16)0x1413, (s16)0x158F, (s16)0x1708, (s16)0x187D, (s16)0x19EF, (s16)0x1B5D, (s16)0x1CC6, + (s16)0x1E2B, (s16)0x1F8B, (s16)0x20E7, (s16)0x223D, (s16)0x238E, (s16)0x24DA, (s16)0x261F, (s16)0x275F, (s16)0x2899, (s16)0x29CD, + (s16)0x2AFA, (s16)0x2C21, (s16)0x2D41, (s16)0x2E5A, (s16)0x2F6B, (s16)0x3076, (s16)0x3179, (s16)0x3274, (s16)0x3367, (s16)0x3453, + (s16)0x3536, (s16)0x3612, (s16)0x36E5, (s16)0x37AF, (s16)0x3871, (s16)0x392A, (s16)0x39DA, (s16)0x3A82, (s16)0x3B20, (s16)0x3BB6, + (s16)0x3C42, (s16)0x3CC5, (s16)0x3D3E, (s16)0x3DAE, (s16)0x3E14, (s16)0x3E71, (s16)0x3EC5, (s16)0x3F0E, (s16)0x3F4E, (s16)0x3F84, + (s16)0x3FB1, (s16)0x3FD3, (s16)0x3FEC, (s16)0x3FFB, (s16)0x4000, (s16)0x3FFB, (s16)0x3FEC, (s16)0x3FD3, (s16)0x3FB1, (s16)0x3F84, + (s16)0x3F4E, (s16)0x3F0E, (s16)0x3EC5, (s16)0x3E71, (s16)0x3E14, (s16)0x3DAE, (s16)0x3D3E, (s16)0x3CC5, (s16)0x3C42, (s16)0x3BB6, + (s16)0x3B20, (s16)0x3A82, (s16)0x39DA, (s16)0x392A, (s16)0x3871, (s16)0x37AF, (s16)0x36E5, (s16)0x3612, (s16)0x3536, (s16)0x3453, + (s16)0x3367, (s16)0x3274, (s16)0x3179, (s16)0x3076, (s16)0x2F6B, (s16)0x2E5A, (s16)0x2D41, (s16)0x2C21, (s16)0x2AFA, (s16)0x29CD, + (s16)0x2899, (s16)0x275F, (s16)0x261F, (s16)0x24DA, (s16)0x238E, (s16)0x223D, (s16)0x20E7, (s16)0x1F8B, (s16)0x1E2B, (s16)0x1CC6, + (s16)0x1B5D, (s16)0x19EF, (s16)0x187D, (s16)0x1708, (s16)0x158F, (s16)0x1413, (s16)0x1294, (s16)0x1111, (s16)0x0F8C, (s16)0x0E05, + (s16)0x0C7C, (s16)0x0AF1, (s16)0x0964, (s16)0x07D5, (s16)0x0645, (s16)0x04B5, (s16)0x0323, (s16)0x0192, (s16)0x0000, (s16)0xFE6E, + (s16)0xFCDD, (s16)0xFB4B, (s16)0xF9BB, (s16)0xF82B, (s16)0xF69C, (s16)0xF50F, (s16)0xF384, (s16)0xF1FB, (s16)0xF074, (s16)0xEEEF, + (s16)0xED6C, (s16)0xEBED, (s16)0xEA71, (s16)0xE8F8, (s16)0xE783, (s16)0xE611, (s16)0xE4A3, (s16)0xE33A, (s16)0xE1D5, (s16)0xE075, + (s16)0xDF19, (s16)0xDDC3, (s16)0xDC72, (s16)0xDB26, (s16)0xD9E1, (s16)0xD8A1, (s16)0xD767, (s16)0xD633, (s16)0xD506, (s16)0xD3DF, + (s16)0xD2BF, (s16)0xD1A6, (s16)0xD095, (s16)0xCF8A, (s16)0xCE87, (s16)0xCD8C, (s16)0xCC99, (s16)0xCBAD, (s16)0xCACA, (s16)0xC9EE, + (s16)0xC91B, (s16)0xC851, (s16)0xC78F, (s16)0xC6D6, (s16)0xC626, (s16)0xC57E, (s16)0xC4E0, (s16)0xC44A, (s16)0xC3BE, (s16)0xC33B, + (s16)0xC2C2, (s16)0xC252, (s16)0xC1EC, (s16)0xC18F, (s16)0xC13B, (s16)0xC0F2, (s16)0xC0B2, (s16)0xC07C, (s16)0xC04F, (s16)0xC02D, + (s16)0xC014, (s16)0xC005, (s16)0xC000, (s16)0xC005, (s16)0xC014, (s16)0xC02D, (s16)0xC04F, (s16)0xC07C, (s16)0xC0B2, (s16)0xC0F2, + (s16)0xC13B, (s16)0xC18F, (s16)0xC1EC, (s16)0xC252, (s16)0xC2C2, (s16)0xC33B, (s16)0xC3BE, (s16)0xC44A, (s16)0xC4E0, (s16)0xC57E, + (s16)0xC626, (s16)0xC6D6, (s16)0xC78F, (s16)0xC851, (s16)0xC91B, (s16)0xC9EE, (s16)0xCACA, (s16)0xCBAD, (s16)0xCC99, (s16)0xCD8C, + (s16)0xCE87, (s16)0xCF8A, (s16)0xD095, (s16)0xD1A6, (s16)0xD2BF, (s16)0xD3DF, (s16)0xD506, (s16)0xD633, (s16)0xD767, (s16)0xD8A1, + (s16)0xD9E1, (s16)0xDB26, (s16)0xDC72, (s16)0xDDC3, (s16)0xDF19, (s16)0xE075, (s16)0xE1D5, (s16)0xE33A, (s16)0xE4A3, (s16)0xE611, + (s16)0xE783, (s16)0xE8F8, (s16)0xEA71, (s16)0xEBED, (s16)0xED6C, (s16)0xEEEF, (s16)0xF074, (s16)0xF1FB, (s16)0xF384, (s16)0xF50F, + (s16)0xF69C, (s16)0xF82B, (s16)0xF9BB, (s16)0xFB4B, (s16)0xFCDD, (s16)0xFE6E }; + +void BgAffineSet(struct BgAffineSrcData *src, struct BgAffineDstData *dest, s32 count) +{ + for (s32 i = 0; i < count; i++) { + s32 cx = src[i].texX; + s32 cy = src[i].texY; + s16 dispx = src[i].scrX; + s16 dispy = src[i].scrY; + s16 rx = src[i].sx; + s16 ry = src[i].sy; + u16 theta = src[i].alpha >> 8; + s32 a = sineTable[(theta + 0x40) & 255]; + s32 b = sineTable[theta]; + + s16 dx = (rx * a) >> 14; + s16 dmx = (rx * b) >> 14; + s16 dy = (ry * b) >> 14; + s16 dmy = (ry * a) >> 14; + + dest[i].pa = dx; + dest[i].pb = -dmx; + dest[i].pc = dy; + dest[i].pd = dmy; + + s32 startx = cx - dx * dispx + dmx * dispy; + s32 starty = cy - dy * dispx - dmy * dispy; + + dest[i].dx = startx; + dest[i].dy = starty; + } +} + +void ObjAffineSet(struct ObjAffineSrcData *src, void *dest, s32 count, s32 offset) +{ + for (s32 i = 0; i < count; i++) { + s16 rx = src[i].xScale; + s16 ry = src[i].yScale; + u16 theta = src[i].rotation >> 8; + + s32 a = (s32)sineTable[(theta + 64) & 255]; + s32 b = (s32)sineTable[theta]; + + s16 dx = ((s32)rx * a) >> 14; + s16 dmx = ((s32)rx * b) >> 14; + s16 dy = ((s32)ry * b) >> 14; + s16 dmy = ((s32)ry * a) >> 14; + + CPUWriteHalfWord(dest, dx); + dest += offset; + CPUWriteHalfWord(dest, -dmx); + dest += offset; + CPUWriteHalfWord(dest, dy); + dest += offset; + CPUWriteHalfWord(dest, dmy); + dest += offset; + } +} + +void SoftReset(u32 resetFlags) { } + +void SoftResetExram(u32 resetFlags) { } + +// Following functions taken from mGBA's source +u16 ArcTan(s16 i) +{ + s32 a = -((i * i) >> 14); + s32 b = ((0xA9 * a) >> 14) + 0x390; + b = ((b * a) >> 14) + 0x91C; + b = ((b * a) >> 14) + 0xFB6; + b = ((b * a) >> 14) + 0x16AA; + b = ((b * a) >> 14) + 0x2081; + b = ((b * a) >> 14) + 0x3651; + b = ((b * a) >> 14) + 0xA2F9; + + return (i * b) >> 16; +} + +u16 ArcTan2(s16 x, s16 y) +{ + if (!y) { + if (x >= 0) + return 0; + return 0x8000; + } + if (!x) { + if (y >= 0) + return 0x4000; + return 0xC000; + } + if (y >= 0) { + if (x >= 0) { + if (x >= y) + return ArcTan((y << 14) / x); + } else if (-x >= y) + return ArcTan((y << 14) / x) + 0x8000; + return 0x4000 - ArcTan((x << 14) / y); + } else { + if (x <= 0) { + if (-x > -y) + return ArcTan((y << 14) / x) + 0x8000; + } else if (x >= -y) + return ArcTan((y << 14) / x) + 0x10000; + return 0xC000 - ArcTan((x << 14) / y); + } +} + +u16 Sqrt(u32 num) +{ + if (!num) + return 0; + u32 lower; + u32 upper = num; + u32 bound = 1; + while (bound < upper) { + upper >>= 1; + bound <<= 1; + } + while (1) { + upper = num; + u32 accum = 0; + lower = bound; + while (1) { + u32 oldLower = lower; + if (lower <= upper >> 1) + lower <<= 1; + if (oldLower >= upper >> 1) + break; + } + while (1) { + accum <<= 1; + if (upper >= lower) { + ++accum; + upper -= lower; + } + if (lower == bound) + break; + lower >>= 1; + } + u32 oldBound = bound; + bound += accum; + bound >>= 1; + if (bound >= oldBound) { + bound = oldBound; + break; + } + } + return bound; +} + +int MultiBoot(struct MultiBootParam *mp) { return 0; } + +void VDraw(void) +{ + extern void DrawFrame_Fast(uint16_t * pixels); + DrawFrame_Fast(gameImage); + UpdateTexture(); + REG_VCOUNT = DISPLAY_HEIGHT + 1; // prep for being in VBlank period +} + +void UpdateTexture(void) +{ + gsKit_TexManager_invalidate(gsGlobal, &screen); + gsKit_TexManager_bind(gsGlobal, &screen); + + int startX = (gsGlobal->Width); + int startY = (gsGlobal->Height); + + gsKit_clear(gsGlobal, GS_SETREG_RGBAQ(0, 0, 0, 0, 0)); + + // Table + gsKit_prim_sprite_texture(gsGlobal, &screen, + 0.0f, // X1 + 0.0f, // Y2 + 0.0f, // U1 + 0.0f, // V1 + startX, // X2 + startY, // Y2 + gsGlobal->Width, // U2 + gsGlobal->Height, // V2 + 0, GS_SETREG_RGBAQ(128, 128, 128, 0, 0)); +} diff --git a/src/platform/shared/audio/m4a_sound_mixer.c b/src/platform/shared/audio/m4a_sound_mixer.c index 67e276fb1..6482d6382 100644 --- a/src/platform/shared/audio/m4a_sound_mixer.c +++ b/src/platform/shared/audio/m4a_sound_mixer.c @@ -31,6 +31,9 @@ struct SoundMixerState *SOUND_INFO_PTR = &sSoundInfo; void SoundMain(void) { +#if !ENABLE_AUDIO + return; +#endif struct SoundMixerState *mixer = SOUND_INFO_PTR; if (mixer->lockStatus != ID_NUMBER) { @@ -419,7 +422,7 @@ void MP2K_event_rept(struct MP2KPlayerState *unused, struct MP2KTrack *track) MP2K_event_goto(unused, track); } else { track->repeatCount = 0; - track->cmdPtr = track->cmdPtr + sizeof(u8 *); + track->cmdPtr += sizeof(u8) + sizeof(u8 *); } } } @@ -908,7 +911,6 @@ void m4aSoundVSync(void) float *m4aBuffer = mixer->pcmBuffer; float *cgbBuffer = cgb_get_buffer(); s32 dmaCounter = mixer->dmaCounter; - bool8 shouldQueue = FALSE; if (dmaCounter > 1) { m4aBuffer += samplesPerFrame * (mixer->framesPerDmaCycle - (dmaCounter - 1)); @@ -916,14 +918,9 @@ void m4aSoundVSync(void) for (u32 i = 0; i < samplesPerFrame; i++) { audioBuffer[i] = m4aBuffer[i] + cgbBuffer[i]; - if (audioBuffer[i] != 0) { - shouldQueue = TRUE; - } } - if (shouldQueue) { - Platform_QueueAudio(audioBuffer, samplesPerFrame * 4); - } + Platform_QueueAudio(audioBuffer, samplesPerFrame * sizeof(float)); if ((s8)(--mixer->dmaCounter) <= 0) mixer->dmaCounter = mixer->framesPerDmaCycle; } diff --git a/src/platform/win32/win32.c b/src/platform/win32/win32.c index 261a18e60..60d375dfb 100644 --- a/src/platform/win32/win32.c +++ b/src/platform/win32/win32.c @@ -449,4 +449,4 @@ void *Platform_malloc(size_t numBytes) { return HeapAlloc(GetProcessHeap(), HEAP void Platform_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); } -void Platform_QueueAudio(const u8 *data, u32 numBytes) { } +void Platform_QueueAudio(const float *data, u32 numBytes) { } diff --git a/src/sprite.c b/src/sprite.c index 94f677da0..234ae233e 100644 --- a/src/sprite.c +++ b/src/sprite.c @@ -9,7 +9,7 @@ #include "animation_commands.h" #include "platform/platform.h" -#if !PLATFORM_GBA && !PLATFORM_SDL +#if !PLATFORM_GBA && RENDERER != RENDERER_SOFTWARE_FAST && RENDERER != RENDERER_SOFTWARE extern void Platform_DisplaySprite(Sprite *sprite, u8 oamPaletteNum); #endif @@ -722,7 +722,7 @@ void DisplaySprite(Sprite *sprite) oam->split.paletteNum += sprite->palId; #endif -#if !PLATFORM_GBA && !PLATFORM_SDL +#if !PLATFORM_GBA && (RENDERER != RENDERER_SOFTWARE_FAST && RENDERER != RENDERER_SOFTWARE) // TEMP // Quick hack for getting output in OpenGL test // The whole function call should be replaced by this! From d194ef36b1c9bf0158b6a00c953002c24a0049a7 Mon Sep 17 00:00:00 2001 From: Oliver Bell Date: Wed, 18 Feb 2026 02:09:08 +0000 Subject: [PATCH 08/13] replace fast renderer with gpsp renderer --- include/config.h | 2 +- include/gba/defines.h | 11 +- src/platform/ps2/ps2.c | 19 +- src/platform/shared/rendering/common.h | 143 + .../shared/rendering/sw_renderer_fast.c | 4643 +++++++++++++---- 5 files changed, 3756 insertions(+), 1062 deletions(-) create mode 100644 src/platform/shared/rendering/common.h diff --git a/include/config.h b/include/config.h index 845e61104..dd1e301d2 100644 --- a/include/config.h +++ b/include/config.h @@ -51,7 +51,7 @@ // TODO: Only win32 for now #define RENDERER RENDERER_OPENGL #else -#define RENDERER RENDERER_SOFTWARE +#define RENDERER RENDERER_SOFTWARE_FAST #endif #endif diff --git a/include/gba/defines.h b/include/gba/defines.h index 6b9d09afc..5ec55588c 100644 --- a/include/gba/defines.h +++ b/include/gba/defines.h @@ -43,9 +43,13 @@ // PSP: Use GBA-native resolution, SDL scales to 480x272 #define DISPLAY_WIDTH 240 #define DISPLAY_HEIGHT 160 +#elif defined(__PS2__) +// Runs at 60fps with the "fast draw" +#define DISPLAY_WIDTH 240 +#define DISPLAY_HEIGHT 160 #else -#define DISPLAY_WIDTH 426 -#define DISPLAY_HEIGHT 240 +#define DISPLAY_WIDTH 240 +#define DISPLAY_HEIGHT 160 #endif // NOTE: We shouldn't consider WIDESCREEN_HACK a permanent thing. @@ -54,7 +58,8 @@ #undef VRAM_SIZE #define VRAM_SIZE (0x18000 + (0x800 * (12))) #define WIDESCREEN_HACK TRUE -#define EXTENDED_OAM TRUE +// TODO: extend oam again once fast renderer supports +#define EXTENDED_OAM FALSE #else #define WIDESCREEN_HACK FALSE #define EXTENDED_OAM !TRUE diff --git a/src/platform/ps2/ps2.c b/src/platform/ps2/ps2.c index b4c09442e..a822b3753 100644 --- a/src/platform/ps2/ps2.c +++ b/src/platform/ps2/ps2.c @@ -161,7 +161,7 @@ static void deinit_drivers() void platform_video_init(void) { if (vid_mode == NULL) { - vid_mode = &vid_modes[2]; // Standard def 480p + vid_mode = &vid_modes[1]; // Standard def 480p } else { if (use_hires) { gsKit_hires_deinit_global(gsGlobal); @@ -219,6 +219,7 @@ void platform_video_init(void) int main(int argc, char **argv) { prepare_IOP(); + init_drivers(); // ReadSaveFile("sa2.sav"); @@ -233,6 +234,11 @@ int main(int argc, char **argv) cgb_audio_init(48000); VDraw(); + // while (true) { + // UpdateTexture(); + // gsKit_sync_flip(gsGlobal); + // gsKit_queue_exec(gsGlobal); + // } AgbMain(); return 0; @@ -260,11 +266,11 @@ void VBlankIntrWait(void) if (isRunning) { REG_KEYINPUT = KEYS_MASK ^ Platform_GetKeyInput(); -// Only render 30fps when in widescreen as the draw func is too slow for the ps2 -#if DISPLAY_WIDTH > 240 - skipFrame++; - skipFrame %= 2; -#endif + // Only render 30fps when in widescreen as the draw func is too slow for the ps2 + // #if DISPLAY_WIDTH > 240 + // skipFrame++; + // skipFrame %= 2; + // #endif if (skipFrame == 0) { VDraw(); } else { @@ -935,7 +941,6 @@ void UpdateTexture(void) gsKit_clear(gsGlobal, GS_SETREG_RGBAQ(0, 0, 0, 0, 0)); - // Table gsKit_prim_sprite_texture(gsGlobal, &screen, 0.0f, // X1 0.0f, // Y2 diff --git a/src/platform/shared/rendering/common.h b/src/platform/shared/rendering/common.h new file mode 100644 index 000000000..7592b4218 --- /dev/null +++ b/src/platform/shared/rendering/common.h @@ -0,0 +1,143 @@ +/* gameplaySP + * + * Copyright (C) 2006 Exophase + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef COMMON_H +#define COMMON_H + +#define ror(dest, value, shift) dest = ((value) >> (shift)) | ((value) << (32 - (shift))) + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +#if defined(_WIN32) +#define PATH_SEPARATOR "\\" +#define PATH_SEPARATOR_CHAR '\\' +#else +#define PATH_SEPARATOR "/" +#define PATH_SEPARATOR_CHAR '/' +#endif + +/* On x86 we pass arguments via registers instead of stack */ +#ifdef X86_ARCH +#define function_cc __attribute__((regparm(2))) +#else +#define function_cc +#endif + +#ifdef ARM_ARCH + +#define _BSD_SOURCE // sync +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* ARM_ARCH */ + +// Huge thanks to pollux for the heads up on using native file I/O +// functions on PSP for vastly improved memstick performance. + +#ifdef PSP +#include +#include +#include +#include +#include +#include +#include +#include +#else +typedef unsigned char u8; +typedef signed char s8; +typedef unsigned short int u16; +typedef signed short int s16; +typedef unsigned int u32; +typedef signed int s32; +typedef unsigned long long int u64; +typedef signed long long int s64; +#endif + +#ifdef USE_BGR_FORMAT +#define convert_palette(value) (((value & 0x7FE0) << 1) | (value & 0x1F)) +#elif defined(USE_XBGR1555_FORMAT) +#define convert_palette(value) (value & 0x7FFF) +#else +#define convert_palette(value) (((value & 0x1F) << 11) | ((value & 0x03E0) << 1) | ((value >> 10) & 0x1F)) +#endif + +#define GBA_SCREEN_WIDTH (240) +#define GBA_SCREEN_HEIGHT (160) +#define GBA_SCREEN_PITCH (240) + +typedef u32 fixed16_16; +typedef u32 fixed8_24; + +#define float_to_fp16_16(value) (fixed16_16)((value)*65536.0) + +#define fp16_16_to_float(value) (float)((value) / 65536.0) + +#define u32_to_fp16_16(value) ((value) << 16) + +#define fp16_16_to_u32(value) ((value) >> 16) + +#define fp16_16_fractional_part(value) ((value)&0xFFFF) + +#define float_to_fp8_24(value) (fixed8_24)((value)*16777216.0) + +#define fp8_24_fractional_part(value) ((value)&0xFFFFFF) + +#define fixed_div(numerator, denominator, bits) (((numerator * (1 << bits)) + (denominator / 2)) / denominator) + +#define address8(base, offset) *((u8 *)((u8 *)base + (offset))) + +#define address16(base, offset) *((u16 *)((u8 *)base + (offset))) + +#define address32(base, offset) *((u32 *)((u8 *)base + (offset))) + +#define eswap8(value) (value) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define eswap16(value) __builtin_bswap16(value) +#define eswap32(value) __builtin_bswap32(value) +#else +#define eswap16(value) (value) +#define eswap32(value) (value) +#endif + +#define readaddress8(base, offset) eswap8(address8(base, offset)) +#define readaddress16(base, offset) eswap16(address16(base, offset)) +#define readaddress32(base, offset) eswap32(address32(base, offset)) + +#define read_ioreg(regnum) (eswap16(io_registers[(regnum)])) +#define write_ioreg(regnum, val) io_registers[(regnum)] = eswap16(val) +#define read_ioreg32(regnum) (read_ioreg(regnum) | (read_ioreg((regnum) + 1) << 16)) + +#define read_dmareg(regnum, dmachan) (eswap16(io_registers[(regnum) + (dmachan)*6])) +#define write_dmareg(regnum, dmachan, val) io_registers[(regnum) + (dmachan)*6] = eswap16(val) + +#include +#include +#include +#include +#include + +#endif \ No newline at end of file diff --git a/src/platform/shared/rendering/sw_renderer_fast.c b/src/platform/shared/rendering/sw_renderer_fast.c index 9a1283d2e..c2a839f0a 100644 --- a/src/platform/shared/rendering/sw_renderer_fast.c +++ b/src/platform/shared/rendering/sw_renderer_fast.c @@ -1,24 +1,27 @@ -// sw_renderer_fast.c -- single-pass back-to-front gba ppu renderer -// -// the default renderer does multiple passes per scanline which thrashes -// the data cache on older platforms with tiny L1 and no L2 - -// this one composites everything in one pass per scanline, painting -// layers directly into the output buffer from back to front. a -// layerIds[] side-buffer tracks what wrote each pixel so alpha -// blending can find its target-b inline. -// -// 4bpp text bgs get a batched path that reads one u32 per 8 pixels. -// 8bpp and mosaic bgs fall back to per-pixel. sprites are pre-filtered -// per scanline so we only touch the ones that actually matter. +/* gameplaySP + * + * Copyright (C) 2006 Exophase + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ #include "config.h" - #if RENDERER == RENDERER_SOFTWARE_FAST +#include "global.h" #include -#include -#include #include #include "global.h" @@ -26,1155 +29,3690 @@ #include "gba/defines.h" #include "gba/io_reg.h" #include "gba/types.h" + #include "platform/shared/dma.h" -#include "platform/shared/rendering/sw_renderer_common.h" -extern IntrFunc gIntrTable[16]; -extern uint8_t REG_BASE[IO_SIZE]; -extern uint16_t PLTT[PLTT_SIZE / sizeof(uint16_t)]; -extern uint8_t VRAM[VRAM_SIZE]; -extern uint8_t OAM[OAM_SIZE]; -extern const u8 gOamShapesSizes[12][2]; +#define eswap16(value) (value) +#define eswap32(value) (value) -#ifndef TILE_WIDTH -#define TILE_WIDTH 8 -#endif +#define GBA_SCREEN_PITCH DISPLAY_WIDTH + +typedef u32 fixed16_16; +typedef u32 fixed8_24; + +#define float_to_fp16_16(value) (fixed16_16)((value)*65536.0) + +#define fp16_16_to_float(value) (float)((value) / 65536.0) + +#define u32_to_fp16_16(value) ((value) << 16) + +#define fp16_16_to_u32(value) ((value) >> 16) + +#define fp16_16_fractional_part(value) ((value)&0xFFFF) + +#define float_to_fp8_24(value) (fixed8_24)((value)*16777216.0) + +#define fp8_24_fractional_part(value) ((value)&0xFFFFFF) + +#define fixed_div(numerator, denominator, bits) (((numerator * (1 << bits)) + (denominator / 2)) / denominator) + +#define read_ioreg(regaddr) (eswap16(*(u16 *)(regaddr))) +#define read_ioreg32(regaddr) (read_ioreg(regaddr) | (read_ioreg((regaddr) + sizeof(u16)) << 16)) + +#define REG_ADDR_BGxCNT(n) (REG_ADDR_BG0CNT + ((n) * sizeof(u16))) + +#define convert_palette(value) (value & 0x7FFF) + +u16 *gba_screen_pixels = NULL; + +#define get_screen_pixels() gba_screen_pixels +#define get_screen_pitch() GBA_SCREEN_PITCH + +typedef struct { + u16 attr0, attr1, attr2, attr3; +} t_oam; + +void update_scanline(void); +void video_reload_counters(void); + +extern s32 affine_reference_x[2]; +extern s32 affine_reference_y[2]; + +typedef void (*tile_render_function)(u32 layer_number, u32 start, u32 end, void *dest_ptr); +typedef void (*bitmap_render_function)(u32 start, u32 end, void *dest_ptr); + +typedef struct { + tile_render_function normal_render_base; + tile_render_function normal_render_transparent; + tile_render_function alpha_render_base; + tile_render_function alpha_render_transparent; + tile_render_function color16_render_base; + tile_render_function color16_render_transparent; + tile_render_function color32_render_base; + tile_render_function color32_render_transparent; +} tile_layer_render_struct; + +typedef struct { + bitmap_render_function normal_render; +} bitmap_layer_render_struct; + +typedef enum { filter_nearest, filter_bilinear } video_filter_type; + +static void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline, u32 enable_flags, u32 dispcnt, u32 bldcnt, + const tile_layer_render_struct *layer_renderers); +static void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline, u32 enable_flags, u32 dispcnt, u32 bldcnt, + const bitmap_layer_render_struct *layer_renderers); + +#define tile_expand_base_normal(index) \ + current_pixel = palette[current_pixel]; \ + dest_ptr[index] = current_pixel + +#define tile_expand_base_normal_mode4(index) \ + if (current_pixel != 0) { \ + current_pixel = palette[current_pixel]; \ + dest_ptr[index] = current_pixel; \ + } + +#define tile_expand_transparent_normal(index) tile_expand_base_normal(index) + +#define tile_expand_copy(index) dest_ptr[index] = copy_ptr[index] + +#define advance_dest_ptr_base(delta) dest_ptr += delta + +#define advance_dest_ptr_transparent(delta) advance_dest_ptr_base(delta) + +#define advance_dest_ptr_copy(delta) \ + advance_dest_ptr_base(delta); \ + copy_ptr += delta + +#define color_combine_mask_a(layer) ((read_ioreg(REG_ADDR_BLDCNT) >> layer) & 0x01) + +// For color blending operations, will create a mask that has in bit +// 10 if the layer is target B, and bit 9 if the layer is target A. + +#define color_combine_mask(layer) (color_combine_mask_a(layer) | ((read_ioreg(REG_ADDR_BLDCNT) >> (layer + 7)) & 0x02)) << 9 + +// For alpha blending renderers, draw the palette index (9bpp) and +// layer bits rather than the raw RGB. For the base this should write to +// the 32bit location directly. + +#define tile_expand_base_alpha(index) dest_ptr[index] = current_pixel | pixel_combine + +#define tile_expand_base_bg(index) dest_ptr[index] = bg_combine + +// For layered (transparent) writes this should shift the "stack" and write +// to the bottom. This will preserve the topmost pixel and the most recent +// one. + +#define tile_expand_transparent_alpha(index) dest_ptr[index] = (dest_ptr[index] << 16) | current_pixel | pixel_combine + +// OBJ should only shift if the top isn't already OBJ +#define tile_expand_transparent_alpha_obj(index) \ + dest = dest_ptr[index]; \ + if (dest & 0x00000100) \ + dest_ptr[index] = (dest & 0xFFFF0000) | current_pixel | pixel_combine; \ + else \ + dest_ptr[index] = (dest << 16) | current_pixel | pixel_combine; + +// For color effects that don't need to preserve the previous layer. +// The color32 version should be used with 32bit wide dest_ptr so as to be +// compatible with alpha combine on top of it. + +#define tile_expand_base_color16(index) dest_ptr[index] = current_pixel | pixel_combine + +#define tile_expand_transparent_color16(index) tile_expand_base_color16(index) + +#define tile_expand_base_color32(index) tile_expand_base_color16(index) + +#define tile_expand_transparent_color32(index) tile_expand_base_color16(index) + +// Operations for isolation 8bpp pixels within 32bpp pixel blocks. + +#define tile_8bpp_pixel_op_mask(op_param) current_pixel = current_pixels & 0xFF + +#define tile_8bpp_pixel_op_shift_mask(shift) current_pixel = (current_pixels >> shift) & 0xFF + +#define tile_8bpp_pixel_op_shift(shift) current_pixel = current_pixels >> shift + +#define tile_8bpp_pixel_op_none(shift) + +// Base should always draw raw in 8bpp mode; color 0 will be drawn where +// color 0 is. + +#define tile_8bpp_draw_base_normal(index) tile_expand_base_normal(index) + +#define tile_8bpp_draw_base_alpha(index) \ + if (current_pixel) { \ + tile_expand_base_alpha(index); \ + } else { \ + tile_expand_base_bg(index); \ + } + +#define tile_8bpp_draw_base_color16(index) tile_8bpp_draw_base_alpha(index) + +#define tile_8bpp_draw_base_color32(index) tile_8bpp_draw_base_alpha(index) + +#define tile_8bpp_draw_base(index, op, op_param, alpha_op) \ + tile_8bpp_pixel_op_##op(op_param); \ + tile_8bpp_draw_base_##alpha_op(index) + +// Transparent (layered) writes should only replace what is there if the +// pixel is not transparent (zero) + +#define tile_8bpp_draw_transparent(index, op, op_param, alpha_op) \ + tile_8bpp_pixel_op_##op(op_param); \ + if (current_pixel) { \ + tile_expand_transparent_##alpha_op(index); \ + } + +#define tile_8bpp_draw_copy(index, op, op_param, alpha_op) \ + tile_8bpp_pixel_op_##op(op_param); \ + if (current_pixel) { \ + tile_expand_copy(index); \ + } + +// Get the current tile from the map in 8bpp mode + +#define get_tile_8bpp() \ + current_tile = eswap16(*map_ptr); \ + tile_ptr = tile_base + ((current_tile & 0x3FF) * 64) + +// Draw half of a tile in 8bpp mode, for base renderer + +#define tile_8bpp_draw_four_noflip(index, combine_op, alpha_op) \ + tile_8bpp_draw_##combine_op(index + 0, mask, 0, alpha_op); \ + tile_8bpp_draw_##combine_op(index + 1, shift_mask, 8, alpha_op); \ + tile_8bpp_draw_##combine_op(index + 2, shift_mask, 16, alpha_op); \ + tile_8bpp_draw_##combine_op(index + 3, shift, 24, alpha_op) + +// Like the above, but draws the half-tile horizontally flipped + +#define tile_8bpp_draw_four_flip(index, combine_op, alpha_op) \ + tile_8bpp_draw_##combine_op(index + 3, mask, 0, alpha_op); \ + tile_8bpp_draw_##combine_op(index + 2, shift_mask, 8, alpha_op); \ + tile_8bpp_draw_##combine_op(index + 1, shift_mask, 16, alpha_op); \ + tile_8bpp_draw_##combine_op(index + 0, shift, 24, alpha_op) + +#define tile_8bpp_draw_four_base(index, alpha_op, flip_op) tile_8bpp_draw_four_##flip_op(index, base, alpha_op) + +// Draw half of a tile in 8bpp mode, for transparent renderer; as an +// optimization the entire thing is checked against zero (in transparent +// capable renders it is more likely for the pixels to be transparent than +// opaque) + +#define tile_8bpp_draw_four_transparent(index, alpha_op, flip_op) \ + if (current_pixels != 0) { \ + tile_8bpp_draw_four_##flip_op(index, transparent, alpha_op); \ + } + +#define tile_8bpp_draw_four_copy(index, alpha_op, flip_op) \ + if (current_pixels != 0) { \ + tile_8bpp_draw_four_##flip_op(index, copy, alpha_op); \ + } + +// Helper macro for drawing 8bpp tiles clipped against the edge of the screen + +#define partial_tile_8bpp(combine_op, alpha_op) \ + for (i = 0; i < partial_tile_run; i++) { \ + tile_8bpp_draw_##combine_op(0, mask, 0, alpha_op); \ + current_pixels >>= 8; \ + advance_dest_ptr_##combine_op(1); \ + } + +// Draws 8bpp tiles clipped against the left side of the screen, +// partial_tile_offset indicates how much clipped in it is, partial_tile_run +// indicates how much it should draw. + +#define partial_tile_right_noflip_8bpp(combine_op, alpha_op) \ + if (partial_tile_offset >= 4) { \ + current_pixels = eswap32(*((u32 *)(tile_ptr + 4))) >> ((partial_tile_offset - 4) * 8); \ + partial_tile_8bpp(combine_op, alpha_op); \ + } else { \ + partial_tile_run -= 4; \ + current_pixels = eswap32(*((u32 *)tile_ptr)) >> (partial_tile_offset * 8); \ + partial_tile_8bpp(combine_op, alpha_op); \ + current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ + tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip); \ + advance_dest_ptr_##combine_op(4); \ + } + +// Draws 8bpp tiles clipped against both the left and right side of the +// screen, IE, runs of less than 8 - partial_tile_offset. + +#define partial_tile_mid_noflip_8bpp(combine_op, alpha_op) \ + if (partial_tile_offset >= 4) { \ + current_pixels = eswap32(*((u32 *)(tile_ptr + 4))) >> ((partial_tile_offset - 4) * 8); \ + } else { \ + current_pixels = eswap32(*((u32 *)tile_ptr)) >> (partial_tile_offset * 8); \ + if ((partial_tile_offset + partial_tile_run) > 4) { \ + u32 old_run = partial_tile_run; \ + partial_tile_run = 4 - partial_tile_offset; \ + partial_tile_8bpp(combine_op, alpha_op); \ + partial_tile_run = old_run - partial_tile_run; \ + current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ + } \ + } \ + partial_tile_8bpp(combine_op, alpha_op); + +// Draws 8bpp tiles clipped against the right side of the screen, +// partial_tile_run indicates how much there is to draw. + +#define partial_tile_left_noflip_8bpp(combine_op, alpha_op) \ + if (partial_tile_run >= 4) { \ + current_pixels = eswap32(*((u32 *)tile_ptr)); \ + tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip); \ + advance_dest_ptr_##combine_op(4); \ + tile_ptr += 4; \ + partial_tile_run -= 4; \ + } \ + \ + current_pixels = eswap32(*((u32 *)(tile_ptr))); \ + partial_tile_8bpp(combine_op, alpha_op) + +// Draws a non-clipped (complete) 8bpp tile. + +#define tile_noflip_8bpp(combine_op, alpha_op) \ + current_pixels = eswap32(*((u32 *)tile_ptr)); \ + tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip); \ + current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ + tile_8bpp_draw_four_##combine_op(4, alpha_op, noflip) + +// Like the above versions but draws flipped tiles. + +#define partial_tile_flip_8bpp(combine_op, alpha_op) \ + for (i = 0; i < partial_tile_run; i++) { \ + tile_8bpp_draw_##combine_op(0, shift, 24, alpha_op); \ + current_pixels <<= 8; \ + advance_dest_ptr_##combine_op(1); \ + } + +#define partial_tile_right_flip_8bpp(combine_op, alpha_op) \ + if (partial_tile_offset >= 4) { \ + current_pixels = eswap32(*((u32 *)tile_ptr)) << ((partial_tile_offset - 4) * 8); \ + partial_tile_flip_8bpp(combine_op, alpha_op); \ + } else { \ + partial_tile_run -= 4; \ + current_pixels = eswap32(*((u32 *)(tile_ptr + 4))) << ((partial_tile_offset - 4) * 8); \ + partial_tile_flip_8bpp(combine_op, alpha_op); \ + current_pixels = eswap32(*((u32 *)tile_ptr)); \ + tile_8bpp_draw_four_##combine_op(0, alpha_op, flip); \ + advance_dest_ptr_##combine_op(4); \ + } + +#define partial_tile_mid_flip_8bpp(combine_op, alpha_op) \ + if (partial_tile_offset >= 4) \ + current_pixels = eswap32(*((u32 *)tile_ptr)) << ((partial_tile_offset - 4) * 8); \ + else { \ + current_pixels = eswap32(*((u32 *)(tile_ptr + 4))) << ((partial_tile_offset - 4) * 8); \ + \ + if ((partial_tile_offset + partial_tile_run) > 4) { \ + u32 old_run = partial_tile_run; \ + partial_tile_run = 4 - partial_tile_offset; \ + partial_tile_flip_8bpp(combine_op, alpha_op); \ + partial_tile_run = old_run - partial_tile_run; \ + current_pixels = eswap32(*((u32 *)(tile_ptr))); \ + } \ + } \ + partial_tile_flip_8bpp(combine_op, alpha_op); + +#define partial_tile_left_flip_8bpp(combine_op, alpha_op) \ + if (partial_tile_run >= 4) { \ + current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ + tile_8bpp_draw_four_##combine_op(0, alpha_op, flip); \ + advance_dest_ptr_##combine_op(4); \ + tile_ptr -= 4; \ + partial_tile_run -= 4; \ + } \ + \ + current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ + partial_tile_flip_8bpp(combine_op, alpha_op) + +#define tile_flip_8bpp(combine_op, alpha_op) \ + current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ + tile_8bpp_draw_four_##combine_op(0, alpha_op, flip); \ + current_pixels = eswap32(*((u32 *)tile_ptr)); \ + tile_8bpp_draw_four_##combine_op(4, alpha_op, flip) + +// Operations for isolating 4bpp tiles in a 32bit block + +#define tile_4bpp_pixel_op_mask(op_param) current_pixel = current_pixels & 0x0F + +#define tile_4bpp_pixel_op_shift_mask(shift) current_pixel = (current_pixels >> shift) & 0x0F + +#define tile_4bpp_pixel_op_shift(shift) current_pixel = current_pixels >> shift + +#define tile_4bpp_pixel_op_none(op_param) + +// Draws a single 4bpp pixel as base, normal renderer; checks to see if the +// pixel is zero because if so the current palette should not be applied. +// These ifs can be replaced with a lookup table, may or may not be superior +// this way, should be benchmarked. The lookup table would be from 0-255 +// identity map except for multiples of 16, which would map to 0. + +#define tile_4bpp_draw_base_normal(index) \ + if (current_pixel) \ + current_pixel |= current_palette; \ + tile_expand_base_normal(index); + +#define tile_4bpp_draw_base_alpha(index) \ + if (current_pixel) { \ + current_pixel |= current_palette; \ + tile_expand_base_alpha(index); \ + } else { \ + tile_expand_base_bg(index); \ + } + +#define tile_4bpp_draw_base_color16(index) tile_4bpp_draw_base_alpha(index) + +#define tile_4bpp_draw_base_color32(index) tile_4bpp_draw_base_alpha(index) + +#define tile_4bpp_draw_base(index, op, op_param, alpha_op) \ + tile_4bpp_pixel_op_##op(op_param); \ + tile_4bpp_draw_base_##alpha_op(index) + +// Draws a single 4bpp pixel as layered, if not transparent. + +#define tile_4bpp_draw_transparent(index, op, op_param, alpha_op) \ + tile_4bpp_pixel_op_##op(op_param); \ + if (current_pixel) { \ + current_pixel |= current_palette; \ + tile_expand_transparent_##alpha_op(index); \ + } + +#define tile_4bpp_draw_copy(index, op, op_param, alpha_op) \ + tile_4bpp_pixel_op_##op(op_param); \ + if (current_pixel) { \ + current_pixel |= current_palette; \ + tile_expand_copy(index); \ + } + +// Draws eight background pixels in transparent mode, for alpha or normal +// renderers. + +#define tile_4bpp_draw_eight_base_zero(value) \ + dest_ptr[0] = value; \ + dest_ptr[1] = value; \ + dest_ptr[2] = value; \ + dest_ptr[3] = value; \ + dest_ptr[4] = value; \ + dest_ptr[5] = value; \ + dest_ptr[6] = value; \ + dest_ptr[7] = value + +// Draws eight background pixels for the alpha renderer, basically color zero +// with the background flag high. + +#define tile_4bpp_draw_eight_base_zero_alpha() tile_4bpp_draw_eight_base_zero(bg_combine) + +#define tile_4bpp_draw_eight_base_zero_color16() tile_4bpp_draw_eight_base_zero_alpha() + +#define tile_4bpp_draw_eight_base_zero_color32() tile_4bpp_draw_eight_base_zero_alpha() + +// Draws eight background pixels for the normal renderer, just a bunch of +// zeros. + +#define tile_4bpp_draw_eight_base_zero_normal() \ + current_pixel = palette[0]; \ + tile_4bpp_draw_eight_base_zero(current_pixel) + +// Draws eight 4bpp pixels. + +#define tile_4bpp_draw_eight_noflip(combine_op, alpha_op) \ + tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op); \ + tile_4bpp_draw_##combine_op(1, shift_mask, 4, alpha_op); \ + tile_4bpp_draw_##combine_op(2, shift_mask, 8, alpha_op); \ + tile_4bpp_draw_##combine_op(3, shift_mask, 12, alpha_op); \ + tile_4bpp_draw_##combine_op(4, shift_mask, 16, alpha_op); \ + tile_4bpp_draw_##combine_op(5, shift_mask, 20, alpha_op); \ + tile_4bpp_draw_##combine_op(6, shift_mask, 24, alpha_op); \ + tile_4bpp_draw_##combine_op(7, shift, 28, alpha_op) + +// Draws eight 4bpp pixels in reverse order (for hflip). + +#define tile_4bpp_draw_eight_flip(combine_op, alpha_op) \ + tile_4bpp_draw_##combine_op(7, mask, 0, alpha_op); \ + tile_4bpp_draw_##combine_op(6, shift_mask, 4, alpha_op); \ + tile_4bpp_draw_##combine_op(5, shift_mask, 8, alpha_op); \ + tile_4bpp_draw_##combine_op(4, shift_mask, 12, alpha_op); \ + tile_4bpp_draw_##combine_op(3, shift_mask, 16, alpha_op); \ + tile_4bpp_draw_##combine_op(2, shift_mask, 20, alpha_op); \ + tile_4bpp_draw_##combine_op(1, shift_mask, 24, alpha_op); \ + tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op) + +// Draws eight 4bpp pixels in base mode, checks if all are zero, if so draws +// the appropriate background pixels. + +#define tile_4bpp_draw_eight_base(alpha_op, flip_op) \ + if (current_pixels != 0) { \ + tile_4bpp_draw_eight_##flip_op(base, alpha_op); \ + } else { \ + tile_4bpp_draw_eight_base_zero_##alpha_op(); \ + } + +// Draws eight 4bpp pixels in transparent (layered) mode, checks if all are +// zero and if so draws nothing. + +#define tile_4bpp_draw_eight_transparent(alpha_op, flip_op) \ + if (current_pixels != 0) { \ + tile_4bpp_draw_eight_##flip_op(transparent, alpha_op); \ + } + +#define tile_4bpp_draw_eight_copy(alpha_op, flip_op) \ + if (current_pixels != 0) { \ + tile_4bpp_draw_eight_##flip_op(copy, alpha_op); \ + } + +// Gets the current tile in 4bpp mode, also getting the current palette and +// the pixel block. + +#define get_tile_4bpp() \ + current_tile = eswap16(*map_ptr); \ + current_palette = (current_tile >> 12) << 4; \ + tile_ptr = tile_base + ((current_tile & 0x3FF) * 32); + +// Helper macro for drawing clipped 4bpp tiles. + +#define partial_tile_4bpp(combine_op, alpha_op) \ + for (i = 0; i < partial_tile_run; i++) { \ + tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op); \ + current_pixels >>= 4; \ + advance_dest_ptr_##combine_op(1); \ + } + +// Draws a 4bpp tile clipped against the left edge of the screen. +// partial_tile_offset is how far in it's clipped, partial_tile_run is +// how many to draw. + +#define partial_tile_right_noflip_4bpp(combine_op, alpha_op) \ + current_pixels = eswap32(*((u32 *)tile_ptr)) >> (partial_tile_offset * 4); \ + partial_tile_4bpp(combine_op, alpha_op) + +// Draws a 4bpp tile clipped against both edges of the screen, same as right. + +#define partial_tile_mid_noflip_4bpp(combine_op, alpha_op) partial_tile_right_noflip_4bpp(combine_op, alpha_op) + +// Draws a 4bpp tile clipped against the right edge of the screen. +// partial_tile_offset is how many to draw. + +#define partial_tile_left_noflip_4bpp(combine_op, alpha_op) \ + current_pixels = eswap32(*((u32 *)tile_ptr)); \ + partial_tile_4bpp(combine_op, alpha_op) + +// Draws a complete 4bpp tile row (not clipped) +#define tile_noflip_4bpp(combine_op, alpha_op) \ + current_pixels = eswap32(*((u32 *)tile_ptr)); \ + tile_4bpp_draw_eight_##combine_op(alpha_op, noflip) + +// Like the above, but draws flipped tiles. + +#define partial_tile_flip_4bpp(combine_op, alpha_op) \ + for (i = 0; i < partial_tile_run; i++) { \ + tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op); \ + current_pixels <<= 4; \ + advance_dest_ptr_##combine_op(1); \ + } + +#define partial_tile_right_flip_4bpp(combine_op, alpha_op) \ + current_pixels = eswap32(*((u32 *)tile_ptr)) << (partial_tile_offset * 4); \ + partial_tile_flip_4bpp(combine_op, alpha_op) + +#define partial_tile_mid_flip_4bpp(combine_op, alpha_op) partial_tile_right_flip_4bpp(combine_op, alpha_op) + +#define partial_tile_left_flip_4bpp(combine_op, alpha_op) \ + current_pixels = eswap32(*((u32 *)tile_ptr)); \ + partial_tile_flip_4bpp(combine_op, alpha_op) + +#define tile_flip_4bpp(combine_op, alpha_op) \ + current_pixels = eswap32(*((u32 *)tile_ptr)); \ + tile_4bpp_draw_eight_##combine_op(alpha_op, flip) + +// Draws a single (partial or complete) tile from the tilemap, flipping +// as necessary. + +#define single_tile_map(tile_type, combine_op, color_depth, alpha_op) \ + get_tile_##color_depth(); \ + if (current_tile & 0x800) \ + tile_ptr += vertical_pixel_flip; \ + \ + if (current_tile & 0x400) { \ + tile_type##_flip_##color_depth(combine_op, alpha_op); \ + } else { \ + tile_type##_noflip_##color_depth(combine_op, alpha_op); \ + } + +#define single_tile_map_base_4bpp_color16(tile_type) \ + get_tile_4bpp(); \ + if (current_tile & 0x800) \ + tile_ptr += vertical_pixel_flip; \ + \ + if (current_tile & 0x400) { \ + tile_type##_flip_4bpp(base, color16); \ + } else { \ + tile_type##_noflip_4bpp(base, color16); \ + } + +// Draws multiple sequential tiles from the tilemap, hflips and vflips as +// necessary. + +#define multiple_tile_map(combine_op, color_depth, alpha_op) \ + for (i = 0; i < tile_run; i++) { \ + single_tile_map(tile, combine_op, color_depth, alpha_op); \ + advance_dest_ptr_##combine_op(8); \ + map_ptr++; \ + } + +#define multiple_tile_map_transparent_8bpp_color16() \ + for (i = 0; i < tile_run; i++) { \ + single_tile_map(tile, transparent, 8bpp, color16); \ + advance_dest_ptr_transparent(8); \ + map_ptr++; \ + } + +#define multiple_tile_map_transparent_4bpp_color16() \ + for (i = 0; i < tile_run; i++) { \ + single_tile_map(tile, transparent, 4bpp, color16); \ + advance_dest_ptr_transparent(8); \ + map_ptr++; \ + } + +#define multiple_tile_map_base_8bpp_color16() \ + for (i = 0; i < tile_run; i++) { \ + single_tile_map(tile, base, 8bpp, color16); \ + advance_dest_ptr_base(8); \ + map_ptr++; \ + } + +#define multiple_tile_map_base_4bpp_color16() \ + for (i = 0; i < tile_run; i++) { \ + single_tile_map_base_4bpp_color16(tile); \ + advance_dest_ptr_base(8); \ + map_ptr++; \ + } + +#define multiple_tile_map_transparent_8bpp_normal() \ + for (i = 0; i < tile_run; i++) { \ + single_tile_map(tile, transparent, 8bpp, normal); \ + advance_dest_ptr_transparent(8); \ + map_ptr++; \ + } + +#define multiple_tile_map_transparent_4bpp_normal() \ + for (i = 0; i < tile_run; i++) { \ + single_tile_map(tile, transparent, 4bpp, normal); \ + advance_dest_ptr_transparent(8); \ + map_ptr++; \ + } + +#define multiple_tile_map_base_8bpp_normal() \ + for (i = 0; i < tile_run; i++) { \ + single_tile_map(tile, base, 8bpp, normal); \ + advance_dest_ptr_base(8); \ + map_ptr++; \ + } + +#define multiple_tile_map_base_4bpp_normal() \ + for (i = 0; i < tile_run; i++) { \ + single_tile_map(tile, base, 4bpp, normal); \ + advance_dest_ptr_base(8); \ + map_ptr++; \ + } + +// Draws a partial tile from a tilemap clipped against the left edge of the +// screen. + +#define partial_tile_right_map(combine_op, color_depth, alpha_op) \ + single_tile_map(partial_tile_right, combine_op, color_depth, alpha_op); \ + map_ptr++ + +// Draws a partial tile from a tilemap clipped against both edges of the +// screen. + +#define partial_tile_mid_map(combine_op, color_depth, alpha_op) single_tile_map(partial_tile_mid, combine_op, color_depth, alpha_op) + +// Draws a partial tile from a tilemap clipped against the right edge of the +// screen. + +#define partial_tile_left_map(combine_op, color_depth, alpha_op) single_tile_map(partial_tile_left, combine_op, color_depth, alpha_op) + +// Advances a non-flipped 4bpp obj to the next tile. + +#define obj_advance_noflip_4bpp() tile_ptr += 32 + +// Advances a non-flipped 8bpp obj to the next tile. + +#define obj_advance_noflip_8bpp() tile_ptr += 64 + +// Advances a flipped 4bpp obj to the next tile. + +#define obj_advance_flip_4bpp() tile_ptr -= 32 + +// Advances a flipped 8bpp obj to the next tile. + +#define obj_advance_flip_8bpp() tile_ptr -= 64 + +// Draws multiple sequential tiles from an obj, flip_op determines if it should +// be flipped or not (set to flip or noflip) + +#define multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op) \ + for (i = 0; i < tile_run; i++) { \ + tile_##flip_op##_##color_depth(combine_op, alpha_op); \ + obj_advance_##flip_op##_##color_depth(); \ + advance_dest_ptr_##combine_op(8); \ + } + +// Draws an obj's tile clipped against the left side of the screen + +#define partial_tile_right_obj(combine_op, color_depth, alpha_op, flip_op) \ + partial_tile_right_##flip_op##_##color_depth(combine_op, alpha_op); \ + obj_advance_##flip_op##_##color_depth() + +// Draws an obj's tile clipped against both sides of the screen + +#define partial_tile_mid_obj(combine_op, color_depth, alpha_op, flip_op) partial_tile_mid_##flip_op##_##color_depth(combine_op, alpha_op) + +// Draws an obj's tile clipped against the right side of the screen + +#define partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op) partial_tile_left_##flip_op##_##color_depth(combine_op, alpha_op) + +// Extra variables specific for 8bpp/4bpp tile renderers. + +#define tile_extra_variables_8bpp() + +#define tile_extra_variables_4bpp() u32 current_palette + +// Byte lengths of complete tiles and tile rows in 4bpp and 8bpp. + +#define tile_width_4bpp 4 +#define tile_size_4bpp 32 +#define tile_width_8bpp 8 +#define tile_size_8bpp 64 + +#define render_scanline_dest_normal u16 +#define render_scanline_dest_alpha u32 +#define render_scanline_dest_alpha_obj u32 +#define render_scanline_dest_color16 u16 +#define render_scanline_dest_color32 u32 +#define render_scanline_dest_partial_alpha u32 +#define render_scanline_dest_copy_tile u16 +#define render_scanline_dest_copy_bitmap u16 + +// If rendering a scanline that is not a target A then there's no point in +// keeping what's underneath it because it can't blend with it. + +#define render_scanline_skip_alpha(bg_type, combine_op) \ + if ((pixel_combine & 0x00000200) == 0) { \ + render_scanline_##bg_type##_##combine_op##_color32(layer, start, end, scanline); \ + return; \ + } + +#define render_scanline_extra_variables_base_normal(bg_type) u16 *palette = PLTT + +#define render_scanline_extra_variables_base_alpha(bg_type) \ + u32 bg_combine = color_combine_mask(5); \ + u32 pixel_combine = color_combine_mask(layer) | (bg_combine << 16); \ + render_scanline_skip_alpha(bg_type, base) + +#define render_scanline_extra_variables_base_color() \ + u32 bg_combine = color_combine_mask(5); \ + u32 pixel_combine = color_combine_mask(layer) + +#define render_scanline_extra_variables_base_color16(bg_type) render_scanline_extra_variables_base_color() + +#define render_scanline_extra_variables_base_color32(bg_type) render_scanline_extra_variables_base_color() + +#define render_scanline_extra_variables_transparent_normal(bg_type) render_scanline_extra_variables_base_normal(bg_type) + +#define render_scanline_extra_variables_transparent_alpha(bg_type) \ + u32 pixel_combine = color_combine_mask(layer); \ + render_scanline_skip_alpha(bg_type, transparent) + +#define render_scanline_extra_variables_transparent_color() u32 pixel_combine = color_combine_mask(layer) + +#define render_scanline_extra_variables_transparent_color16(bg_type) render_scanline_extra_variables_transparent_color() + +#define render_scanline_extra_variables_transparent_color32(bg_type) render_scanline_extra_variables_transparent_color() + +static const u32 map_widths[] = { 256, 512, 256, 512 }; + +static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void *scanline) +{ + render_scanline_extra_variables_base_normal(text); + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; + u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; + + u32 current_pixel; + u32 current_pixels; + u32 partial_tile_run = 0; + u32 partial_tile_offset; + u32 tile_run; + u32 i; + render_scanline_dest_normal *dest_ptr = ((render_scanline_dest_normal *)scanline) + start; + + u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_ptr, *second_ptr; + u8 *tile_ptr; + + end -= start; + + if ((map_size & 0x02) && (vertical_offset >= 256)) { + map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); + } else { + map_base += (((vertical_offset % 256) / 8) * 32); + } + + if (map_size & 0x01) { + if (horizontal_offset >= 256) { + horizontal_offset -= 256; + map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + second_ptr = map_base; + } else { + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base + (32 * 32); + } + } else { + horizontal_offset %= 256; + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base; + } + + if (bg_control & 0x80) { + /* color depth: 8bpp + * combine: base + * alpha : normal + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; + s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; + + tile_extra_variables_8bpp(); + + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(base, 8bpp, normal); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(base, 8bpp, normal); + } + } + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(base, 8bpp, normal); + } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map_base_8bpp_normal(); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map_base_8bpp_normal(); + + partial_tile_run = end % 8; + if (partial_tile_run) { + partial_tile_left_map(base, 8bpp, normal); + } + } else { + /* color depth: 4bpp + * combine: base + * alpha : normal + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; + s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; + + tile_extra_variables_4bpp(); + + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(base, 4bpp, normal); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(base, 4bpp, normal); + } + } + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(base, 4bpp, normal); + } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map_base_4bpp_normal(); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map_base_4bpp_normal(); + + partial_tile_run = end % 8; + if (partial_tile_run) { + partial_tile_left_map(base, 4bpp, normal); + } + } +} + +static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 end, void *scanline) +{ + render_scanline_extra_variables_transparent_normal(text); + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; + u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; + u32 current_pixel; + u32 current_pixels; + u32 partial_tile_run = 0; + u32 partial_tile_offset; + u32 tile_run; + u32 i; + render_scanline_dest_normal *dest_ptr = ((render_scanline_dest_normal *)scanline) + start; + + u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_ptr, *second_ptr; + u8 *tile_ptr; + + end -= start; + + if ((map_size & 0x02) && (vertical_offset >= 256)) { + map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); + } else { + map_base += (((vertical_offset % 256) / 8) * 32); + } + + if (map_size & 0x01) { + if (horizontal_offset >= 256) { + horizontal_offset -= 256; + map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + second_ptr = map_base; + } else { + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base + (32 * 32); + } + } else { + horizontal_offset %= 256; + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base; + } + + if (bg_control & 0x80) { + /* color depth: 8bpp + * combine: transparent + * alpha : normal + */ + + /* Render a single scanline of text tiles */ + + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; + s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_8bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(transparent, 8bpp, normal); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(transparent, 8bpp, normal); + } + } + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(transparent, 8bpp, normal); + } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map_transparent_8bpp_normal(); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map_transparent_8bpp_normal(); + + partial_tile_run = end % 8; + if (partial_tile_run) { + partial_tile_left_map(transparent, 8bpp, normal); + } + } else { + /* color depth: 4bpp + * combine: transparent + * alpha : normal + */ + + /* Render a single scanline of text tiles */ + + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; + s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_4bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(transparent, 4bpp, normal); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(transparent, 4bpp, normal); + } + } + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(transparent, 4bpp, normal); + } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map_transparent_4bpp_normal(); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map_transparent_4bpp_normal(); + + partial_tile_run = end % 8; + if (partial_tile_run) { + partial_tile_left_map(transparent, 4bpp, normal); + } + } +} + +static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, void *scanline) +{ + render_scanline_extra_variables_base_color16(text); + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; + u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; + u32 current_pixel; + u32 current_pixels; + u32 partial_tile_run = 0; + u32 partial_tile_offset; + u32 tile_run; + u32 i; + render_scanline_dest_color16 *dest_ptr = ((render_scanline_dest_color16 *)scanline) + start; + + u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_ptr, *second_ptr; + u8 *tile_ptr; + + end -= start; + + if ((map_size & 0x02) && (vertical_offset >= 256)) { + map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); + } else { + map_base += (((vertical_offset % 256) / 8) * 32); + } + + if (map_size & 0x01) { + if (horizontal_offset >= 256) { + horizontal_offset -= 256; + map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + second_ptr = map_base; + } else { + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base + (32 * 32); + } + } else { + horizontal_offset %= 256; + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base; + } + + if (bg_control & 0x80) { + /* color depth: 8bpp + * combine: base + * alpha :color16 + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; + s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_8bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(base, 8bpp, color16); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(base, 8bpp, color16); + } + } + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(base, 8bpp, color16); + } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map_base_8bpp_color16(); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map_base_8bpp_color16(); + + partial_tile_run = end % 8; + if (partial_tile_run) { + partial_tile_left_map(base, 8bpp, color16); + } + } else { + /* color depth: 4bpp + * combine: base + * alpha :color16 + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; + s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_4bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(base, 4bpp, color16); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(base, 4bpp, color16); + } + } + + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(base, 4bpp, color16); + } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map_base_4bpp_color16(); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map_base_4bpp_color16(); + + partial_tile_run = end % 8; + + if (partial_tile_run) { + partial_tile_left_map(base, 4bpp, color16); + } + } +} + +static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 end, void *scanline) +{ + render_scanline_extra_variables_transparent_color16(text); + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; + u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; + u32 current_pixel; + u32 current_pixels; + u32 partial_tile_run = 0; + u32 partial_tile_offset; + u32 tile_run; + u32 i; + render_scanline_dest_color16 *dest_ptr = ((render_scanline_dest_color16 *)scanline) + start; + + u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_ptr, *second_ptr; + u8 *tile_ptr; + + end -= start; + + if ((map_size & 0x02) && (vertical_offset >= 256)) { + map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); + } else { + map_base += (((vertical_offset % 256) / 8) * 32); + } + + if (map_size & 0x01) { + if (horizontal_offset >= 256) { + horizontal_offset -= 256; + map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + second_ptr = map_base; + } else { + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base + (32 * 32); + } + } else { + horizontal_offset %= 256; + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base; + } + + if (bg_control & 0x80) { + /* color depth: 8bpp + * combine: transparent + * alpha :color16 + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; + s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_8bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(transparent, 8bpp, color16); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(transparent, 8bpp, color16); + } + } + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(transparent, 8bpp, color16); + } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map_transparent_8bpp_color16(); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map_transparent_8bpp_color16(); + + partial_tile_run = end % 8; + if (partial_tile_run) { + partial_tile_left_map(transparent, 8bpp, color16); + } + } else { + /* color depth: 4bpp + * combine: transparent + * alpha :color16 + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; + s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_4bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(transparent, 4bpp, color16); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(transparent, 4bpp, color16); + } + } + + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(transparent, 4bpp, color16); + } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map_transparent_4bpp_color16(); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map_transparent_4bpp_color16(); + + partial_tile_run = end % 8; + + if (partial_tile_run) { + partial_tile_left_map(transparent, 4bpp, color16); + } + } +} + +static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, void *scanline) +{ + render_scanline_extra_variables_base_color32(text); + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; + u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; + u32 current_pixel; + u32 current_pixels; + u32 partial_tile_run = 0; + u32 partial_tile_offset; + u32 tile_run; + u32 i; + render_scanline_dest_color32 *dest_ptr = ((render_scanline_dest_color32 *)scanline) + start; + + u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_ptr, *second_ptr; + u8 *tile_ptr; + + end -= start; + + if ((map_size & 0x02) && (vertical_offset >= 256)) { + map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); + } else { + map_base += (((vertical_offset % 256) / 8) * 32); + } + + if (map_size & 0x01) { + if (horizontal_offset >= 256) { + horizontal_offset -= 256; + map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + second_ptr = map_base; + } else { + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base + (32 * 32); + } + } else { + horizontal_offset %= 256; + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base; + } + + if (bg_control & 0x80) { + /* color depth: 8bpp + * combine: base + * alpha :color32 + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; + s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_8bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(base, 8bpp, color32); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(base, 8bpp, color32); + } + } + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(base, 8bpp, color32); + } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map(base, 8bpp, color32); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map(base, 8bpp, color32); + + partial_tile_run = end % 8; + if (partial_tile_run) { + partial_tile_left_map(base, 8bpp, color32); + } + } else { + /* color depth: 4bpp + * combine: base + * alpha :color32 + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; + s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_4bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(base, 4bpp, color32); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(base, 4bpp, color32); + } + } + + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(base, 4bpp, color32); + } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map(base, 4bpp, color32); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map(base, 4bpp, color32); + + partial_tile_run = end % 8; + + if (partial_tile_run) { + partial_tile_left_map(base, 4bpp, color32); + } + } +} + +static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 end, void *scanline) +{ + render_scanline_extra_variables_transparent_color32(text); + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; + u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; + u32 current_pixel; + u32 current_pixels; + u32 partial_tile_run = 0; + u32 partial_tile_offset; + u32 tile_run; + u32 i; + render_scanline_dest_color32 *dest_ptr = ((render_scanline_dest_color32 *)scanline) + start; + + u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_ptr, *second_ptr; + u8 *tile_ptr; + + end -= start; + + if ((map_size & 0x02) && (vertical_offset >= 256)) { + map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); + } else { + map_base += (((vertical_offset % 256) / 8) * 32); + } + + if (map_size & 0x01) { + if (horizontal_offset >= 256) { + horizontal_offset -= 256; + map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + second_ptr = map_base; + } else { + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base + (32 * 32); + } + } else { + horizontal_offset %= 256; + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base; + } + + if (bg_control & 0x80) { + /* color depth: 8bpp + * combine: transparent + * alpha :color32 + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; + s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_8bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(transparent, 8bpp, color32); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(transparent, 8bpp, color32); + } + } -#define IsBGEnabled(n) (((REG_DISPCNT >> 8) & 0xF) & (1 << (n))) - -// mosaic -#define MOSAIC_BG_X (REG_MOSAIC & 0xF) -#define MOSAIC_BG_Y ((REG_MOSAIC >> 4) & 0xF) -#define MOSAIC_SPR_X ((REG_MOSAIC >> 8) & 0xF) -#define MOSAIC_SPR_Y ((REG_MOSAIC >> 12) & 0xF) -#define ApplyMosaicBGX(x) ((x) - ((x) % (MOSAIC_BG_X + 1))) -#define ApplyMosaicBGY(y) ((y) - ((y) % (MOSAIC_BG_Y + 1))) -#define ApplyMosaicSprX(x) ((x) - ((x) % (MOSAIC_SPR_X + 1))) -#define ApplyMosaicSprY(y) ((y) - ((y) % (MOSAIC_SPR_Y + 1))) - -// tilemap entry fields -#define TILE_NUM(e) ((e)&0x3FF) -#define TILE_PALETTE(e) (((e) >> 12) & 0xF) -#define TILE_HFLIP(e) ((e) & (1 << 10)) -#define TILE_VFLIP(e) ((e) & (1 << 11)) - -// window mask bits -#define WINMASK_BG0 (1 << 0) -#define WINMASK_BG1 (1 << 1) -#define WINMASK_BG2 (1 << 2) -#define WINMASK_BG3 (1 << 3) -#define WINMASK_OBJ (1 << 4) -#define WINMASK_CLR (1 << 5) -#define WINMASK_WINOUT (1 << 6) - -// layer ids for blend target tracking -#define LAYER_BG0 0 -#define LAYER_BG1 1 -#define LAYER_BG2 2 -#define LAYER_BG3 3 -#define LAYER_OBJ 4 -#define LAYER_BACKDROP 5 - -static const uint16_t bgMapSizes[][2] = { - { 32, 32 }, - { 64, 32 }, - { 32, 64 }, - { 64, 64 }, -}; + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(transparent, 8bpp, color32); + } -// 16-bit fill using 32-bit writes -static inline void Memset16(uint16_t *dst, uint16_t fill, unsigned int count) -{ - uint32_t fill32 = ((uint32_t)fill << 16) | fill; - uint32_t *dst32 = (uint32_t *)dst; - unsigned int pairs = count >> 1; - for (unsigned int i = 0; i < pairs; i++) - dst32[i] = fill32; - if (count & 1) - dst[count - 1] = fill; -} + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map(transparent, 8bpp, color32); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map(transparent, 8bpp, color32); -static inline uint32_t GetBgRefX(int bg) { return (bg == 2) ? REG_BG2X : (bg == 3) ? REG_BG3X : 0; } -static inline uint32_t GetBgRefY(int bg) { return (bg == 2) ? REG_BG2Y : (bg == 3) ? REG_BG3Y : 0; } -static inline uint16_t GetBgPA(int bg) { return (bg == 2) ? REG_BG2PA : (bg == 3) ? REG_BG3PA : 0; } -static inline uint16_t GetBgPB(int bg) { return (bg == 2) ? REG_BG2PB : (bg == 3) ? REG_BG3PB : 0; } -static inline uint16_t GetBgPC(int bg) { return (bg == 2) ? REG_BG2PC : (bg == 3) ? REG_BG3PC : 0; } -static inline uint16_t GetBgPD(int bg) { return (bg == 2) ? REG_BG2PD : (bg == 3) ? REG_BG3PD : 0; } + partial_tile_run = end % 8; -// handles the wraparound case where left > right -static inline bool WindowContainsX(u16 left, u16 right, u16 x) -{ - if (left > right) - return (x >= left || x < right); - return (x >= left && x < right); -} + if (partial_tile_run) { + partial_tile_left_map(transparent, 8bpp, color32); + } + } else { + /* color depth: 4bpp + * combine: transparent + * alpha :color32 + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; + s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_4bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(transparent, 4bpp, color32); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(transparent, 4bpp, color32); + } + } -// check if a layer can be the target-b for alpha blending -static inline bool IsBlendTargetB(uint8_t layerId, unsigned int bldcnt) -{ - if (layerId <= 3) - return (bldcnt & (1 << (8 + layerId))) != 0; - if (layerId == LAYER_OBJ) - return (bldcnt & BLDCNT_TGT2_OBJ) != 0; - if (layerId == LAYER_BACKDROP) - return (bldcnt & BLDCNT_TGT2_BD) != 0; - return false; -} + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(transparent, 4bpp, color32); + } -// sprites with oam mode 1 always try alpha blending regardless of bldcnt -static inline uint16_t BlendSpritePixel(uint16_t color, unsigned int x, uint16_t *output, uint8_t *layerIds, bool isSemiTransparent, - unsigned int blendMode, unsigned int bldcnt, bool windowsEnabled, uint16_t *winMask, - unsigned int eva, unsigned int evb, unsigned int evy) -{ - bool winAllowsBlend = !windowsEnabled || (winMask && (winMask[x] & WINMASK_CLR)); + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map(transparent, 4bpp, color32); + map_ptr = second_ptr; + end -= pixel_run; + } + tile_run = end / 8; + multiple_tile_map(transparent, 4bpp, color32); - bool doAlpha = (blendMode == 1 && (bldcnt & BLDCNT_TGT1_OBJ) && winAllowsBlend) || isSemiTransparent; + partial_tile_run = end % 8; - if (doAlpha) { - if (IsBlendTargetB(layerIds[x], bldcnt)) - return alphaBlendColor(color, output[x], eva, evb); - } else if ((bldcnt & BLDCNT_TGT1_OBJ) && winAllowsBlend) { - if (blendMode == 2) - return alphaBrightnessIncrease(color, evy); - if (blendMode == 3) - return alphaBrightnessDecrease(color, evy); + if (partial_tile_run) { + partial_tile_left_map(transparent, 4bpp, color32); + } } - - return color; } -// write a bg pixel with inline blend resolution -static inline void WriteBGPixelBlended(unsigned int x, uint8_t pixel, const uint16_t *palBase, int bgNum, uint16_t *output, - uint8_t *layerIds, unsigned int blendMode, bool bgIsTargetA, bool useWindows, unsigned int winBgBit, - uint16_t *winMask, unsigned int bldcnt, unsigned int eva, unsigned int evb, unsigned int evy) +static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void *scanline) { - uint16_t color = palBase[pixel] | COLOR_OPAQUE; - - if (useWindows && !(winMask[x] & winBgBit)) - return; - - if (bgIsTargetA && (!useWindows || (winMask[x] & WINMASK_CLR))) { - uint16_t src = color; - switch (blendMode) { - case 1: - if (IsBlendTargetB(layerIds[x], bldcnt)) - color = alphaBlendColor(src, output[x], eva, evb); - break; - case 2: - color = alphaBrightnessIncrease(src, evy); - break; - case 3: - color = alphaBrightnessDecrease(src, evy); - break; - } + render_scanline_extra_variables_base_alpha(text); + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; + u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; + u32 current_pixel; + u32 current_pixels; + u32 partial_tile_run = 0; + u32 partial_tile_offset; + u32 tile_run; + u32 i; + render_scanline_dest_alpha *dest_ptr = ((render_scanline_dest_alpha *)scanline) + start; + + u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_ptr, *second_ptr; + u8 *tile_ptr; + + end -= start; + + if ((map_size & 0x02) && (vertical_offset >= 256)) { + map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); + } else { + map_base += (((vertical_offset % 256) / 8) * 32); } - output[x] = color; - layerIds[x] = bgNum; -} + if (map_size & 0x01) { + if (horizontal_offset >= 256) { + horizontal_offset -= 256; + map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + second_ptr = map_base; + } else { + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base + (32 * 32); + } + } else { + horizontal_offset %= 256; + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base; + } -static void RenderTextBG(int bgNum, uint16_t control, uint16_t hoffs, uint16_t voffs, int lineNum, uint16_t *output) -{ - unsigned int charBase = (control >> 2) & 3; - unsigned int screenBase = (control & BGCNT_SCREENBASE_MASK) >> 8; - unsigned int is8bpp = (control >> 7) & 1; - - unsigned int mapW = bgMapSizes[control >> 14][0]; - unsigned int mapPxW = mapW << 3; - unsigned int mapPxH = bgMapSizes[control >> 14][1] << 3; - unsigned int wMask = mapPxW - 1; - unsigned int hMask = mapPxH - 1; - - uint8_t *tiles = (uint8_t *)BG_CHAR_ADDR(charBase); - uint16_t *map = (uint16_t *)BG_SCREEN_ADDR(screenBase); - uint16_t *pal = (uint16_t *)PLTT; - - bool hasMosaic = control & BGCNT_MOSAIC; - if (hasMosaic) - lineNum = ApplyMosaicBGY(lineNum); - - hoffs &= 0x1FF; - voffs &= 0x1FF; - - unsigned int yy = (lineNum + voffs) & hMask; - unsigned int mapY = yy >> 3; - unsigned int tileY = yy & 7; - unsigned int rowBase = mapY * mapW; - - // slow path: 8bpp or mosaic, one pixel at a time - if (hasMosaic || is8bpp) { - for (unsigned int x = 0; x < DISPLAY_WIDTH; x++) { - unsigned int xx = hasMosaic ? (ApplyMosaicBGX(x) + hoffs) & wMask : (x + hoffs) & wMask; - - uint16_t entry = map[rowBase + (xx >> 3)]; - unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int tx = xx & 7; - unsigned int ty = tileY; - if (TILE_HFLIP(entry)) - tx = 7 - tx; - if (TILE_VFLIP(entry)) - ty = 7 - ty; - - if (!is8bpp) { - uint8_t pair = tiles[(tileNum << 5) + (ty << 2) + (tx >> 1)]; - uint8_t pixel = (tx & 1) ? (pair >> 4) : (pair & 0xF); - if (pixel) - output[x] = pal[(palNum << 4) + pixel] | COLOR_OPAQUE; - } else { - uint8_t pixel = tiles[(tileNum << 6) + (ty << 3) + tx]; - if (pixel) - output[x] = pal[pixel] | COLOR_OPAQUE; + if (bg_control & 0x80) { + /* color depth: 8bpp + * combine: base + * alpha : alpha + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; + s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_8bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(base, 8bpp, alpha); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(base, 8bpp, alpha); + } } - } - return; - } - - // fast path: 4bpp, read one u32 per tile row, unroll 8 pixels - unsigned int x = 0; - - // left edge: partial tile if scroll isn't tile-aligned - { - unsigned int startX = hoffs & wMask; - unsigned int startOff = startX & 7; - - if (startOff != 0) { - uint16_t entry = map[rowBase + (startX >> 3)]; - unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) - ty = 7 - ty; - bool hflip = TILE_HFLIP(entry); - - uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); - - unsigned int partial = 8 - startOff; - if (partial > DISPLAY_WIDTH) - partial = DISPLAY_WIDTH; - - for (unsigned int t = 0; t < partial && x < DISPLAY_WIDTH; t++, x++) { - unsigned int tx = startOff + t; - if (hflip) - tx = 7 - tx; - uint8_t pixel = (row >> (tx << 2)) & 0xF; - if (pixel) - output[x] = pal[(palNum << 4) + pixel] | COLOR_OPAQUE; + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(base, 8bpp, alpha); } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map(base, 8bpp, alpha); + map_ptr = second_ptr; + end -= pixel_run; } - } + tile_run = end / 8; + multiple_tile_map(base, 8bpp, alpha); - // middle: full tiles, 8 pixels at a time - while (x + 8 <= DISPLAY_WIDTH) { - unsigned int srcX = (x + hoffs) & wMask; - uint16_t entry = map[rowBase + (srcX >> 3)]; - unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) - ty = 7 - ty; - - uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); - uint16_t *palBase = pal + (palNum << 4); - - if (!TILE_HFLIP(entry)) { - uint8_t p; - p = row & 0xF; - if (p) - output[x] = palBase[p] | COLOR_OPAQUE; - p = (row >> 4) & 0xF; - if (p) - output[x + 1] = palBase[p] | COLOR_OPAQUE; - p = (row >> 8) & 0xF; - if (p) - output[x + 2] = palBase[p] | COLOR_OPAQUE; - p = (row >> 12) & 0xF; - if (p) - output[x + 3] = palBase[p] | COLOR_OPAQUE; - p = (row >> 16) & 0xF; - if (p) - output[x + 4] = palBase[p] | COLOR_OPAQUE; - p = (row >> 20) & 0xF; - if (p) - output[x + 5] = palBase[p] | COLOR_OPAQUE; - p = (row >> 24) & 0xF; - if (p) - output[x + 6] = palBase[p] | COLOR_OPAQUE; - p = (row >> 28) & 0xF; - if (p) - output[x + 7] = palBase[p] | COLOR_OPAQUE; + partial_tile_run = end % 8; + if (partial_tile_run) { + partial_tile_left_map(base, 8bpp, alpha); + } + } else { + /* color depth: 4bpp + * combine: base + * alpha : alpha + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; + s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_4bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(base, 4bpp, alpha); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(base, 4bpp, alpha); + } + } } else { - uint8_t p; - p = (row >> 28) & 0xF; - if (p) - output[x] = palBase[p] | COLOR_OPAQUE; - p = (row >> 24) & 0xF; - if (p) - output[x + 1] = palBase[p] | COLOR_OPAQUE; - p = (row >> 20) & 0xF; - if (p) - output[x + 2] = palBase[p] | COLOR_OPAQUE; - p = (row >> 16) & 0xF; - if (p) - output[x + 3] = palBase[p] | COLOR_OPAQUE; - p = (row >> 12) & 0xF; - if (p) - output[x + 4] = palBase[p] | COLOR_OPAQUE; - p = (row >> 8) & 0xF; - if (p) - output[x + 5] = palBase[p] | COLOR_OPAQUE; - p = (row >> 4) & 0xF; - if (p) - output[x + 6] = palBase[p] | COLOR_OPAQUE; - p = row & 0xF; - if (p) - output[x + 7] = palBase[p] | COLOR_OPAQUE; + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(base, 4bpp, alpha); + } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map(base, 4bpp, alpha); + map_ptr = second_ptr; + end -= pixel_run; } - x += 8; - } - - // right edge: leftover partial tile - if (x < DISPLAY_WIDTH) { - unsigned int srcX = (x + hoffs) & wMask; - uint16_t entry = map[rowBase + (srcX >> 3)]; - unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) - ty = 7 - ty; - bool hflip = TILE_HFLIP(entry); - - uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); - - for (unsigned int t = 0; x < DISPLAY_WIDTH; t++, x++) { - unsigned int tx = hflip ? (7 - t) : t; - uint8_t pixel = (row >> (tx << 2)) & 0xF; - if (pixel) - output[x] = pal[(palNum << 4) + pixel] | COLOR_OPAQUE; + tile_run = end / 8; + multiple_tile_map(base, 4bpp, alpha); + + partial_tile_run = end % 8; + if (partial_tile_run) { + partial_tile_left_map(base, 4bpp, alpha); } } } -// same thing but with blend/window tracking baked in -static void RenderTextBGBlend(int bgNum, uint16_t control, uint16_t hoffs, uint16_t voffs, int lineNum, uint16_t *output, uint8_t *layerIds, - unsigned int blendMode, bool windowsEnabled, uint16_t *winMask, unsigned int bldcnt, unsigned int eva, - unsigned int evb, unsigned int evy) +static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end, void *scanline) { - unsigned int charBase = (control >> 2) & 3; - unsigned int screenBase = (control & BGCNT_SCREENBASE_MASK) >> 8; - unsigned int is8bpp = (control >> 7) & 1; - - unsigned int mapW = bgMapSizes[control >> 14][0]; - unsigned int mapPxW = mapW << 3; - unsigned int mapPxH = bgMapSizes[control >> 14][1] << 3; - unsigned int wMask = mapPxW - 1; - unsigned int hMask = mapPxH - 1; - - uint8_t *tiles = (uint8_t *)BG_CHAR_ADDR(charBase); - uint16_t *map = (uint16_t *)BG_SCREEN_ADDR(screenBase); - uint16_t *pal = (uint16_t *)PLTT; - - bool hasMosaic = control & BGCNT_MOSAIC; - if (hasMosaic) - lineNum = ApplyMosaicBGY(lineNum); - - hoffs &= 0x1FF; - voffs &= 0x1FF; - - unsigned int yy = (lineNum + voffs) & hMask; - unsigned int mapY = yy >> 3; - unsigned int tileY = yy & 7; - unsigned int rowBase = mapY * mapW; - - bool bgIsTargetA = (blendMode != 0) && (bldcnt & (1 << bgNum)); - bool useWindows = windowsEnabled && (winMask != NULL); - unsigned int winBgBit = 1 << bgNum; - - // slow path: 8bpp or mosaic - if (hasMosaic || is8bpp) { - for (unsigned int x = 0; x < DISPLAY_WIDTH; x++) { - unsigned int xx = hasMosaic ? (ApplyMosaicBGX(x) + hoffs) & wMask : (x + hoffs) & wMask; - - uint16_t entry = map[rowBase + (xx >> 3)]; - unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int tx = xx & 7; - unsigned int ty = tileY; - if (TILE_HFLIP(entry)) - tx = 7 - tx; - if (TILE_VFLIP(entry)) - ty = 7 - ty; - - uint8_t pixel; - if (!is8bpp) { - uint8_t pair = tiles[(tileNum << 5) + (ty << 2) + (tx >> 1)]; - pixel = (tx & 1) ? (pair >> 4) : (pair & 0xF); - } else { - pixel = tiles[(tileNum << 6) + (ty << 3) + tx]; - } + render_scanline_extra_variables_transparent_alpha(text); + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; + u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; + u32 current_pixel; + u32 current_pixels; + u32 partial_tile_run = 0; + u32 partial_tile_offset; + u32 tile_run; + u32 i; + render_scanline_dest_alpha *dest_ptr = ((render_scanline_dest_alpha *)scanline) + start; + + u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_ptr, *second_ptr; + u8 *tile_ptr; + + end -= start; + + if ((map_size & 0x02) && (vertical_offset >= 256)) { + map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); + } else { + map_base += (((vertical_offset % 256) / 8) * 32); + } - if (pixel == 0) - continue; - - uint16_t color = !is8bpp ? pal[(palNum << 4) + pixel] | COLOR_OPAQUE : pal[pixel] | COLOR_OPAQUE; - - if (useWindows && !(winMask[x] & winBgBit)) - continue; - - if (bgIsTargetA && (!useWindows || (winMask[x] & WINMASK_CLR))) { - uint16_t src = color; - switch (blendMode) { - case 1: - if (IsBlendTargetB(layerIds[x], bldcnt)) - color = alphaBlendColor(src, output[x], eva, evb); - break; - case 2: - color = alphaBrightnessIncrease(src, evy); - break; - case 3: - color = alphaBrightnessDecrease(src, evy); - break; + if (map_size & 0x01) { + if (horizontal_offset >= 256) { + horizontal_offset -= 256; + map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + second_ptr = map_base; + } else { + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base + (32 * 32); + } + } else { + horizontal_offset %= 256; + map_ptr = map_base + (horizontal_offset / 8); + second_ptr = map_base; + } + + if (bg_control & 0x80) { + /* color depth: 8bpp + * combine: transparent + * alpha : alpha + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; + s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_8bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(transparent, 8bpp, alpha); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(transparent, 8bpp, alpha); } } + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(transparent, 8bpp, alpha); + } - output[x] = color; - layerIds[x] = bgNum; + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map(transparent, 8bpp, alpha); + map_ptr = second_ptr; + end -= pixel_run; } - return; - } - - // fast path: 4bpp batched with inline blend - unsigned int x = 0; - - // left edge partial tile - { - unsigned int startX = hoffs & wMask; - unsigned int startOff = startX & 7; - - if (startOff != 0) { - uint16_t entry = map[rowBase + (startX >> 3)]; - unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) - ty = 7 - ty; - bool hflip = TILE_HFLIP(entry); - - uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); - uint16_t *palBase = pal + (palNum << 4); - - unsigned int partial = 8 - startOff; - if (partial > DISPLAY_WIDTH) - partial = DISPLAY_WIDTH; - - for (unsigned int t = 0; t < partial && x < DISPLAY_WIDTH; t++, x++) { - unsigned int tx = startOff + t; - if (hflip) - tx = 7 - tx; - uint8_t pixel = (row >> (tx << 2)) & 0xF; - if (pixel) - WriteBGPixelBlended(x, pixel, palBase, bgNum, output, layerIds, blendMode, bgIsTargetA, useWindows, winBgBit, winMask, - bldcnt, eva, evb, evy); + tile_run = end / 8; + multiple_tile_map(transparent, 8bpp, alpha); + + partial_tile_run = end % 8; + if (partial_tile_run) { + partial_tile_left_map(transparent, 8bpp, alpha); + } + } else { + /* color depth: 4bpp + * combine: transparent + * alpha : alpha + */ + + /* Render a single scanline of text tiles */ + u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; + s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; + tile_extra_variables_4bpp(); + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); + u32 current_tile; + + map_base += ((vertical_offset % 256) / 8) * 32; + partial_tile_offset = (horizontal_offset % 8); + + if (pixel_run >= end) { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + if (end < partial_tile_run) { + partial_tile_run = end; + partial_tile_mid_map(transparent, 4bpp, alpha); + return; + } else { + end -= partial_tile_run; + partial_tile_right_map(transparent, 4bpp, alpha); + } + } + } else { + if (partial_tile_offset) { + partial_tile_run = 8 - partial_tile_offset; + partial_tile_right_map(transparent, 4bpp, alpha); } + + tile_run = (pixel_run - partial_tile_run) / 8; + multiple_tile_map(transparent, 4bpp, alpha); + map_ptr = second_ptr; + end -= pixel_run; } - } + tile_run = end / 8; + multiple_tile_map(transparent, 4bpp, alpha); - // middle: full tiles - while (x + 8 <= DISPLAY_WIDTH) { - unsigned int srcX = (x + hoffs) & wMask; - uint16_t entry = map[rowBase + (srcX >> 3)]; - unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) - ty = 7 - ty; - - uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); - uint16_t *palBase = pal + (palNum << 4); - -#define BLEND_PX(off, shift) \ - do { \ - uint8_t p = (row >> (shift)) & 0xF; \ - if (p) \ - WriteBGPixelBlended(x + (off), p, palBase, bgNum, output, layerIds, blendMode, bgIsTargetA, useWindows, winBgBit, winMask, \ - bldcnt, eva, evb, evy); \ - } while (0) - - if (!TILE_HFLIP(entry)) { - BLEND_PX(0, 0); - BLEND_PX(1, 4); - BLEND_PX(2, 8); - BLEND_PX(3, 12); - BLEND_PX(4, 16); - BLEND_PX(5, 20); - BLEND_PX(6, 24); - BLEND_PX(7, 28); - } else { - BLEND_PX(0, 28); - BLEND_PX(1, 24); - BLEND_PX(2, 20); - BLEND_PX(3, 16); - BLEND_PX(4, 12); - BLEND_PX(5, 8); - BLEND_PX(6, 4); - BLEND_PX(7, 0); + partial_tile_run = end % 8; + if (partial_tile_run) { + partial_tile_left_map(transparent, 4bpp, alpha); } + } +} + +s32 affine_reference_x[2]; +s32 affine_reference_y[2]; + +static inline s32 signext28(u32 value) +{ + s32 ret = (s32)(value << 4); + return ret >> 4; +} + +void video_reload_counters() +{ + /* This happens every Vblank */ + affine_reference_x[0] = signext28(read_ioreg32(REG_ADDR_BG2X_L)); + affine_reference_y[0] = signext28(read_ioreg32(REG_ADDR_BG2Y_L)); + affine_reference_x[1] = signext28(read_ioreg32(REG_ADDR_BG3X_L)); + affine_reference_y[1] = signext28(read_ioreg32(REG_ADDR_BG3Y_L)); +} + +#define affine_render_bg_pixel_normal() current_pixel = PLTT[0] + +#define affine_render_bg_pixel_alpha() current_pixel = bg_combine + +#define affine_render_bg_pixel_color16() affine_render_bg_pixel_alpha() + +#define affine_render_bg_pixel_color32() affine_render_bg_pixel_alpha() + +#define affine_render_bg_pixel_base(alpha_op) affine_render_bg_pixel_##alpha_op() -#undef BLEND_PX +#define affine_render_bg_pixel_transparent(alpha_op) - x += 8; +#define affine_render_bg_pixel_copy(alpha_op) + +#define affine_render_bg_base(alpha_op) dest_ptr[0] = current_pixel + +#define affine_render_bg_transparent(alpha_op) + +#define affine_render_bg_copy(alpha_op) + +#define affine_render_bg_remainder_base(alpha_op) \ + affine_render_bg_pixel_##alpha_op(); \ + for (; i < end; i++) { \ + affine_render_bg_base(alpha_op); \ + advance_dest_ptr_base(1); \ } - // right edge partial tile - if (x < DISPLAY_WIDTH) { - unsigned int srcX = (x + hoffs) & wMask; - uint16_t entry = map[rowBase + (srcX >> 3)]; - unsigned int tileNum = TILE_NUM(entry); - unsigned int palNum = TILE_PALETTE(entry); - unsigned int ty = tileY; - if (TILE_VFLIP(entry)) - ty = 7 - ty; - bool hflip = TILE_HFLIP(entry); +#define affine_render_bg_remainder_transparent(alpha_op) + +#define affine_render_bg_remainder_copy(alpha_op) + +#define affine_render_next(combine_op) \ + source_x += dx; \ + source_y += dy; \ + advance_dest_ptr_##combine_op(1) + +#define affine_render_scale_offset() \ + tile_base += ((pixel_y % 8) * 8); \ + map_base += (pixel_y / 8) << map_pitch + +#define affine_render_scale_pixel(combine_op, alpha_op) \ + map_offset = (pixel_x / 8); \ + if (map_offset != last_map_offset) { \ + tile_ptr = tile_base + (map_base[map_offset] * 64); \ + last_map_offset = map_offset; \ + } \ + tile_ptr = tile_base + (map_base[(pixel_x / 8)] * 64); \ + current_pixel = tile_ptr[(pixel_x % 8)]; \ + tile_8bpp_draw_##combine_op(0, none, 0, alpha_op); \ + affine_render_next(combine_op) + +#define affine_render_scale(combine_op, alpha_op) \ + { \ + pixel_y = source_y >> 8; \ + u32 i = 0; \ + affine_render_bg_pixel_##combine_op(alpha_op); \ + if ((u32)pixel_y < (u32)width_height) { \ + affine_render_scale_offset(); \ + for (; i < end; i++) { \ + pixel_x = source_x >> 8; \ + \ + if ((u32)pixel_x < (u32)width_height) { \ + break; \ + } \ + \ + affine_render_bg_##combine_op(alpha_op); \ + affine_render_next(combine_op); \ + } \ + \ + for (; i < end; i++) { \ + pixel_x = source_x >> 8; \ + \ + if ((u32)pixel_x >= (u32)width_height) \ + break; \ + \ + affine_render_scale_pixel(combine_op, alpha_op); \ + } \ + } \ + affine_render_bg_remainder_##combine_op(alpha_op); \ + } - uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); - uint16_t *palBase = pal + (palNum << 4); +#define affine_render_scale_wrap(combine_op, alpha_op) \ + { \ + u32 wrap_mask = width_height - 1; \ + pixel_y = (source_y >> 8) & wrap_mask; \ + if ((u32)pixel_y < (u32)width_height) { \ + affine_render_scale_offset(); \ + for (i = 0; i < end; i++) { \ + pixel_x = (source_x >> 8) & wrap_mask; \ + affine_render_scale_pixel(combine_op, alpha_op); \ + } \ + } \ + } - for (unsigned int t = 0; x < DISPLAY_WIDTH; t++, x++) { - unsigned int tx = hflip ? (7 - t) : t; - uint8_t pixel = (row >> (tx << 2)) & 0xF; - if (pixel) - WriteBGPixelBlended(x, pixel, palBase, bgNum, output, layerIds, blendMode, bgIsTargetA, useWindows, winBgBit, winMask, - bldcnt, eva, evb, evy); - } +#define affine_render_rotate_pixel(combine_op, alpha_op) \ + map_offset = (pixel_x / 8) + ((pixel_y / 8) << map_pitch); \ + if (map_offset != last_map_offset) { \ + tile_ptr = tile_base + (map_base[map_offset] * 64); \ + last_map_offset = map_offset; \ + } \ + \ + current_pixel = tile_ptr[(pixel_x % 8) + ((pixel_y % 8) * 8)]; \ + tile_8bpp_draw_##combine_op(0, none, 0, alpha_op); \ + affine_render_next(combine_op) + +#define affine_render_rotate(combine_op, alpha_op) \ + { \ + affine_render_bg_pixel_##combine_op(alpha_op); \ + for (i = 0; i < end; i++) { \ + pixel_x = source_x >> 8; \ + pixel_y = source_y >> 8; \ + \ + if (((u32)pixel_x < (u32)width_height) && ((u32)pixel_y < (u32)width_height)) { \ + break; \ + } \ + affine_render_bg_##combine_op(alpha_op); \ + affine_render_next(combine_op); \ + } \ + \ + for (; i < end; i++) { \ + pixel_x = source_x >> 8; \ + pixel_y = source_y >> 8; \ + \ + if (((u32)pixel_x >= (u32)width_height) || ((u32)pixel_y >= (u32)width_height)) { \ + affine_render_bg_remainder_##combine_op(alpha_op); \ + break; \ + } \ + \ + affine_render_rotate_pixel(combine_op, alpha_op); \ + } \ } -} -static void RenderAffineBG(int bgNum, uint16_t control, int lineNum, uint16_t *output) -{ - vBgCnt *bgcnt = (vBgCnt *)&control; - - uint8_t *tiles = (uint8_t *)(VRAM + bgcnt->charBaseBlock * 0x4000); - uint8_t *map = (uint8_t *)(VRAM + bgcnt->screenBaseBlock * 0x800); - uint16_t *pal = (uint16_t *)PLTT; - - if (control & BGCNT_MOSAIC) - lineNum = ApplyMosaicBGY(lineNum); - - s16 pa = GetBgPA(bgNum); - s16 pb = GetBgPB(bgNum); - s16 pc = GetBgPC(bgNum); - s16 pd = GetBgPD(bgNum); - - // always square: 128/256/512/1024 - int size = 128; - switch (bgcnt->screenSize) { - case 1: - size = 256; - break; - case 2: - size = 512; - break; - case 3: - size = 1024; - break; - } - int mask = size - 1; - int yshift = ((control >> 14) & 3) + 4; - - // sign-extend 28-bit reference point, advance by scanline - s32 refX = GetBgRefX(bgNum); - s32 refY = GetBgRefY(bgNum); - refX = (refX & (1 << 27)) ? refX | 0xF0000000 : refX; - refY = (refY & (1 << 27)) ? refY | 0xF0000000 : refY; - refX += lineNum * pb; - refY += lineNum * pd; - - int curX = refX; - int curY = refY; - - if (bgcnt->areaOverflowMode) { - // wraparound - for (int x = 0; x < DISPLAY_WIDTH; x++) { - int tx = (curX >> 8) & mask; - int ty = (curY >> 8) & mask; - int tile = map[(tx >> 3) + ((ty >> 3) << yshift)]; - uint8_t pixel = tiles[(tile << 6) + ((ty & 7) << 3) + (tx & 7)]; - if (pixel) - output[x] = pal[pixel] | COLOR_OPAQUE; - curX += pa; - curY += pc; - } - } else { - // clamp: outside the map = transparent - for (int x = 0; x < DISPLAY_WIDTH; x++) { - int tx = curX >> 8; - int ty = curY >> 8; - if (tx >= 0 && ty >= 0 && tx < size && ty < size) { - int tile = map[(tx >> 3) + ((ty >> 3) << yshift)]; - uint8_t pixel = tiles[(tile << 6) + ((ty & 7) << 3) + (tx & 7)]; - if (pixel) - output[x] = pal[pixel] | COLOR_OPAQUE; - } - curX += pa; - curY += pc; - } +#define affine_render_rotate_wrap(combine_op, alpha_op) \ + { \ + u32 wrap_mask = width_height - 1; \ + for (i = 0; i < end; i++) { \ + pixel_x = (source_x >> 8) & wrap_mask; \ + pixel_y = (source_y >> 8) & wrap_mask; \ + \ + affine_render_rotate_pixel(combine_op, alpha_op); \ + } \ } - // horizontal mosaic as a post-pass - if ((control & BGCNT_MOSAIC) && MOSAIC_BG_X > 0) { - for (int x = 0; x < DISPLAY_WIDTH; x++) - output[x] = output[ApplyMosaicBGX(x)]; +// Build affine background renderers. + +#define render_scanline_affine_builder(combine_op, alpha_op) \ + void render_scanline_affine_##combine_op##_##alpha_op(u32 layer, u32 start, u32 end, void *scanline) \ + { \ + render_scanline_extra_variables_##combine_op##_##alpha_op(affine); \ + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); \ + u32 current_pixel; \ + s32 source_x, source_y; \ + u32 pixel_x, pixel_y; \ + u32 layer_offset = (layer - 2) * 8; \ + s32 dx, dy; \ + u32 map_size = (bg_control >> 14) & 0x03; \ + u32 width_height = 1 << (7 + map_size); \ + u32 map_pitch = map_size + 4; \ + u8 *map_base = VRAM + (((bg_control >> 8) & 0x1F) * (1024 * 2)); \ + u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)); \ + u8 *tile_ptr = NULL; \ + u32 map_offset, last_map_offset = (u32)-1; \ + u32 i; \ + render_scanline_dest_##alpha_op *dest_ptr = ((render_scanline_dest_##alpha_op *)scanline) + start; \ + \ + dx = (s16)read_ioreg(REG_ADDR_BG2PA + (layer_offset * sizeof(u16))); \ + dy = (s16)read_ioreg(REG_ADDR_BG2PC + (layer_offset * sizeof(u16))); \ + source_x = affine_reference_x[layer - 2] + (start * dx); \ + source_y = affine_reference_y[layer - 2] + (start * dy); \ + \ + end -= start; \ + \ + switch (((bg_control >> 12) & 0x02) | (dy != 0)) { \ + case 0x00: \ + affine_render_scale(combine_op, alpha_op); \ + break; \ + \ + case 0x01: \ + affine_render_rotate(combine_op, alpha_op); \ + break; \ + \ + case 0x02: \ + affine_render_scale_wrap(combine_op, alpha_op); \ + break; \ + \ + case 0x03: \ + affine_render_rotate_wrap(combine_op, alpha_op); \ + break; \ + } \ } -} -// same deal with blend/window support -static void RenderAffineBGBlend(int bgNum, uint16_t control, int lineNum, uint16_t *output, uint8_t *layerIds, unsigned int blendMode, - bool windowsEnabled, uint16_t *winMask, unsigned int bldcnt, unsigned int eva, unsigned int evb, - unsigned int evy) -{ - vBgCnt *bgcnt = (vBgCnt *)&control; - - uint8_t *tiles = (uint8_t *)(VRAM + bgcnt->charBaseBlock * 0x4000); - uint8_t *map = (uint8_t *)(VRAM + bgcnt->screenBaseBlock * 0x800); - uint16_t *pal = (uint16_t *)PLTT; - - if (control & BGCNT_MOSAIC) - lineNum = ApplyMosaicBGY(lineNum); - - s16 pa = GetBgPA(bgNum); - s16 pb = GetBgPB(bgNum); - s16 pc = GetBgPC(bgNum); - s16 pd = GetBgPD(bgNum); - - int size = 128; - switch (bgcnt->screenSize) { - case 1: - size = 256; - break; - case 2: - size = 512; - break; - case 3: - size = 1024; - break; - } - int mask = size - 1; - int yshift = ((control >> 14) & 3) + 4; - - s32 refX = GetBgRefX(bgNum); - s32 refY = GetBgRefY(bgNum); - refX = (refX & (1 << 27)) ? refX | 0xF0000000 : refX; - refY = (refY & (1 << 27)) ? refY | 0xF0000000 : refY; - refX += lineNum * pb; - refY += lineNum * pd; - - int curX = refX; - int curY = refY; - - bool bgIsTargetA = (blendMode != 0) && (bldcnt & (1 << bgNum)); - - for (int x = 0; x < DISPLAY_WIDTH; x++) { - int tx, ty; - - if (bgcnt->areaOverflowMode) { - tx = (curX >> 8) & mask; - ty = (curY >> 8) & mask; - } else { - tx = curX >> 8; - ty = curY >> 8; - if (tx < 0 || ty < 0 || tx >= size || ty >= size) { - curX += pa; - curY += pc; - continue; - } - } +render_scanline_affine_builder(base, normal); +render_scanline_affine_builder(transparent, normal); +render_scanline_affine_builder(base, color16); +render_scanline_affine_builder(transparent, color16); +render_scanline_affine_builder(base, color32); +render_scanline_affine_builder(transparent, color32); +render_scanline_affine_builder(base, alpha); +render_scanline_affine_builder(transparent, alpha); + +#define bitmap_render_pixel_mode3(alpha_op) \ + current_pixel = convert_palette(current_pixel); \ + *dest_ptr = current_pixel + +#define bitmap_render_pixel_mode4(alpha_op) tile_expand_base_##alpha_op##_mode4(0) + +#define bitmap_render_pixel_mode5(alpha_op) bitmap_render_pixel_mode3(alpha_op) + +#define bitmap_render_scale(type, alpha_op, width, height) \ + pixel_y = (source_y >> 8); \ + if ((u32)pixel_y < (u32)height) { \ + pixel_x = (source_x >> 8); \ + src_ptr += (pixel_y * width); \ + if (dx == 0x100) { \ + if (pixel_x < 0) { \ + end += pixel_x; \ + dest_ptr -= pixel_x; \ + pixel_x = 0; \ + } else if (pixel_x > 0) \ + src_ptr += pixel_x; \ + \ + if ((pixel_x + end) >= width) \ + end = (width - pixel_x); \ + \ + for (i = 0; (s32)i < (s32)end; i++) { \ + current_pixel = srcread_##type(*src_ptr); \ + bitmap_render_pixel_##type(alpha_op); \ + src_ptr++; \ + dest_ptr++; \ + } \ + } else { \ + if ((u32)(source_y >> 8) < (u32)height) { \ + for (i = 0; i < end; i++) { \ + pixel_x = (source_x >> 8); \ + \ + if ((u32)pixel_x < (u32)width) \ + break; \ + \ + source_x += dx; \ + dest_ptr++; \ + } \ + \ + for (; i < end; i++) { \ + pixel_x = (source_x >> 8); \ + \ + if ((u32)pixel_x >= (u32)width) \ + break; \ + \ + current_pixel = srcread_##type(src_ptr[pixel_x]); \ + bitmap_render_pixel_##type(alpha_op); \ + \ + source_x += dx; \ + dest_ptr++; \ + } \ + } \ + } \ + } - int tile = map[(tx >> 3) + ((ty >> 3) << yshift)]; - uint8_t pixel = tiles[(tile << 6) + ((ty & 7) << 3) + (tx & 7)]; - - curX += pa; - curY += pc; - - if (pixel == 0) - continue; - - uint16_t color = pal[pixel] | COLOR_OPAQUE; - - if (windowsEnabled && winMask && !(winMask[x] & (1 << bgNum))) - continue; - - bool winAllowsBlend = true; - if (windowsEnabled && winMask) - winAllowsBlend = (winMask[x] & WINMASK_CLR) >> 5; - - if (bgIsTargetA && winAllowsBlend) { - uint16_t src = color; - switch (blendMode) { - case 1: - if (IsBlendTargetB(layerIds[x], bldcnt)) - color = alphaBlendColor(src, output[x], eva, evb); - break; - case 2: - color = alphaBrightnessIncrease(src, evy); - break; - case 3: - color = alphaBrightnessDecrease(src, evy); - break; - } - } +#define bitmap_render_rotate(type, alpha_op, width, height) \ + for (i = 0; i < end; i++) { \ + pixel_x = source_x >> 8; \ + pixel_y = source_y >> 8; \ + \ + if (((u32)pixel_x < (u32)width) && ((u32)pixel_y < (u32)height)) \ + break; \ + \ + source_x += dx; \ + source_y += dy; \ + dest_ptr++; \ + } \ + \ + for (; i < end; i++) { \ + pixel_x = (source_x >> 8); \ + pixel_y = (source_y >> 8); \ + \ + if (((u32)pixel_x >= (u32)width) || ((u32)pixel_y >= (u32)height)) \ + break; \ + \ + current_pixel = srcread_##type(src_ptr[pixel_x + (pixel_y * width)]); \ + bitmap_render_pixel_##type(alpha_op); \ + \ + source_x += dx; \ + source_y += dy; \ + dest_ptr++; \ + } - output[x] = color; - layerIds[x] = bgNum; +#define render_scanline_vram_setup_mode3() u16 *src_ptr = (u16 *)VRAM + +#define render_scanline_vram_setup_mode5() \ + u16 *src_ptr = (u16 *)VRAM; \ + if (read_ioreg(REG_ADDR_DISPCNT) & 0x10) \ + src_ptr = (u16 *)(VRAM + 0xA000); + +#define render_scanline_vram_setup_mode4() \ + u16 *palette = PLTT; \ + u8 *src_ptr = VRAM; \ + if (read_ioreg(REG_ADDR_DISPCNT) & 0x10) \ + src_ptr = VRAM + 0xA000; + +#define srcread_mode3(v) eswap16(v) +#define srcread_mode5(v) eswap16(v) +#define srcread_mode4(v) (v) + +// Build bitmap scanline rendering functions. + +#define render_scanline_bitmap_builder(type, alpha_op, width, height) \ + static void render_scanline_bitmap_##type##_##alpha_op(u32 start, u32 end, void *scanline) \ + { \ + u32 current_pixel; \ + s32 source_x, source_y; \ + s32 pixel_x, pixel_y; \ + \ + s32 dx = (s16)read_ioreg(REG_ADDR_BG2PA); \ + s32 dy = (s16)read_ioreg(REG_ADDR_BG2PC); \ + \ + u32 i; \ + \ + render_scanline_dest_##alpha_op *dest_ptr = ((render_scanline_dest_##alpha_op *)scanline) + start; \ + render_scanline_vram_setup_##type(); \ + \ + end -= start; \ + \ + source_x = affine_reference_x[0] + (start * dx); \ + source_y = affine_reference_y[0] + (start * dy); \ + \ + if (dy == 0) { \ + bitmap_render_scale(type, alpha_op, width, height); \ + } else { \ + bitmap_render_rotate(type, alpha_op, width, height); \ + } \ } - if ((control & BGCNT_MOSAIC) && MOSAIC_BG_X > 0) { - for (int x = 0; x < DISPLAY_WIDTH; x++) - output[x] = output[ApplyMosaicBGX(x)]; +render_scanline_bitmap_builder(mode3, normal, DISPLAY_WIDTH, DISPLAY_HEIGHT); +render_scanline_bitmap_builder(mode4, normal, DISPLAY_WIDTH, DISPLAY_WIDTH); +render_scanline_bitmap_builder(mode5, normal, 160, 128); + +// Fill in the renderers for a layer based on the mode type, + +#define tile_layer_render_functions(type) \ + { \ + render_scanline_##type##_base_normal, render_scanline_##type##_transparent_normal, render_scanline_##type##_base_alpha, \ + render_scanline_##type##_transparent_alpha, render_scanline_##type##_base_color16, \ + render_scanline_##type##_transparent_color16, render_scanline_##type##_base_color32, \ + render_scanline_##type##_transparent_color32 \ } -} -#define MAX_SPRITES_PER_PRIORITY 32 +// Use if a layer is unsupported for that mode. -typedef struct { - uint8_t oamIndex; -} ActiveSprite; +#define tile_layer_render_null() \ + { \ + NULL, NULL, NULL, NULL \ + } + +#define bitmap_layer_render_functions(type) \ + { \ + render_scanline_bitmap_##type##_normal \ + } -static ActiveSprite sActiveSprites[4][MAX_SPRITES_PER_PRIORITY]; -static int sActiveSpriteCount[4]; +// Structs containing functions to render the layers for each mode, for +// each render type. +static const tile_layer_render_struct tile_mode_renderers[3][4] + = { { tile_layer_render_functions(text), tile_layer_render_functions(text), tile_layer_render_functions(text), + tile_layer_render_functions(text) }, + { tile_layer_render_functions(text), tile_layer_render_functions(text), tile_layer_render_functions(affine), + tile_layer_render_functions(text) }, + { tile_layer_render_functions(text), tile_layer_render_functions(text), tile_layer_render_functions(affine), + tile_layer_render_functions(affine) } }; + +static const bitmap_layer_render_struct bitmap_mode_renderers[3] + = { bitmap_layer_render_functions(mode3), bitmap_layer_render_functions(mode4), bitmap_layer_render_functions(mode5) }; + +#define render_scanline_layer_functions_tile() const tile_layer_render_struct *layer_renderers = tile_mode_renderers[dispcnt & 0x07] + +#define render_scanline_layer_functions_bitmap() \ + const bitmap_layer_render_struct *layer_renderers = bitmap_mode_renderers + ((dispcnt & 0x07) - 3) + +// Adjust a flipped obj's starting position + +#define obj_tile_offset_noflip(color_depth) + +#define obj_tile_offset_flip(color_depth) +(tile_size_##color_depth * ((obj_width - 8) / 8)) + +// Adjust the obj's starting point if it goes too far off the left edge of +// the screen. + +#define obj_tile_right_offset_noflip(color_depth) tile_ptr += (partial_tile_offset / 8) * tile_size_##color_depth + +#define obj_tile_right_offset_flip(color_depth) tile_ptr -= (partial_tile_offset / 8) * tile_size_##color_depth + +// Get the current row offset into an obj in 1D map space + +#define obj_tile_offset_1D(color_depth, flip_op) \ + tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32) + ((vertical_offset / 8) * (obj_width / 8) * tile_size_##color_depth) \ + + ((vertical_offset % 8) * tile_width_##color_depth) obj_tile_offset_##flip_op(color_depth) + +// Get the current row offset into an obj in 2D map space + +#define obj_tile_offset_2D(color_depth, flip_op) \ + tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32) + ((vertical_offset / 8) * 1024) \ + + ((vertical_offset % 8) * tile_width_##color_depth) obj_tile_offset_##flip_op(color_depth) + +// Get the palette for 4bpp obj. + +#define obj_get_palette_4bpp() current_palette = (obj_attribute_2 >> 8) & 0xF0 + +#define obj_get_palette_8bpp() + +// Render the current row of an obj. + +#define obj_render(combine_op, color_depth, alpha_op, map_space, flip_op) \ + { \ + obj_get_palette_##color_depth(); \ + obj_tile_offset_##map_space(color_depth, flip_op); \ + \ + if (obj_x < (s32)start) { \ + dest_ptr = scanline + start; \ + pixel_run = obj_width - (start - obj_x); \ + if ((s32)pixel_run > 0) { \ + if ((obj_x + obj_width) >= end) { \ + pixel_run = end - start; \ + partial_tile_offset = start - obj_x; \ + obj_tile_right_offset_##flip_op(color_depth); \ + partial_tile_offset %= 8; \ + \ + if (partial_tile_offset) { \ + partial_tile_run = 8 - partial_tile_offset; \ + if ((s32)pixel_run < (s32)partial_tile_run) { \ + if ((s32)pixel_run > 0) { \ + partial_tile_run = pixel_run; \ + partial_tile_mid_obj(combine_op, color_depth, alpha_op, flip_op); \ + } \ + continue; \ + } else { \ + pixel_run -= partial_tile_run; \ + partial_tile_right_obj(combine_op, color_depth, alpha_op, flip_op); \ + } \ + } \ + tile_run = pixel_run / 8; \ + multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op); \ + partial_tile_run = pixel_run % 8; \ + if (partial_tile_run) { \ + partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op); \ + } \ + } else { \ + partial_tile_offset = start - obj_x; \ + obj_tile_right_offset_##flip_op(color_depth); \ + partial_tile_offset %= 8; \ + if (partial_tile_offset) { \ + partial_tile_run = 8 - partial_tile_offset; \ + partial_tile_right_obj(combine_op, color_depth, alpha_op, flip_op); \ + } \ + tile_run = pixel_run / 8; \ + multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op); \ + } \ + } \ + } else \ + \ + if ((obj_x + obj_width) >= end) { \ + pixel_run = end - obj_x; \ + if ((s32)pixel_run > 0) { \ + dest_ptr = scanline + obj_x; \ + tile_run = pixel_run / 8; \ + multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op); \ + partial_tile_run = pixel_run % 8; \ + if (partial_tile_run) { \ + partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op); \ + } \ + } \ + } else { \ + dest_ptr = scanline + obj_x; \ + tile_run = obj_width / 8; \ + multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op); \ + } \ + } -static void PrefilterSprites(uint16_t vcount) -{ - sActiveSpriteCount[0] = 0; - sActiveSpriteCount[1] = 0; - sActiveSpriteCount[2] = 0; - sActiveSpriteCount[3] = 0; +#define obj_scale_offset_1D(color_depth) \ + tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32) + ((vertical_offset / 8) * (max_x / 8) * tile_size_##color_depth) \ + + ((vertical_offset % 8) * tile_width_##color_depth) + +// Get the current row offset into an obj in 2D map space + +#define obj_scale_offset_2D(color_depth) \ + tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32) + ((vertical_offset / 8) * 1024) \ + + ((vertical_offset % 8) * tile_width_##color_depth) + +#define obj_render_scale_pixel_4bpp(combine_op, alpha_op) \ + current_pixel = tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)]; \ + if (tile_x & 0x01) \ + current_pixel >>= 4; \ + else \ + current_pixel &= 0x0F; \ + \ + tile_4bpp_draw_##combine_op(0, none, 0, alpha_op) + +#define obj_render_scale_pixel_8bpp(combine_op, alpha_op) \ + current_pixel = tile_ptr[tile_map_offset + (tile_x & 0x07)]; \ + tile_8bpp_draw_##combine_op(0, none, 0, alpha_op); + +#define obj_render_scale(combine_op, color_depth, alpha_op, map_space) \ + { \ + u32 vertical_offset; \ + source_y += (y_delta * dmy); \ + vertical_offset = (source_y >> 8); \ + if ((u32)vertical_offset < (u32)max_y) { \ + obj_scale_offset_##map_space(color_depth); \ + source_x += (y_delta * dmx) - (middle_x * dx); \ + \ + for (i = 0; i < obj_width; i++) { \ + tile_x = (source_x >> 8); \ + \ + if ((u32)tile_x < (u32)max_x) \ + break; \ + \ + source_x += dx; \ + advance_dest_ptr_##combine_op(1); \ + } \ + \ + for (; i < obj_width; i++) { \ + tile_x = (source_x >> 8); \ + \ + if ((u32)tile_x >= (u32)max_x) \ + break; \ + \ + tile_map_offset = (tile_x >> 3) * tile_size_##color_depth; \ + obj_render_scale_pixel_##color_depth(combine_op, alpha_op); \ + \ + source_x += dx; \ + advance_dest_ptr_##combine_op(1); \ + } \ + } \ + } + +#define obj_rotate_offset_1D(color_depth) obj_tile_pitch = (max_x / 8) * tile_size_##color_depth + +#define obj_rotate_offset_2D(color_depth) obj_tile_pitch = 1024 + +#define obj_render_rotate_pixel_4bpp(combine_op, alpha_op) \ + current_pixel = tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)]; \ + if (tile_x & 0x01) \ + current_pixel >>= 4; \ + else \ + current_pixel &= 0x0F; \ + \ + tile_4bpp_draw_##combine_op(0, none, 0, alpha_op) + +#define obj_render_rotate_pixel_8bpp(combine_op, alpha_op) \ + current_pixel = tile_ptr[tile_map_offset + (tile_x & 0x07) + ((tile_y & 0x07) * obj_pitch)]; \ + \ + tile_8bpp_draw_##combine_op(0, none, 0, alpha_op) + +#define obj_render_rotate(combine_op, color_depth, alpha_op, map_space) \ + { \ + tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32); \ + obj_rotate_offset_##map_space(color_depth); \ + \ + source_x += (y_delta * dmx) - (middle_x * dx); \ + source_y += (y_delta * dmy) - (middle_x * dy); \ + \ + for (i = 0; i < obj_width; i++) { \ + tile_x = (source_x >> 8); \ + tile_y = (source_y >> 8); \ + \ + if (((u32)tile_x < (u32)max_x) && ((u32)tile_y < (u32)max_y)) \ + break; \ + \ + source_x += dx; \ + source_y += dy; \ + advance_dest_ptr_##combine_op(1); \ + } \ + \ + for (; i < obj_width; i++) { \ + tile_x = (source_x >> 8); \ + tile_y = (source_y >> 8); \ + \ + if (((u32)tile_x >= (u32)max_x) || ((u32)tile_y >= (u32)max_y)) \ + break; \ + \ + tile_map_offset = ((tile_x >> 3) * tile_size_##color_depth) + ((tile_y >> 3) * obj_tile_pitch); \ + obj_render_rotate_pixel_##color_depth(combine_op, alpha_op); \ + \ + source_x += dx; \ + source_y += dy; \ + advance_dest_ptr_##combine_op(1); \ + } \ + } + +// Render the current row of an affine transformed OBJ. + +#define obj_render_affine(combine_op, color_depth, alpha_op, map_space) \ + { \ + u16 *params = (u16 *)OAM + (((obj_attribute_1 >> 9) & 0x1F) * 16); \ + s32 dx = (s16)eswap16(params[3]); \ + s32 dmx = (s16)eswap16(params[7]); \ + s32 dy = (s16)eswap16(params[11]); \ + s32 dmy = (s16)eswap16(params[15]); \ + s32 source_x, source_y; \ + s32 tile_x, tile_y; \ + u32 tile_map_offset; \ + s32 middle_x; \ + s32 middle_y; \ + s32 max_x = obj_width; \ + s32 max_y = obj_height; \ + s32 y_delta; \ + u32 obj_pitch = tile_width_##color_depth; \ + u32 obj_tile_pitch; \ + \ + middle_x = (obj_width / 2); \ + middle_y = (obj_height / 2); \ + \ + source_x = (middle_x << 8); \ + source_y = (middle_y << 8); \ + \ + if (obj_attribute_0 & 0x200) { \ + obj_width *= 2; \ + obj_height *= 2; \ + middle_x *= 2; \ + middle_y *= 2; \ + } \ + \ + if ((s32)obj_x < (s32)start) { \ + u32 x_delta = start - obj_x; \ + middle_x -= x_delta; \ + obj_width -= x_delta; \ + obj_x = start; \ + \ + if ((s32)obj_width <= 0) \ + continue; \ + } \ + \ + if ((s32)(obj_x + obj_width) >= (s32)end) { \ + obj_width = end - obj_x; \ + \ + if ((s32)obj_width <= 0) \ + continue; \ + } \ + dest_ptr = scanline + obj_x; \ + \ + y_delta = vcount - (obj_y + middle_y); \ + \ + obj_get_palette_##color_depth(); \ + \ + if (dy == 0) { \ + obj_render_scale(combine_op, color_depth, alpha_op, map_space); \ + } else { \ + obj_render_rotate(combine_op, color_depth, alpha_op, map_space); \ + } \ + } + +static const u32 obj_width_table[] = { 8, 16, 32, 64, 16, 32, 32, 64, 8, 8, 16, 32 }; +static const u32 obj_height_table[] = { 8, 16, 32, 64, 8, 8, 16, 32, 16, 32, 32, 64 }; + +static const u8 obj_dim_table[3][4][2] = { { { 8, 8 }, { 16, 16 }, { 32, 32 }, { 64, 64 } }, + { { 16, 8 }, { 32, 8 }, { 32, 16 }, { 64, 32 } }, + { { 8, 16 }, { 8, 32 }, { 16, 32 }, { 32, 64 } } }; + +static u8 obj_priority_list[5][DISPLAY_HEIGHT][128]; +static u8 obj_priority_count[5][DISPLAY_HEIGHT]; +static u8 obj_alpha_count[DISPLAY_HEIGHT]; + +// Build obj rendering functions + +#define render_scanline_obj_extra_variables_normal(bg_type) u16 *palette = PLTT + 256 + +#define render_scanline_obj_extra_variables_color() u32 pixel_combine = color_combine_mask(4) | (1 << 8) + +#define render_scanline_obj_extra_variables_alpha_obj(map_space) \ + render_scanline_obj_extra_variables_color(); \ + u32 dest; \ + if ((pixel_combine & 0x00000200) == 0) { \ + render_scanline_obj_color32_##map_space(priority, start, end, scanline); \ + return; \ + } + +#define render_scanline_obj_extra_variables_color16(map_space) render_scanline_obj_extra_variables_color() + +#define render_scanline_obj_extra_variables_color32(map_space) render_scanline_obj_extra_variables_color() + +#define render_scanline_obj_extra_variables_partial_alpha(map_space) \ + render_scanline_obj_extra_variables_color(); \ + u32 base_pixel_combine = pixel_combine; \ + u32 dest + +#define render_scanline_obj_extra_variables_copy(type) \ + u32 bldcnt = read_ioreg(REG_ADDR_BLDCNT); \ + u32 dispcnt = read_ioreg(REG_ADDR_DISPCNT); \ + u32 obj_enable = read_ioreg(REG_ADDR_WINOUT) >> 8; \ + render_scanline_layer_functions_##type(); \ + u32 copy_start, copy_end; \ + u16 copy_buffer[DISPLAY_WIDTH]; \ + u16 *copy_ptr + +#define render_scanline_obj_extra_variables_copy_tile(map_space) render_scanline_obj_extra_variables_copy(tile) + +#define render_scanline_obj_extra_variables_copy_bitmap(map_space) render_scanline_obj_extra_variables_copy(bitmap) + +#define render_scanline_obj_main(combine_op, alpha_op, map_space) \ + if (obj_attribute_0 & 0x100) { \ + if ((obj_attribute_0 >> 13) & 0x01) { \ + obj_render_affine(combine_op, 8bpp, alpha_op, map_space); \ + } else { \ + obj_render_affine(combine_op, 4bpp, alpha_op, map_space); \ + } \ + } else { \ + vertical_offset = vcount - obj_y; \ + \ + if ((obj_attribute_1 >> 13) & 0x01) \ + vertical_offset = obj_height - vertical_offset - 1; \ + \ + switch (((obj_attribute_0 >> 12) & 0x02) | ((obj_attribute_1 >> 12) & 0x01)) { \ + case 0x0: \ + obj_render(combine_op, 4bpp, alpha_op, map_space, noflip); \ + break; \ + \ + case 0x1: \ + obj_render(combine_op, 4bpp, alpha_op, map_space, flip); \ + break; \ + \ + case 0x2: \ + obj_render(combine_op, 8bpp, alpha_op, map_space, noflip); \ + break; \ + \ + case 0x3: \ + obj_render(combine_op, 8bpp, alpha_op, map_space, flip); \ + break; \ + } \ + } + +#define render_scanline_obj_no_partial_alpha(combine_op, alpha_op, map_space) render_scanline_obj_main(combine_op, alpha_op, map_space) + +#define render_scanline_obj_partial_alpha(combine_op, alpha_op, map_space) \ + if ((obj_attribute_0 >> 10) & 0x03) { \ + pixel_combine = 0x00000300; \ + render_scanline_obj_main(combine_op, alpha_obj, map_space); \ + } else { \ + pixel_combine = base_pixel_combine; \ + render_scanline_obj_main(combine_op, color32, map_space); \ + } - if (!(REG_DISPCNT & DISPCNT_OBJ_ON)) - return; +#define render_scanline_obj_prologue_transparent(alpha_op) + +#define render_scanline_obj_prologue_copy_body(type) \ + copy_start = obj_x; \ + copy_end = obj_x + obj_width; \ + if (obj_attribute_0 & 0x200) \ + copy_end += obj_width; \ + \ + if (copy_start < start) \ + copy_start = start; \ + if (copy_end > end) \ + copy_end = end; \ + \ + if ((copy_start < end) && (copy_end > start)) { \ + render_scanline_conditional_##type(copy_start, copy_end, copy_buffer, obj_enable, dispcnt, bldcnt, layer_renderers); \ + copy_ptr = copy_buffer + copy_start; \ + } else { \ + continue; \ + } - // back-to-front so lower oam indices (higher hw priority) draw last - for (int i = OAM_ENTRY_COUNT - 1; i >= 0; i--) { - OamData *oam = &((OamData *)OAM)[i]; +#define render_scanline_obj_prologue_copy_tile() render_scanline_obj_prologue_copy_body(tile) + +#define render_scanline_obj_prologue_copy_bitmap() render_scanline_obj_prologue_copy_body(bitmap) + +#define render_scanline_obj_prologue_copy(alpha_op) render_scanline_obj_prologue_##alpha_op() + +#define render_scanline_obj_builder(combine_op, alpha_op, map_space, partial_alpha_op) \ + static void render_scanline_obj_##alpha_op##_##map_space(u32 priority, u32 start, u32 end, render_scanline_dest_##alpha_op *scanline) \ + { \ + render_scanline_obj_extra_variables_##alpha_op(map_space); \ + u32 obj_num, i; \ + s32 obj_x, obj_y; \ + u32 obj_size; \ + u32 obj_width, obj_height; \ + u32 obj_attribute_0, obj_attribute_1, obj_attribute_2; \ + s32 vcount = read_ioreg(REG_ADDR_VCOUNT); \ + u32 tile_run; \ + u32 current_pixels; \ + u32 current_pixel; \ + u32 current_palette; \ + u32 vertical_offset; \ + u32 partial_tile_run, partial_tile_offset; \ + u32 pixel_run; \ + u16 *oam_ptr; \ + OamData *oam_data; \ + render_scanline_dest_##alpha_op *dest_ptr; \ + u8 *tile_base = VRAM + 0x10000; \ + u8 *tile_ptr; \ + u32 obj_count = obj_priority_count[priority][vcount]; \ + u8 *obj_list = obj_priority_list[priority][vcount]; \ + \ + for (obj_num = 0; obj_num < obj_count; obj_num++) { \ + oam_data = (OamData *)&OAM[obj_list[obj_num] * OAM_DATA_SIZE_AFFINE]; \ + oam_ptr = (u16 *)OAM + (obj_list[obj_num] * 4); \ + obj_attribute_0 = eswap16(oam_ptr[0]); \ + obj_attribute_1 = eswap16(oam_ptr[1]); \ + obj_attribute_2 = eswap16(oam_ptr[2]); \ + obj_size = ((obj_attribute_0 >> 12) & 0x0C) | (obj_attribute_1 >> 14); \ + \ + obj_x = oam_data->split.x; \ + obj_width = obj_width_table[obj_size]; \ + \ + render_scanline_obj_prologue_##combine_op(alpha_op); \ + \ + obj_y = obj_attribute_0 & 0xFF; \ + \ + if (!EXTENDED_OAM) { \ + if (obj_x > DISPLAY_WIDTH) \ + obj_x -= 512; \ + if (obj_y > DISPLAY_HEIGHT) \ + obj_y -= 256; \ + } \ + \ + obj_height = obj_height_table[obj_size]; \ + render_scanline_obj_##partial_alpha_op(combine_op, alpha_op, map_space); \ + } \ + } - bool isAffine = oam->split.affineMode & 1; - bool isDisabled = (oam->split.affineMode >> 1) & 1; +render_scanline_obj_builder(transparent, normal, 1D, no_partial_alpha); +render_scanline_obj_builder(transparent, normal, 2D, no_partial_alpha); +render_scanline_obj_builder(transparent, color16, 1D, no_partial_alpha); +render_scanline_obj_builder(transparent, color16, 2D, no_partial_alpha); +render_scanline_obj_builder(transparent, color32, 1D, no_partial_alpha); +render_scanline_obj_builder(transparent, color32, 2D, no_partial_alpha); +render_scanline_obj_builder(transparent, alpha_obj, 1D, no_partial_alpha); +render_scanline_obj_builder(transparent, alpha_obj, 2D, no_partial_alpha); +render_scanline_obj_builder(transparent, partial_alpha, 1D, partial_alpha); +render_scanline_obj_builder(transparent, partial_alpha, 2D, partial_alpha); +render_scanline_obj_builder(copy, copy_tile, 1D, no_partial_alpha); +render_scanline_obj_builder(copy, copy_tile, 2D, no_partial_alpha); +render_scanline_obj_builder(copy, copy_bitmap, 1D, no_partial_alpha); +render_scanline_obj_builder(copy, copy_bitmap, 2D, no_partial_alpha); + +#define OBJ_MOD_NORMAL 0 +#define OBJ_MOD_SEMITRAN 1 +#define OBJ_MOD_WINDOW 2 +#define OBJ_MOD_INVALID 3 + +// Goes through the object list in the OAM (from #127 to #0) and adds objects +// into a sorted list by priority for the current row. +// Invisible objects are discarded. +static void order_obj(u32 video_mode) +{ + s32 obj_num; + u32 row; + t_oam *oam_base = (t_oam *)OAM; + + memset(obj_priority_count, 0, sizeof(obj_priority_count)); + memset(obj_alpha_count, 0, sizeof(obj_alpha_count)); + + for (obj_num = 127; obj_num >= 0; obj_num--) { + OamData *oam_data = (OamData *)&OAM[obj_num * OAM_DATA_SIZE_AFFINE]; + t_oam *oam_ptr = &oam_base[obj_num]; + u16 obj_attr0 = eswap16(oam_ptr->attr0); + // Bit 9 disables regular sprites. Used as double bit for affine ones. + bool visible = oam_data->split.affineMode != 2; + if (visible) { + u16 obj_shape = obj_attr0 >> 14; + u32 obj_mode = (obj_attr0 >> 10) & 0x03; + + // Prohibited shape and mode + bool invalid = (obj_shape == 0x3) || (obj_mode == OBJ_MOD_INVALID); + if (!invalid) { + u16 obj_attr1 = eswap16(oam_ptr->attr1); + u16 obj_attr2 = eswap16(oam_ptr->attr2); + u32 obj_priority = (obj_attr2 >> 10) & 0x03; + + if (((video_mode < 3) || ((obj_attr2 & 0x3FF) >= 512))) { + // Calculate object size (from size and shape attr bits) + u16 obj_size = (obj_attr1 >> 14); + s32 obj_height = obj_dim_table[obj_shape][obj_size][1]; + s32 obj_width = obj_dim_table[obj_shape][obj_size][0]; + s32 obj_y = obj_attr0 & 0xFF; - if (!isAffine && isDisabled) - continue; +#if !EXTENDED_OAM + if (obj_y > DISPLAY_HEIGHT) + obj_y -= 512; +#endif - s32 idx = (oam->split.shape << 2) | oam->split.size; - unsigned int width = gOamShapesSizes[idx][0]; - unsigned int height = gOamShapesSizes[idx][1]; - int halfW = width / 2; - int halfH = height / 2; + // Double size for affine sprites with double bit set + if (obj_attr0 & 0x200) { + obj_height *= 2; + obj_width *= 2; + } - int32_t sx = oam->split.x; - int32_t sy = oam->split.y; + if (((obj_y + obj_height) > 0) && (obj_y < DISPLAY_HEIGHT)) { + s32 obj_x = oam_data->split.x; #if !EXTENDED_OAM - if (sx >= DISPLAY_WIDTH) - sx -= 512; - if (sy >= DISPLAY_HEIGHT) - sy -= 256; + if (obj_x > DISPLAY_WIDTH) + obj_x -= 512; #endif - // double-size affine sprites have 2x bounding box - if (isAffine && isDisabled) { - halfW *= 2; - halfH *= 2; + if (((obj_x + obj_width) > 0) && (obj_x < DISPLAY_WIDTH)) { + // Clip Y coord and height to the 0..159 interval + u32 starty = MAX(obj_y, 0); + u32 endy = MIN(obj_y + obj_height, DISPLAY_HEIGHT); + + switch (obj_mode) { + case OBJ_MOD_SEMITRAN: + for (row = starty; row < endy; row++) { + u32 cur_cnt = obj_priority_count[obj_priority][row]; + obj_priority_list[obj_priority][row][cur_cnt] = obj_num; + obj_priority_count[obj_priority][row] = cur_cnt + 1; + // Mark the row as having semi-transparent objects + obj_alpha_count[row] = 1; + } + break; + case OBJ_MOD_WINDOW: + obj_priority = 4; + /* fallthrough */ + case OBJ_MOD_NORMAL: + // Add the object to the list. + for (row = starty; row < endy; row++) { + u32 cur_cnt = obj_priority_count[obj_priority][row]; + obj_priority_list[obj_priority][row][cur_cnt] = obj_num; + obj_priority_count[obj_priority][row] = cur_cnt + 1; + } + break; + }; + } + } + } + } } + } +} + +u32 layer_order[16]; +u32 layer_count; + +// Sorts active BG/OBJ layers and generates an ordered list of layers. +// Things are drawn back to front, so lowest priority goes first. +static void order_layers(u32 layer_flags, u32 vcnt) +{ + bool obj_enabled = (layer_flags & 0x10); + s32 priority; + + layer_count = 0; - if ((int)vcount < sy || (int)vcount >= sy + halfH * 2) - continue; - if (sx + halfW * 2 < 0 || sx >= DISPLAY_WIDTH) - continue; + for (priority = 3; priority >= 0; priority--) { + bool anyobj = obj_priority_count[priority][vcnt] > 0; + s32 lnum; - int pri = oam->split.priority; - if (sActiveSpriteCount[pri] < MAX_SPRITES_PER_PRIORITY) { - sActiveSprites[pri][sActiveSpriteCount[pri]].oamIndex = i; - sActiveSpriteCount[pri]++; + for (lnum = 3; lnum >= 0; lnum--) { + if (((layer_flags >> lnum) & 1) && ((read_ioreg(REG_ADDR_BGxCNT(lnum)) & 0x03) == priority)) { + layer_order[layer_count++] = lnum; + } } + + if (obj_enabled && anyobj) + layer_order[layer_count++] = priority | 0x04; } } -static void DrawSpritesAtPriority(int priority, uint16_t vcount, uint16_t *output, uint8_t *layerIds, bool windowsEnabled, - uint16_t *winMask, unsigned int blendMode, bool objWinOnly, unsigned int bldcnt, unsigned int eva, - unsigned int evb, unsigned int evy) -{ - uint8_t *tiledata = (uint8_t *)OBJ_VRAM0; - uint16_t *sprpal = (uint16_t *)PLTT + (0x200 / 2); - int16_t matrix[2][2]; +#define fill_line(_start, _end) \ + u32 i; \ + \ + for (i = _start; i < _end; i++) \ + dest_ptr[i] = color; - // only 1-D tile mapping supported - if (!(REG_DISPCNT & (1 << 6))) - return; +#define fill_line_color_normal() color = PLTT[color] - for (int s = 0; s < sActiveSpriteCount[priority]; s++) { - int i = sActiveSprites[priority][s].oamIndex; - OamData *oam = &((OamData *)OAM)[i]; +#define fill_line_color_alpha() - bool isAffine = oam->split.affineMode & 1; - bool doubleSize = (oam->split.affineMode >> 1) & 1; +#define fill_line_color_color16() - s32 idx = (oam->split.shape << 2) | oam->split.size; - unsigned int width = gOamShapesSizes[idx][0]; - unsigned int height = gOamShapesSizes[idx][1]; - int halfW = width / 2; - int halfH = height / 2; +#define fill_line_color_color32() + +#define fill_line_builder(type) \ + static void fill_line_##type(u16 color, render_scanline_dest_##type *dest_ptr, u32 start, u32 end) \ + { \ + fill_line_color_##type(); \ + fill_line(start, end); \ + } + +fill_line_builder(normal); +fill_line_builder(alpha); +fill_line_builder(color16); +fill_line_builder(color32); + +// Blending is performed by separating an RGB value into 0G0R0B (32 bit) +// Since blending factors are at most 16, mult/add operations do not overflow +// to the neighbouring color and can be performed much faster than separatedly + +// Here follow the mask value to separate/expand the color to 32 bit, +// the mask to detect overflows in the blend operation and + +#define BLND_MSK (SATR_MSK | SATG_MSK | SATB_MSK) + +#define OVFG_MSK 0x04000000 +#define OVFR_MSK 0x00008000 +#define OVFB_MSK 0x00000020 +#define SATG_MSK 0x03E00000 +#define SATR_MSK 0x00007C00 +#define SATB_MSK 0x0000001F + +// Alpha blend two pixels (pixel_top and pixel_bottom). + +#define blend_pixel() \ + pixel_bottom = PLTT[(pixel_pair >> 16) & 0x1FF]; \ + pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & BLND_MSK; \ + pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4 + +// Alpha blend two pixels, allowing for saturation (individual channels > 31). +// The operation is optimized towards saturation not occuring. + +#define blend_saturate_pixel() \ + pixel_bottom = PLTT[(pixel_pair >> 16) & 0x1FF]; \ + pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & BLND_MSK; \ + pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4; \ + if (pixel_top & (OVFR_MSK | OVFG_MSK | OVFB_MSK)) { \ + if (pixel_top & OVFG_MSK) \ + pixel_top |= SATG_MSK; \ + \ + if (pixel_top & OVFR_MSK) \ + pixel_top |= SATR_MSK; \ + \ + if (pixel_top & OVFB_MSK) \ + pixel_top |= SATB_MSK; \ + } + +#define brighten_pixel() pixel_top = upper + ((pixel_top * blend) >> 4); + +#define darken_pixel() pixel_top = (pixel_top * blend) >> 4; + +#define effect_condition_alpha ((pixel_pair & 0x04000200) == 0x04000200) + +#define effect_condition_fade(pixel_source) ((pixel_source & 0x00000200) == 0x00000200) + +#define expand_pixel_no_dest(expand_type, pixel_source) \ + pixel_top = (pixel_top | (pixel_top << 16)) & BLND_MSK; \ + expand_type##_pixel(); \ + pixel_top &= BLND_MSK; \ + pixel_top = (pixel_top >> 16) | pixel_top + +#define expand_pixel(expand_type, pixel_source) \ + pixel_top = PLTT[pixel_source & 0x1FF]; \ + expand_pixel_no_dest(expand_type, pixel_source); \ + *screen_dest_ptr = pixel_top + +#define expand_loop(expand_type, effect_condition, pixel_source) \ + screen_src_ptr += start; \ + screen_dest_ptr += start; \ + \ + end -= start; \ + \ + for (i = 0; i < end; i++) { \ + pixel_source = *screen_src_ptr; \ + if (effect_condition) { \ + expand_pixel(expand_type, pixel_source); \ + } else { \ + *screen_dest_ptr = PLTT[pixel_source & 0x1FF]; \ + } \ + \ + screen_src_ptr++; \ + screen_dest_ptr++; \ + } + +#define expand_loop_partial_alpha(alpha_expand, expand_type) \ + screen_src_ptr += start; \ + screen_dest_ptr += start; \ + \ + end -= start; \ + \ + for (i = 0; i < end; i++) { \ + pixel_pair = *screen_src_ptr; \ + if (effect_condition_fade(pixel_pair)) { \ + if (effect_condition_alpha) { \ + expand_pixel(alpha_expand, pixel_pair); \ + } else { \ + expand_pixel(expand_type, pixel_pair); \ + } \ + } else { \ + *screen_dest_ptr = PLTT[pixel_pair & 0x1FF]; \ + } \ + \ + screen_src_ptr++; \ + screen_dest_ptr++; \ + } + +#define expand_partial_alpha(expand_type) \ + if ((blend_a + blend_b) > 16) { \ + expand_loop_partial_alpha(blend_saturate, expand_type); \ + } else { \ + expand_loop_partial_alpha(blend, expand_type); \ + } + +// Blend top two pixels of scanline with each other. + +#define expand_normal(screen_ptr, start, end) + +void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end); + +#ifndef ARM_ARCH_BLENDING_OPTS + +void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end) +{ + u32 pixel_pair; + u32 pixel_top, pixel_bottom; + u32 bldalpha = read_ioreg(REG_ADDR_BLDALPHA); + u32 blend_a = bldalpha & 0x1F; + u32 blend_b = (bldalpha >> 8) & 0x1F; + u32 i; + + if (blend_a > 16) + blend_a = 16; + + if (blend_b > 16) + blend_b = 16; + + // The individual colors can saturate over 31, this should be taken + // care of in an alternate pass as it incurs a huge additional speedhit. + if ((blend_a + blend_b) > 16) { + expand_loop(blend_saturate, effect_condition_alpha, pixel_pair); + } else { + expand_loop(blend, effect_condition_alpha, pixel_pair); + } +} - int32_t x = oam->split.x; - int32_t y = oam->split.y; -#if !EXTENDED_OAM - if (x >= DISPLAY_WIDTH) - x -= 512; - if (y >= DISPLAY_HEIGHT) - y -= 256; #endif - if (isAffine && doubleSize) { - halfW *= 2; - halfH *= 2; - } - bool isSemiTransparent = (oam->split.objMode == 1); - bool isObjWin = (oam->split.objMode == 2); - - if (objWinOnly && !isObjWin) - continue; - if (!objWinOnly && isObjWin) - continue; - - int rectWidth = width; - int rectHeight = height; - - if (isAffine) { - u8 matrixNum = oam->split.matrixNum * 4; - OamData *m0 = &((OamData *)OAM)[matrixNum]; - OamData *m1 = &((OamData *)OAM)[matrixNum + 1]; - OamData *m2 = &((OamData *)OAM)[matrixNum + 2]; - OamData *m3 = &((OamData *)OAM)[matrixNum + 3]; - matrix[0][0] = m0->all.affineParam; - matrix[0][1] = m1->all.affineParam; - matrix[1][0] = m2->all.affineParam; - matrix[1][1] = m3->all.affineParam; - if (doubleSize) { - rectWidth *= 2; - rectHeight *= 2; - } - } else { - matrix[0][0] = 0x100; // identity in 8.8 fixed point - matrix[0][1] = 0; - matrix[1][0] = 0; - matrix[1][1] = 0x100; - } +// Blend scanline with white. - x += halfW; - y += halfH; - - int localY = (oam->split.mosaic == 1) ? ApplyMosaicSprY(vcount) - y : vcount - y; - bool flipX = !isAffine && ((oam->split.matrixNum >> 3) & 1); - bool flipY = !isAffine && ((oam->split.matrixNum >> 4) & 1); - bool is8bpp = oam->split.bpp & 1; - - int startLX = -halfW; - int endLX = halfW; - if (startLX + x < 0) - startLX = -x; - if (endLX + x >= DISPLAY_WIDTH) - endLX = DISPLAY_WIDTH - 1 - x; - - // fast path: non-affine 4bpp, no mosaic -- batched tile row reads - if (!isAffine && !is8bpp && !oam->split.mosaic) { - int texY = localY + halfH; - if (flipY) - texY = height - texY - 1; - if (texY < 0 || texY >= (int)height) - continue; - - int tileRowY = texY & 7; - int blockY = texY >> 3; - int tilesPerRow = (REG_DISPCNT & 0x40) ? ((int)width >> 3) : 16; - int tileBase = blockY * tilesPerRow + oam->split.tileNum; - int rowByteOff = tileRowY << 2; - uint16_t *pixpal = sprpal + (oam->split.paletteNum << 4); - - int lx = startLX; - while (lx <= endLX) { - int rawX = lx + halfW; - int texX = flipX ? ((int)width - 1 - rawX) : rawX; - - if (texX < 0 || texX >= (int)width) { - lx++; - continue; - } +static void expand_darken(u16 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end) +{ + u32 pixel_top; + s32 blend = 16 - (read_ioreg(REG_ADDR_BLDY) & 0x1F); + u32 i; - int blockX = texX >> 3; - int tileXStart = texX & 7; + if (blend < 0) + blend = 0; - uint32_t rowData = *(uint32_t *)(tiledata + ((tileBase + blockX) << 5) + rowByteOff); + expand_loop(darken, effect_condition_fade(pixel_top), pixel_top); +} - int pixelsInTile = !flipX ? (8 - tileXStart) : (tileXStart + 1); - int remain = endLX - lx + 1; - if (pixelsInTile > remain) - pixelsInTile = remain; +// Blend scanline with black. - if (!flipX) { - int texRemain = (int)width - texX; - if (pixelsInTile > texRemain) - pixelsInTile = texRemain; - } else { - int texRemain = texX + 1; - if (pixelsInTile > texRemain) - pixelsInTile = texRemain; - } +static void expand_brighten(u16 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end) +{ + u32 pixel_top; + u32 blend = read_ioreg(REG_ADDR_BLDY) & 0x1F; + u32 upper; + u32 i; - for (int p = 0; p < pixelsInTile; p++, lx++) { - int curTX = flipX ? (tileXStart - p) : (tileXStart + p); - uint8_t pixel = (rowData >> (curTX << 2)) & 0xF; - if (pixel == 0) - continue; + if (blend > 16) + blend = 16; - int gx = lx + x; - uint16_t color = pixpal[pixel]; + upper = ((BLND_MSK * blend) >> 4) & BLND_MSK; + blend = 16 - blend; - // obj window sprites modify the window mask, not the framebuffer - if (isObjWin) { - if (windowsEnabled && winMask && (winMask[gx] & WINMASK_WINOUT)) - winMask[gx] = (REG_WINOUT >> 8) & 0x3F; - continue; - } + expand_loop(brighten, effect_condition_fade(pixel_top), pixel_top); +} - if (layerIds && blendMode != 0) - color = BlendSpritePixel(color, gx, output, layerIds, isSemiTransparent, blendMode, bldcnt, windowsEnabled, winMask, - eva, evb, evy); +// Expand scanline such that if both top and bottom pass it's alpha, +// if only top passes it's as specified, and if neither pass it's normal. - if (windowsEnabled && winMask && !(winMask[gx] & WINMASK_OBJ)) - continue; +static void expand_darken_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end) +{ + s32 blend = 16 - (read_ioreg(REG_ADDR_BLDY) & 0x1F); + u32 pixel_pair; + u32 pixel_top, pixel_bottom; + u32 bldalpha = read_ioreg(REG_ADDR_BLDALPHA); + u32 blend_a = bldalpha & 0x1F; + u32 blend_b = (bldalpha >> 8) & 0x1F; + u32 i; - output[gx] = color | COLOR_OPAQUE; - if (layerIds) - layerIds[gx] = LAYER_OBJ; - } - } - continue; - } + if (blend < 0) + blend = 0; - // generic path: affine, 8bpp, or mosaic -- per pixel - for (int localX = startLX; localX <= endLX; localX++) { - int gx = localX + x; - int texX, texY; - - if (!isAffine) { - int lmx = localX; - if (oam->split.mosaic == 1) - lmx = ApplyMosaicSprX(gx) - x; - texX = lmx + halfW; - texY = localY + halfH; - if (flipX) - texX = width - texX - 1; - if (flipY) - texY = height - texY - 1; - } else { - int lmx = localX; - int lmy = localY; - if (oam->split.mosaic == 1) { - lmx = ApplyMosaicSprX(gx) - x; - lmy = ApplyMosaicSprY(vcount) - y; - } - // apply 2x2 affine matrix (8.8 fixed point) - texX = ((matrix[0][0] * lmx + matrix[0][1] * lmy) >> 8) + (width / 2); - texY = ((matrix[1][0] * lmx + matrix[1][1] * lmy) >> 8) + (height / 2); - } + if (blend_a > 16) + blend_a = 16; - if (texX < 0 || texY < 0 || texX >= (int)width || texY >= (int)height) - continue; - - int tileX = texX & 7; - int tileY = texY & 7; - int blockX = texX >> 3; - int blockY = texY >> 3; - int blockOffset = blockY * (REG_DISPCNT & 0x40 ? ((int)width >> 3) : 16) + blockX; - - uint16_t pixel = 0; - uint16_t *pixpal; - - if (!is8bpp) { - int tdi = ((blockOffset + oam->split.tileNum) << 5) + (tileY << 2) + (tileX >> 1); - pixel = tiledata[tdi]; - if (tileX & 1) - pixel >>= 4; - else - pixel &= 0xF; - pixpal = sprpal + (oam->split.paletteNum << 4); - } else { - pixel = tiledata[((blockOffset * 2 + oam->split.tileNum) << 5) + (tileY << 3) + tileX]; - pixpal = sprpal; - } + if (blend_b > 16) + blend_b = 16; - if (pixel == 0) - continue; + expand_partial_alpha(darken); +} + +static void expand_brighten_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end) +{ + s32 blend = read_ioreg(REG_ADDR_BLDY) & 0x1F; + u32 pixel_pair; + u32 pixel_top, pixel_bottom; + u32 bldalpha = read_ioreg(REG_ADDR_BLDALPHA); + u32 blend_a = bldalpha & 0x1F; + u32 blend_b = (bldalpha >> 8) & 0x1F; + u32 upper; + u32 i; - uint16_t color = pixpal[pixel]; + if (blend > 16) + blend = 16; - if (isObjWin) { - if (windowsEnabled && winMask && (winMask[gx] & WINMASK_WINOUT)) - winMask[gx] = (REG_WINOUT >> 8) & 0x3F; - continue; - } + upper = ((BLND_MSK * blend) >> 4) & BLND_MSK; + blend = 16 - blend; - if (layerIds && blendMode != 0) - color = BlendSpritePixel(color, gx, output, layerIds, isSemiTransparent, blendMode, bldcnt, windowsEnabled, winMask, eva, - evb, evy); + if (blend_a > 16) + blend_a = 16; - if (windowsEnabled && winMask && !(winMask[gx] & WINMASK_OBJ)) - continue; + if (blend_b > 16) + blend_b = 16; - output[gx] = color | COLOR_OPAQUE; - if (layerIds) - layerIds[gx] = LAYER_OBJ; + expand_partial_alpha(brighten); +} + +// Render an OBJ layer from start to end, depending on the type (1D or 2D) +// stored in dispcnt. + +#define render_obj_layer(type, dest, _start, _end) \ + current_layer &= ~0x04; \ + if (dispcnt & 0x40) \ + render_scanline_obj_##type##_1D(current_layer, _start, _end, dest); \ + else \ + render_scanline_obj_##type##_2D(current_layer, _start, _end, dest) + +// Render a target all the way with the background color as taken from the +// palette. + +#define fill_line_bg(type, dest, _start, _end) fill_line_##type(0, dest, _start, _end) + +// Render all layers as they appear in the layer order. + +#define render_layers(tile_alpha, obj_alpha, dest) \ + { \ + current_layer = layer_order[0]; \ + if (current_layer & 0x04) { \ + /* If the first one is OBJ render the background then render it. */ \ + fill_line_bg(tile_alpha, dest, 0, DISPLAY_WIDTH); \ + render_obj_layer(obj_alpha, dest, 0, DISPLAY_WIDTH); \ + } else { \ + /* Otherwise render a base layer. */ \ + layer_renderers[current_layer].tile_alpha##_render_base(current_layer, 0, DISPLAY_WIDTH, dest); \ + } \ + \ + /* Render the rest of the layers. */ \ + for (layer_order_pos = 1; layer_order_pos < layer_count; layer_order_pos++) { \ + current_layer = layer_order[layer_order_pos]; \ + if (current_layer & 0x04) { \ + render_obj_layer(obj_alpha, dest, 0, DISPLAY_WIDTH); \ + } else { \ + layer_renderers[current_layer].tile_alpha##_render_transparent(current_layer, 0, DISPLAY_WIDTH, dest); \ + } \ + } \ + } + +#define render_condition_alpha \ + (((read_ioreg(REG_ADDR_BLDALPHA) & 0x1F1F) != 0x001F) && ((read_ioreg(REG_ADDR_BLDCNT) & 0x3F) != 0) \ + && ((read_ioreg(REG_ADDR_BLDCNT) & 0x3F00) != 0)) + +#define render_condition_fade (((read_ioreg(REG_ADDR_BLDY) & 0x1F) != 0) && ((read_ioreg(REG_ADDR_BLDCNT) & 0x3F) != 0)) + +#define render_layers_color_effect(renderer, layer_condition, alpha_condition, fade_condition, _start, _end) \ + { \ + if (layer_condition) { \ + if (obj_alpha_count[read_ioreg(REG_ADDR_VCOUNT)]) { \ + /* Render based on special effects mode. */ \ + u32 screen_buffer[DISPLAY_WIDTH]; \ + switch ((bldcnt >> 6) & 0x03) { \ + /* Alpha blend */ \ + case 0x01: { \ + if (alpha_condition) { \ + renderer(alpha, alpha_obj, screen_buffer); \ + expand_blend(screen_buffer, scanline, _start, _end); \ + return; \ + } \ + break; \ + } \ + \ + /* Fade to white */ \ + case 0x02: { \ + if (fade_condition) { \ + renderer(color32, partial_alpha, screen_buffer); \ + expand_brighten_partial_alpha(screen_buffer, scanline, _start, _end); \ + return; \ + } \ + break; \ + } \ + \ + /* Fade to black */ \ + case 0x03: { \ + if (fade_condition) { \ + renderer(color32, partial_alpha, screen_buffer); \ + expand_darken_partial_alpha(screen_buffer, scanline, _start, _end); \ + return; \ + } \ + break; \ + } \ + } \ + \ + renderer(color32, partial_alpha, screen_buffer); \ + expand_blend(screen_buffer, scanline, _start, _end); \ + } else { \ + /* Render based on special effects mode. */ \ + switch ((bldcnt >> 6) & 0x03) { \ + /* Alpha blend */ \ + case 0x01: { \ + if (alpha_condition) { \ + u32 screen_buffer[DISPLAY_WIDTH]; \ + renderer(alpha, alpha_obj, screen_buffer); \ + expand_blend(screen_buffer, scanline, _start, _end); \ + return; \ + } \ + break; \ + } \ + \ + /* Fade to white */ \ + case 0x02: { \ + if (fade_condition) { \ + renderer(color16, color16, scanline); \ + expand_brighten(scanline, scanline, _start, _end); \ + return; \ + } \ + break; \ + } \ + \ + /* Fade to black */ \ + case 0x03: { \ + if (fade_condition) { \ + renderer(color16, color16, scanline); \ + expand_darken(scanline, scanline, _start, _end); \ + return; \ + } \ + break; \ + } \ + } \ + \ + renderer(normal, normal, scanline); \ + expand_normal(scanline, _start, _end); \ + } \ + } else { \ + u32 pixel_top = PLTT[0]; \ + switch ((bldcnt >> 6) & 0x03) { \ + /* Fade to white */ \ + case 0x02: { \ + if (color_combine_mask_a(5)) { \ + u32 blend = read_ioreg(REG_ADDR_BLDY) & 0x1F; \ + u32 upper; \ + \ + if (blend > 16) \ + blend = 16; \ + \ + upper = ((BLND_MSK * blend) >> 4) & BLND_MSK; \ + blend = 16 - blend; \ + \ + expand_pixel_no_dest(brighten, pixel_top); \ + } \ + break; \ + } \ + \ + /* Fade to black */ \ + case 0x03: { \ + if (color_combine_mask_a(5)) { \ + s32 blend = 16 - (read_ioreg(REG_ADDR_BLDY) & 0x1F); \ + \ + if (blend < 0) \ + blend = 0; \ + \ + expand_pixel_no_dest(darken, pixel_top); \ + } \ + break; \ + } \ + } \ + fill_line_color16(pixel_top, scanline, _start, _end); \ + } \ + } + +// Renders an entire scanline from 0 to DISPLAY_WIDTH, based on current color mode. + +static void render_scanline_tile(u16 *scanline, u32 dispcnt) +{ + u32 current_layer; + u32 layer_order_pos; + u32 bldcnt = read_ioreg(REG_ADDR_BLDCNT); + render_scanline_layer_functions_tile(); + + render_layers_color_effect(render_layers, layer_count, render_condition_alpha, render_condition_fade, 0, DISPLAY_WIDTH); +} + +static void render_scanline_bitmap(u16 *scanline, u32 dispcnt) +{ + render_scanline_layer_functions_bitmap(); + u32 current_layer; + u32 layer_order_pos; + + fill_line_bg(normal, scanline, 0, DISPLAY_WIDTH); + + for (layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++) { + current_layer = layer_order[layer_order_pos]; + if (current_layer & 0x04) { + render_obj_layer(normal, scanline, 0, DISPLAY_WIDTH); + } else { + layer_renderers->normal_render(0, DISPLAY_WIDTH, scanline); } } } -static void DrawScanline(uint16_t *pixels, uint16_t vcount) +// Render layers from start to end based on if they're allowed in the +// enable flags. + +#define render_layers_conditional(tile_alpha, obj_alpha, dest) \ + { \ + __label__ skip; \ + current_layer = layer_order[layer_order_pos]; \ + /* If OBJ aren't enabled skip to the first non-OBJ layer */ \ + if (!(enable_flags & 0x10)) { \ + while ((current_layer & 0x04) || !((1 << current_layer) & enable_flags)) { \ + layer_order_pos++; \ + current_layer = layer_order[layer_order_pos]; \ + \ + /* Oops, ran out of layers, render the background. */ \ + if (layer_order_pos == layer_count) { \ + fill_line_bg(tile_alpha, dest, start, end); \ + goto skip; \ + } \ + } \ + \ + /* Render the first valid layer */ \ + layer_renderers[current_layer].tile_alpha##_render_base(current_layer, start, end, dest); \ + \ + layer_order_pos++; \ + \ + /* Render the rest of the layers if active, skipping OBJ ones. */ \ + for (; layer_order_pos < layer_count; layer_order_pos++) { \ + current_layer = layer_order[layer_order_pos]; \ + if (!(current_layer & 0x04) && ((1 << current_layer) & enable_flags)) { \ + layer_renderers[current_layer].tile_alpha##_render_transparent(current_layer, start, end, dest); \ + } \ + } \ + } else { \ + /* Find the first active layer, skip all of the inactive ones */ \ + while (!((current_layer & 0x04) || ((1 << current_layer) & enable_flags))) { \ + layer_order_pos++; \ + current_layer = layer_order[layer_order_pos]; \ + \ + /* Oops, ran out of layers, render the background. */ \ + if (layer_order_pos == layer_count) { \ + fill_line_bg(tile_alpha, dest, start, end); \ + goto skip; \ + } \ + } \ + \ + if (current_layer & 0x04) { \ + /* If the first one is OBJ render the background then render it. */ \ + fill_line_bg(tile_alpha, dest, start, end); \ + render_obj_layer(obj_alpha, dest, start, end); \ + } else { \ + /* Otherwise render a base layer. */ \ + layer_renderers[current_layer].tile_alpha##_render_base(current_layer, start, end, dest); \ + } \ + \ + layer_order_pos++; \ + \ + /* Render the rest of the layers. */ \ + for (; layer_order_pos < layer_count; layer_order_pos++) { \ + current_layer = layer_order[layer_order_pos]; \ + if (current_layer & 0x04) { \ + render_obj_layer(obj_alpha, dest, start, end); \ + } else { \ + if (enable_flags & (1 << current_layer)) { \ + layer_renderers[current_layer].tile_alpha##_render_transparent(current_layer, start, end, dest); \ + } \ + } \ + } \ + } \ + \ + skip:; \ + } + +// Render all of the BG and OBJ in a tiled scanline from start to end ONLY if +// enable_flag allows that layer/OBJ. Also conditionally render color effects. + +static void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline, u32 enable_flags, u32 dispcnt, u32 bldcnt, + const tile_layer_render_struct *layer_renderers) +{ + u32 current_layer; + u32 layer_order_pos = 0; + + render_layers_color_effect(render_layers_conditional, (layer_count && (enable_flags & 0x1F)), + ((enable_flags & 0x20) && render_condition_alpha), ((enable_flags & 0x20) && render_condition_fade), start, + end); +} + +// Render the BG and OBJ in a bitmap scanline from start to end ONLY if +// enable_flag allows that layer/OBJ. Also conditionally render color effects. + +static void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline, u32 enable_flags, u32 dispcnt, u32 bldcnt, + const bitmap_layer_render_struct *layer_renderers) { - unsigned int mode = REG_DISPCNT & 3; - unsigned int numBGs = (mode == 0) ? 4 : 3; - unsigned int blendMode = (REG_BLDCNT >> 6) & 3; - unsigned int enabledBgs = (REG_DISPCNT >> 8) & 0xF; - - // sort bgs by priority - uint16_t bgcnts[4]; - char bgPriority[4]; - char bgsByPri[4][4]; - char bgsByPriCount[4] = { 0, 0, 0, 0 }; - - for (int bg = 0; bg < (int)numBGs; bg++) { - uint16_t cnt = *(uint16_t *)(REG_ADDR_BG0CNT + bg * 2); - bgcnts[bg] = cnt; - uint16_t pri = cnt & 3; - bgPriority[bg] = pri; - bgsByPri[pri][bgsByPriCount[pri]] = bg; - bgsByPriCount[pri]++; - } - - // window setup - bool windowsEnabled = false; - u16 win0Bot, win0Top, win0Right, win0Left; - u16 win1Bot, win1Top, win1Right, win1Left; - bool win0Active = false, win1Active = false; - static uint16_t winMask[DISPLAY_WIDTH]; - - if (REG_DISPCNT & DISPCNT_WIN0_ON) { - win0Bot = WIN_GET_HIGHER(REG_WIN0V); - win0Top = WIN_GET_LOWER(REG_WIN0V); - win0Right = WIN_GET_HIGHER(REG_WIN0H); - win0Left = WIN_GET_LOWER(REG_WIN0H); - if (win0Top > win0Bot) - win0Active = (vcount >= win0Top || vcount < win0Bot); - else - win0Active = (vcount >= win0Top && vcount < win0Bot); - windowsEnabled = true; - } - if (REG_DISPCNT & DISPCNT_WIN1_ON) { - win1Bot = WIN_GET_HIGHER(REG_WIN1V); - win1Top = WIN_GET_LOWER(REG_WIN1V); - win1Right = WIN_GET_HIGHER(REG_WIN1H); - win1Left = WIN_GET_LOWER(REG_WIN1H); - if (win1Top > win1Bot) - win1Active = (vcount >= win1Top || vcount < win1Bot); - else - win1Active = (vcount >= win1Top && vcount < win1Bot); - windowsEnabled = true; - } - if ((REG_DISPCNT & DISPCNT_OBJWIN_ON) && (REG_DISPCNT & DISPCNT_OBJ_ON)) - windowsEnabled = true; - - // build per-pixel window mask - if (windowsEnabled) { - for (unsigned int xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { - if (win0Active && WindowContainsX(win0Left, win0Right, xpos)) - winMask[xpos] = REG_WININ & 0x3F; - else if (win1Active && WindowContainsX(win1Left, win1Right, xpos)) - winMask[xpos] = (REG_WININ >> 8) & 0x3F; - else - winMask[xpos] = (REG_WINOUT & 0x3F) | WINMASK_WINOUT; + u32 current_layer; + u32 layer_order_pos; + + fill_line_bg(normal, scanline, start, end); + + for (layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++) { + current_layer = layer_order[layer_order_pos]; + if (current_layer & 0x04) { + if (enable_flags & 0x10) { + render_obj_layer(normal, scanline, start, end); + } + } else { + if (enable_flags & 0x04) + layer_renderers->normal_render(start, end, scanline); } } +} - PrefilterSprites(vcount); - - // layerIds tracks who wrote each pixel so alpha blend can find target-b - static uint8_t layerIds[DISPLAY_WIDTH]; - bool needLayerIds = (blendMode != 0 || windowsEnabled); - uint8_t *lids = needLayerIds ? layerIds : NULL; - uint16_t *wmask = windowsEnabled ? winMask : NULL; - - if (needLayerIds) - memset(layerIds, LAYER_BACKDROP, DISPLAY_WIDTH); - - // grab blend regs once per scanline - unsigned int bldcnt = REG_BLDCNT; - unsigned int bld_eva = REG_BLDALPHA & 0x1F; - unsigned int bld_evb = (REG_BLDALPHA >> 8) & 0x1F; - unsigned int bld_evy = REG_BLDY & 0x1F; - - // obj window pass -- these sprites modify the window mask, not the framebuffer - if (windowsEnabled && (REG_DISPCNT & DISPCNT_OBJWIN_ON) && (REG_DISPCNT & DISPCNT_OBJ_ON)) { - for (int pri = 0; pri < 4; pri++) - DrawSpritesAtPriority(pri, vcount, pixels, lids, windowsEnabled, wmask, blendMode, - /*objWinOnly=*/true, bldcnt, bld_eva, bld_evb, bld_evy); - } - - // back-to-front: priority 3 first, 0 last (0 is topmost) - for (int pri = 3; pri >= 0; pri--) { - for (int sub = bgsByPriCount[pri] - 1; sub >= 0; sub--) { - int bg = bgsByPri[pri][sub]; - if (!IsBGEnabled(bg)) - continue; - - if (!needLayerIds) { - switch (mode) { - case 0: - RenderTextBG(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), - vcount, pixels); - break; - case 1: - if (bg == 2) - RenderAffineBG(bg, bgcnts[bg], vcount, pixels); - else - RenderTextBG(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), - vcount, pixels); - break; - } +#define window_x_coords(window_number) \ + window_##window_number##_x1 = read_ioreg(REG_ADDR_WIN##window_number##H) >> 8; \ + window_##window_number##_x2 = read_ioreg(REG_ADDR_WIN##window_number##H) & 0xFF; \ + window_##window_number##_enable = (winin >> (window_number * 8)) & 0x3F; \ + \ + if (window_##window_number##_x1 > DISPLAY_WIDTH) \ + window_##window_number##_x1 = DISPLAY_WIDTH; \ + \ + if (window_##window_number##_x2 > DISPLAY_WIDTH) \ + window_##window_number##_x2 = DISPLAY_WIDTH + +#define window_coords(window_number) \ + u32 window_##window_number##_x1, window_##window_number##_x2; \ + u32 window_##window_number##_y1, window_##window_number##_y2; \ + u32 window_##window_number##_enable = 0; \ + window_##window_number##_y1 = read_ioreg(REG_ADDR_WIN##window_number##V) >> 8; \ + window_##window_number##_y2 = read_ioreg(REG_ADDR_WIN##window_number##V) & 0xFF; \ + \ + if (window_##window_number##_y1 > window_##window_number##_y2) { \ + if ((((vcount <= window_##window_number##_y2) || (vcount > window_##window_number##_y1)) \ + || (window_##window_number##_y2 > (DISPLAY_WIDTH - 13))) \ + && (window_##window_number##_y1 <= (DISPLAY_WIDTH - 13))) { \ + window_x_coords(window_number); \ + } else { \ + window_##window_number##_x1 = DISPLAY_WIDTH; \ + window_##window_number##_x2 = DISPLAY_WIDTH; \ + } \ + } else { \ + if ((((vcount >= window_##window_number##_y1) && (vcount < window_##window_number##_y2)) \ + || (window_##window_number##_y2 > (DISPLAY_WIDTH - 13))) \ + && (window_##window_number##_y1 <= (DISPLAY_WIDTH - 13))) { \ + window_x_coords(window_number); \ + } else { \ + window_##window_number##_x1 = DISPLAY_WIDTH; \ + window_##window_number##_x2 = DISPLAY_WIDTH; \ + } \ + } + +#define render_window_segment(type, start, end, window_type) \ + if (start != end) { \ + render_scanline_conditional_##type(start, end, scanline, window_##window_type##_enable, dispcnt, bldcnt, layer_renderers); \ + } + +#define render_window_segment_unequal(type, start, end, window_type) \ + render_scanline_conditional_##type(start, end, scanline, window_##window_type##_enable, dispcnt, bldcnt, layer_renderers) + +#define render_window_segment_clip(type, clip_start, clip_end, start, end, window_type) \ + { \ + if (start != end) { \ + if (start < clip_start) { \ + if (end > clip_start) { \ + if (end > clip_end) { \ + render_window_segment_unequal(type, clip_start, clip_end, window_type); \ + } else { \ + render_window_segment_unequal(type, clip_start, end, window_type); \ + } \ + } \ + } else \ + \ + if (end > clip_end) { \ + if (start < clip_end) \ + render_window_segment_unequal(type, start, clip_end, window_type); \ + } else { \ + render_window_segment_unequal(type, start, end, window_type); \ + } \ + } \ + } + +#define render_window_clip_1(type, start, end) \ + if (window_1_x1 != DISPLAY_WIDTH) { \ + if (window_1_x1 > window_1_x2) { \ + render_window_segment_clip(type, start, end, 0, window_1_x2, 1); \ + render_window_segment_clip(type, start, end, window_1_x2, window_1_x1, out); \ + render_window_segment_clip(type, start, end, window_1_x1, DISPLAY_WIDTH, 1); \ + } else { \ + render_window_segment_clip(type, start, end, 0, window_1_x1, out); \ + render_window_segment_clip(type, start, end, window_1_x1, window_1_x2, 1); \ + render_window_segment_clip(type, start, end, window_1_x2, DISPLAY_WIDTH, out); \ + } \ + } else { \ + render_window_segment(type, start, end, out); \ + } + +#define render_window_clip_obj(type, start, end) \ + ; \ + render_window_segment(type, start, end, out); \ + if (dispcnt & 0x40) \ + render_scanline_obj_copy_##type##_1D(4, start, end, scanline); \ + else \ + render_scanline_obj_copy_##type##_2D(4, start, end, scanline) + +#define render_window_segment_clip_obj(type, clip_start, clip_end, start, end) \ + { \ + if (start != end) { \ + if (start < clip_start) { \ + if (end > clip_start) { \ + if (end > clip_end) { \ + render_window_clip_obj(type, clip_start, clip_end); \ + } else { \ + render_window_clip_obj(type, clip_start, end); \ + } \ + } \ + } else \ + \ + if (end > clip_end) { \ + if (start < clip_end) { \ + render_window_clip_obj(type, start, clip_end); \ + } \ + } else { \ + render_window_clip_obj(type, start, end); \ + } \ + } \ + } + +#define render_window_clip_1_obj(type, start, end) \ + if (window_1_x1 != DISPLAY_WIDTH) { \ + if (window_1_x1 > window_1_x2) { \ + render_window_segment_clip(type, start, end, 0, window_1_x2, 1); \ + render_window_segment_clip_obj(type, start, end, window_1_x2, window_1_x1); \ + render_window_segment_clip(type, start, end, window_1_x1, DISPLAY_WIDTH, 1); \ + } else { \ + render_window_segment_clip_obj(type, start, end, 0, window_1_x1); \ + render_window_segment_clip(type, start, end, window_1_x1, window_1_x2, 1); \ + render_window_segment_clip_obj(type, start, end, window_1_x2, DISPLAY_WIDTH); \ + } \ + } else { \ + render_window_clip_obj(type, start, end); \ + } + +#define render_window_single(type, window_number) \ + u32 winin = read_ioreg(REG_ADDR_WININ); \ + window_coords(window_number); \ + if (window_##window_number##_x1 > window_##window_number##_x2) { \ + render_window_segment(type, 0, window_##window_number##_x2, window_number); \ + render_window_segment(type, window_##window_number##_x2, window_##window_number##_x1, out); \ + render_window_segment(type, window_##window_number##_x1, DISPLAY_WIDTH, window_number); \ + } else { \ + render_window_segment(type, 0, window_##window_number##_x1, out); \ + render_window_segment(type, window_##window_number##_x1, window_##window_number##_x2, window_number); \ + render_window_segment(type, window_##window_number##_x2, DISPLAY_WIDTH, out); \ + } + +#define render_window_multi(type, front, back) \ + if (window_##front##_x1 > window_##front##_x2) { \ + render_window_segment(type, 0, window_##front##_x2, front); \ + render_window_clip_##back(type, window_##front##_x2, window_##front##_x1); \ + render_window_segment(type, window_##front##_x1, DISPLAY_WIDTH, front); \ + } else { \ + render_window_clip_##back(type, 0, window_##front##_x1); \ + render_window_segment(type, window_##front##_x1, window_##front##_x2, front); \ + render_window_clip_##back(type, window_##front##_x2, DISPLAY_WIDTH); \ + } + +#define render_scanline_window_builder(type) \ + static void render_scanline_window_##type(u16 *scanline, u32 dispcnt) \ + { \ + u32 vcount = read_ioreg(REG_ADDR_VCOUNT); \ + u32 winout = read_ioreg(REG_ADDR_WINOUT); \ + u32 bldcnt = read_ioreg(REG_ADDR_BLDCNT); \ + u32 window_out_enable = winout & 0x3F; \ + \ + render_scanline_layer_functions_##type(); \ + \ + switch (dispcnt >> 13) { \ + /* Just window 0 */ \ + case 0x01: { \ + render_window_single(type, 0); \ + break; \ + } \ + \ + /* Just window 1 */ \ + case 0x02: { \ + render_window_single(type, 1); \ + break; \ + } \ + \ + /* Windows 1 and 2 */ \ + case 0x03: { \ + u32 winin = read_ioreg(REG_ADDR_WININ); \ + window_coords(0); \ + window_coords(1); \ + render_window_multi(type, 0, 1); \ + break; \ + } \ + \ + /* Just OBJ windows */ \ + case 0x04: { \ + render_window_clip_obj(type, 0, DISPLAY_WIDTH); \ + break; \ + } \ + \ + /* Window 0 and OBJ window */ \ + case 0x05: { \ + u32 winin = read_ioreg(REG_ADDR_WININ); \ + window_coords(0); \ + render_window_multi(type, 0, obj); \ + break; \ + } \ + \ + /* Window 1 and OBJ window */ \ + case 0x06: { \ + u32 winin = read_ioreg(REG_ADDR_WININ); \ + window_coords(1); \ + render_window_multi(type, 1, obj); \ + break; \ + } \ + \ + /* Window 0, 1, and OBJ window */ \ + case 0x07: { \ + u32 winin = read_ioreg(REG_ADDR_WININ); \ + window_coords(0); \ + window_coords(1); \ + render_window_multi(type, 0, 1_obj); \ + break; \ + } \ + } \ + } + +render_scanline_window_builder(tile); +render_scanline_window_builder(bitmap); + +static const u8 active_layers[] = { + 0x1F, // Mode 0, Tile BG0-3 and OBJ + 0x17, // Mode 1, Tile BG0-2 and OBJ + 0x1C, // Mode 2, Tile BG2-3 and OBJ + 0x14, // Mode 3, BMP BG2 and OBJ + 0x14, // Mode 4, BMP BG2 and OBJ + 0x14, // Mode 5, BMP BG2 and OBJ + 0, // Unused + 0, +}; + +void update_scanline(void) +{ + u32 pitch = get_screen_pitch(); + u32 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u32 vcount = read_ioreg(REG_ADDR_VCOUNT); + u16 *screen_offset = get_screen_pixels() + (vcount * pitch); + u32 video_mode = dispcnt & 0x07; + + order_layers((dispcnt >> 8) & active_layers[video_mode], vcount); + + // If the screen is in in forced blank draw pure white. + if (dispcnt & 0x80) { + fill_line_color16(0xFFFF, screen_offset, 0, DISPLAY_WIDTH); + } else { + if (video_mode < 3) { + if (dispcnt >> 13) { + render_scanline_window_tile(screen_offset, dispcnt); } else { - switch (mode) { - case 0: - RenderTextBGBlend(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), - *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), vcount, pixels, lids, blendMode, windowsEnabled, wmask, - bldcnt, bld_eva, bld_evb, bld_evy); - break; - case 1: - if (bg == 2) - RenderAffineBGBlend(bg, bgcnts[bg], vcount, pixels, lids, blendMode, windowsEnabled, wmask, bldcnt, bld_eva, - bld_evb, bld_evy); - else - RenderTextBGBlend(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), - *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), vcount, pixels, lids, blendMode, windowsEnabled, - wmask, bldcnt, bld_eva, bld_evb, bld_evy); - break; - } + render_scanline_tile(screen_offset, dispcnt); } + } else { + if (dispcnt >> 13) + render_scanline_window_bitmap(screen_offset, dispcnt); + else + render_scanline_bitmap(screen_offset, dispcnt); } - - if (REG_DISPCNT & DISPCNT_OBJ_ON) - DrawSpritesAtPriority(pri, vcount, pixels, lids, windowsEnabled, wmask, blendMode, - /*objWinOnly=*/false, bldcnt, bld_eva, bld_evb, bld_evy); } + + affine_reference_x[0] += (s16)read_ioreg(REG_ADDR_BG2PB); + affine_reference_y[0] += (s16)read_ioreg(REG_ADDR_BG2PD); + affine_reference_x[1] += (s16)read_ioreg(REG_ADDR_BG3PB); + affine_reference_y[1] += (s16)read_ioreg(REG_ADDR_BG3PD); } -void DrawFrame_Fast(uint16_t *pixels) +void DrawFrame_Fast(u16 *pixels) { - for (int i = 0; i < DISPLAY_HEIGHT; i++) { - uint16_t *scanline = &pixels[i * DISPLAY_WIDTH]; + int i; + + gba_screen_pixels = pixels; + video_reload_counters(); + // convert_whole_palette(); + + // assume that the oam is only updated once before the frame + // starts to be drawn + u32 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u32 video_mode = dispcnt & 0x07; + order_obj(video_mode); + + for (i = 0; i < DISPLAY_HEIGHT; i++) { REG_VCOUNT = i; if (((REG_DISPSTAT >> 8) & 0xFF) == REG_VCOUNT) { @@ -1183,11 +3721,14 @@ void DrawFrame_Fast(uint16_t *pixels) gIntrTable[INTR_INDEX_VCOUNT](); } - Memset16(scanline, *(uint16_t *)PLTT, DISPLAY_WIDTH); - DrawScanline(scanline, i); + // Render the backdrop color before each individual scanline. + // HBlank interrupt code could have changed it in between lines. + update_scanline(); REG_DISPSTAT |= INTR_FLAG_HBLANK; + RunDMAs(DMA_HBLANK); + if (REG_DISPSTAT & DISPSTAT_HBLANK_INTR) gIntrTable[INTR_INDEX_HBLANK](); From 84cbc3e14da547cc0669583d238e1c960fa54fba Mon Sep 17 00:00:00 2001 From: Oliver Bell Date: Wed, 18 Feb 2026 02:09:26 +0000 Subject: [PATCH 09/13] remove common --- src/platform/shared/rendering/common.h | 143 ------------------------- 1 file changed, 143 deletions(-) delete mode 100644 src/platform/shared/rendering/common.h diff --git a/src/platform/shared/rendering/common.h b/src/platform/shared/rendering/common.h deleted file mode 100644 index 7592b4218..000000000 --- a/src/platform/shared/rendering/common.h +++ /dev/null @@ -1,143 +0,0 @@ -/* gameplaySP - * - * Copyright (C) 2006 Exophase - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef COMMON_H -#define COMMON_H - -#define ror(dest, value, shift) dest = ((value) >> (shift)) | ((value) << (32 - (shift))) - -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -#if defined(_WIN32) -#define PATH_SEPARATOR "\\" -#define PATH_SEPARATOR_CHAR '\\' -#else -#define PATH_SEPARATOR "/" -#define PATH_SEPARATOR_CHAR '/' -#endif - -/* On x86 we pass arguments via registers instead of stack */ -#ifdef X86_ARCH -#define function_cc __attribute__((regparm(2))) -#else -#define function_cc -#endif - -#ifdef ARM_ARCH - -#define _BSD_SOURCE // sync -#include -#include -#include -#include -#include -#include -#include -#include - -#endif /* ARM_ARCH */ - -// Huge thanks to pollux for the heads up on using native file I/O -// functions on PSP for vastly improved memstick performance. - -#ifdef PSP -#include -#include -#include -#include -#include -#include -#include -#include -#else -typedef unsigned char u8; -typedef signed char s8; -typedef unsigned short int u16; -typedef signed short int s16; -typedef unsigned int u32; -typedef signed int s32; -typedef unsigned long long int u64; -typedef signed long long int s64; -#endif - -#ifdef USE_BGR_FORMAT -#define convert_palette(value) (((value & 0x7FE0) << 1) | (value & 0x1F)) -#elif defined(USE_XBGR1555_FORMAT) -#define convert_palette(value) (value & 0x7FFF) -#else -#define convert_palette(value) (((value & 0x1F) << 11) | ((value & 0x03E0) << 1) | ((value >> 10) & 0x1F)) -#endif - -#define GBA_SCREEN_WIDTH (240) -#define GBA_SCREEN_HEIGHT (160) -#define GBA_SCREEN_PITCH (240) - -typedef u32 fixed16_16; -typedef u32 fixed8_24; - -#define float_to_fp16_16(value) (fixed16_16)((value)*65536.0) - -#define fp16_16_to_float(value) (float)((value) / 65536.0) - -#define u32_to_fp16_16(value) ((value) << 16) - -#define fp16_16_to_u32(value) ((value) >> 16) - -#define fp16_16_fractional_part(value) ((value)&0xFFFF) - -#define float_to_fp8_24(value) (fixed8_24)((value)*16777216.0) - -#define fp8_24_fractional_part(value) ((value)&0xFFFFFF) - -#define fixed_div(numerator, denominator, bits) (((numerator * (1 << bits)) + (denominator / 2)) / denominator) - -#define address8(base, offset) *((u8 *)((u8 *)base + (offset))) - -#define address16(base, offset) *((u16 *)((u8 *)base + (offset))) - -#define address32(base, offset) *((u32 *)((u8 *)base + (offset))) - -#define eswap8(value) (value) -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define eswap16(value) __builtin_bswap16(value) -#define eswap32(value) __builtin_bswap32(value) -#else -#define eswap16(value) (value) -#define eswap32(value) (value) -#endif - -#define readaddress8(base, offset) eswap8(address8(base, offset)) -#define readaddress16(base, offset) eswap16(address16(base, offset)) -#define readaddress32(base, offset) eswap32(address32(base, offset)) - -#define read_ioreg(regnum) (eswap16(io_registers[(regnum)])) -#define write_ioreg(regnum, val) io_registers[(regnum)] = eswap16(val) -#define read_ioreg32(regnum) (read_ioreg(regnum) | (read_ioreg((regnum) + 1) << 16)) - -#define read_dmareg(regnum, dmachan) (eswap16(io_registers[(regnum) + (dmachan)*6])) -#define write_dmareg(regnum, dmachan, val) io_registers[(regnum) + (dmachan)*6] = eswap16(val) - -#include -#include -#include -#include -#include - -#endif \ No newline at end of file From 45754d515b10fa5f186e141bad224c64073afd85 Mon Sep 17 00:00:00 2001 From: Oliver Bell Date: Wed, 18 Feb 2026 21:32:54 +0000 Subject: [PATCH 10/13] fix oam and some extended backgrounds --- include/gba/defines.h | 7 +- include/global.h | 2 +- src/platform/pret_sdl/sdl2.c | 2 + .../shared/rendering/sw_renderer_fast.c | 151 +++++++++--------- 4 files changed, 78 insertions(+), 84 deletions(-) diff --git a/include/gba/defines.h b/include/gba/defines.h index 5ec55588c..7c3ccac2e 100644 --- a/include/gba/defines.h +++ b/include/gba/defines.h @@ -48,8 +48,8 @@ #define DISPLAY_WIDTH 240 #define DISPLAY_HEIGHT 160 #else -#define DISPLAY_WIDTH 240 -#define DISPLAY_HEIGHT 160 +#define DISPLAY_WIDTH 426 +#define DISPLAY_HEIGHT 240 #endif // NOTE: We shouldn't consider WIDESCREEN_HACK a permanent thing. @@ -58,8 +58,7 @@ #undef VRAM_SIZE #define VRAM_SIZE (0x18000 + (0x800 * (12))) #define WIDESCREEN_HACK TRUE -// TODO: extend oam again once fast renderer supports -#define EXTENDED_OAM FALSE +#define EXTENDED_OAM TRUE #else #define WIDESCREEN_HACK FALSE #define EXTENDED_OAM !TRUE diff --git a/include/global.h b/include/global.h index 946528d0d..777ebc321 100644 --- a/include/global.h +++ b/include/global.h @@ -7,7 +7,7 @@ #if PLATFORM_GBA #define ENABLE_AUDIO TRUE #else -#define ENABLE_AUDIO TRUE +#define ENABLE_AUDIO FALSE #define ENABLE_VRAM_VIEW !TRUE #endif diff --git a/src/platform/pret_sdl/sdl2.c b/src/platform/pret_sdl/sdl2.c index 4e4086ca9..f63b7ba7e 100644 --- a/src/platform/pret_sdl/sdl2.c +++ b/src/platform/pret_sdl/sdl2.c @@ -218,8 +218,10 @@ int main(int argc, char **argv) REG_KEYINPUT = 0x3FF; if (headless) { +#if ENABLE_AUDIO // Required or it makes an infinite loop cgb_audio_init(48000); +#endif AgbMain(); return 1; } diff --git a/src/platform/shared/rendering/sw_renderer_fast.c b/src/platform/shared/rendering/sw_renderer_fast.c index c2a839f0a..a38af7d90 100644 --- a/src/platform/shared/rendering/sw_renderer_fast.c +++ b/src/platform/shared/rendering/sw_renderer_fast.c @@ -804,8 +804,8 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void } if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; + if (horizontal_offset >= 512) { + horizontal_offset -= 512; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -813,7 +813,7 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 256; + horizontal_offset %= 512; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -831,7 +831,7 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void tile_extra_variables_8bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -880,7 +880,7 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void tile_extra_variables_4bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -948,8 +948,8 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en } if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; + if (horizontal_offset >= 512) { + horizontal_offset -= 512; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -957,7 +957,7 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 256; + horizontal_offset %= 512; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -974,7 +974,7 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1022,7 +1022,7 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1090,8 +1090,8 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi } if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; + if (horizontal_offset >= 512) { + horizontal_offset -= 512; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1099,7 +1099,7 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 256; + horizontal_offset %= 512; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1115,7 +1115,7 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1162,7 +1162,7 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1232,8 +1232,8 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e } if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; + if (horizontal_offset >= 512) { + horizontal_offset -= 512; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1241,7 +1241,7 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 256; + horizontal_offset %= 512; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1257,7 +1257,7 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1304,7 +1304,7 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1374,8 +1374,8 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi } if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; + if (horizontal_offset >= 512) { + horizontal_offset -= 512; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1383,7 +1383,7 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 256; + horizontal_offset %= 512; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1399,7 +1399,7 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1446,7 +1446,7 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1516,8 +1516,8 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e } if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; + if (horizontal_offset >= 512) { + horizontal_offset -= 512; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1525,7 +1525,7 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 256; + horizontal_offset %= 512; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1541,7 +1541,7 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1590,7 +1590,7 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1660,8 +1660,8 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void } if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; + if (horizontal_offset >= 512) { + horizontal_offset -= 512; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1669,7 +1669,7 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 256; + horizontal_offset %= 512; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1685,7 +1685,7 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1732,7 +1732,7 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1800,8 +1800,8 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end } if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; + if (horizontal_offset >= 512) { + horizontal_offset -= 512; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1809,7 +1809,7 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 256; + horizontal_offset %= 512; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1825,7 +1825,7 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1872,7 +1872,7 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = 512 - (horizontal_offset % 512); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -2321,18 +2321,18 @@ static const bitmap_layer_render_struct bitmap_mode_renderers[3] // Get the current row offset into an obj in 1D map space #define obj_tile_offset_1D(color_depth, flip_op) \ - tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32) + ((vertical_offset / 8) * (obj_width / 8) * tile_size_##color_depth) \ + tile_ptr = tile_base + (oam_data->split.tileNum * 32) + ((vertical_offset / 8) * (obj_width / 8) * tile_size_##color_depth) \ + ((vertical_offset % 8) * tile_width_##color_depth) obj_tile_offset_##flip_op(color_depth) // Get the current row offset into an obj in 2D map space #define obj_tile_offset_2D(color_depth, flip_op) \ - tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32) + ((vertical_offset / 8) * 1024) \ + tile_ptr = tile_base + (oam_data->split.tileNum * 32) + ((vertical_offset / 8) * 1024) \ + ((vertical_offset % 8) * tile_width_##color_depth) obj_tile_offset_##flip_op(color_depth) // Get the palette for 4bpp obj. -#define obj_get_palette_4bpp() current_palette = (obj_attribute_2 >> 8) & 0xF0 +#define obj_get_palette_4bpp() current_palette = oam_data->split.paletteNum << 4 #define obj_get_palette_8bpp() @@ -2405,14 +2405,14 @@ static const bitmap_layer_render_struct bitmap_mode_renderers[3] } #define obj_scale_offset_1D(color_depth) \ - tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32) + ((vertical_offset / 8) * (max_x / 8) * tile_size_##color_depth) \ + tile_ptr = tile_base + (oam_data->split.tileNum * 32) + ((vertical_offset / 8) * (max_x / 8) * tile_size_##color_depth) \ + ((vertical_offset % 8) * tile_width_##color_depth) // Get the current row offset into an obj in 2D map space #define obj_scale_offset_2D(color_depth) \ - tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32) + ((vertical_offset / 8) * 1024) \ - + ((vertical_offset % 8) * tile_width_##color_depth) + tile_ptr \ + = tile_base + (oam_data->split.tileNum * 32) + ((vertical_offset / 8) * 1024) + ((vertical_offset % 8) * tile_width_##color_depth) #define obj_render_scale_pixel_4bpp(combine_op, alpha_op) \ current_pixel = tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)]; \ @@ -2481,7 +2481,7 @@ static const bitmap_layer_render_struct bitmap_mode_renderers[3] #define obj_render_rotate(combine_op, color_depth, alpha_op, map_space) \ { \ - tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32); \ + tile_ptr = tile_base + (oam_data->split.tileNum * 32); \ obj_rotate_offset_##map_space(color_depth); \ \ source_x += (y_delta * dmx) - (middle_x * dx); \ @@ -2519,11 +2519,15 @@ static const bitmap_layer_render_struct bitmap_mode_renderers[3] #define obj_render_affine(combine_op, color_depth, alpha_op, map_space) \ { \ - u16 *params = (u16 *)OAM + (((obj_attribute_1 >> 9) & 0x1F) * 16); \ - s32 dx = (s16)eswap16(params[3]); \ - s32 dmx = (s16)eswap16(params[7]); \ - s32 dy = (s16)eswap16(params[11]); \ - s32 dmy = (s16)eswap16(params[15]); \ + u8 matrix_num = oam_data->split.matrixNum * 4; \ + OamData *oam1 = &((OamData *)OAM)[matrix_num]; \ + OamData *oam2 = &((OamData *)OAM)[matrix_num + 1]; \ + OamData *oam3 = &((OamData *)OAM)[matrix_num + 2]; \ + OamData *oam4 = &((OamData *)OAM)[matrix_num + 3]; \ + s32 dx = (s16)oam1->all.affineParam; \ + s32 dmx = (s16)oam2->all.affineParam; \ + s32 dy = (s16)oam3->all.affineParam; \ + s32 dmy = (s16)oam4->all.affineParam; \ s32 source_x, source_y; \ s32 tile_x, tile_y; \ u32 tile_map_offset; \ @@ -2541,7 +2545,7 @@ static const bitmap_layer_render_struct bitmap_mode_renderers[3] source_x = (middle_x << 8); \ source_y = (middle_y << 8); \ \ - if (obj_attribute_0 & 0x200) { \ + if ((oam_data->split.affineMode >> 1) & 1) { \ obj_width *= 2; \ obj_height *= 2; \ middle_x *= 2; \ @@ -2625,8 +2629,8 @@ static u8 obj_alpha_count[DISPLAY_HEIGHT]; #define render_scanline_obj_extra_variables_copy_bitmap(map_space) render_scanline_obj_extra_variables_copy(bitmap) #define render_scanline_obj_main(combine_op, alpha_op, map_space) \ - if (obj_attribute_0 & 0x100) { \ - if ((obj_attribute_0 >> 13) & 0x01) { \ + if (oam_data->split.affineMode & 1) { \ + if (oam_data->split.bpp & 1) { \ obj_render_affine(combine_op, 8bpp, alpha_op, map_space); \ } else { \ obj_render_affine(combine_op, 4bpp, alpha_op, map_space); \ @@ -2634,10 +2638,10 @@ static u8 obj_alpha_count[DISPLAY_HEIGHT]; } else { \ vertical_offset = vcount - obj_y; \ \ - if ((obj_attribute_1 >> 13) & 0x01) \ + if ((oam_data->split.matrixNum >> 4) & 1) \ vertical_offset = obj_height - vertical_offset - 1; \ \ - switch (((obj_attribute_0 >> 12) & 0x02) | ((obj_attribute_1 >> 12) & 0x01)) { \ + switch ((oam_data->split.bpp << 1) | ((oam_data->split.matrixNum >> 3) & 1)) { \ case 0x0: \ obj_render(combine_op, 4bpp, alpha_op, map_space, noflip); \ break; \ @@ -2659,7 +2663,7 @@ static u8 obj_alpha_count[DISPLAY_HEIGHT]; #define render_scanline_obj_no_partial_alpha(combine_op, alpha_op, map_space) render_scanline_obj_main(combine_op, alpha_op, map_space) #define render_scanline_obj_partial_alpha(combine_op, alpha_op, map_space) \ - if ((obj_attribute_0 >> 10) & 0x03) { \ + if (oam_data->split.objMode) { \ pixel_combine = 0x00000300; \ render_scanline_obj_main(combine_op, alpha_obj, map_space); \ } else { \ @@ -2672,7 +2676,7 @@ static u8 obj_alpha_count[DISPLAY_HEIGHT]; #define render_scanline_obj_prologue_copy_body(type) \ copy_start = obj_x; \ copy_end = obj_x + obj_width; \ - if (obj_attribute_0 & 0x200) \ + if (oam_data->split.affineMode & 2) \ copy_end += obj_width; \ \ if (copy_start < start) \ @@ -2701,7 +2705,6 @@ static u8 obj_alpha_count[DISPLAY_HEIGHT]; s32 obj_x, obj_y; \ u32 obj_size; \ u32 obj_width, obj_height; \ - u32 obj_attribute_0, obj_attribute_1, obj_attribute_2; \ s32 vcount = read_ioreg(REG_ADDR_VCOUNT); \ u32 tile_run; \ u32 current_pixels; \ @@ -2710,7 +2713,6 @@ static u8 obj_alpha_count[DISPLAY_HEIGHT]; u32 vertical_offset; \ u32 partial_tile_run, partial_tile_offset; \ u32 pixel_run; \ - u16 *oam_ptr; \ OamData *oam_data; \ render_scanline_dest_##alpha_op *dest_ptr; \ u8 *tile_base = VRAM + 0x10000; \ @@ -2720,18 +2722,14 @@ static u8 obj_alpha_count[DISPLAY_HEIGHT]; \ for (obj_num = 0; obj_num < obj_count; obj_num++) { \ oam_data = (OamData *)&OAM[obj_list[obj_num] * OAM_DATA_SIZE_AFFINE]; \ - oam_ptr = (u16 *)OAM + (obj_list[obj_num] * 4); \ - obj_attribute_0 = eswap16(oam_ptr[0]); \ - obj_attribute_1 = eswap16(oam_ptr[1]); \ - obj_attribute_2 = eswap16(oam_ptr[2]); \ - obj_size = ((obj_attribute_0 >> 12) & 0x0C) | (obj_attribute_1 >> 14); \ + obj_size = (oam_data->split.shape << 2) | oam_data->split.size; \ \ obj_x = oam_data->split.x; \ obj_width = obj_width_table[obj_size]; \ \ render_scanline_obj_prologue_##combine_op(alpha_op); \ \ - obj_y = obj_attribute_0 & 0xFF; \ + obj_y = oam_data->split.y; \ \ if (!EXTENDED_OAM) { \ if (obj_x > DISPLAY_WIDTH) \ @@ -2772,34 +2770,29 @@ static void order_obj(u32 video_mode) { s32 obj_num; u32 row; - t_oam *oam_base = (t_oam *)OAM; memset(obj_priority_count, 0, sizeof(obj_priority_count)); memset(obj_alpha_count, 0, sizeof(obj_alpha_count)); for (obj_num = 127; obj_num >= 0; obj_num--) { OamData *oam_data = (OamData *)&OAM[obj_num * OAM_DATA_SIZE_AFFINE]; - t_oam *oam_ptr = &oam_base[obj_num]; - u16 obj_attr0 = eswap16(oam_ptr->attr0); // Bit 9 disables regular sprites. Used as double bit for affine ones. bool visible = oam_data->split.affineMode != 2; if (visible) { - u16 obj_shape = obj_attr0 >> 14; - u32 obj_mode = (obj_attr0 >> 10) & 0x03; + u16 obj_shape = oam_data->split.shape; + u32 obj_mode = oam_data->split.objMode; // Prohibited shape and mode bool invalid = (obj_shape == 0x3) || (obj_mode == OBJ_MOD_INVALID); if (!invalid) { - u16 obj_attr1 = eswap16(oam_ptr->attr1); - u16 obj_attr2 = eswap16(oam_ptr->attr2); - u32 obj_priority = (obj_attr2 >> 10) & 0x03; + u32 obj_priority = oam_data->split.priority; - if (((video_mode < 3) || ((obj_attr2 & 0x3FF) >= 512))) { + if (((video_mode < 3) || (oam_data->split.tileNum >= 512))) { // Calculate object size (from size and shape attr bits) - u16 obj_size = (obj_attr1 >> 14); + u16 obj_size = oam_data->split.size; s32 obj_height = obj_dim_table[obj_shape][obj_size][1]; s32 obj_width = obj_dim_table[obj_shape][obj_size][0]; - s32 obj_y = obj_attr0 & 0xFF; + s32 obj_y = oam_data->split.y; #if !EXTENDED_OAM if (obj_y > DISPLAY_HEIGHT) @@ -2807,7 +2800,7 @@ static void order_obj(u32 video_mode) #endif // Double size for affine sprites with double bit set - if (obj_attr0 & 0x200) { + if ((oam_data->split.affineMode >> 1) & 1) { obj_height *= 2; obj_width *= 2; } From 66f9a093b8ad82ed10a850c2cf9103baaf4341f0 Mon Sep 17 00:00:00 2001 From: Oliver Bell Date: Thu, 19 Feb 2026 02:54:25 +0000 Subject: [PATCH 11/13] fix bg rendering in widescreen hack, 512 rendering still not working --- include/gba/defines.h | 2 +- include/global.h | 2 +- src/platform/ps2/ps2.c | 2 +- .../shared/rendering/sw_renderer_fast.c | 156 +++++++++--------- 4 files changed, 83 insertions(+), 79 deletions(-) diff --git a/include/gba/defines.h b/include/gba/defines.h index 7c3ccac2e..4ef57d98b 100644 --- a/include/gba/defines.h +++ b/include/gba/defines.h @@ -60,7 +60,7 @@ #define WIDESCREEN_HACK TRUE #define EXTENDED_OAM TRUE #else -#define WIDESCREEN_HACK FALSE +#define WIDESCREEN_HACK TRUE #define EXTENDED_OAM !TRUE #endif extern uint8_t VRAM[VRAM_SIZE]; diff --git a/include/global.h b/include/global.h index 777ebc321..946528d0d 100644 --- a/include/global.h +++ b/include/global.h @@ -7,7 +7,7 @@ #if PLATFORM_GBA #define ENABLE_AUDIO TRUE #else -#define ENABLE_AUDIO FALSE +#define ENABLE_AUDIO TRUE #define ENABLE_VRAM_VIEW !TRUE #endif diff --git a/src/platform/ps2/ps2.c b/src/platform/ps2/ps2.c index a822b3753..809c0a843 100644 --- a/src/platform/ps2/ps2.c +++ b/src/platform/ps2/ps2.c @@ -161,7 +161,7 @@ static void deinit_drivers() void platform_video_init(void) { if (vid_mode == NULL) { - vid_mode = &vid_modes[1]; // Standard def 480p + vid_mode = &vid_modes[3]; // Standard def 480p } else { if (use_hires) { gsKit_hires_deinit_global(gsGlobal); diff --git a/src/platform/shared/rendering/sw_renderer_fast.c b/src/platform/shared/rendering/sw_renderer_fast.c index a38af7d90..143b7cbb6 100644 --- a/src/platform/shared/rendering/sw_renderer_fast.c +++ b/src/platform/shared/rendering/sw_renderer_fast.c @@ -791,7 +791,7 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void u32 i; render_scanline_dest_normal *dest_ptr = ((render_scanline_dest_normal *)scanline) + start; - u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); u16 *map_ptr, *second_ptr; u8 *tile_ptr; @@ -804,8 +804,8 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void } if (map_size & 0x01) { - if (horizontal_offset >= 512) { - horizontal_offset -= 512; + if (horizontal_offset >= 256) { + horizontal_offset -= 256; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -813,7 +813,7 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 512; + horizontal_offset %= 256; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -830,8 +830,8 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void tile_extra_variables_8bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -879,8 +879,8 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void tile_extra_variables_4bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -909,6 +909,7 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void map_ptr = second_ptr; end -= pixel_run; } + tile_run = end / 8; multiple_tile_map_base_4bpp_normal(); @@ -935,7 +936,7 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en u32 i; render_scanline_dest_normal *dest_ptr = ((render_scanline_dest_normal *)scanline) + start; - u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); u16 *map_ptr, *second_ptr; u8 *tile_ptr; @@ -948,8 +949,9 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en } if (map_size & 0x01) { - if (horizontal_offset >= 512) { - horizontal_offset -= 512; + printf("%d\n", vertical_offset); + if (horizontal_offset >= 256) { + horizontal_offset -= 256; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -957,7 +959,7 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 512; + horizontal_offset %= 256; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -973,8 +975,8 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1021,8 +1023,8 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1077,7 +1079,7 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi u32 i; render_scanline_dest_color16 *dest_ptr = ((render_scanline_dest_color16 *)scanline) + start; - u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); u16 *map_ptr, *second_ptr; u8 *tile_ptr; @@ -1090,8 +1092,8 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi } if (map_size & 0x01) { - if (horizontal_offset >= 512) { - horizontal_offset -= 512; + if (horizontal_offset >= 256) { + horizontal_offset -= 256; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1099,7 +1101,7 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 512; + horizontal_offset %= 256; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1114,8 +1116,8 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1161,8 +1163,8 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1219,7 +1221,7 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e u32 i; render_scanline_dest_color16 *dest_ptr = ((render_scanline_dest_color16 *)scanline) + start; - u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); u16 *map_ptr, *second_ptr; u8 *tile_ptr; @@ -1232,8 +1234,8 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e } if (map_size & 0x01) { - if (horizontal_offset >= 512) { - horizontal_offset -= 512; + if (horizontal_offset >= 256) { + horizontal_offset -= 256; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1241,7 +1243,7 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 512; + horizontal_offset %= 256; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1256,8 +1258,8 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1303,8 +1305,8 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1361,7 +1363,7 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi u32 i; render_scanline_dest_color32 *dest_ptr = ((render_scanline_dest_color32 *)scanline) + start; - u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); u16 *map_ptr, *second_ptr; u8 *tile_ptr; @@ -1374,8 +1376,8 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi } if (map_size & 0x01) { - if (horizontal_offset >= 512) { - horizontal_offset -= 512; + if (horizontal_offset >= 256) { + horizontal_offset -= 256; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1383,7 +1385,7 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 512; + horizontal_offset %= 256; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1398,8 +1400,8 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1445,8 +1447,8 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1503,7 +1505,7 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e u32 i; render_scanline_dest_color32 *dest_ptr = ((render_scanline_dest_color32 *)scanline) + start; - u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); u16 *map_ptr, *second_ptr; u8 *tile_ptr; @@ -1516,8 +1518,8 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e } if (map_size & 0x01) { - if (horizontal_offset >= 512) { - horizontal_offset -= 512; + if (horizontal_offset >= 256) { + horizontal_offset -= 256; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1525,7 +1527,7 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 512; + horizontal_offset %= 256; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1540,8 +1542,8 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1589,8 +1591,8 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1647,7 +1649,7 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void u32 i; render_scanline_dest_alpha *dest_ptr = ((render_scanline_dest_alpha *)scanline) + start; - u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); u16 *map_ptr, *second_ptr; u8 *tile_ptr; @@ -1660,8 +1662,8 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void } if (map_size & 0x01) { - if (horizontal_offset >= 512) { - horizontal_offset -= 512; + if (horizontal_offset >= 256) { + horizontal_offset -= 256; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1669,7 +1671,7 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 512; + horizontal_offset %= 256; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1684,8 +1686,8 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1731,8 +1733,8 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1787,7 +1789,7 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end u32 i; render_scanline_dest_alpha *dest_ptr = ((render_scanline_dest_alpha *)scanline) + start; - u16 *map_base = (u16 *)(VRAM + ((bg_control >> 8) & 0x1F) * (1024 * 2)); + u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); u16 *map_ptr, *second_ptr; u8 *tile_ptr; @@ -1800,8 +1802,8 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end } if (map_size & 0x01) { - if (horizontal_offset >= 512) { - horizontal_offset -= 512; + if (horizontal_offset >= 256) { + horizontal_offset -= 256; map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { @@ -1809,7 +1811,7 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end second_ptr = map_base + (32 * 32); } } else { - horizontal_offset %= 512; + horizontal_offset %= 256; map_ptr = map_base + (horizontal_offset / 8); second_ptr = map_base; } @@ -1824,8 +1826,8 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -1871,8 +1873,8 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)) + vertical_pixel_offset; - u32 pixel_run = 512 - (horizontal_offset % 512); + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; + u32 pixel_run = 256 - (horizontal_offset % 256); u32 current_tile; map_base += ((vertical_offset % 256) / 8) * 32; @@ -2087,8 +2089,8 @@ void video_reload_counters() u32 map_size = (bg_control >> 14) & 0x03; \ u32 width_height = 1 << (7 + map_size); \ u32 map_pitch = map_size + 4; \ - u8 *map_base = VRAM + (((bg_control >> 8) & 0x1F) * (1024 * 2)); \ - u8 *tile_base = VRAM + (((bg_control >> 2) & 0x03) * (1024 * 16)); \ + u8 *map_base = BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); \ + u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03); \ u8 *tile_ptr = NULL; \ u32 map_offset, last_map_offset = (u32)-1; \ u32 i; \ @@ -2618,7 +2620,7 @@ static u8 obj_alpha_count[DISPLAY_HEIGHT]; #define render_scanline_obj_extra_variables_copy(type) \ u32 bldcnt = read_ioreg(REG_ADDR_BLDCNT); \ u32 dispcnt = read_ioreg(REG_ADDR_DISPCNT); \ - u32 obj_enable = read_ioreg(REG_ADDR_WINOUT) >> 8; \ + u32 obj_enable = WIN_GET_LOWER(read_ioreg(REG_ADDR_WINOUT)); \ render_scanline_layer_functions_##type(); \ u32 copy_start, copy_end; \ u16 copy_buffer[DISPLAY_WIDTH]; \ @@ -3426,8 +3428,8 @@ static void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline } #define window_x_coords(window_number) \ - window_##window_number##_x1 = read_ioreg(REG_ADDR_WIN##window_number##H) >> 8; \ - window_##window_number##_x2 = read_ioreg(REG_ADDR_WIN##window_number##H) & 0xFF; \ + window_##window_number##_x1 = WIN_GET_LOWER(read_ioreg(REG_ADDR_WIN##window_number##H)); \ + window_##window_number##_x2 = WIN_GET_HIGHER(read_ioreg(REG_ADDR_WIN##window_number##H)); \ window_##window_number##_enable = (winin >> (window_number * 8)) & 0x3F; \ \ if (window_##window_number##_x1 > DISPLAY_WIDTH) \ @@ -3440,13 +3442,13 @@ static void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline u32 window_##window_number##_x1, window_##window_number##_x2; \ u32 window_##window_number##_y1, window_##window_number##_y2; \ u32 window_##window_number##_enable = 0; \ - window_##window_number##_y1 = read_ioreg(REG_ADDR_WIN##window_number##V) >> 8; \ - window_##window_number##_y2 = read_ioreg(REG_ADDR_WIN##window_number##V) & 0xFF; \ + window_##window_number##_y1 = WIN_GET_LOWER(read_ioreg(REG_ADDR_WIN##window_number##V)); \ + window_##window_number##_y2 = WIN_GET_HIGHER(read_ioreg(REG_ADDR_WIN##window_number##V)); \ \ if (window_##window_number##_y1 > window_##window_number##_y2) { \ if ((((vcount <= window_##window_number##_y2) || (vcount > window_##window_number##_y1)) \ - || (window_##window_number##_y2 > (DISPLAY_WIDTH - 13))) \ - && (window_##window_number##_y1 <= (DISPLAY_WIDTH - 13))) { \ + || (window_##window_number##_y2 > (DISPLAY_HEIGHT + 67))) \ + && (window_##window_number##_y1 <= (DISPLAY_HEIGHT + 67))) { \ window_x_coords(window_number); \ } else { \ window_##window_number##_x1 = DISPLAY_WIDTH; \ @@ -3454,8 +3456,8 @@ static void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline } \ } else { \ if ((((vcount >= window_##window_number##_y1) && (vcount < window_##window_number##_y2)) \ - || (window_##window_number##_y2 > (DISPLAY_WIDTH - 13))) \ - && (window_##window_number##_y1 <= (DISPLAY_WIDTH - 13))) { \ + || (window_##window_number##_y2 > (DISPLAY_HEIGHT + 67))) \ + && (window_##window_number##_y1 <= (DISPLAY_HEIGHT + 67))) { \ window_x_coords(window_number); \ } else { \ window_##window_number##_x1 = DISPLAY_WIDTH; \ @@ -3667,8 +3669,10 @@ void update_scanline(void) order_layers((dispcnt >> 8) & active_layers[video_mode], vcount); + // fill_line_color16(*(uint16_t *)PLTT, screen_offset, 0, DISPLAY_WIDTH); + // If the screen is in in forced blank draw pure white. - if (dispcnt & 0x80) { + if (dispcnt & DISPCNT_FORCED_BLANK) { fill_line_color16(0xFFFF, screen_offset, 0, DISPLAY_WIDTH); } else { if (video_mode < 3) { From 5891c7e3755067afb6e44ef03a07a537ff3bb4ce Mon Sep 17 00:00:00 2001 From: Oliver Bell Date: Thu, 19 Feb 2026 17:05:34 +0000 Subject: [PATCH 12/13] fix render for 512 backgrounds --- .../shared/rendering/sw_renderer_fast.c | 113 +++++++++--------- 1 file changed, 56 insertions(+), 57 deletions(-) diff --git a/src/platform/shared/rendering/sw_renderer_fast.c b/src/platform/shared/rendering/sw_renderer_fast.c index 143b7cbb6..814f64009 100644 --- a/src/platform/shared/rendering/sw_renderer_fast.c +++ b/src/platform/shared/rendering/sw_renderer_fast.c @@ -800,17 +800,17 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void if ((map_size & 0x02) && (vertical_offset >= 256)) { map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); } else { - map_base += (((vertical_offset % 256) / 8) * 32); + map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); } if (map_size & 0x01) { if (horizontal_offset >= 256) { horizontal_offset -= 256; - map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + (32 * 32); + second_ptr = map_base + ((map_width / 8) * 32); } } else { horizontal_offset %= 256; @@ -831,10 +831,10 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void tile_extra_variables_8bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -880,10 +880,10 @@ static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void tile_extra_variables_4bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -945,18 +945,17 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en if ((map_size & 0x02) && (vertical_offset >= 256)) { map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); } else { - map_base += (((vertical_offset % 256) / 8) * 32); + map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); } if (map_size & 0x01) { - printf("%d\n", vertical_offset); if (horizontal_offset >= 256) { horizontal_offset -= 256; - map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + (32 * 32); + second_ptr = map_base + ((map_width / 8) * 32); } } else { horizontal_offset %= 256; @@ -976,10 +975,10 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1024,10 +1023,10 @@ static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 en s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1088,17 +1087,17 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi if ((map_size & 0x02) && (vertical_offset >= 256)) { map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); } else { - map_base += (((vertical_offset % 256) / 8) * 32); + map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); } if (map_size & 0x01) { if (horizontal_offset >= 256) { horizontal_offset -= 256; - map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + (32 * 32); + second_ptr = map_base + ((map_width / 8) * 32); } } else { horizontal_offset %= 256; @@ -1117,10 +1116,10 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1164,10 +1163,10 @@ static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, voi s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1230,17 +1229,17 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e if ((map_size & 0x02) && (vertical_offset >= 256)) { map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); } else { - map_base += (((vertical_offset % 256) / 8) * 32); + map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); } if (map_size & 0x01) { if (horizontal_offset >= 256) { horizontal_offset -= 256; - map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + (32 * 32); + second_ptr = map_base + ((map_width / 8) * 32); } } else { horizontal_offset %= 256; @@ -1259,10 +1258,10 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1306,10 +1305,10 @@ static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 e s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1372,17 +1371,17 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi if ((map_size & 0x02) && (vertical_offset >= 256)) { map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); } else { - map_base += (((vertical_offset % 256) / 8) * 32); + map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); } if (map_size & 0x01) { if (horizontal_offset >= 256) { horizontal_offset -= 256; - map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + (32 * 32); + second_ptr = map_base + ((map_width / 8) * 32); } } else { horizontal_offset %= 256; @@ -1401,10 +1400,10 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1448,10 +1447,10 @@ static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, voi s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1514,17 +1513,17 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e if ((map_size & 0x02) && (vertical_offset >= 256)) { map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); } else { - map_base += (((vertical_offset % 256) / 8) * 32); + map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); } if (map_size & 0x01) { if (horizontal_offset >= 256) { horizontal_offset -= 256; - map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + (32 * 32); + second_ptr = map_base + ((map_width / 8) * 32); } } else { horizontal_offset %= 256; @@ -1543,10 +1542,10 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1592,10 +1591,10 @@ static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 e s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1658,17 +1657,17 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void if ((map_size & 0x02) && (vertical_offset >= 256)) { map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); } else { - map_base += (((vertical_offset % 256) / 8) * 32); + map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); } if (map_size & 0x01) { if (horizontal_offset >= 256) { horizontal_offset -= 256; - map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + (32 * 32); + second_ptr = map_base + ((map_width / 8) * 32); } } else { horizontal_offset %= 256; @@ -1687,10 +1686,10 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1734,10 +1733,10 @@ static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1798,17 +1797,17 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end if ((map_size & 0x02) && (vertical_offset >= 256)) { map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); } else { - map_base += (((vertical_offset % 256) / 8) * 32); + map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); } if (map_size & 0x01) { if (horizontal_offset >= 256) { horizontal_offset -= 256; - map_ptr = map_base + (32 * 32) + (horizontal_offset / 8); + map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); second_ptr = map_base; } else { map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + (32 * 32); + second_ptr = map_base + ((map_width / 8) * 32); } } else { horizontal_offset %= 256; @@ -1827,10 +1826,10 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_8bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { @@ -1874,10 +1873,10 @@ static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; tile_extra_variables_4bpp(); u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = 256 - (horizontal_offset % 256); + u32 pixel_run = map_width - (horizontal_offset % map_width); u32 current_tile; - map_base += ((vertical_offset % 256) / 8) * 32; + map_base += ((vertical_offset % 256) / 8) * (map_width / 8); partial_tile_offset = (horizontal_offset % 8); if (pixel_run >= end) { From a849adc47109e3f16e37618a5e9c17755cea6c03 Mon Sep 17 00:00:00 2001 From: Oliver Bell Date: Fri, 20 Feb 2026 02:42:59 +0000 Subject: [PATCH 13/13] switch to latest gpsp renderer, allow c++ compilation --- Makefile | 65 +- include/gba/defines.h | 6 +- include/gba/types.h | 6 +- src/platform/ps2/ps2.c | 57 - .../shared/rendering/sw_renderer_fast.c | 3736 ----------------- .../shared/rendering/sw_renderer_fast.cc | 2298 ++++++++++ tools/scaninc/source_file.cpp | 2 +- 7 files changed, 2349 insertions(+), 3821 deletions(-) delete mode 100644 src/platform/shared/rendering/sw_renderer_fast.c create mode 100644 src/platform/shared/rendering/sw_renderer_fast.cc diff --git a/Makefile b/Makefile index e17045f1f..e07a3e3cc 100644 --- a/Makefile +++ b/Makefile @@ -84,6 +84,7 @@ CC1 := tools/agbcc/bin/agbcc$(EXE) CC1_OLD := tools/agbcc/bin/old_agbcc$(EXE) else CC1 := $(PREFIX)gcc$(EXE) +CXX := $(PREFIX)g++$(EXE) CC1_OLD := $(CC1) endif @@ -194,6 +195,14 @@ C_SRCS := $(shell find $(C_SUBDIR) -name "*.c") endif C_OBJS := $(patsubst $(C_SUBDIR)/%.c,$(C_BUILDDIR)/%.o,$(C_SRCS)) +ifeq ($(PLATFORM),gba) +CXX_SRCS := $(shell find $(C_SUBDIR) -name "*.cc" -not -path "*/platform/*") +else +CXX_SRCS := $(shell find $(C_SUBDIR) -name "*.cc") +endif + +CXX_OBJS := $(patsubst $(C_SUBDIR)/%.cc,$(C_BUILDDIR)/%.o,$(CXX_SRCS)) + # Platform not included as we only need the headers for decomp scratches C_HEADERS := $(shell find $(INCLUDE_DIRS) -name "*.h" -not -path "*/platform/*") @@ -217,7 +226,7 @@ MID_OBJS := $(patsubst $(MID_SUBDIR)/%.mid,$(MID_BUILDDIR)/%.o,$(MID_SRCS)) SOUND_ASM_SRCS := $(wildcard $(SOUND_ASM_SUBDIR)/*.s) SOUND_ASM_OBJS := $(patsubst $(SOUND_ASM_SUBDIR)/%.s,$(SOUND_ASM_BUILDDIR)/%.o,$(SOUND_ASM_SRCS)) -OBJS := $(C_OBJS) $(ASM_OBJS) $(C_ASM_OBJS) $(DATA_ASM_OBJS) $(SONG_OBJS) $(MID_OBJS) +OBJS := $(C_OBJS) $(CXX_OBJS) $(ASM_OBJS) $(C_ASM_OBJS) $(DATA_ASM_OBJS) $(SONG_OBJS) $(MID_OBJS) OBJS_REL := $(patsubst $(OBJ_DIR)/%,%,$(OBJS)) FORMAT_SRC_PATHS := $(shell find . -name "*.c" ! -path '*/src/data/*' ! -path '*/build/*' ! -path '*/ext/*') @@ -278,25 +287,6 @@ else endif endif -ifeq ($(PLATFORM),gba) - ASFLAGS += -mcpu=arm7tdmi -mthumb-interwork - CC1FLAGS += -mthumb-interwork -else - ifeq ($(PLATFORM), sdl) - # for modern we are using a modern compiler - # so instead of CPP we can use gcc -E to "preprocess only" - CPP := $(CC1) -E - else ifeq ($(PLATFORM), psp) - CPP := $(CC1) -E - else ifeq ($(PLATFORM), sdl_ps2) - ASFLAGS += -msingle-float - else ifeq ($(PLATFORM), ps2) - ASFLAGS += -msingle-float - endif - # Allow file input through stdin on modern GCC and set it to "compile only" - CC1FLAGS += -x c -S -endif - ifeq ($(DEBUG),1) CC1FLAGS += -g3 -O0 CPPFLAGS += -D DEBUG=1 @@ -307,7 +297,7 @@ else else ifeq ($(PLATFORM),sdl_ps2) CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer else ifeq ($(PLATFORM),ps2) - CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer + CC1FLAGS += -O3 -fomit-frame-pointer else CC1FLAGS += -O2 endif @@ -337,6 +327,28 @@ else CPPFLAGS += -D ENABLE_DECOMP_CREDITS=1 endif +CXXFLAGS := $(CC1FLAGS) $(CPPFLAGS) -fno-rtti -fno-exceptions -std=c++11 + +ifeq ($(PLATFORM),gba) + ASFLAGS += -mcpu=arm7tdmi -mthumb-interwork + CC1FLAGS += -mthumb-interwork +else + ifeq ($(PLATFORM), sdl) + # for modern we are using a modern compiler + # so instead of CPP we can use gcc -E to "preprocess only" + CPP := $(CC1) -E + else ifeq ($(PLATFORM), psp) + CPP := $(CC1) -E + else ifeq ($(PLATFORM), sdl_ps2) + ASFLAGS += -msingle-float + else ifeq ($(PLATFORM), ps2) + ASFLAGS += -msingle-float + endif + # Allow file input through stdin on modern gcc/g++ and set it to "compile only" + CC1FLAGS += -x c -S + CXXFLAGS += -x c++ -S +endif + ### LINKER FLAGS ### # GBA @@ -584,11 +596,21 @@ ifeq ($(PLATFORM), gba) endif @$(AS) $(ASFLAGS) $(C_BUILDDIR)/$*.s -o $@ +$(C_BUILDDIR)/%.o: $(C_SUBDIR)/%.cc + @echo "$(CXX) -o $@ $<" + @$(shell mkdir -p $(shell dirname '$(C_BUILDDIR)/$*.o')) + @$(CXX) $(CXXFLAGS) -o $(C_BUILDDIR)/$*.s $< + @$(AS) $(ASFLAGS) $(C_BUILDDIR)/$*.s -o $@ + # Scan the src dependencies to determine if any dependent files have changed $(C_BUILDDIR)/%.d: $(C_SUBDIR)/%.c @$(shell mkdir -p $(shell dirname '$(C_BUILDDIR)/$*.d')) $(SCANINC) -M $@ $(INCLUDE_SCANINC_ARGS) $< +$(C_BUILDDIR)/%.d: $(C_SUBDIR)/%.cc + @$(shell mkdir -p $(shell dirname '$(C_BUILDDIR)/$*.d')) + $(SCANINC) -M $@ $(INCLUDE_SCANINC_ARGS) $< + # rule for sources from the src dir (parts of libraries) $(C_BUILDDIR)/%.o: $(C_SUBDIR)/%.s @echo "$(AS) -o $@ $<" @@ -608,6 +630,7 @@ $(DATA_ASM_BUILDDIR)/%.d: $(DATA_ASM_SUBDIR)/%.s ifneq ($(NODEP),1) -include $(addprefix $(OBJ_DIR)/,$(C_SRCS:.c=.d)) +-include $(addprefix $(OBJ_DIR)/,$(CXX_SRCS:.cc=.d)) -include $(addprefix $(OBJ_DIR)/,$(DATA_ASM_SRCS:.s=.d)) endif diff --git a/include/gba/defines.h b/include/gba/defines.h index 4ef57d98b..45371a2e1 100644 --- a/include/gba/defines.h +++ b/include/gba/defines.h @@ -45,8 +45,8 @@ #define DISPLAY_HEIGHT 160 #elif defined(__PS2__) // Runs at 60fps with the "fast draw" -#define DISPLAY_WIDTH 240 -#define DISPLAY_HEIGHT 160 +#define DISPLAY_WIDTH 320 +#define DISPLAY_HEIGHT 180 #else #define DISPLAY_WIDTH 426 #define DISPLAY_HEIGHT 240 @@ -60,7 +60,7 @@ #define WIDESCREEN_HACK TRUE #define EXTENDED_OAM TRUE #else -#define WIDESCREEN_HACK TRUE +#define WIDESCREEN_HACK FALSE #define EXTENDED_OAM !TRUE #endif extern uint8_t VRAM[VRAM_SIZE]; diff --git a/include/gba/types.h b/include/gba/types.h index fa3234357..419e1fdda 100644 --- a/include/gba/types.h +++ b/include/gba/types.h @@ -42,12 +42,12 @@ typedef u16 MetatileIndexType; // If the DISPLAY_HEIGHT was >255, scanline effects would break, // so we have to make this variable bigger. // (u16 should be plenty for screen coordinates, right?) -#if !defined(DISPLAY_HEIGHT) -#error DISPLAY_HEIGHT not defined. +#if !defined(WIDESCREEN_HACK) +#error WIDESCREEN_HACK not defined. #endif /// TODO: Technically this should only be #if (DISPLAY_HEIGHT > 255), // we should probably replace uses of int_vcount with a different type where a high DISPLAY_WIDTH necessitates u16. -#if ((DISPLAY_WIDTH > 255) || (DISPLAY_HEIGHT > 255)) +#if WIDESCREEN_HACK typedef u16 int_vcount; #else typedef u8 int_vcount; diff --git a/src/platform/ps2/ps2.c b/src/platform/ps2/ps2.c index 809c0a843..b0541e2e6 100644 --- a/src/platform/ps2/ps2.c +++ b/src/platform/ps2/ps2.c @@ -773,63 +773,6 @@ const s16 sineTable[256] (s16)0xE783, (s16)0xE8F8, (s16)0xEA71, (s16)0xEBED, (s16)0xED6C, (s16)0xEEEF, (s16)0xF074, (s16)0xF1FB, (s16)0xF384, (s16)0xF50F, (s16)0xF69C, (s16)0xF82B, (s16)0xF9BB, (s16)0xFB4B, (s16)0xFCDD, (s16)0xFE6E }; -void BgAffineSet(struct BgAffineSrcData *src, struct BgAffineDstData *dest, s32 count) -{ - for (s32 i = 0; i < count; i++) { - s32 cx = src[i].texX; - s32 cy = src[i].texY; - s16 dispx = src[i].scrX; - s16 dispy = src[i].scrY; - s16 rx = src[i].sx; - s16 ry = src[i].sy; - u16 theta = src[i].alpha >> 8; - s32 a = sineTable[(theta + 0x40) & 255]; - s32 b = sineTable[theta]; - - s16 dx = (rx * a) >> 14; - s16 dmx = (rx * b) >> 14; - s16 dy = (ry * b) >> 14; - s16 dmy = (ry * a) >> 14; - - dest[i].pa = dx; - dest[i].pb = -dmx; - dest[i].pc = dy; - dest[i].pd = dmy; - - s32 startx = cx - dx * dispx + dmx * dispy; - s32 starty = cy - dy * dispx - dmy * dispy; - - dest[i].dx = startx; - dest[i].dy = starty; - } -} - -void ObjAffineSet(struct ObjAffineSrcData *src, void *dest, s32 count, s32 offset) -{ - for (s32 i = 0; i < count; i++) { - s16 rx = src[i].xScale; - s16 ry = src[i].yScale; - u16 theta = src[i].rotation >> 8; - - s32 a = (s32)sineTable[(theta + 64) & 255]; - s32 b = (s32)sineTable[theta]; - - s16 dx = ((s32)rx * a) >> 14; - s16 dmx = ((s32)rx * b) >> 14; - s16 dy = ((s32)ry * b) >> 14; - s16 dmy = ((s32)ry * a) >> 14; - - CPUWriteHalfWord(dest, dx); - dest += offset; - CPUWriteHalfWord(dest, -dmx); - dest += offset; - CPUWriteHalfWord(dest, dy); - dest += offset; - CPUWriteHalfWord(dest, dmy); - dest += offset; - } -} - void SoftReset(u32 resetFlags) { } void SoftResetExram(u32 resetFlags) { } diff --git a/src/platform/shared/rendering/sw_renderer_fast.c b/src/platform/shared/rendering/sw_renderer_fast.c deleted file mode 100644 index 814f64009..000000000 --- a/src/platform/shared/rendering/sw_renderer_fast.c +++ /dev/null @@ -1,3736 +0,0 @@ -/* gameplaySP - * - * Copyright (C) 2006 Exophase - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "config.h" -#if RENDERER == RENDERER_SOFTWARE_FAST - -#include "global.h" -#include -#include - -#include "global.h" -#include "core.h" -#include "gba/defines.h" -#include "gba/io_reg.h" -#include "gba/types.h" - -#include "platform/shared/dma.h" - -#define eswap16(value) (value) -#define eswap32(value) (value) - -#define GBA_SCREEN_PITCH DISPLAY_WIDTH - -typedef u32 fixed16_16; -typedef u32 fixed8_24; - -#define float_to_fp16_16(value) (fixed16_16)((value)*65536.0) - -#define fp16_16_to_float(value) (float)((value) / 65536.0) - -#define u32_to_fp16_16(value) ((value) << 16) - -#define fp16_16_to_u32(value) ((value) >> 16) - -#define fp16_16_fractional_part(value) ((value)&0xFFFF) - -#define float_to_fp8_24(value) (fixed8_24)((value)*16777216.0) - -#define fp8_24_fractional_part(value) ((value)&0xFFFFFF) - -#define fixed_div(numerator, denominator, bits) (((numerator * (1 << bits)) + (denominator / 2)) / denominator) - -#define read_ioreg(regaddr) (eswap16(*(u16 *)(regaddr))) -#define read_ioreg32(regaddr) (read_ioreg(regaddr) | (read_ioreg((regaddr) + sizeof(u16)) << 16)) - -#define REG_ADDR_BGxCNT(n) (REG_ADDR_BG0CNT + ((n) * sizeof(u16))) - -#define convert_palette(value) (value & 0x7FFF) - -u16 *gba_screen_pixels = NULL; - -#define get_screen_pixels() gba_screen_pixels -#define get_screen_pitch() GBA_SCREEN_PITCH - -typedef struct { - u16 attr0, attr1, attr2, attr3; -} t_oam; - -void update_scanline(void); -void video_reload_counters(void); - -extern s32 affine_reference_x[2]; -extern s32 affine_reference_y[2]; - -typedef void (*tile_render_function)(u32 layer_number, u32 start, u32 end, void *dest_ptr); -typedef void (*bitmap_render_function)(u32 start, u32 end, void *dest_ptr); - -typedef struct { - tile_render_function normal_render_base; - tile_render_function normal_render_transparent; - tile_render_function alpha_render_base; - tile_render_function alpha_render_transparent; - tile_render_function color16_render_base; - tile_render_function color16_render_transparent; - tile_render_function color32_render_base; - tile_render_function color32_render_transparent; -} tile_layer_render_struct; - -typedef struct { - bitmap_render_function normal_render; -} bitmap_layer_render_struct; - -typedef enum { filter_nearest, filter_bilinear } video_filter_type; - -static void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline, u32 enable_flags, u32 dispcnt, u32 bldcnt, - const tile_layer_render_struct *layer_renderers); -static void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline, u32 enable_flags, u32 dispcnt, u32 bldcnt, - const bitmap_layer_render_struct *layer_renderers); - -#define tile_expand_base_normal(index) \ - current_pixel = palette[current_pixel]; \ - dest_ptr[index] = current_pixel - -#define tile_expand_base_normal_mode4(index) \ - if (current_pixel != 0) { \ - current_pixel = palette[current_pixel]; \ - dest_ptr[index] = current_pixel; \ - } - -#define tile_expand_transparent_normal(index) tile_expand_base_normal(index) - -#define tile_expand_copy(index) dest_ptr[index] = copy_ptr[index] - -#define advance_dest_ptr_base(delta) dest_ptr += delta - -#define advance_dest_ptr_transparent(delta) advance_dest_ptr_base(delta) - -#define advance_dest_ptr_copy(delta) \ - advance_dest_ptr_base(delta); \ - copy_ptr += delta - -#define color_combine_mask_a(layer) ((read_ioreg(REG_ADDR_BLDCNT) >> layer) & 0x01) - -// For color blending operations, will create a mask that has in bit -// 10 if the layer is target B, and bit 9 if the layer is target A. - -#define color_combine_mask(layer) (color_combine_mask_a(layer) | ((read_ioreg(REG_ADDR_BLDCNT) >> (layer + 7)) & 0x02)) << 9 - -// For alpha blending renderers, draw the palette index (9bpp) and -// layer bits rather than the raw RGB. For the base this should write to -// the 32bit location directly. - -#define tile_expand_base_alpha(index) dest_ptr[index] = current_pixel | pixel_combine - -#define tile_expand_base_bg(index) dest_ptr[index] = bg_combine - -// For layered (transparent) writes this should shift the "stack" and write -// to the bottom. This will preserve the topmost pixel and the most recent -// one. - -#define tile_expand_transparent_alpha(index) dest_ptr[index] = (dest_ptr[index] << 16) | current_pixel | pixel_combine - -// OBJ should only shift if the top isn't already OBJ -#define tile_expand_transparent_alpha_obj(index) \ - dest = dest_ptr[index]; \ - if (dest & 0x00000100) \ - dest_ptr[index] = (dest & 0xFFFF0000) | current_pixel | pixel_combine; \ - else \ - dest_ptr[index] = (dest << 16) | current_pixel | pixel_combine; - -// For color effects that don't need to preserve the previous layer. -// The color32 version should be used with 32bit wide dest_ptr so as to be -// compatible with alpha combine on top of it. - -#define tile_expand_base_color16(index) dest_ptr[index] = current_pixel | pixel_combine - -#define tile_expand_transparent_color16(index) tile_expand_base_color16(index) - -#define tile_expand_base_color32(index) tile_expand_base_color16(index) - -#define tile_expand_transparent_color32(index) tile_expand_base_color16(index) - -// Operations for isolation 8bpp pixels within 32bpp pixel blocks. - -#define tile_8bpp_pixel_op_mask(op_param) current_pixel = current_pixels & 0xFF - -#define tile_8bpp_pixel_op_shift_mask(shift) current_pixel = (current_pixels >> shift) & 0xFF - -#define tile_8bpp_pixel_op_shift(shift) current_pixel = current_pixels >> shift - -#define tile_8bpp_pixel_op_none(shift) - -// Base should always draw raw in 8bpp mode; color 0 will be drawn where -// color 0 is. - -#define tile_8bpp_draw_base_normal(index) tile_expand_base_normal(index) - -#define tile_8bpp_draw_base_alpha(index) \ - if (current_pixel) { \ - tile_expand_base_alpha(index); \ - } else { \ - tile_expand_base_bg(index); \ - } - -#define tile_8bpp_draw_base_color16(index) tile_8bpp_draw_base_alpha(index) - -#define tile_8bpp_draw_base_color32(index) tile_8bpp_draw_base_alpha(index) - -#define tile_8bpp_draw_base(index, op, op_param, alpha_op) \ - tile_8bpp_pixel_op_##op(op_param); \ - tile_8bpp_draw_base_##alpha_op(index) - -// Transparent (layered) writes should only replace what is there if the -// pixel is not transparent (zero) - -#define tile_8bpp_draw_transparent(index, op, op_param, alpha_op) \ - tile_8bpp_pixel_op_##op(op_param); \ - if (current_pixel) { \ - tile_expand_transparent_##alpha_op(index); \ - } - -#define tile_8bpp_draw_copy(index, op, op_param, alpha_op) \ - tile_8bpp_pixel_op_##op(op_param); \ - if (current_pixel) { \ - tile_expand_copy(index); \ - } - -// Get the current tile from the map in 8bpp mode - -#define get_tile_8bpp() \ - current_tile = eswap16(*map_ptr); \ - tile_ptr = tile_base + ((current_tile & 0x3FF) * 64) - -// Draw half of a tile in 8bpp mode, for base renderer - -#define tile_8bpp_draw_four_noflip(index, combine_op, alpha_op) \ - tile_8bpp_draw_##combine_op(index + 0, mask, 0, alpha_op); \ - tile_8bpp_draw_##combine_op(index + 1, shift_mask, 8, alpha_op); \ - tile_8bpp_draw_##combine_op(index + 2, shift_mask, 16, alpha_op); \ - tile_8bpp_draw_##combine_op(index + 3, shift, 24, alpha_op) - -// Like the above, but draws the half-tile horizontally flipped - -#define tile_8bpp_draw_four_flip(index, combine_op, alpha_op) \ - tile_8bpp_draw_##combine_op(index + 3, mask, 0, alpha_op); \ - tile_8bpp_draw_##combine_op(index + 2, shift_mask, 8, alpha_op); \ - tile_8bpp_draw_##combine_op(index + 1, shift_mask, 16, alpha_op); \ - tile_8bpp_draw_##combine_op(index + 0, shift, 24, alpha_op) - -#define tile_8bpp_draw_four_base(index, alpha_op, flip_op) tile_8bpp_draw_four_##flip_op(index, base, alpha_op) - -// Draw half of a tile in 8bpp mode, for transparent renderer; as an -// optimization the entire thing is checked against zero (in transparent -// capable renders it is more likely for the pixels to be transparent than -// opaque) - -#define tile_8bpp_draw_four_transparent(index, alpha_op, flip_op) \ - if (current_pixels != 0) { \ - tile_8bpp_draw_four_##flip_op(index, transparent, alpha_op); \ - } - -#define tile_8bpp_draw_four_copy(index, alpha_op, flip_op) \ - if (current_pixels != 0) { \ - tile_8bpp_draw_four_##flip_op(index, copy, alpha_op); \ - } - -// Helper macro for drawing 8bpp tiles clipped against the edge of the screen - -#define partial_tile_8bpp(combine_op, alpha_op) \ - for (i = 0; i < partial_tile_run; i++) { \ - tile_8bpp_draw_##combine_op(0, mask, 0, alpha_op); \ - current_pixels >>= 8; \ - advance_dest_ptr_##combine_op(1); \ - } - -// Draws 8bpp tiles clipped against the left side of the screen, -// partial_tile_offset indicates how much clipped in it is, partial_tile_run -// indicates how much it should draw. - -#define partial_tile_right_noflip_8bpp(combine_op, alpha_op) \ - if (partial_tile_offset >= 4) { \ - current_pixels = eswap32(*((u32 *)(tile_ptr + 4))) >> ((partial_tile_offset - 4) * 8); \ - partial_tile_8bpp(combine_op, alpha_op); \ - } else { \ - partial_tile_run -= 4; \ - current_pixels = eswap32(*((u32 *)tile_ptr)) >> (partial_tile_offset * 8); \ - partial_tile_8bpp(combine_op, alpha_op); \ - current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ - tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip); \ - advance_dest_ptr_##combine_op(4); \ - } - -// Draws 8bpp tiles clipped against both the left and right side of the -// screen, IE, runs of less than 8 - partial_tile_offset. - -#define partial_tile_mid_noflip_8bpp(combine_op, alpha_op) \ - if (partial_tile_offset >= 4) { \ - current_pixels = eswap32(*((u32 *)(tile_ptr + 4))) >> ((partial_tile_offset - 4) * 8); \ - } else { \ - current_pixels = eswap32(*((u32 *)tile_ptr)) >> (partial_tile_offset * 8); \ - if ((partial_tile_offset + partial_tile_run) > 4) { \ - u32 old_run = partial_tile_run; \ - partial_tile_run = 4 - partial_tile_offset; \ - partial_tile_8bpp(combine_op, alpha_op); \ - partial_tile_run = old_run - partial_tile_run; \ - current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ - } \ - } \ - partial_tile_8bpp(combine_op, alpha_op); - -// Draws 8bpp tiles clipped against the right side of the screen, -// partial_tile_run indicates how much there is to draw. - -#define partial_tile_left_noflip_8bpp(combine_op, alpha_op) \ - if (partial_tile_run >= 4) { \ - current_pixels = eswap32(*((u32 *)tile_ptr)); \ - tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip); \ - advance_dest_ptr_##combine_op(4); \ - tile_ptr += 4; \ - partial_tile_run -= 4; \ - } \ - \ - current_pixels = eswap32(*((u32 *)(tile_ptr))); \ - partial_tile_8bpp(combine_op, alpha_op) - -// Draws a non-clipped (complete) 8bpp tile. - -#define tile_noflip_8bpp(combine_op, alpha_op) \ - current_pixels = eswap32(*((u32 *)tile_ptr)); \ - tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip); \ - current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ - tile_8bpp_draw_four_##combine_op(4, alpha_op, noflip) - -// Like the above versions but draws flipped tiles. - -#define partial_tile_flip_8bpp(combine_op, alpha_op) \ - for (i = 0; i < partial_tile_run; i++) { \ - tile_8bpp_draw_##combine_op(0, shift, 24, alpha_op); \ - current_pixels <<= 8; \ - advance_dest_ptr_##combine_op(1); \ - } - -#define partial_tile_right_flip_8bpp(combine_op, alpha_op) \ - if (partial_tile_offset >= 4) { \ - current_pixels = eswap32(*((u32 *)tile_ptr)) << ((partial_tile_offset - 4) * 8); \ - partial_tile_flip_8bpp(combine_op, alpha_op); \ - } else { \ - partial_tile_run -= 4; \ - current_pixels = eswap32(*((u32 *)(tile_ptr + 4))) << ((partial_tile_offset - 4) * 8); \ - partial_tile_flip_8bpp(combine_op, alpha_op); \ - current_pixels = eswap32(*((u32 *)tile_ptr)); \ - tile_8bpp_draw_four_##combine_op(0, alpha_op, flip); \ - advance_dest_ptr_##combine_op(4); \ - } - -#define partial_tile_mid_flip_8bpp(combine_op, alpha_op) \ - if (partial_tile_offset >= 4) \ - current_pixels = eswap32(*((u32 *)tile_ptr)) << ((partial_tile_offset - 4) * 8); \ - else { \ - current_pixels = eswap32(*((u32 *)(tile_ptr + 4))) << ((partial_tile_offset - 4) * 8); \ - \ - if ((partial_tile_offset + partial_tile_run) > 4) { \ - u32 old_run = partial_tile_run; \ - partial_tile_run = 4 - partial_tile_offset; \ - partial_tile_flip_8bpp(combine_op, alpha_op); \ - partial_tile_run = old_run - partial_tile_run; \ - current_pixels = eswap32(*((u32 *)(tile_ptr))); \ - } \ - } \ - partial_tile_flip_8bpp(combine_op, alpha_op); - -#define partial_tile_left_flip_8bpp(combine_op, alpha_op) \ - if (partial_tile_run >= 4) { \ - current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ - tile_8bpp_draw_four_##combine_op(0, alpha_op, flip); \ - advance_dest_ptr_##combine_op(4); \ - tile_ptr -= 4; \ - partial_tile_run -= 4; \ - } \ - \ - current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ - partial_tile_flip_8bpp(combine_op, alpha_op) - -#define tile_flip_8bpp(combine_op, alpha_op) \ - current_pixels = eswap32(*((u32 *)(tile_ptr + 4))); \ - tile_8bpp_draw_four_##combine_op(0, alpha_op, flip); \ - current_pixels = eswap32(*((u32 *)tile_ptr)); \ - tile_8bpp_draw_four_##combine_op(4, alpha_op, flip) - -// Operations for isolating 4bpp tiles in a 32bit block - -#define tile_4bpp_pixel_op_mask(op_param) current_pixel = current_pixels & 0x0F - -#define tile_4bpp_pixel_op_shift_mask(shift) current_pixel = (current_pixels >> shift) & 0x0F - -#define tile_4bpp_pixel_op_shift(shift) current_pixel = current_pixels >> shift - -#define tile_4bpp_pixel_op_none(op_param) - -// Draws a single 4bpp pixel as base, normal renderer; checks to see if the -// pixel is zero because if so the current palette should not be applied. -// These ifs can be replaced with a lookup table, may or may not be superior -// this way, should be benchmarked. The lookup table would be from 0-255 -// identity map except for multiples of 16, which would map to 0. - -#define tile_4bpp_draw_base_normal(index) \ - if (current_pixel) \ - current_pixel |= current_palette; \ - tile_expand_base_normal(index); - -#define tile_4bpp_draw_base_alpha(index) \ - if (current_pixel) { \ - current_pixel |= current_palette; \ - tile_expand_base_alpha(index); \ - } else { \ - tile_expand_base_bg(index); \ - } - -#define tile_4bpp_draw_base_color16(index) tile_4bpp_draw_base_alpha(index) - -#define tile_4bpp_draw_base_color32(index) tile_4bpp_draw_base_alpha(index) - -#define tile_4bpp_draw_base(index, op, op_param, alpha_op) \ - tile_4bpp_pixel_op_##op(op_param); \ - tile_4bpp_draw_base_##alpha_op(index) - -// Draws a single 4bpp pixel as layered, if not transparent. - -#define tile_4bpp_draw_transparent(index, op, op_param, alpha_op) \ - tile_4bpp_pixel_op_##op(op_param); \ - if (current_pixel) { \ - current_pixel |= current_palette; \ - tile_expand_transparent_##alpha_op(index); \ - } - -#define tile_4bpp_draw_copy(index, op, op_param, alpha_op) \ - tile_4bpp_pixel_op_##op(op_param); \ - if (current_pixel) { \ - current_pixel |= current_palette; \ - tile_expand_copy(index); \ - } - -// Draws eight background pixels in transparent mode, for alpha or normal -// renderers. - -#define tile_4bpp_draw_eight_base_zero(value) \ - dest_ptr[0] = value; \ - dest_ptr[1] = value; \ - dest_ptr[2] = value; \ - dest_ptr[3] = value; \ - dest_ptr[4] = value; \ - dest_ptr[5] = value; \ - dest_ptr[6] = value; \ - dest_ptr[7] = value - -// Draws eight background pixels for the alpha renderer, basically color zero -// with the background flag high. - -#define tile_4bpp_draw_eight_base_zero_alpha() tile_4bpp_draw_eight_base_zero(bg_combine) - -#define tile_4bpp_draw_eight_base_zero_color16() tile_4bpp_draw_eight_base_zero_alpha() - -#define tile_4bpp_draw_eight_base_zero_color32() tile_4bpp_draw_eight_base_zero_alpha() - -// Draws eight background pixels for the normal renderer, just a bunch of -// zeros. - -#define tile_4bpp_draw_eight_base_zero_normal() \ - current_pixel = palette[0]; \ - tile_4bpp_draw_eight_base_zero(current_pixel) - -// Draws eight 4bpp pixels. - -#define tile_4bpp_draw_eight_noflip(combine_op, alpha_op) \ - tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op); \ - tile_4bpp_draw_##combine_op(1, shift_mask, 4, alpha_op); \ - tile_4bpp_draw_##combine_op(2, shift_mask, 8, alpha_op); \ - tile_4bpp_draw_##combine_op(3, shift_mask, 12, alpha_op); \ - tile_4bpp_draw_##combine_op(4, shift_mask, 16, alpha_op); \ - tile_4bpp_draw_##combine_op(5, shift_mask, 20, alpha_op); \ - tile_4bpp_draw_##combine_op(6, shift_mask, 24, alpha_op); \ - tile_4bpp_draw_##combine_op(7, shift, 28, alpha_op) - -// Draws eight 4bpp pixels in reverse order (for hflip). - -#define tile_4bpp_draw_eight_flip(combine_op, alpha_op) \ - tile_4bpp_draw_##combine_op(7, mask, 0, alpha_op); \ - tile_4bpp_draw_##combine_op(6, shift_mask, 4, alpha_op); \ - tile_4bpp_draw_##combine_op(5, shift_mask, 8, alpha_op); \ - tile_4bpp_draw_##combine_op(4, shift_mask, 12, alpha_op); \ - tile_4bpp_draw_##combine_op(3, shift_mask, 16, alpha_op); \ - tile_4bpp_draw_##combine_op(2, shift_mask, 20, alpha_op); \ - tile_4bpp_draw_##combine_op(1, shift_mask, 24, alpha_op); \ - tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op) - -// Draws eight 4bpp pixels in base mode, checks if all are zero, if so draws -// the appropriate background pixels. - -#define tile_4bpp_draw_eight_base(alpha_op, flip_op) \ - if (current_pixels != 0) { \ - tile_4bpp_draw_eight_##flip_op(base, alpha_op); \ - } else { \ - tile_4bpp_draw_eight_base_zero_##alpha_op(); \ - } - -// Draws eight 4bpp pixels in transparent (layered) mode, checks if all are -// zero and if so draws nothing. - -#define tile_4bpp_draw_eight_transparent(alpha_op, flip_op) \ - if (current_pixels != 0) { \ - tile_4bpp_draw_eight_##flip_op(transparent, alpha_op); \ - } - -#define tile_4bpp_draw_eight_copy(alpha_op, flip_op) \ - if (current_pixels != 0) { \ - tile_4bpp_draw_eight_##flip_op(copy, alpha_op); \ - } - -// Gets the current tile in 4bpp mode, also getting the current palette and -// the pixel block. - -#define get_tile_4bpp() \ - current_tile = eswap16(*map_ptr); \ - current_palette = (current_tile >> 12) << 4; \ - tile_ptr = tile_base + ((current_tile & 0x3FF) * 32); - -// Helper macro for drawing clipped 4bpp tiles. - -#define partial_tile_4bpp(combine_op, alpha_op) \ - for (i = 0; i < partial_tile_run; i++) { \ - tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op); \ - current_pixels >>= 4; \ - advance_dest_ptr_##combine_op(1); \ - } - -// Draws a 4bpp tile clipped against the left edge of the screen. -// partial_tile_offset is how far in it's clipped, partial_tile_run is -// how many to draw. - -#define partial_tile_right_noflip_4bpp(combine_op, alpha_op) \ - current_pixels = eswap32(*((u32 *)tile_ptr)) >> (partial_tile_offset * 4); \ - partial_tile_4bpp(combine_op, alpha_op) - -// Draws a 4bpp tile clipped against both edges of the screen, same as right. - -#define partial_tile_mid_noflip_4bpp(combine_op, alpha_op) partial_tile_right_noflip_4bpp(combine_op, alpha_op) - -// Draws a 4bpp tile clipped against the right edge of the screen. -// partial_tile_offset is how many to draw. - -#define partial_tile_left_noflip_4bpp(combine_op, alpha_op) \ - current_pixels = eswap32(*((u32 *)tile_ptr)); \ - partial_tile_4bpp(combine_op, alpha_op) - -// Draws a complete 4bpp tile row (not clipped) -#define tile_noflip_4bpp(combine_op, alpha_op) \ - current_pixels = eswap32(*((u32 *)tile_ptr)); \ - tile_4bpp_draw_eight_##combine_op(alpha_op, noflip) - -// Like the above, but draws flipped tiles. - -#define partial_tile_flip_4bpp(combine_op, alpha_op) \ - for (i = 0; i < partial_tile_run; i++) { \ - tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op); \ - current_pixels <<= 4; \ - advance_dest_ptr_##combine_op(1); \ - } - -#define partial_tile_right_flip_4bpp(combine_op, alpha_op) \ - current_pixels = eswap32(*((u32 *)tile_ptr)) << (partial_tile_offset * 4); \ - partial_tile_flip_4bpp(combine_op, alpha_op) - -#define partial_tile_mid_flip_4bpp(combine_op, alpha_op) partial_tile_right_flip_4bpp(combine_op, alpha_op) - -#define partial_tile_left_flip_4bpp(combine_op, alpha_op) \ - current_pixels = eswap32(*((u32 *)tile_ptr)); \ - partial_tile_flip_4bpp(combine_op, alpha_op) - -#define tile_flip_4bpp(combine_op, alpha_op) \ - current_pixels = eswap32(*((u32 *)tile_ptr)); \ - tile_4bpp_draw_eight_##combine_op(alpha_op, flip) - -// Draws a single (partial or complete) tile from the tilemap, flipping -// as necessary. - -#define single_tile_map(tile_type, combine_op, color_depth, alpha_op) \ - get_tile_##color_depth(); \ - if (current_tile & 0x800) \ - tile_ptr += vertical_pixel_flip; \ - \ - if (current_tile & 0x400) { \ - tile_type##_flip_##color_depth(combine_op, alpha_op); \ - } else { \ - tile_type##_noflip_##color_depth(combine_op, alpha_op); \ - } - -#define single_tile_map_base_4bpp_color16(tile_type) \ - get_tile_4bpp(); \ - if (current_tile & 0x800) \ - tile_ptr += vertical_pixel_flip; \ - \ - if (current_tile & 0x400) { \ - tile_type##_flip_4bpp(base, color16); \ - } else { \ - tile_type##_noflip_4bpp(base, color16); \ - } - -// Draws multiple sequential tiles from the tilemap, hflips and vflips as -// necessary. - -#define multiple_tile_map(combine_op, color_depth, alpha_op) \ - for (i = 0; i < tile_run; i++) { \ - single_tile_map(tile, combine_op, color_depth, alpha_op); \ - advance_dest_ptr_##combine_op(8); \ - map_ptr++; \ - } - -#define multiple_tile_map_transparent_8bpp_color16() \ - for (i = 0; i < tile_run; i++) { \ - single_tile_map(tile, transparent, 8bpp, color16); \ - advance_dest_ptr_transparent(8); \ - map_ptr++; \ - } - -#define multiple_tile_map_transparent_4bpp_color16() \ - for (i = 0; i < tile_run; i++) { \ - single_tile_map(tile, transparent, 4bpp, color16); \ - advance_dest_ptr_transparent(8); \ - map_ptr++; \ - } - -#define multiple_tile_map_base_8bpp_color16() \ - for (i = 0; i < tile_run; i++) { \ - single_tile_map(tile, base, 8bpp, color16); \ - advance_dest_ptr_base(8); \ - map_ptr++; \ - } - -#define multiple_tile_map_base_4bpp_color16() \ - for (i = 0; i < tile_run; i++) { \ - single_tile_map_base_4bpp_color16(tile); \ - advance_dest_ptr_base(8); \ - map_ptr++; \ - } - -#define multiple_tile_map_transparent_8bpp_normal() \ - for (i = 0; i < tile_run; i++) { \ - single_tile_map(tile, transparent, 8bpp, normal); \ - advance_dest_ptr_transparent(8); \ - map_ptr++; \ - } - -#define multiple_tile_map_transparent_4bpp_normal() \ - for (i = 0; i < tile_run; i++) { \ - single_tile_map(tile, transparent, 4bpp, normal); \ - advance_dest_ptr_transparent(8); \ - map_ptr++; \ - } - -#define multiple_tile_map_base_8bpp_normal() \ - for (i = 0; i < tile_run; i++) { \ - single_tile_map(tile, base, 8bpp, normal); \ - advance_dest_ptr_base(8); \ - map_ptr++; \ - } - -#define multiple_tile_map_base_4bpp_normal() \ - for (i = 0; i < tile_run; i++) { \ - single_tile_map(tile, base, 4bpp, normal); \ - advance_dest_ptr_base(8); \ - map_ptr++; \ - } - -// Draws a partial tile from a tilemap clipped against the left edge of the -// screen. - -#define partial_tile_right_map(combine_op, color_depth, alpha_op) \ - single_tile_map(partial_tile_right, combine_op, color_depth, alpha_op); \ - map_ptr++ - -// Draws a partial tile from a tilemap clipped against both edges of the -// screen. - -#define partial_tile_mid_map(combine_op, color_depth, alpha_op) single_tile_map(partial_tile_mid, combine_op, color_depth, alpha_op) - -// Draws a partial tile from a tilemap clipped against the right edge of the -// screen. - -#define partial_tile_left_map(combine_op, color_depth, alpha_op) single_tile_map(partial_tile_left, combine_op, color_depth, alpha_op) - -// Advances a non-flipped 4bpp obj to the next tile. - -#define obj_advance_noflip_4bpp() tile_ptr += 32 - -// Advances a non-flipped 8bpp obj to the next tile. - -#define obj_advance_noflip_8bpp() tile_ptr += 64 - -// Advances a flipped 4bpp obj to the next tile. - -#define obj_advance_flip_4bpp() tile_ptr -= 32 - -// Advances a flipped 8bpp obj to the next tile. - -#define obj_advance_flip_8bpp() tile_ptr -= 64 - -// Draws multiple sequential tiles from an obj, flip_op determines if it should -// be flipped or not (set to flip or noflip) - -#define multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op) \ - for (i = 0; i < tile_run; i++) { \ - tile_##flip_op##_##color_depth(combine_op, alpha_op); \ - obj_advance_##flip_op##_##color_depth(); \ - advance_dest_ptr_##combine_op(8); \ - } - -// Draws an obj's tile clipped against the left side of the screen - -#define partial_tile_right_obj(combine_op, color_depth, alpha_op, flip_op) \ - partial_tile_right_##flip_op##_##color_depth(combine_op, alpha_op); \ - obj_advance_##flip_op##_##color_depth() - -// Draws an obj's tile clipped against both sides of the screen - -#define partial_tile_mid_obj(combine_op, color_depth, alpha_op, flip_op) partial_tile_mid_##flip_op##_##color_depth(combine_op, alpha_op) - -// Draws an obj's tile clipped against the right side of the screen - -#define partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op) partial_tile_left_##flip_op##_##color_depth(combine_op, alpha_op) - -// Extra variables specific for 8bpp/4bpp tile renderers. - -#define tile_extra_variables_8bpp() - -#define tile_extra_variables_4bpp() u32 current_palette - -// Byte lengths of complete tiles and tile rows in 4bpp and 8bpp. - -#define tile_width_4bpp 4 -#define tile_size_4bpp 32 -#define tile_width_8bpp 8 -#define tile_size_8bpp 64 - -#define render_scanline_dest_normal u16 -#define render_scanline_dest_alpha u32 -#define render_scanline_dest_alpha_obj u32 -#define render_scanline_dest_color16 u16 -#define render_scanline_dest_color32 u32 -#define render_scanline_dest_partial_alpha u32 -#define render_scanline_dest_copy_tile u16 -#define render_scanline_dest_copy_bitmap u16 - -// If rendering a scanline that is not a target A then there's no point in -// keeping what's underneath it because it can't blend with it. - -#define render_scanline_skip_alpha(bg_type, combine_op) \ - if ((pixel_combine & 0x00000200) == 0) { \ - render_scanline_##bg_type##_##combine_op##_color32(layer, start, end, scanline); \ - return; \ - } - -#define render_scanline_extra_variables_base_normal(bg_type) u16 *palette = PLTT - -#define render_scanline_extra_variables_base_alpha(bg_type) \ - u32 bg_combine = color_combine_mask(5); \ - u32 pixel_combine = color_combine_mask(layer) | (bg_combine << 16); \ - render_scanline_skip_alpha(bg_type, base) - -#define render_scanline_extra_variables_base_color() \ - u32 bg_combine = color_combine_mask(5); \ - u32 pixel_combine = color_combine_mask(layer) - -#define render_scanline_extra_variables_base_color16(bg_type) render_scanline_extra_variables_base_color() - -#define render_scanline_extra_variables_base_color32(bg_type) render_scanline_extra_variables_base_color() - -#define render_scanline_extra_variables_transparent_normal(bg_type) render_scanline_extra_variables_base_normal(bg_type) - -#define render_scanline_extra_variables_transparent_alpha(bg_type) \ - u32 pixel_combine = color_combine_mask(layer); \ - render_scanline_skip_alpha(bg_type, transparent) - -#define render_scanline_extra_variables_transparent_color() u32 pixel_combine = color_combine_mask(layer) - -#define render_scanline_extra_variables_transparent_color16(bg_type) render_scanline_extra_variables_transparent_color() - -#define render_scanline_extra_variables_transparent_color32(bg_type) render_scanline_extra_variables_transparent_color() - -static const u32 map_widths[] = { 256, 512, 256, 512 }; - -static void render_scanline_text_base_normal(u32 layer, u32 start, u32 end, void *scanline) -{ - render_scanline_extra_variables_base_normal(text); - u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); - u32 map_size = (bg_control >> 14) & 0x03; - u32 map_width = map_widths[map_size]; - u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; - u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; - - u32 current_pixel; - u32 current_pixels; - u32 partial_tile_run = 0; - u32 partial_tile_offset; - u32 tile_run; - u32 i; - render_scanline_dest_normal *dest_ptr = ((render_scanline_dest_normal *)scanline) + start; - - u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); - u16 *map_ptr, *second_ptr; - u8 *tile_ptr; - - end -= start; - - if ((map_size & 0x02) && (vertical_offset >= 256)) { - map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); - } else { - map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); - } - - if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; - map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); - second_ptr = map_base; - } else { - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + ((map_width / 8) * 32); - } - } else { - horizontal_offset %= 256; - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base; - } - - if (bg_control & 0x80) { - /* color depth: 8bpp - * combine: base - * alpha : normal - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; - s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; - - tile_extra_variables_8bpp(); - - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(base, 8bpp, normal); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(base, 8bpp, normal); - } - } - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(base, 8bpp, normal); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map_base_8bpp_normal(); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map_base_8bpp_normal(); - - partial_tile_run = end % 8; - if (partial_tile_run) { - partial_tile_left_map(base, 8bpp, normal); - } - } else { - /* color depth: 4bpp - * combine: base - * alpha : normal - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; - s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; - - tile_extra_variables_4bpp(); - - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(base, 4bpp, normal); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(base, 4bpp, normal); - } - } - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(base, 4bpp, normal); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map_base_4bpp_normal(); - map_ptr = second_ptr; - end -= pixel_run; - } - - tile_run = end / 8; - multiple_tile_map_base_4bpp_normal(); - - partial_tile_run = end % 8; - if (partial_tile_run) { - partial_tile_left_map(base, 4bpp, normal); - } - } -} - -static void render_scanline_text_transparent_normal(u32 layer, u32 start, u32 end, void *scanline) -{ - render_scanline_extra_variables_transparent_normal(text); - u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); - u32 map_size = (bg_control >> 14) & 0x03; - u32 map_width = map_widths[map_size]; - u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; - u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; - u32 current_pixel; - u32 current_pixels; - u32 partial_tile_run = 0; - u32 partial_tile_offset; - u32 tile_run; - u32 i; - render_scanline_dest_normal *dest_ptr = ((render_scanline_dest_normal *)scanline) + start; - - u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); - u16 *map_ptr, *second_ptr; - u8 *tile_ptr; - - end -= start; - - if ((map_size & 0x02) && (vertical_offset >= 256)) { - map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); - } else { - map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); - } - - if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; - map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); - second_ptr = map_base; - } else { - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + ((map_width / 8) * 32); - } - } else { - horizontal_offset %= 256; - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base; - } - - if (bg_control & 0x80) { - /* color depth: 8bpp - * combine: transparent - * alpha : normal - */ - - /* Render a single scanline of text tiles */ - - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; - s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_8bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(transparent, 8bpp, normal); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(transparent, 8bpp, normal); - } - } - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(transparent, 8bpp, normal); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map_transparent_8bpp_normal(); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map_transparent_8bpp_normal(); - - partial_tile_run = end % 8; - if (partial_tile_run) { - partial_tile_left_map(transparent, 8bpp, normal); - } - } else { - /* color depth: 4bpp - * combine: transparent - * alpha : normal - */ - - /* Render a single scanline of text tiles */ - - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; - s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_4bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(transparent, 4bpp, normal); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(transparent, 4bpp, normal); - } - } - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(transparent, 4bpp, normal); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map_transparent_4bpp_normal(); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map_transparent_4bpp_normal(); - - partial_tile_run = end % 8; - if (partial_tile_run) { - partial_tile_left_map(transparent, 4bpp, normal); - } - } -} - -static void render_scanline_text_base_color16(u32 layer, u32 start, u32 end, void *scanline) -{ - render_scanline_extra_variables_base_color16(text); - u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); - u32 map_size = (bg_control >> 14) & 0x03; - u32 map_width = map_widths[map_size]; - u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; - u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; - u32 current_pixel; - u32 current_pixels; - u32 partial_tile_run = 0; - u32 partial_tile_offset; - u32 tile_run; - u32 i; - render_scanline_dest_color16 *dest_ptr = ((render_scanline_dest_color16 *)scanline) + start; - - u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); - u16 *map_ptr, *second_ptr; - u8 *tile_ptr; - - end -= start; - - if ((map_size & 0x02) && (vertical_offset >= 256)) { - map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); - } else { - map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); - } - - if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; - map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); - second_ptr = map_base; - } else { - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + ((map_width / 8) * 32); - } - } else { - horizontal_offset %= 256; - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base; - } - - if (bg_control & 0x80) { - /* color depth: 8bpp - * combine: base - * alpha :color16 - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; - s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_8bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(base, 8bpp, color16); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(base, 8bpp, color16); - } - } - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(base, 8bpp, color16); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map_base_8bpp_color16(); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map_base_8bpp_color16(); - - partial_tile_run = end % 8; - if (partial_tile_run) { - partial_tile_left_map(base, 8bpp, color16); - } - } else { - /* color depth: 4bpp - * combine: base - * alpha :color16 - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; - s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_4bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(base, 4bpp, color16); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(base, 4bpp, color16); - } - } - - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(base, 4bpp, color16); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map_base_4bpp_color16(); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map_base_4bpp_color16(); - - partial_tile_run = end % 8; - - if (partial_tile_run) { - partial_tile_left_map(base, 4bpp, color16); - } - } -} - -static void render_scanline_text_transparent_color16(u32 layer, u32 start, u32 end, void *scanline) -{ - render_scanline_extra_variables_transparent_color16(text); - u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); - u32 map_size = (bg_control >> 14) & 0x03; - u32 map_width = map_widths[map_size]; - u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; - u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; - u32 current_pixel; - u32 current_pixels; - u32 partial_tile_run = 0; - u32 partial_tile_offset; - u32 tile_run; - u32 i; - render_scanline_dest_color16 *dest_ptr = ((render_scanline_dest_color16 *)scanline) + start; - - u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); - u16 *map_ptr, *second_ptr; - u8 *tile_ptr; - - end -= start; - - if ((map_size & 0x02) && (vertical_offset >= 256)) { - map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); - } else { - map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); - } - - if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; - map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); - second_ptr = map_base; - } else { - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + ((map_width / 8) * 32); - } - } else { - horizontal_offset %= 256; - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base; - } - - if (bg_control & 0x80) { - /* color depth: 8bpp - * combine: transparent - * alpha :color16 - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; - s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_8bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(transparent, 8bpp, color16); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(transparent, 8bpp, color16); - } - } - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(transparent, 8bpp, color16); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map_transparent_8bpp_color16(); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map_transparent_8bpp_color16(); - - partial_tile_run = end % 8; - if (partial_tile_run) { - partial_tile_left_map(transparent, 8bpp, color16); - } - } else { - /* color depth: 4bpp - * combine: transparent - * alpha :color16 - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; - s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_4bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(transparent, 4bpp, color16); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(transparent, 4bpp, color16); - } - } - - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(transparent, 4bpp, color16); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map_transparent_4bpp_color16(); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map_transparent_4bpp_color16(); - - partial_tile_run = end % 8; - - if (partial_tile_run) { - partial_tile_left_map(transparent, 4bpp, color16); - } - } -} - -static void render_scanline_text_base_color32(u32 layer, u32 start, u32 end, void *scanline) -{ - render_scanline_extra_variables_base_color32(text); - u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); - u32 map_size = (bg_control >> 14) & 0x03; - u32 map_width = map_widths[map_size]; - u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; - u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; - u32 current_pixel; - u32 current_pixels; - u32 partial_tile_run = 0; - u32 partial_tile_offset; - u32 tile_run; - u32 i; - render_scanline_dest_color32 *dest_ptr = ((render_scanline_dest_color32 *)scanline) + start; - - u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); - u16 *map_ptr, *second_ptr; - u8 *tile_ptr; - - end -= start; - - if ((map_size & 0x02) && (vertical_offset >= 256)) { - map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); - } else { - map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); - } - - if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; - map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); - second_ptr = map_base; - } else { - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + ((map_width / 8) * 32); - } - } else { - horizontal_offset %= 256; - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base; - } - - if (bg_control & 0x80) { - /* color depth: 8bpp - * combine: base - * alpha :color32 - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; - s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_8bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(base, 8bpp, color32); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(base, 8bpp, color32); - } - } - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(base, 8bpp, color32); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map(base, 8bpp, color32); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map(base, 8bpp, color32); - - partial_tile_run = end % 8; - if (partial_tile_run) { - partial_tile_left_map(base, 8bpp, color32); - } - } else { - /* color depth: 4bpp - * combine: base - * alpha :color32 - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; - s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_4bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(base, 4bpp, color32); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(base, 4bpp, color32); - } - } - - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(base, 4bpp, color32); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map(base, 4bpp, color32); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map(base, 4bpp, color32); - - partial_tile_run = end % 8; - - if (partial_tile_run) { - partial_tile_left_map(base, 4bpp, color32); - } - } -} - -static void render_scanline_text_transparent_color32(u32 layer, u32 start, u32 end, void *scanline) -{ - render_scanline_extra_variables_transparent_color32(text); - u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); - u32 map_size = (bg_control >> 14) & 0x03; - u32 map_width = map_widths[map_size]; - u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; - u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; - u32 current_pixel; - u32 current_pixels; - u32 partial_tile_run = 0; - u32 partial_tile_offset; - u32 tile_run; - u32 i; - render_scanline_dest_color32 *dest_ptr = ((render_scanline_dest_color32 *)scanline) + start; - - u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); - u16 *map_ptr, *second_ptr; - u8 *tile_ptr; - - end -= start; - - if ((map_size & 0x02) && (vertical_offset >= 256)) { - map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); - } else { - map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); - } - - if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; - map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); - second_ptr = map_base; - } else { - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + ((map_width / 8) * 32); - } - } else { - horizontal_offset %= 256; - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base; - } - - if (bg_control & 0x80) { - /* color depth: 8bpp - * combine: transparent - * alpha :color32 - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; - s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_8bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(transparent, 8bpp, color32); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(transparent, 8bpp, color32); - } - } - - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(transparent, 8bpp, color32); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map(transparent, 8bpp, color32); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map(transparent, 8bpp, color32); - - partial_tile_run = end % 8; - - if (partial_tile_run) { - partial_tile_left_map(transparent, 8bpp, color32); - } - } else { - /* color depth: 4bpp - * combine: transparent - * alpha :color32 - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; - s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_4bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(transparent, 4bpp, color32); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(transparent, 4bpp, color32); - } - } - - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(transparent, 4bpp, color32); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map(transparent, 4bpp, color32); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map(transparent, 4bpp, color32); - - partial_tile_run = end % 8; - - if (partial_tile_run) { - partial_tile_left_map(transparent, 4bpp, color32); - } - } -} - -static void render_scanline_text_base_alpha(u32 layer, u32 start, u32 end, void *scanline) -{ - render_scanline_extra_variables_base_alpha(text); - u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); - u32 map_size = (bg_control >> 14) & 0x03; - u32 map_width = map_widths[map_size]; - u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; - u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; - u32 current_pixel; - u32 current_pixels; - u32 partial_tile_run = 0; - u32 partial_tile_offset; - u32 tile_run; - u32 i; - render_scanline_dest_alpha *dest_ptr = ((render_scanline_dest_alpha *)scanline) + start; - - u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); - u16 *map_ptr, *second_ptr; - u8 *tile_ptr; - - end -= start; - - if ((map_size & 0x02) && (vertical_offset >= 256)) { - map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); - } else { - map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); - } - - if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; - map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); - second_ptr = map_base; - } else { - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + ((map_width / 8) * 32); - } - } else { - horizontal_offset %= 256; - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base; - } - - if (bg_control & 0x80) { - /* color depth: 8bpp - * combine: base - * alpha : alpha - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; - s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_8bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(base, 8bpp, alpha); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(base, 8bpp, alpha); - } - } - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(base, 8bpp, alpha); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map(base, 8bpp, alpha); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map(base, 8bpp, alpha); - - partial_tile_run = end % 8; - if (partial_tile_run) { - partial_tile_left_map(base, 8bpp, alpha); - } - } else { - /* color depth: 4bpp - * combine: base - * alpha : alpha - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; - s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_4bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(base, 4bpp, alpha); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(base, 4bpp, alpha); - } - } - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(base, 4bpp, alpha); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map(base, 4bpp, alpha); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map(base, 4bpp, alpha); - - partial_tile_run = end % 8; - if (partial_tile_run) { - partial_tile_left_map(base, 4bpp, alpha); - } - } -} - -static void render_scanline_text_transparent_alpha(u32 layer, u32 start, u32 end, void *scanline) -{ - render_scanline_extra_variables_transparent_alpha(text); - u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); - u32 map_size = (bg_control >> 14) & 0x03; - u32 map_width = map_widths[map_size]; - u32 horizontal_offset = (read_ioreg(REG_ADDR_BG0HOFS + (layer * 2) * sizeof(u16)) + start) % 512; - u32 vertical_offset = (read_ioreg(REG_ADDR_VCOUNT) + read_ioreg(REG_ADDR_BG0VOFS + (layer * 2) * sizeof(u16))) % 512; - u32 current_pixel; - u32 current_pixels; - u32 partial_tile_run = 0; - u32 partial_tile_offset; - u32 tile_run; - u32 i; - render_scanline_dest_alpha *dest_ptr = ((render_scanline_dest_alpha *)scanline) + start; - - u16 *map_base = (u16 *)BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); - u16 *map_ptr, *second_ptr; - u8 *tile_ptr; - - end -= start; - - if ((map_size & 0x02) && (vertical_offset >= 256)) { - map_base += ((map_width / 8) * 32) + (((vertical_offset - 256) / 8) * 32); - } else { - map_base += (((vertical_offset % 256) / 8) * (map_width / 8)); - } - - if (map_size & 0x01) { - if (horizontal_offset >= 256) { - horizontal_offset -= 256; - map_ptr = map_base + ((map_width / 8) * 32) + (horizontal_offset / 8); - second_ptr = map_base; - } else { - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base + ((map_width / 8) * 32); - } - } else { - horizontal_offset %= 256; - map_ptr = map_base + (horizontal_offset / 8); - second_ptr = map_base; - } - - if (bg_control & 0x80) { - /* color depth: 8bpp - * combine: transparent - * alpha : alpha - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_8bpp; - s32 vertical_pixel_flip = ((tile_size_8bpp - tile_width_8bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_8bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(transparent, 8bpp, alpha); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(transparent, 8bpp, alpha); - } - } - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(transparent, 8bpp, alpha); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map(transparent, 8bpp, alpha); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map(transparent, 8bpp, alpha); - - partial_tile_run = end % 8; - if (partial_tile_run) { - partial_tile_left_map(transparent, 8bpp, alpha); - } - } else { - /* color depth: 4bpp - * combine: transparent - * alpha : alpha - */ - - /* Render a single scanline of text tiles */ - u32 vertical_pixel_offset = (vertical_offset % 8) * tile_width_4bpp; - s32 vertical_pixel_flip = ((tile_size_4bpp - tile_width_4bpp) - vertical_pixel_offset) - vertical_pixel_offset; - tile_extra_variables_4bpp(); - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03) + vertical_pixel_offset; - u32 pixel_run = map_width - (horizontal_offset % map_width); - u32 current_tile; - - map_base += ((vertical_offset % 256) / 8) * (map_width / 8); - partial_tile_offset = (horizontal_offset % 8); - - if (pixel_run >= end) { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - if (end < partial_tile_run) { - partial_tile_run = end; - partial_tile_mid_map(transparent, 4bpp, alpha); - return; - } else { - end -= partial_tile_run; - partial_tile_right_map(transparent, 4bpp, alpha); - } - } - } else { - if (partial_tile_offset) { - partial_tile_run = 8 - partial_tile_offset; - partial_tile_right_map(transparent, 4bpp, alpha); - } - - tile_run = (pixel_run - partial_tile_run) / 8; - multiple_tile_map(transparent, 4bpp, alpha); - map_ptr = second_ptr; - end -= pixel_run; - } - tile_run = end / 8; - multiple_tile_map(transparent, 4bpp, alpha); - - partial_tile_run = end % 8; - if (partial_tile_run) { - partial_tile_left_map(transparent, 4bpp, alpha); - } - } -} - -s32 affine_reference_x[2]; -s32 affine_reference_y[2]; - -static inline s32 signext28(u32 value) -{ - s32 ret = (s32)(value << 4); - return ret >> 4; -} - -void video_reload_counters() -{ - /* This happens every Vblank */ - affine_reference_x[0] = signext28(read_ioreg32(REG_ADDR_BG2X_L)); - affine_reference_y[0] = signext28(read_ioreg32(REG_ADDR_BG2Y_L)); - affine_reference_x[1] = signext28(read_ioreg32(REG_ADDR_BG3X_L)); - affine_reference_y[1] = signext28(read_ioreg32(REG_ADDR_BG3Y_L)); -} - -#define affine_render_bg_pixel_normal() current_pixel = PLTT[0] - -#define affine_render_bg_pixel_alpha() current_pixel = bg_combine - -#define affine_render_bg_pixel_color16() affine_render_bg_pixel_alpha() - -#define affine_render_bg_pixel_color32() affine_render_bg_pixel_alpha() - -#define affine_render_bg_pixel_base(alpha_op) affine_render_bg_pixel_##alpha_op() - -#define affine_render_bg_pixel_transparent(alpha_op) - -#define affine_render_bg_pixel_copy(alpha_op) - -#define affine_render_bg_base(alpha_op) dest_ptr[0] = current_pixel - -#define affine_render_bg_transparent(alpha_op) - -#define affine_render_bg_copy(alpha_op) - -#define affine_render_bg_remainder_base(alpha_op) \ - affine_render_bg_pixel_##alpha_op(); \ - for (; i < end; i++) { \ - affine_render_bg_base(alpha_op); \ - advance_dest_ptr_base(1); \ - } - -#define affine_render_bg_remainder_transparent(alpha_op) - -#define affine_render_bg_remainder_copy(alpha_op) - -#define affine_render_next(combine_op) \ - source_x += dx; \ - source_y += dy; \ - advance_dest_ptr_##combine_op(1) - -#define affine_render_scale_offset() \ - tile_base += ((pixel_y % 8) * 8); \ - map_base += (pixel_y / 8) << map_pitch - -#define affine_render_scale_pixel(combine_op, alpha_op) \ - map_offset = (pixel_x / 8); \ - if (map_offset != last_map_offset) { \ - tile_ptr = tile_base + (map_base[map_offset] * 64); \ - last_map_offset = map_offset; \ - } \ - tile_ptr = tile_base + (map_base[(pixel_x / 8)] * 64); \ - current_pixel = tile_ptr[(pixel_x % 8)]; \ - tile_8bpp_draw_##combine_op(0, none, 0, alpha_op); \ - affine_render_next(combine_op) - -#define affine_render_scale(combine_op, alpha_op) \ - { \ - pixel_y = source_y >> 8; \ - u32 i = 0; \ - affine_render_bg_pixel_##combine_op(alpha_op); \ - if ((u32)pixel_y < (u32)width_height) { \ - affine_render_scale_offset(); \ - for (; i < end; i++) { \ - pixel_x = source_x >> 8; \ - \ - if ((u32)pixel_x < (u32)width_height) { \ - break; \ - } \ - \ - affine_render_bg_##combine_op(alpha_op); \ - affine_render_next(combine_op); \ - } \ - \ - for (; i < end; i++) { \ - pixel_x = source_x >> 8; \ - \ - if ((u32)pixel_x >= (u32)width_height) \ - break; \ - \ - affine_render_scale_pixel(combine_op, alpha_op); \ - } \ - } \ - affine_render_bg_remainder_##combine_op(alpha_op); \ - } - -#define affine_render_scale_wrap(combine_op, alpha_op) \ - { \ - u32 wrap_mask = width_height - 1; \ - pixel_y = (source_y >> 8) & wrap_mask; \ - if ((u32)pixel_y < (u32)width_height) { \ - affine_render_scale_offset(); \ - for (i = 0; i < end; i++) { \ - pixel_x = (source_x >> 8) & wrap_mask; \ - affine_render_scale_pixel(combine_op, alpha_op); \ - } \ - } \ - } - -#define affine_render_rotate_pixel(combine_op, alpha_op) \ - map_offset = (pixel_x / 8) + ((pixel_y / 8) << map_pitch); \ - if (map_offset != last_map_offset) { \ - tile_ptr = tile_base + (map_base[map_offset] * 64); \ - last_map_offset = map_offset; \ - } \ - \ - current_pixel = tile_ptr[(pixel_x % 8) + ((pixel_y % 8) * 8)]; \ - tile_8bpp_draw_##combine_op(0, none, 0, alpha_op); \ - affine_render_next(combine_op) - -#define affine_render_rotate(combine_op, alpha_op) \ - { \ - affine_render_bg_pixel_##combine_op(alpha_op); \ - for (i = 0; i < end; i++) { \ - pixel_x = source_x >> 8; \ - pixel_y = source_y >> 8; \ - \ - if (((u32)pixel_x < (u32)width_height) && ((u32)pixel_y < (u32)width_height)) { \ - break; \ - } \ - affine_render_bg_##combine_op(alpha_op); \ - affine_render_next(combine_op); \ - } \ - \ - for (; i < end; i++) { \ - pixel_x = source_x >> 8; \ - pixel_y = source_y >> 8; \ - \ - if (((u32)pixel_x >= (u32)width_height) || ((u32)pixel_y >= (u32)width_height)) { \ - affine_render_bg_remainder_##combine_op(alpha_op); \ - break; \ - } \ - \ - affine_render_rotate_pixel(combine_op, alpha_op); \ - } \ - } - -#define affine_render_rotate_wrap(combine_op, alpha_op) \ - { \ - u32 wrap_mask = width_height - 1; \ - for (i = 0; i < end; i++) { \ - pixel_x = (source_x >> 8) & wrap_mask; \ - pixel_y = (source_y >> 8) & wrap_mask; \ - \ - affine_render_rotate_pixel(combine_op, alpha_op); \ - } \ - } - -// Build affine background renderers. - -#define render_scanline_affine_builder(combine_op, alpha_op) \ - void render_scanline_affine_##combine_op##_##alpha_op(u32 layer, u32 start, u32 end, void *scanline) \ - { \ - render_scanline_extra_variables_##combine_op##_##alpha_op(affine); \ - u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); \ - u32 current_pixel; \ - s32 source_x, source_y; \ - u32 pixel_x, pixel_y; \ - u32 layer_offset = (layer - 2) * 8; \ - s32 dx, dy; \ - u32 map_size = (bg_control >> 14) & 0x03; \ - u32 width_height = 1 << (7 + map_size); \ - u32 map_pitch = map_size + 4; \ - u8 *map_base = BG_SCREEN_ADDR((bg_control & BGCNT_SCREENBASE_MASK) >> 8); \ - u8 *tile_base = BG_CHAR_ADDR((bg_control >> 2) & 0x03); \ - u8 *tile_ptr = NULL; \ - u32 map_offset, last_map_offset = (u32)-1; \ - u32 i; \ - render_scanline_dest_##alpha_op *dest_ptr = ((render_scanline_dest_##alpha_op *)scanline) + start; \ - \ - dx = (s16)read_ioreg(REG_ADDR_BG2PA + (layer_offset * sizeof(u16))); \ - dy = (s16)read_ioreg(REG_ADDR_BG2PC + (layer_offset * sizeof(u16))); \ - source_x = affine_reference_x[layer - 2] + (start * dx); \ - source_y = affine_reference_y[layer - 2] + (start * dy); \ - \ - end -= start; \ - \ - switch (((bg_control >> 12) & 0x02) | (dy != 0)) { \ - case 0x00: \ - affine_render_scale(combine_op, alpha_op); \ - break; \ - \ - case 0x01: \ - affine_render_rotate(combine_op, alpha_op); \ - break; \ - \ - case 0x02: \ - affine_render_scale_wrap(combine_op, alpha_op); \ - break; \ - \ - case 0x03: \ - affine_render_rotate_wrap(combine_op, alpha_op); \ - break; \ - } \ - } - -render_scanline_affine_builder(base, normal); -render_scanline_affine_builder(transparent, normal); -render_scanline_affine_builder(base, color16); -render_scanline_affine_builder(transparent, color16); -render_scanline_affine_builder(base, color32); -render_scanline_affine_builder(transparent, color32); -render_scanline_affine_builder(base, alpha); -render_scanline_affine_builder(transparent, alpha); - -#define bitmap_render_pixel_mode3(alpha_op) \ - current_pixel = convert_palette(current_pixel); \ - *dest_ptr = current_pixel - -#define bitmap_render_pixel_mode4(alpha_op) tile_expand_base_##alpha_op##_mode4(0) - -#define bitmap_render_pixel_mode5(alpha_op) bitmap_render_pixel_mode3(alpha_op) - -#define bitmap_render_scale(type, alpha_op, width, height) \ - pixel_y = (source_y >> 8); \ - if ((u32)pixel_y < (u32)height) { \ - pixel_x = (source_x >> 8); \ - src_ptr += (pixel_y * width); \ - if (dx == 0x100) { \ - if (pixel_x < 0) { \ - end += pixel_x; \ - dest_ptr -= pixel_x; \ - pixel_x = 0; \ - } else if (pixel_x > 0) \ - src_ptr += pixel_x; \ - \ - if ((pixel_x + end) >= width) \ - end = (width - pixel_x); \ - \ - for (i = 0; (s32)i < (s32)end; i++) { \ - current_pixel = srcread_##type(*src_ptr); \ - bitmap_render_pixel_##type(alpha_op); \ - src_ptr++; \ - dest_ptr++; \ - } \ - } else { \ - if ((u32)(source_y >> 8) < (u32)height) { \ - for (i = 0; i < end; i++) { \ - pixel_x = (source_x >> 8); \ - \ - if ((u32)pixel_x < (u32)width) \ - break; \ - \ - source_x += dx; \ - dest_ptr++; \ - } \ - \ - for (; i < end; i++) { \ - pixel_x = (source_x >> 8); \ - \ - if ((u32)pixel_x >= (u32)width) \ - break; \ - \ - current_pixel = srcread_##type(src_ptr[pixel_x]); \ - bitmap_render_pixel_##type(alpha_op); \ - \ - source_x += dx; \ - dest_ptr++; \ - } \ - } \ - } \ - } - -#define bitmap_render_rotate(type, alpha_op, width, height) \ - for (i = 0; i < end; i++) { \ - pixel_x = source_x >> 8; \ - pixel_y = source_y >> 8; \ - \ - if (((u32)pixel_x < (u32)width) && ((u32)pixel_y < (u32)height)) \ - break; \ - \ - source_x += dx; \ - source_y += dy; \ - dest_ptr++; \ - } \ - \ - for (; i < end; i++) { \ - pixel_x = (source_x >> 8); \ - pixel_y = (source_y >> 8); \ - \ - if (((u32)pixel_x >= (u32)width) || ((u32)pixel_y >= (u32)height)) \ - break; \ - \ - current_pixel = srcread_##type(src_ptr[pixel_x + (pixel_y * width)]); \ - bitmap_render_pixel_##type(alpha_op); \ - \ - source_x += dx; \ - source_y += dy; \ - dest_ptr++; \ - } - -#define render_scanline_vram_setup_mode3() u16 *src_ptr = (u16 *)VRAM - -#define render_scanline_vram_setup_mode5() \ - u16 *src_ptr = (u16 *)VRAM; \ - if (read_ioreg(REG_ADDR_DISPCNT) & 0x10) \ - src_ptr = (u16 *)(VRAM + 0xA000); - -#define render_scanline_vram_setup_mode4() \ - u16 *palette = PLTT; \ - u8 *src_ptr = VRAM; \ - if (read_ioreg(REG_ADDR_DISPCNT) & 0x10) \ - src_ptr = VRAM + 0xA000; - -#define srcread_mode3(v) eswap16(v) -#define srcread_mode5(v) eswap16(v) -#define srcread_mode4(v) (v) - -// Build bitmap scanline rendering functions. - -#define render_scanline_bitmap_builder(type, alpha_op, width, height) \ - static void render_scanline_bitmap_##type##_##alpha_op(u32 start, u32 end, void *scanline) \ - { \ - u32 current_pixel; \ - s32 source_x, source_y; \ - s32 pixel_x, pixel_y; \ - \ - s32 dx = (s16)read_ioreg(REG_ADDR_BG2PA); \ - s32 dy = (s16)read_ioreg(REG_ADDR_BG2PC); \ - \ - u32 i; \ - \ - render_scanline_dest_##alpha_op *dest_ptr = ((render_scanline_dest_##alpha_op *)scanline) + start; \ - render_scanline_vram_setup_##type(); \ - \ - end -= start; \ - \ - source_x = affine_reference_x[0] + (start * dx); \ - source_y = affine_reference_y[0] + (start * dy); \ - \ - if (dy == 0) { \ - bitmap_render_scale(type, alpha_op, width, height); \ - } else { \ - bitmap_render_rotate(type, alpha_op, width, height); \ - } \ - } - -render_scanline_bitmap_builder(mode3, normal, DISPLAY_WIDTH, DISPLAY_HEIGHT); -render_scanline_bitmap_builder(mode4, normal, DISPLAY_WIDTH, DISPLAY_WIDTH); -render_scanline_bitmap_builder(mode5, normal, 160, 128); - -// Fill in the renderers for a layer based on the mode type, - -#define tile_layer_render_functions(type) \ - { \ - render_scanline_##type##_base_normal, render_scanline_##type##_transparent_normal, render_scanline_##type##_base_alpha, \ - render_scanline_##type##_transparent_alpha, render_scanline_##type##_base_color16, \ - render_scanline_##type##_transparent_color16, render_scanline_##type##_base_color32, \ - render_scanline_##type##_transparent_color32 \ - } - -// Use if a layer is unsupported for that mode. - -#define tile_layer_render_null() \ - { \ - NULL, NULL, NULL, NULL \ - } - -#define bitmap_layer_render_functions(type) \ - { \ - render_scanline_bitmap_##type##_normal \ - } - -// Structs containing functions to render the layers for each mode, for -// each render type. -static const tile_layer_render_struct tile_mode_renderers[3][4] - = { { tile_layer_render_functions(text), tile_layer_render_functions(text), tile_layer_render_functions(text), - tile_layer_render_functions(text) }, - { tile_layer_render_functions(text), tile_layer_render_functions(text), tile_layer_render_functions(affine), - tile_layer_render_functions(text) }, - { tile_layer_render_functions(text), tile_layer_render_functions(text), tile_layer_render_functions(affine), - tile_layer_render_functions(affine) } }; - -static const bitmap_layer_render_struct bitmap_mode_renderers[3] - = { bitmap_layer_render_functions(mode3), bitmap_layer_render_functions(mode4), bitmap_layer_render_functions(mode5) }; - -#define render_scanline_layer_functions_tile() const tile_layer_render_struct *layer_renderers = tile_mode_renderers[dispcnt & 0x07] - -#define render_scanline_layer_functions_bitmap() \ - const bitmap_layer_render_struct *layer_renderers = bitmap_mode_renderers + ((dispcnt & 0x07) - 3) - -// Adjust a flipped obj's starting position - -#define obj_tile_offset_noflip(color_depth) - -#define obj_tile_offset_flip(color_depth) +(tile_size_##color_depth * ((obj_width - 8) / 8)) - -// Adjust the obj's starting point if it goes too far off the left edge of -// the screen. - -#define obj_tile_right_offset_noflip(color_depth) tile_ptr += (partial_tile_offset / 8) * tile_size_##color_depth - -#define obj_tile_right_offset_flip(color_depth) tile_ptr -= (partial_tile_offset / 8) * tile_size_##color_depth - -// Get the current row offset into an obj in 1D map space - -#define obj_tile_offset_1D(color_depth, flip_op) \ - tile_ptr = tile_base + (oam_data->split.tileNum * 32) + ((vertical_offset / 8) * (obj_width / 8) * tile_size_##color_depth) \ - + ((vertical_offset % 8) * tile_width_##color_depth) obj_tile_offset_##flip_op(color_depth) - -// Get the current row offset into an obj in 2D map space - -#define obj_tile_offset_2D(color_depth, flip_op) \ - tile_ptr = tile_base + (oam_data->split.tileNum * 32) + ((vertical_offset / 8) * 1024) \ - + ((vertical_offset % 8) * tile_width_##color_depth) obj_tile_offset_##flip_op(color_depth) - -// Get the palette for 4bpp obj. - -#define obj_get_palette_4bpp() current_palette = oam_data->split.paletteNum << 4 - -#define obj_get_palette_8bpp() - -// Render the current row of an obj. - -#define obj_render(combine_op, color_depth, alpha_op, map_space, flip_op) \ - { \ - obj_get_palette_##color_depth(); \ - obj_tile_offset_##map_space(color_depth, flip_op); \ - \ - if (obj_x < (s32)start) { \ - dest_ptr = scanline + start; \ - pixel_run = obj_width - (start - obj_x); \ - if ((s32)pixel_run > 0) { \ - if ((obj_x + obj_width) >= end) { \ - pixel_run = end - start; \ - partial_tile_offset = start - obj_x; \ - obj_tile_right_offset_##flip_op(color_depth); \ - partial_tile_offset %= 8; \ - \ - if (partial_tile_offset) { \ - partial_tile_run = 8 - partial_tile_offset; \ - if ((s32)pixel_run < (s32)partial_tile_run) { \ - if ((s32)pixel_run > 0) { \ - partial_tile_run = pixel_run; \ - partial_tile_mid_obj(combine_op, color_depth, alpha_op, flip_op); \ - } \ - continue; \ - } else { \ - pixel_run -= partial_tile_run; \ - partial_tile_right_obj(combine_op, color_depth, alpha_op, flip_op); \ - } \ - } \ - tile_run = pixel_run / 8; \ - multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op); \ - partial_tile_run = pixel_run % 8; \ - if (partial_tile_run) { \ - partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op); \ - } \ - } else { \ - partial_tile_offset = start - obj_x; \ - obj_tile_right_offset_##flip_op(color_depth); \ - partial_tile_offset %= 8; \ - if (partial_tile_offset) { \ - partial_tile_run = 8 - partial_tile_offset; \ - partial_tile_right_obj(combine_op, color_depth, alpha_op, flip_op); \ - } \ - tile_run = pixel_run / 8; \ - multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op); \ - } \ - } \ - } else \ - \ - if ((obj_x + obj_width) >= end) { \ - pixel_run = end - obj_x; \ - if ((s32)pixel_run > 0) { \ - dest_ptr = scanline + obj_x; \ - tile_run = pixel_run / 8; \ - multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op); \ - partial_tile_run = pixel_run % 8; \ - if (partial_tile_run) { \ - partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op); \ - } \ - } \ - } else { \ - dest_ptr = scanline + obj_x; \ - tile_run = obj_width / 8; \ - multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op); \ - } \ - } - -#define obj_scale_offset_1D(color_depth) \ - tile_ptr = tile_base + (oam_data->split.tileNum * 32) + ((vertical_offset / 8) * (max_x / 8) * tile_size_##color_depth) \ - + ((vertical_offset % 8) * tile_width_##color_depth) - -// Get the current row offset into an obj in 2D map space - -#define obj_scale_offset_2D(color_depth) \ - tile_ptr \ - = tile_base + (oam_data->split.tileNum * 32) + ((vertical_offset / 8) * 1024) + ((vertical_offset % 8) * tile_width_##color_depth) - -#define obj_render_scale_pixel_4bpp(combine_op, alpha_op) \ - current_pixel = tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)]; \ - if (tile_x & 0x01) \ - current_pixel >>= 4; \ - else \ - current_pixel &= 0x0F; \ - \ - tile_4bpp_draw_##combine_op(0, none, 0, alpha_op) - -#define obj_render_scale_pixel_8bpp(combine_op, alpha_op) \ - current_pixel = tile_ptr[tile_map_offset + (tile_x & 0x07)]; \ - tile_8bpp_draw_##combine_op(0, none, 0, alpha_op); - -#define obj_render_scale(combine_op, color_depth, alpha_op, map_space) \ - { \ - u32 vertical_offset; \ - source_y += (y_delta * dmy); \ - vertical_offset = (source_y >> 8); \ - if ((u32)vertical_offset < (u32)max_y) { \ - obj_scale_offset_##map_space(color_depth); \ - source_x += (y_delta * dmx) - (middle_x * dx); \ - \ - for (i = 0; i < obj_width; i++) { \ - tile_x = (source_x >> 8); \ - \ - if ((u32)tile_x < (u32)max_x) \ - break; \ - \ - source_x += dx; \ - advance_dest_ptr_##combine_op(1); \ - } \ - \ - for (; i < obj_width; i++) { \ - tile_x = (source_x >> 8); \ - \ - if ((u32)tile_x >= (u32)max_x) \ - break; \ - \ - tile_map_offset = (tile_x >> 3) * tile_size_##color_depth; \ - obj_render_scale_pixel_##color_depth(combine_op, alpha_op); \ - \ - source_x += dx; \ - advance_dest_ptr_##combine_op(1); \ - } \ - } \ - } - -#define obj_rotate_offset_1D(color_depth) obj_tile_pitch = (max_x / 8) * tile_size_##color_depth - -#define obj_rotate_offset_2D(color_depth) obj_tile_pitch = 1024 - -#define obj_render_rotate_pixel_4bpp(combine_op, alpha_op) \ - current_pixel = tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)]; \ - if (tile_x & 0x01) \ - current_pixel >>= 4; \ - else \ - current_pixel &= 0x0F; \ - \ - tile_4bpp_draw_##combine_op(0, none, 0, alpha_op) - -#define obj_render_rotate_pixel_8bpp(combine_op, alpha_op) \ - current_pixel = tile_ptr[tile_map_offset + (tile_x & 0x07) + ((tile_y & 0x07) * obj_pitch)]; \ - \ - tile_8bpp_draw_##combine_op(0, none, 0, alpha_op) - -#define obj_render_rotate(combine_op, color_depth, alpha_op, map_space) \ - { \ - tile_ptr = tile_base + (oam_data->split.tileNum * 32); \ - obj_rotate_offset_##map_space(color_depth); \ - \ - source_x += (y_delta * dmx) - (middle_x * dx); \ - source_y += (y_delta * dmy) - (middle_x * dy); \ - \ - for (i = 0; i < obj_width; i++) { \ - tile_x = (source_x >> 8); \ - tile_y = (source_y >> 8); \ - \ - if (((u32)tile_x < (u32)max_x) && ((u32)tile_y < (u32)max_y)) \ - break; \ - \ - source_x += dx; \ - source_y += dy; \ - advance_dest_ptr_##combine_op(1); \ - } \ - \ - for (; i < obj_width; i++) { \ - tile_x = (source_x >> 8); \ - tile_y = (source_y >> 8); \ - \ - if (((u32)tile_x >= (u32)max_x) || ((u32)tile_y >= (u32)max_y)) \ - break; \ - \ - tile_map_offset = ((tile_x >> 3) * tile_size_##color_depth) + ((tile_y >> 3) * obj_tile_pitch); \ - obj_render_rotate_pixel_##color_depth(combine_op, alpha_op); \ - \ - source_x += dx; \ - source_y += dy; \ - advance_dest_ptr_##combine_op(1); \ - } \ - } - -// Render the current row of an affine transformed OBJ. - -#define obj_render_affine(combine_op, color_depth, alpha_op, map_space) \ - { \ - u8 matrix_num = oam_data->split.matrixNum * 4; \ - OamData *oam1 = &((OamData *)OAM)[matrix_num]; \ - OamData *oam2 = &((OamData *)OAM)[matrix_num + 1]; \ - OamData *oam3 = &((OamData *)OAM)[matrix_num + 2]; \ - OamData *oam4 = &((OamData *)OAM)[matrix_num + 3]; \ - s32 dx = (s16)oam1->all.affineParam; \ - s32 dmx = (s16)oam2->all.affineParam; \ - s32 dy = (s16)oam3->all.affineParam; \ - s32 dmy = (s16)oam4->all.affineParam; \ - s32 source_x, source_y; \ - s32 tile_x, tile_y; \ - u32 tile_map_offset; \ - s32 middle_x; \ - s32 middle_y; \ - s32 max_x = obj_width; \ - s32 max_y = obj_height; \ - s32 y_delta; \ - u32 obj_pitch = tile_width_##color_depth; \ - u32 obj_tile_pitch; \ - \ - middle_x = (obj_width / 2); \ - middle_y = (obj_height / 2); \ - \ - source_x = (middle_x << 8); \ - source_y = (middle_y << 8); \ - \ - if ((oam_data->split.affineMode >> 1) & 1) { \ - obj_width *= 2; \ - obj_height *= 2; \ - middle_x *= 2; \ - middle_y *= 2; \ - } \ - \ - if ((s32)obj_x < (s32)start) { \ - u32 x_delta = start - obj_x; \ - middle_x -= x_delta; \ - obj_width -= x_delta; \ - obj_x = start; \ - \ - if ((s32)obj_width <= 0) \ - continue; \ - } \ - \ - if ((s32)(obj_x + obj_width) >= (s32)end) { \ - obj_width = end - obj_x; \ - \ - if ((s32)obj_width <= 0) \ - continue; \ - } \ - dest_ptr = scanline + obj_x; \ - \ - y_delta = vcount - (obj_y + middle_y); \ - \ - obj_get_palette_##color_depth(); \ - \ - if (dy == 0) { \ - obj_render_scale(combine_op, color_depth, alpha_op, map_space); \ - } else { \ - obj_render_rotate(combine_op, color_depth, alpha_op, map_space); \ - } \ - } - -static const u32 obj_width_table[] = { 8, 16, 32, 64, 16, 32, 32, 64, 8, 8, 16, 32 }; -static const u32 obj_height_table[] = { 8, 16, 32, 64, 8, 8, 16, 32, 16, 32, 32, 64 }; - -static const u8 obj_dim_table[3][4][2] = { { { 8, 8 }, { 16, 16 }, { 32, 32 }, { 64, 64 } }, - { { 16, 8 }, { 32, 8 }, { 32, 16 }, { 64, 32 } }, - { { 8, 16 }, { 8, 32 }, { 16, 32 }, { 32, 64 } } }; - -static u8 obj_priority_list[5][DISPLAY_HEIGHT][128]; -static u8 obj_priority_count[5][DISPLAY_HEIGHT]; -static u8 obj_alpha_count[DISPLAY_HEIGHT]; - -// Build obj rendering functions - -#define render_scanline_obj_extra_variables_normal(bg_type) u16 *palette = PLTT + 256 - -#define render_scanline_obj_extra_variables_color() u32 pixel_combine = color_combine_mask(4) | (1 << 8) - -#define render_scanline_obj_extra_variables_alpha_obj(map_space) \ - render_scanline_obj_extra_variables_color(); \ - u32 dest; \ - if ((pixel_combine & 0x00000200) == 0) { \ - render_scanline_obj_color32_##map_space(priority, start, end, scanline); \ - return; \ - } - -#define render_scanline_obj_extra_variables_color16(map_space) render_scanline_obj_extra_variables_color() - -#define render_scanline_obj_extra_variables_color32(map_space) render_scanline_obj_extra_variables_color() - -#define render_scanline_obj_extra_variables_partial_alpha(map_space) \ - render_scanline_obj_extra_variables_color(); \ - u32 base_pixel_combine = pixel_combine; \ - u32 dest - -#define render_scanline_obj_extra_variables_copy(type) \ - u32 bldcnt = read_ioreg(REG_ADDR_BLDCNT); \ - u32 dispcnt = read_ioreg(REG_ADDR_DISPCNT); \ - u32 obj_enable = WIN_GET_LOWER(read_ioreg(REG_ADDR_WINOUT)); \ - render_scanline_layer_functions_##type(); \ - u32 copy_start, copy_end; \ - u16 copy_buffer[DISPLAY_WIDTH]; \ - u16 *copy_ptr - -#define render_scanline_obj_extra_variables_copy_tile(map_space) render_scanline_obj_extra_variables_copy(tile) - -#define render_scanline_obj_extra_variables_copy_bitmap(map_space) render_scanline_obj_extra_variables_copy(bitmap) - -#define render_scanline_obj_main(combine_op, alpha_op, map_space) \ - if (oam_data->split.affineMode & 1) { \ - if (oam_data->split.bpp & 1) { \ - obj_render_affine(combine_op, 8bpp, alpha_op, map_space); \ - } else { \ - obj_render_affine(combine_op, 4bpp, alpha_op, map_space); \ - } \ - } else { \ - vertical_offset = vcount - obj_y; \ - \ - if ((oam_data->split.matrixNum >> 4) & 1) \ - vertical_offset = obj_height - vertical_offset - 1; \ - \ - switch ((oam_data->split.bpp << 1) | ((oam_data->split.matrixNum >> 3) & 1)) { \ - case 0x0: \ - obj_render(combine_op, 4bpp, alpha_op, map_space, noflip); \ - break; \ - \ - case 0x1: \ - obj_render(combine_op, 4bpp, alpha_op, map_space, flip); \ - break; \ - \ - case 0x2: \ - obj_render(combine_op, 8bpp, alpha_op, map_space, noflip); \ - break; \ - \ - case 0x3: \ - obj_render(combine_op, 8bpp, alpha_op, map_space, flip); \ - break; \ - } \ - } - -#define render_scanline_obj_no_partial_alpha(combine_op, alpha_op, map_space) render_scanline_obj_main(combine_op, alpha_op, map_space) - -#define render_scanline_obj_partial_alpha(combine_op, alpha_op, map_space) \ - if (oam_data->split.objMode) { \ - pixel_combine = 0x00000300; \ - render_scanline_obj_main(combine_op, alpha_obj, map_space); \ - } else { \ - pixel_combine = base_pixel_combine; \ - render_scanline_obj_main(combine_op, color32, map_space); \ - } - -#define render_scanline_obj_prologue_transparent(alpha_op) - -#define render_scanline_obj_prologue_copy_body(type) \ - copy_start = obj_x; \ - copy_end = obj_x + obj_width; \ - if (oam_data->split.affineMode & 2) \ - copy_end += obj_width; \ - \ - if (copy_start < start) \ - copy_start = start; \ - if (copy_end > end) \ - copy_end = end; \ - \ - if ((copy_start < end) && (copy_end > start)) { \ - render_scanline_conditional_##type(copy_start, copy_end, copy_buffer, obj_enable, dispcnt, bldcnt, layer_renderers); \ - copy_ptr = copy_buffer + copy_start; \ - } else { \ - continue; \ - } - -#define render_scanline_obj_prologue_copy_tile() render_scanline_obj_prologue_copy_body(tile) - -#define render_scanline_obj_prologue_copy_bitmap() render_scanline_obj_prologue_copy_body(bitmap) - -#define render_scanline_obj_prologue_copy(alpha_op) render_scanline_obj_prologue_##alpha_op() - -#define render_scanline_obj_builder(combine_op, alpha_op, map_space, partial_alpha_op) \ - static void render_scanline_obj_##alpha_op##_##map_space(u32 priority, u32 start, u32 end, render_scanline_dest_##alpha_op *scanline) \ - { \ - render_scanline_obj_extra_variables_##alpha_op(map_space); \ - u32 obj_num, i; \ - s32 obj_x, obj_y; \ - u32 obj_size; \ - u32 obj_width, obj_height; \ - s32 vcount = read_ioreg(REG_ADDR_VCOUNT); \ - u32 tile_run; \ - u32 current_pixels; \ - u32 current_pixel; \ - u32 current_palette; \ - u32 vertical_offset; \ - u32 partial_tile_run, partial_tile_offset; \ - u32 pixel_run; \ - OamData *oam_data; \ - render_scanline_dest_##alpha_op *dest_ptr; \ - u8 *tile_base = VRAM + 0x10000; \ - u8 *tile_ptr; \ - u32 obj_count = obj_priority_count[priority][vcount]; \ - u8 *obj_list = obj_priority_list[priority][vcount]; \ - \ - for (obj_num = 0; obj_num < obj_count; obj_num++) { \ - oam_data = (OamData *)&OAM[obj_list[obj_num] * OAM_DATA_SIZE_AFFINE]; \ - obj_size = (oam_data->split.shape << 2) | oam_data->split.size; \ - \ - obj_x = oam_data->split.x; \ - obj_width = obj_width_table[obj_size]; \ - \ - render_scanline_obj_prologue_##combine_op(alpha_op); \ - \ - obj_y = oam_data->split.y; \ - \ - if (!EXTENDED_OAM) { \ - if (obj_x > DISPLAY_WIDTH) \ - obj_x -= 512; \ - if (obj_y > DISPLAY_HEIGHT) \ - obj_y -= 256; \ - } \ - \ - obj_height = obj_height_table[obj_size]; \ - render_scanline_obj_##partial_alpha_op(combine_op, alpha_op, map_space); \ - } \ - } - -render_scanline_obj_builder(transparent, normal, 1D, no_partial_alpha); -render_scanline_obj_builder(transparent, normal, 2D, no_partial_alpha); -render_scanline_obj_builder(transparent, color16, 1D, no_partial_alpha); -render_scanline_obj_builder(transparent, color16, 2D, no_partial_alpha); -render_scanline_obj_builder(transparent, color32, 1D, no_partial_alpha); -render_scanline_obj_builder(transparent, color32, 2D, no_partial_alpha); -render_scanline_obj_builder(transparent, alpha_obj, 1D, no_partial_alpha); -render_scanline_obj_builder(transparent, alpha_obj, 2D, no_partial_alpha); -render_scanline_obj_builder(transparent, partial_alpha, 1D, partial_alpha); -render_scanline_obj_builder(transparent, partial_alpha, 2D, partial_alpha); -render_scanline_obj_builder(copy, copy_tile, 1D, no_partial_alpha); -render_scanline_obj_builder(copy, copy_tile, 2D, no_partial_alpha); -render_scanline_obj_builder(copy, copy_bitmap, 1D, no_partial_alpha); -render_scanline_obj_builder(copy, copy_bitmap, 2D, no_partial_alpha); - -#define OBJ_MOD_NORMAL 0 -#define OBJ_MOD_SEMITRAN 1 -#define OBJ_MOD_WINDOW 2 -#define OBJ_MOD_INVALID 3 - -// Goes through the object list in the OAM (from #127 to #0) and adds objects -// into a sorted list by priority for the current row. -// Invisible objects are discarded. -static void order_obj(u32 video_mode) -{ - s32 obj_num; - u32 row; - - memset(obj_priority_count, 0, sizeof(obj_priority_count)); - memset(obj_alpha_count, 0, sizeof(obj_alpha_count)); - - for (obj_num = 127; obj_num >= 0; obj_num--) { - OamData *oam_data = (OamData *)&OAM[obj_num * OAM_DATA_SIZE_AFFINE]; - // Bit 9 disables regular sprites. Used as double bit for affine ones. - bool visible = oam_data->split.affineMode != 2; - if (visible) { - u16 obj_shape = oam_data->split.shape; - u32 obj_mode = oam_data->split.objMode; - - // Prohibited shape and mode - bool invalid = (obj_shape == 0x3) || (obj_mode == OBJ_MOD_INVALID); - if (!invalid) { - u32 obj_priority = oam_data->split.priority; - - if (((video_mode < 3) || (oam_data->split.tileNum >= 512))) { - // Calculate object size (from size and shape attr bits) - u16 obj_size = oam_data->split.size; - s32 obj_height = obj_dim_table[obj_shape][obj_size][1]; - s32 obj_width = obj_dim_table[obj_shape][obj_size][0]; - s32 obj_y = oam_data->split.y; - -#if !EXTENDED_OAM - if (obj_y > DISPLAY_HEIGHT) - obj_y -= 512; -#endif - - // Double size for affine sprites with double bit set - if ((oam_data->split.affineMode >> 1) & 1) { - obj_height *= 2; - obj_width *= 2; - } - - if (((obj_y + obj_height) > 0) && (obj_y < DISPLAY_HEIGHT)) { - s32 obj_x = oam_data->split.x; -#if !EXTENDED_OAM - if (obj_x > DISPLAY_WIDTH) - obj_x -= 512; -#endif - - if (((obj_x + obj_width) > 0) && (obj_x < DISPLAY_WIDTH)) { - // Clip Y coord and height to the 0..159 interval - u32 starty = MAX(obj_y, 0); - u32 endy = MIN(obj_y + obj_height, DISPLAY_HEIGHT); - - switch (obj_mode) { - case OBJ_MOD_SEMITRAN: - for (row = starty; row < endy; row++) { - u32 cur_cnt = obj_priority_count[obj_priority][row]; - obj_priority_list[obj_priority][row][cur_cnt] = obj_num; - obj_priority_count[obj_priority][row] = cur_cnt + 1; - // Mark the row as having semi-transparent objects - obj_alpha_count[row] = 1; - } - break; - case OBJ_MOD_WINDOW: - obj_priority = 4; - /* fallthrough */ - case OBJ_MOD_NORMAL: - // Add the object to the list. - for (row = starty; row < endy; row++) { - u32 cur_cnt = obj_priority_count[obj_priority][row]; - obj_priority_list[obj_priority][row][cur_cnt] = obj_num; - obj_priority_count[obj_priority][row] = cur_cnt + 1; - } - break; - }; - } - } - } - } - } - } -} - -u32 layer_order[16]; -u32 layer_count; - -// Sorts active BG/OBJ layers and generates an ordered list of layers. -// Things are drawn back to front, so lowest priority goes first. -static void order_layers(u32 layer_flags, u32 vcnt) -{ - bool obj_enabled = (layer_flags & 0x10); - s32 priority; - - layer_count = 0; - - for (priority = 3; priority >= 0; priority--) { - bool anyobj = obj_priority_count[priority][vcnt] > 0; - s32 lnum; - - for (lnum = 3; lnum >= 0; lnum--) { - if (((layer_flags >> lnum) & 1) && ((read_ioreg(REG_ADDR_BGxCNT(lnum)) & 0x03) == priority)) { - layer_order[layer_count++] = lnum; - } - } - - if (obj_enabled && anyobj) - layer_order[layer_count++] = priority | 0x04; - } -} - -#define fill_line(_start, _end) \ - u32 i; \ - \ - for (i = _start; i < _end; i++) \ - dest_ptr[i] = color; - -#define fill_line_color_normal() color = PLTT[color] - -#define fill_line_color_alpha() - -#define fill_line_color_color16() - -#define fill_line_color_color32() - -#define fill_line_builder(type) \ - static void fill_line_##type(u16 color, render_scanline_dest_##type *dest_ptr, u32 start, u32 end) \ - { \ - fill_line_color_##type(); \ - fill_line(start, end); \ - } - -fill_line_builder(normal); -fill_line_builder(alpha); -fill_line_builder(color16); -fill_line_builder(color32); - -// Blending is performed by separating an RGB value into 0G0R0B (32 bit) -// Since blending factors are at most 16, mult/add operations do not overflow -// to the neighbouring color and can be performed much faster than separatedly - -// Here follow the mask value to separate/expand the color to 32 bit, -// the mask to detect overflows in the blend operation and - -#define BLND_MSK (SATR_MSK | SATG_MSK | SATB_MSK) - -#define OVFG_MSK 0x04000000 -#define OVFR_MSK 0x00008000 -#define OVFB_MSK 0x00000020 -#define SATG_MSK 0x03E00000 -#define SATR_MSK 0x00007C00 -#define SATB_MSK 0x0000001F - -// Alpha blend two pixels (pixel_top and pixel_bottom). - -#define blend_pixel() \ - pixel_bottom = PLTT[(pixel_pair >> 16) & 0x1FF]; \ - pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & BLND_MSK; \ - pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4 - -// Alpha blend two pixels, allowing for saturation (individual channels > 31). -// The operation is optimized towards saturation not occuring. - -#define blend_saturate_pixel() \ - pixel_bottom = PLTT[(pixel_pair >> 16) & 0x1FF]; \ - pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & BLND_MSK; \ - pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4; \ - if (pixel_top & (OVFR_MSK | OVFG_MSK | OVFB_MSK)) { \ - if (pixel_top & OVFG_MSK) \ - pixel_top |= SATG_MSK; \ - \ - if (pixel_top & OVFR_MSK) \ - pixel_top |= SATR_MSK; \ - \ - if (pixel_top & OVFB_MSK) \ - pixel_top |= SATB_MSK; \ - } - -#define brighten_pixel() pixel_top = upper + ((pixel_top * blend) >> 4); - -#define darken_pixel() pixel_top = (pixel_top * blend) >> 4; - -#define effect_condition_alpha ((pixel_pair & 0x04000200) == 0x04000200) - -#define effect_condition_fade(pixel_source) ((pixel_source & 0x00000200) == 0x00000200) - -#define expand_pixel_no_dest(expand_type, pixel_source) \ - pixel_top = (pixel_top | (pixel_top << 16)) & BLND_MSK; \ - expand_type##_pixel(); \ - pixel_top &= BLND_MSK; \ - pixel_top = (pixel_top >> 16) | pixel_top - -#define expand_pixel(expand_type, pixel_source) \ - pixel_top = PLTT[pixel_source & 0x1FF]; \ - expand_pixel_no_dest(expand_type, pixel_source); \ - *screen_dest_ptr = pixel_top - -#define expand_loop(expand_type, effect_condition, pixel_source) \ - screen_src_ptr += start; \ - screen_dest_ptr += start; \ - \ - end -= start; \ - \ - for (i = 0; i < end; i++) { \ - pixel_source = *screen_src_ptr; \ - if (effect_condition) { \ - expand_pixel(expand_type, pixel_source); \ - } else { \ - *screen_dest_ptr = PLTT[pixel_source & 0x1FF]; \ - } \ - \ - screen_src_ptr++; \ - screen_dest_ptr++; \ - } - -#define expand_loop_partial_alpha(alpha_expand, expand_type) \ - screen_src_ptr += start; \ - screen_dest_ptr += start; \ - \ - end -= start; \ - \ - for (i = 0; i < end; i++) { \ - pixel_pair = *screen_src_ptr; \ - if (effect_condition_fade(pixel_pair)) { \ - if (effect_condition_alpha) { \ - expand_pixel(alpha_expand, pixel_pair); \ - } else { \ - expand_pixel(expand_type, pixel_pair); \ - } \ - } else { \ - *screen_dest_ptr = PLTT[pixel_pair & 0x1FF]; \ - } \ - \ - screen_src_ptr++; \ - screen_dest_ptr++; \ - } - -#define expand_partial_alpha(expand_type) \ - if ((blend_a + blend_b) > 16) { \ - expand_loop_partial_alpha(blend_saturate, expand_type); \ - } else { \ - expand_loop_partial_alpha(blend, expand_type); \ - } - -// Blend top two pixels of scanline with each other. - -#define expand_normal(screen_ptr, start, end) - -void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end); - -#ifndef ARM_ARCH_BLENDING_OPTS - -void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end) -{ - u32 pixel_pair; - u32 pixel_top, pixel_bottom; - u32 bldalpha = read_ioreg(REG_ADDR_BLDALPHA); - u32 blend_a = bldalpha & 0x1F; - u32 blend_b = (bldalpha >> 8) & 0x1F; - u32 i; - - if (blend_a > 16) - blend_a = 16; - - if (blend_b > 16) - blend_b = 16; - - // The individual colors can saturate over 31, this should be taken - // care of in an alternate pass as it incurs a huge additional speedhit. - if ((blend_a + blend_b) > 16) { - expand_loop(blend_saturate, effect_condition_alpha, pixel_pair); - } else { - expand_loop(blend, effect_condition_alpha, pixel_pair); - } -} - -#endif - -// Blend scanline with white. - -static void expand_darken(u16 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end) -{ - u32 pixel_top; - s32 blend = 16 - (read_ioreg(REG_ADDR_BLDY) & 0x1F); - u32 i; - - if (blend < 0) - blend = 0; - - expand_loop(darken, effect_condition_fade(pixel_top), pixel_top); -} - -// Blend scanline with black. - -static void expand_brighten(u16 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end) -{ - u32 pixel_top; - u32 blend = read_ioreg(REG_ADDR_BLDY) & 0x1F; - u32 upper; - u32 i; - - if (blend > 16) - blend = 16; - - upper = ((BLND_MSK * blend) >> 4) & BLND_MSK; - blend = 16 - blend; - - expand_loop(brighten, effect_condition_fade(pixel_top), pixel_top); -} - -// Expand scanline such that if both top and bottom pass it's alpha, -// if only top passes it's as specified, and if neither pass it's normal. - -static void expand_darken_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end) -{ - s32 blend = 16 - (read_ioreg(REG_ADDR_BLDY) & 0x1F); - u32 pixel_pair; - u32 pixel_top, pixel_bottom; - u32 bldalpha = read_ioreg(REG_ADDR_BLDALPHA); - u32 blend_a = bldalpha & 0x1F; - u32 blend_b = (bldalpha >> 8) & 0x1F; - u32 i; - - if (blend < 0) - blend = 0; - - if (blend_a > 16) - blend_a = 16; - - if (blend_b > 16) - blend_b = 16; - - expand_partial_alpha(darken); -} - -static void expand_brighten_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr, u32 start, u32 end) -{ - s32 blend = read_ioreg(REG_ADDR_BLDY) & 0x1F; - u32 pixel_pair; - u32 pixel_top, pixel_bottom; - u32 bldalpha = read_ioreg(REG_ADDR_BLDALPHA); - u32 blend_a = bldalpha & 0x1F; - u32 blend_b = (bldalpha >> 8) & 0x1F; - u32 upper; - u32 i; - - if (blend > 16) - blend = 16; - - upper = ((BLND_MSK * blend) >> 4) & BLND_MSK; - blend = 16 - blend; - - if (blend_a > 16) - blend_a = 16; - - if (blend_b > 16) - blend_b = 16; - - expand_partial_alpha(brighten); -} - -// Render an OBJ layer from start to end, depending on the type (1D or 2D) -// stored in dispcnt. - -#define render_obj_layer(type, dest, _start, _end) \ - current_layer &= ~0x04; \ - if (dispcnt & 0x40) \ - render_scanline_obj_##type##_1D(current_layer, _start, _end, dest); \ - else \ - render_scanline_obj_##type##_2D(current_layer, _start, _end, dest) - -// Render a target all the way with the background color as taken from the -// palette. - -#define fill_line_bg(type, dest, _start, _end) fill_line_##type(0, dest, _start, _end) - -// Render all layers as they appear in the layer order. - -#define render_layers(tile_alpha, obj_alpha, dest) \ - { \ - current_layer = layer_order[0]; \ - if (current_layer & 0x04) { \ - /* If the first one is OBJ render the background then render it. */ \ - fill_line_bg(tile_alpha, dest, 0, DISPLAY_WIDTH); \ - render_obj_layer(obj_alpha, dest, 0, DISPLAY_WIDTH); \ - } else { \ - /* Otherwise render a base layer. */ \ - layer_renderers[current_layer].tile_alpha##_render_base(current_layer, 0, DISPLAY_WIDTH, dest); \ - } \ - \ - /* Render the rest of the layers. */ \ - for (layer_order_pos = 1; layer_order_pos < layer_count; layer_order_pos++) { \ - current_layer = layer_order[layer_order_pos]; \ - if (current_layer & 0x04) { \ - render_obj_layer(obj_alpha, dest, 0, DISPLAY_WIDTH); \ - } else { \ - layer_renderers[current_layer].tile_alpha##_render_transparent(current_layer, 0, DISPLAY_WIDTH, dest); \ - } \ - } \ - } - -#define render_condition_alpha \ - (((read_ioreg(REG_ADDR_BLDALPHA) & 0x1F1F) != 0x001F) && ((read_ioreg(REG_ADDR_BLDCNT) & 0x3F) != 0) \ - && ((read_ioreg(REG_ADDR_BLDCNT) & 0x3F00) != 0)) - -#define render_condition_fade (((read_ioreg(REG_ADDR_BLDY) & 0x1F) != 0) && ((read_ioreg(REG_ADDR_BLDCNT) & 0x3F) != 0)) - -#define render_layers_color_effect(renderer, layer_condition, alpha_condition, fade_condition, _start, _end) \ - { \ - if (layer_condition) { \ - if (obj_alpha_count[read_ioreg(REG_ADDR_VCOUNT)]) { \ - /* Render based on special effects mode. */ \ - u32 screen_buffer[DISPLAY_WIDTH]; \ - switch ((bldcnt >> 6) & 0x03) { \ - /* Alpha blend */ \ - case 0x01: { \ - if (alpha_condition) { \ - renderer(alpha, alpha_obj, screen_buffer); \ - expand_blend(screen_buffer, scanline, _start, _end); \ - return; \ - } \ - break; \ - } \ - \ - /* Fade to white */ \ - case 0x02: { \ - if (fade_condition) { \ - renderer(color32, partial_alpha, screen_buffer); \ - expand_brighten_partial_alpha(screen_buffer, scanline, _start, _end); \ - return; \ - } \ - break; \ - } \ - \ - /* Fade to black */ \ - case 0x03: { \ - if (fade_condition) { \ - renderer(color32, partial_alpha, screen_buffer); \ - expand_darken_partial_alpha(screen_buffer, scanline, _start, _end); \ - return; \ - } \ - break; \ - } \ - } \ - \ - renderer(color32, partial_alpha, screen_buffer); \ - expand_blend(screen_buffer, scanline, _start, _end); \ - } else { \ - /* Render based on special effects mode. */ \ - switch ((bldcnt >> 6) & 0x03) { \ - /* Alpha blend */ \ - case 0x01: { \ - if (alpha_condition) { \ - u32 screen_buffer[DISPLAY_WIDTH]; \ - renderer(alpha, alpha_obj, screen_buffer); \ - expand_blend(screen_buffer, scanline, _start, _end); \ - return; \ - } \ - break; \ - } \ - \ - /* Fade to white */ \ - case 0x02: { \ - if (fade_condition) { \ - renderer(color16, color16, scanline); \ - expand_brighten(scanline, scanline, _start, _end); \ - return; \ - } \ - break; \ - } \ - \ - /* Fade to black */ \ - case 0x03: { \ - if (fade_condition) { \ - renderer(color16, color16, scanline); \ - expand_darken(scanline, scanline, _start, _end); \ - return; \ - } \ - break; \ - } \ - } \ - \ - renderer(normal, normal, scanline); \ - expand_normal(scanline, _start, _end); \ - } \ - } else { \ - u32 pixel_top = PLTT[0]; \ - switch ((bldcnt >> 6) & 0x03) { \ - /* Fade to white */ \ - case 0x02: { \ - if (color_combine_mask_a(5)) { \ - u32 blend = read_ioreg(REG_ADDR_BLDY) & 0x1F; \ - u32 upper; \ - \ - if (blend > 16) \ - blend = 16; \ - \ - upper = ((BLND_MSK * blend) >> 4) & BLND_MSK; \ - blend = 16 - blend; \ - \ - expand_pixel_no_dest(brighten, pixel_top); \ - } \ - break; \ - } \ - \ - /* Fade to black */ \ - case 0x03: { \ - if (color_combine_mask_a(5)) { \ - s32 blend = 16 - (read_ioreg(REG_ADDR_BLDY) & 0x1F); \ - \ - if (blend < 0) \ - blend = 0; \ - \ - expand_pixel_no_dest(darken, pixel_top); \ - } \ - break; \ - } \ - } \ - fill_line_color16(pixel_top, scanline, _start, _end); \ - } \ - } - -// Renders an entire scanline from 0 to DISPLAY_WIDTH, based on current color mode. - -static void render_scanline_tile(u16 *scanline, u32 dispcnt) -{ - u32 current_layer; - u32 layer_order_pos; - u32 bldcnt = read_ioreg(REG_ADDR_BLDCNT); - render_scanline_layer_functions_tile(); - - render_layers_color_effect(render_layers, layer_count, render_condition_alpha, render_condition_fade, 0, DISPLAY_WIDTH); -} - -static void render_scanline_bitmap(u16 *scanline, u32 dispcnt) -{ - render_scanline_layer_functions_bitmap(); - u32 current_layer; - u32 layer_order_pos; - - fill_line_bg(normal, scanline, 0, DISPLAY_WIDTH); - - for (layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++) { - current_layer = layer_order[layer_order_pos]; - if (current_layer & 0x04) { - render_obj_layer(normal, scanline, 0, DISPLAY_WIDTH); - } else { - layer_renderers->normal_render(0, DISPLAY_WIDTH, scanline); - } - } -} - -// Render layers from start to end based on if they're allowed in the -// enable flags. - -#define render_layers_conditional(tile_alpha, obj_alpha, dest) \ - { \ - __label__ skip; \ - current_layer = layer_order[layer_order_pos]; \ - /* If OBJ aren't enabled skip to the first non-OBJ layer */ \ - if (!(enable_flags & 0x10)) { \ - while ((current_layer & 0x04) || !((1 << current_layer) & enable_flags)) { \ - layer_order_pos++; \ - current_layer = layer_order[layer_order_pos]; \ - \ - /* Oops, ran out of layers, render the background. */ \ - if (layer_order_pos == layer_count) { \ - fill_line_bg(tile_alpha, dest, start, end); \ - goto skip; \ - } \ - } \ - \ - /* Render the first valid layer */ \ - layer_renderers[current_layer].tile_alpha##_render_base(current_layer, start, end, dest); \ - \ - layer_order_pos++; \ - \ - /* Render the rest of the layers if active, skipping OBJ ones. */ \ - for (; layer_order_pos < layer_count; layer_order_pos++) { \ - current_layer = layer_order[layer_order_pos]; \ - if (!(current_layer & 0x04) && ((1 << current_layer) & enable_flags)) { \ - layer_renderers[current_layer].tile_alpha##_render_transparent(current_layer, start, end, dest); \ - } \ - } \ - } else { \ - /* Find the first active layer, skip all of the inactive ones */ \ - while (!((current_layer & 0x04) || ((1 << current_layer) & enable_flags))) { \ - layer_order_pos++; \ - current_layer = layer_order[layer_order_pos]; \ - \ - /* Oops, ran out of layers, render the background. */ \ - if (layer_order_pos == layer_count) { \ - fill_line_bg(tile_alpha, dest, start, end); \ - goto skip; \ - } \ - } \ - \ - if (current_layer & 0x04) { \ - /* If the first one is OBJ render the background then render it. */ \ - fill_line_bg(tile_alpha, dest, start, end); \ - render_obj_layer(obj_alpha, dest, start, end); \ - } else { \ - /* Otherwise render a base layer. */ \ - layer_renderers[current_layer].tile_alpha##_render_base(current_layer, start, end, dest); \ - } \ - \ - layer_order_pos++; \ - \ - /* Render the rest of the layers. */ \ - for (; layer_order_pos < layer_count; layer_order_pos++) { \ - current_layer = layer_order[layer_order_pos]; \ - if (current_layer & 0x04) { \ - render_obj_layer(obj_alpha, dest, start, end); \ - } else { \ - if (enable_flags & (1 << current_layer)) { \ - layer_renderers[current_layer].tile_alpha##_render_transparent(current_layer, start, end, dest); \ - } \ - } \ - } \ - } \ - \ - skip:; \ - } - -// Render all of the BG and OBJ in a tiled scanline from start to end ONLY if -// enable_flag allows that layer/OBJ. Also conditionally render color effects. - -static void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline, u32 enable_flags, u32 dispcnt, u32 bldcnt, - const tile_layer_render_struct *layer_renderers) -{ - u32 current_layer; - u32 layer_order_pos = 0; - - render_layers_color_effect(render_layers_conditional, (layer_count && (enable_flags & 0x1F)), - ((enable_flags & 0x20) && render_condition_alpha), ((enable_flags & 0x20) && render_condition_fade), start, - end); -} - -// Render the BG and OBJ in a bitmap scanline from start to end ONLY if -// enable_flag allows that layer/OBJ. Also conditionally render color effects. - -static void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline, u32 enable_flags, u32 dispcnt, u32 bldcnt, - const bitmap_layer_render_struct *layer_renderers) -{ - u32 current_layer; - u32 layer_order_pos; - - fill_line_bg(normal, scanline, start, end); - - for (layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++) { - current_layer = layer_order[layer_order_pos]; - if (current_layer & 0x04) { - if (enable_flags & 0x10) { - render_obj_layer(normal, scanline, start, end); - } - } else { - if (enable_flags & 0x04) - layer_renderers->normal_render(start, end, scanline); - } - } -} - -#define window_x_coords(window_number) \ - window_##window_number##_x1 = WIN_GET_LOWER(read_ioreg(REG_ADDR_WIN##window_number##H)); \ - window_##window_number##_x2 = WIN_GET_HIGHER(read_ioreg(REG_ADDR_WIN##window_number##H)); \ - window_##window_number##_enable = (winin >> (window_number * 8)) & 0x3F; \ - \ - if (window_##window_number##_x1 > DISPLAY_WIDTH) \ - window_##window_number##_x1 = DISPLAY_WIDTH; \ - \ - if (window_##window_number##_x2 > DISPLAY_WIDTH) \ - window_##window_number##_x2 = DISPLAY_WIDTH - -#define window_coords(window_number) \ - u32 window_##window_number##_x1, window_##window_number##_x2; \ - u32 window_##window_number##_y1, window_##window_number##_y2; \ - u32 window_##window_number##_enable = 0; \ - window_##window_number##_y1 = WIN_GET_LOWER(read_ioreg(REG_ADDR_WIN##window_number##V)); \ - window_##window_number##_y2 = WIN_GET_HIGHER(read_ioreg(REG_ADDR_WIN##window_number##V)); \ - \ - if (window_##window_number##_y1 > window_##window_number##_y2) { \ - if ((((vcount <= window_##window_number##_y2) || (vcount > window_##window_number##_y1)) \ - || (window_##window_number##_y2 > (DISPLAY_HEIGHT + 67))) \ - && (window_##window_number##_y1 <= (DISPLAY_HEIGHT + 67))) { \ - window_x_coords(window_number); \ - } else { \ - window_##window_number##_x1 = DISPLAY_WIDTH; \ - window_##window_number##_x2 = DISPLAY_WIDTH; \ - } \ - } else { \ - if ((((vcount >= window_##window_number##_y1) && (vcount < window_##window_number##_y2)) \ - || (window_##window_number##_y2 > (DISPLAY_HEIGHT + 67))) \ - && (window_##window_number##_y1 <= (DISPLAY_HEIGHT + 67))) { \ - window_x_coords(window_number); \ - } else { \ - window_##window_number##_x1 = DISPLAY_WIDTH; \ - window_##window_number##_x2 = DISPLAY_WIDTH; \ - } \ - } - -#define render_window_segment(type, start, end, window_type) \ - if (start != end) { \ - render_scanline_conditional_##type(start, end, scanline, window_##window_type##_enable, dispcnt, bldcnt, layer_renderers); \ - } - -#define render_window_segment_unequal(type, start, end, window_type) \ - render_scanline_conditional_##type(start, end, scanline, window_##window_type##_enable, dispcnt, bldcnt, layer_renderers) - -#define render_window_segment_clip(type, clip_start, clip_end, start, end, window_type) \ - { \ - if (start != end) { \ - if (start < clip_start) { \ - if (end > clip_start) { \ - if (end > clip_end) { \ - render_window_segment_unequal(type, clip_start, clip_end, window_type); \ - } else { \ - render_window_segment_unequal(type, clip_start, end, window_type); \ - } \ - } \ - } else \ - \ - if (end > clip_end) { \ - if (start < clip_end) \ - render_window_segment_unequal(type, start, clip_end, window_type); \ - } else { \ - render_window_segment_unequal(type, start, end, window_type); \ - } \ - } \ - } - -#define render_window_clip_1(type, start, end) \ - if (window_1_x1 != DISPLAY_WIDTH) { \ - if (window_1_x1 > window_1_x2) { \ - render_window_segment_clip(type, start, end, 0, window_1_x2, 1); \ - render_window_segment_clip(type, start, end, window_1_x2, window_1_x1, out); \ - render_window_segment_clip(type, start, end, window_1_x1, DISPLAY_WIDTH, 1); \ - } else { \ - render_window_segment_clip(type, start, end, 0, window_1_x1, out); \ - render_window_segment_clip(type, start, end, window_1_x1, window_1_x2, 1); \ - render_window_segment_clip(type, start, end, window_1_x2, DISPLAY_WIDTH, out); \ - } \ - } else { \ - render_window_segment(type, start, end, out); \ - } - -#define render_window_clip_obj(type, start, end) \ - ; \ - render_window_segment(type, start, end, out); \ - if (dispcnt & 0x40) \ - render_scanline_obj_copy_##type##_1D(4, start, end, scanline); \ - else \ - render_scanline_obj_copy_##type##_2D(4, start, end, scanline) - -#define render_window_segment_clip_obj(type, clip_start, clip_end, start, end) \ - { \ - if (start != end) { \ - if (start < clip_start) { \ - if (end > clip_start) { \ - if (end > clip_end) { \ - render_window_clip_obj(type, clip_start, clip_end); \ - } else { \ - render_window_clip_obj(type, clip_start, end); \ - } \ - } \ - } else \ - \ - if (end > clip_end) { \ - if (start < clip_end) { \ - render_window_clip_obj(type, start, clip_end); \ - } \ - } else { \ - render_window_clip_obj(type, start, end); \ - } \ - } \ - } - -#define render_window_clip_1_obj(type, start, end) \ - if (window_1_x1 != DISPLAY_WIDTH) { \ - if (window_1_x1 > window_1_x2) { \ - render_window_segment_clip(type, start, end, 0, window_1_x2, 1); \ - render_window_segment_clip_obj(type, start, end, window_1_x2, window_1_x1); \ - render_window_segment_clip(type, start, end, window_1_x1, DISPLAY_WIDTH, 1); \ - } else { \ - render_window_segment_clip_obj(type, start, end, 0, window_1_x1); \ - render_window_segment_clip(type, start, end, window_1_x1, window_1_x2, 1); \ - render_window_segment_clip_obj(type, start, end, window_1_x2, DISPLAY_WIDTH); \ - } \ - } else { \ - render_window_clip_obj(type, start, end); \ - } - -#define render_window_single(type, window_number) \ - u32 winin = read_ioreg(REG_ADDR_WININ); \ - window_coords(window_number); \ - if (window_##window_number##_x1 > window_##window_number##_x2) { \ - render_window_segment(type, 0, window_##window_number##_x2, window_number); \ - render_window_segment(type, window_##window_number##_x2, window_##window_number##_x1, out); \ - render_window_segment(type, window_##window_number##_x1, DISPLAY_WIDTH, window_number); \ - } else { \ - render_window_segment(type, 0, window_##window_number##_x1, out); \ - render_window_segment(type, window_##window_number##_x1, window_##window_number##_x2, window_number); \ - render_window_segment(type, window_##window_number##_x2, DISPLAY_WIDTH, out); \ - } - -#define render_window_multi(type, front, back) \ - if (window_##front##_x1 > window_##front##_x2) { \ - render_window_segment(type, 0, window_##front##_x2, front); \ - render_window_clip_##back(type, window_##front##_x2, window_##front##_x1); \ - render_window_segment(type, window_##front##_x1, DISPLAY_WIDTH, front); \ - } else { \ - render_window_clip_##back(type, 0, window_##front##_x1); \ - render_window_segment(type, window_##front##_x1, window_##front##_x2, front); \ - render_window_clip_##back(type, window_##front##_x2, DISPLAY_WIDTH); \ - } - -#define render_scanline_window_builder(type) \ - static void render_scanline_window_##type(u16 *scanline, u32 dispcnt) \ - { \ - u32 vcount = read_ioreg(REG_ADDR_VCOUNT); \ - u32 winout = read_ioreg(REG_ADDR_WINOUT); \ - u32 bldcnt = read_ioreg(REG_ADDR_BLDCNT); \ - u32 window_out_enable = winout & 0x3F; \ - \ - render_scanline_layer_functions_##type(); \ - \ - switch (dispcnt >> 13) { \ - /* Just window 0 */ \ - case 0x01: { \ - render_window_single(type, 0); \ - break; \ - } \ - \ - /* Just window 1 */ \ - case 0x02: { \ - render_window_single(type, 1); \ - break; \ - } \ - \ - /* Windows 1 and 2 */ \ - case 0x03: { \ - u32 winin = read_ioreg(REG_ADDR_WININ); \ - window_coords(0); \ - window_coords(1); \ - render_window_multi(type, 0, 1); \ - break; \ - } \ - \ - /* Just OBJ windows */ \ - case 0x04: { \ - render_window_clip_obj(type, 0, DISPLAY_WIDTH); \ - break; \ - } \ - \ - /* Window 0 and OBJ window */ \ - case 0x05: { \ - u32 winin = read_ioreg(REG_ADDR_WININ); \ - window_coords(0); \ - render_window_multi(type, 0, obj); \ - break; \ - } \ - \ - /* Window 1 and OBJ window */ \ - case 0x06: { \ - u32 winin = read_ioreg(REG_ADDR_WININ); \ - window_coords(1); \ - render_window_multi(type, 1, obj); \ - break; \ - } \ - \ - /* Window 0, 1, and OBJ window */ \ - case 0x07: { \ - u32 winin = read_ioreg(REG_ADDR_WININ); \ - window_coords(0); \ - window_coords(1); \ - render_window_multi(type, 0, 1_obj); \ - break; \ - } \ - } \ - } - -render_scanline_window_builder(tile); -render_scanline_window_builder(bitmap); - -static const u8 active_layers[] = { - 0x1F, // Mode 0, Tile BG0-3 and OBJ - 0x17, // Mode 1, Tile BG0-2 and OBJ - 0x1C, // Mode 2, Tile BG2-3 and OBJ - 0x14, // Mode 3, BMP BG2 and OBJ - 0x14, // Mode 4, BMP BG2 and OBJ - 0x14, // Mode 5, BMP BG2 and OBJ - 0, // Unused - 0, -}; - -void update_scanline(void) -{ - u32 pitch = get_screen_pitch(); - u32 dispcnt = read_ioreg(REG_ADDR_DISPCNT); - u32 vcount = read_ioreg(REG_ADDR_VCOUNT); - u16 *screen_offset = get_screen_pixels() + (vcount * pitch); - u32 video_mode = dispcnt & 0x07; - - order_layers((dispcnt >> 8) & active_layers[video_mode], vcount); - - // fill_line_color16(*(uint16_t *)PLTT, screen_offset, 0, DISPLAY_WIDTH); - - // If the screen is in in forced blank draw pure white. - if (dispcnt & DISPCNT_FORCED_BLANK) { - fill_line_color16(0xFFFF, screen_offset, 0, DISPLAY_WIDTH); - } else { - if (video_mode < 3) { - if (dispcnt >> 13) { - render_scanline_window_tile(screen_offset, dispcnt); - } else { - render_scanline_tile(screen_offset, dispcnt); - } - } else { - if (dispcnt >> 13) - render_scanline_window_bitmap(screen_offset, dispcnt); - else - render_scanline_bitmap(screen_offset, dispcnt); - } - } - - affine_reference_x[0] += (s16)read_ioreg(REG_ADDR_BG2PB); - affine_reference_y[0] += (s16)read_ioreg(REG_ADDR_BG2PD); - affine_reference_x[1] += (s16)read_ioreg(REG_ADDR_BG3PB); - affine_reference_y[1] += (s16)read_ioreg(REG_ADDR_BG3PD); -} - -void DrawFrame_Fast(u16 *pixels) -{ - int i; - - gba_screen_pixels = pixels; - video_reload_counters(); - // convert_whole_palette(); - - // assume that the oam is only updated once before the frame - // starts to be drawn - u32 dispcnt = read_ioreg(REG_ADDR_DISPCNT); - u32 video_mode = dispcnt & 0x07; - order_obj(video_mode); - - for (i = 0; i < DISPLAY_HEIGHT; i++) { - - REG_VCOUNT = i; - if (((REG_DISPSTAT >> 8) & 0xFF) == REG_VCOUNT) { - REG_DISPSTAT |= INTR_FLAG_VCOUNT; - if (REG_DISPSTAT & DISPSTAT_VCOUNT_INTR) - gIntrTable[INTR_INDEX_VCOUNT](); - } - - // Render the backdrop color before each individual scanline. - // HBlank interrupt code could have changed it in between lines. - update_scanline(); - - REG_DISPSTAT |= INTR_FLAG_HBLANK; - - RunDMAs(DMA_HBLANK); - - if (REG_DISPSTAT & DISPSTAT_HBLANK_INTR) - gIntrTable[INTR_INDEX_HBLANK](); - - REG_DISPSTAT &= ~INTR_FLAG_HBLANK; - REG_DISPSTAT &= ~INTR_FLAG_VCOUNT; - } -} - -#endif diff --git a/src/platform/shared/rendering/sw_renderer_fast.cc b/src/platform/shared/rendering/sw_renderer_fast.cc new file mode 100644 index 000000000..10a505865 --- /dev/null +++ b/src/platform/shared/rendering/sw_renderer_fast.cc @@ -0,0 +1,2298 @@ +/* gameplaySP - Modified to fit the SA2 codebase (FreshOllie - 2026) + * + * Copyright (C) 2006 Exophase + * Copyright (C) 2023 David Guillen Fandos + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +extern "C" { +#include "config.h" +} + +#if RENDERER == RENDERER_SOFTWARE_FAST + +#include +#include + +extern "C" { +#include "global.h" +#include "core.h" +#include "gba/defines.h" +#include "gba/io_reg.h" +#include "gba/types.h" + +#include "platform/shared/dma.h" +} + +#define eswap16(value) (value) +#define eswap32(value) (value) + +#define GBA_SCREEN_PITCH DISPLAY_WIDTH + +typedef u32 fixed16_16; +typedef u32 fixed8_24; + +#define float_to_fp16_16(value) (fixed16_16)((value)*65536.0) + +#define fp16_16_to_float(value) (float)((value) / 65536.0) + +#define u32_to_fp16_16(value) ((value) << 16) + +#define fp16_16_to_u32(value) ((value) >> 16) + +#define fp16_16_fractional_part(value) ((value)&0xFFFF) + +#define float_to_fp8_24(value) (fixed8_24)((value)*16777216.0) + +#define fp8_24_fractional_part(value) ((value)&0xFFFFFF) + +#define fixed_div(numerator, denominator, bits) (((numerator * (1 << bits)) + (denominator / 2)) / denominator) + +#define read_ioreg(regaddr) (eswap16(*(u16 *)(regaddr))) +#define read_ioreg32(regaddr) (read_ioreg(regaddr) | (read_ioreg((regaddr) + sizeof(u16)) << 16)) + +#define convert_palette(value) (value & 0x7FFF) + +u16 *gba_screen_pixels = NULL; + +#define get_screen_pixels() gba_screen_pixels +#define get_screen_pitch() GBA_SCREEN_PITCH + +#define REG_ADDR_BGxCNT(n) (REG_ADDR_BG0CNT + (n) * sizeof(u16)) +#define REG_ADDR_WINxH(n) (REG_ADDR_WIN0H + (n) * sizeof(winreg_t)) +#define REG_ADDR_WINxV(n) (REG_ADDR_WIN0V + (n) * sizeof(winreg_t)) +#define REG_ADDR_BGxHOFS(n) (REG_ADDR_BG0HOFS + ((n)*2) * sizeof(u16)) +#define REG_ADDR_BGxVOFS(n) (REG_ADDR_BG0VOFS + ((n)*2) * sizeof(u16)) +#define REG_ADDR_BGxPA(n) (REG_ADDR_BG2PA + ((n)-2) * 8 * sizeof(u16)) +#define REG_ADDR_BGxPB(n) (REG_ADDR_BG2PB + ((n)-2) * 8 * sizeof(u16)) +#define REG_ADDR_BGxPC(n) (REG_ADDR_BG2PC + ((n)-2) * 8 * sizeof(u16)) +#define REG_ADDR_BGxPD(n) (REG_ADDR_BG2PD + ((n)-2) * 8 * sizeof(u16)) + +typedef struct { + u16 pad0[OAM_DATA_COUNT_AFFINE - 1]; + u16 dx; + u16 pad1[OAM_DATA_COUNT_AFFINE - 1]; + u16 dmx; + u16 pad2[OAM_DATA_COUNT_AFFINE - 1]; + u16 dy; + u16 pad3[OAM_DATA_COUNT_AFFINE - 1]; + u16 dmy; +} t_affp; + +typedef void (*bitmap_render_function)(u32 start, u32 end, void *dest_ptr, const u16 *pal); +typedef void (*tile_render_function)(u32 layer, u32 start, u32 end, void *dest_ptr, const u16 *pal); + +typedef void (*render_function_u16)(u32 start, u32 end, u16 *scanline, u32 enable_flags); +typedef void (*render_function_u32)(u32 start, u32 end, u32 *scanline, u32 enable_flags); + +typedef void (*window_render_function)(u16 *scanline, u32 start, u32 end); + +static void render_scanline_conditional(u32 start, u32 end, u16 *scanline, u32 enable_flags = 0x3F); + +typedef struct { + bitmap_render_function blit_render; + bitmap_render_function scale_render; + bitmap_render_function affine_render; +} bitmap_layer_render_struct; + +typedef struct { + render_function_u16 fullcolor; + render_function_u16 indexed_u16; + render_function_u32 indexed_u32; + render_function_u32 stacked; +} layer_render_struct; + +// Object blending modes +#define OBJ_MOD_NORMAL 0 +#define OBJ_MOD_SEMITRAN 1 +#define OBJ_MOD_WINDOW 2 +#define OBJ_MOD_INVALID 3 + +// BLDCNT color effect modes +#define COL_EFFECT_NONE 0x0 +#define COL_EFFECT_BLEND 0x1 +#define COL_EFFECT_BRIGHT 0x2 +#define COL_EFFECT_DARK 0x3 + +// Background render modes +#define RENDER_NORMAL 0 +#define RENDER_COL16 1 +#define RENDER_COL32 2 +#define RENDER_ALPHA 3 + +// Byte lengths of complete tiles and tile rows in 4bpp and 8bpp. + +#define tile_width_4bpp 4 +#define tile_size_4bpp 32 +#define tile_width_8bpp 8 +#define tile_size_8bpp 64 + +// Sprite rendering cycles +#define REND_CYC_MAX 32768 /* Theoretical max is 17920 */ +#define REND_CYC_SCANLINE 1210 +#define REND_CYC_REDUCED 954 + +// Generate bit mask (bits 9th and 10th) with information about the pixel +// status (1st and/or 2nd target) for later blending. +static inline u16 color_flags(u32 layer) +{ + u32 bldcnt = read_ioreg(REG_ADDR_BLDCNT); + return (((bldcnt >> layer) & 0x01) | // 1st target + ((bldcnt >> (layer + 7)) & 0x02) // 2nd target + ) + << 9; +} + +static const u32 map_widths[] = { 256, 512, 256, 512 }; + +typedef enum { + FULLCOLOR, // Regular rendering, output a 16 bit color + INDXCOLOR, // Rendering to indexed color, so we can later apply dark/bright + STCKCOLOR, // Stacks two indexed pixels (+flags) to apply blending + PIXCOPY // Special mode used for sprites, to allow for obj-window drawing +} rendtype; + +s32 affine_reference_x[2]; +s32 affine_reference_y[2]; + +static inline s32 signext28(u32 value) +{ + s32 ret = (s32)(value << 4); + return ret >> 4; +} + +void video_reload_counters() +{ + /* This happens every Vblank */ + affine_reference_x[0] = signext28(read_ioreg32(REG_ADDR_BG2X_L)); + affine_reference_y[0] = signext28(read_ioreg32(REG_ADDR_BG2Y_L)); + affine_reference_x[1] = signext28(read_ioreg32(REG_ADDR_BG3X_L)); + affine_reference_y[1] = signext28(read_ioreg32(REG_ADDR_BG3Y_L)); +} + +// Renders non-affine tiled background layer. +// Will process a full or partial tile (start and end within 0..8) and draw +// it in either 8 or 4 bpp mode. Honors vertical and horizontal flip. + +// tile contains the tile info (contains tile index, flip bits, pal info) +// hflip causes the tile pixels lookup to be reversed (from MSB to LSB +// If isbase is not set, color 0 is interpreted as transparent, otherwise +// we are drawing the base layer, so palette[0] is used (backdrop). + +template +static inline void rend_part_tile_Nbpp(u32 bg_comb, u32 px_comb, dtype *dest_ptr, u32 start, u32 end, u16 tile, const u8 *tile_base, + int vertical_pixel_flip, const u16 *paltbl) +{ + // Seek to the specified tile, using the tile number and size. + // tile_base already points to the right tile-line vertical offset + const u8 *tile_ptr = &tile_base[(tile & 0x3FF) * (is8bpp ? 64 : 32)]; + u16 bgcolor = paltbl[0]; + + // On vertical flip, apply the mirror offset + if (tile & 0x800) + tile_ptr += vertical_pixel_flip; + + if (is8bpp) { + // Each byte is a color, mapped to a palete. 8 bytes can be read as 64bit + for (u32 i = start; i < end; i++, dest_ptr++) { + // Honor hflip by selecting bytes in the correct order + u32 sel = hflip ? (7 - i) : i; + u8 pval = tile_ptr[sel]; + // Alhpa mode stacks previous value (unless rendering the first layer) + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = paltbl[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_comb; // Add combine flags + else if (rdtype == STCKCOLOR) + // Stack pixels on top of the pixel value and combine flags + *dest_ptr = pval | px_comb | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + if (rdtype == FULLCOLOR) + *dest_ptr = bgcolor; + else + *dest_ptr = 0 | bg_comb; // Add combine flags + } + } + } else { + // In 4bpp mode, the tile[15..12] bits contain the sub-palette number. + u16 tilepal = (tile >> 12) << 4; + u16 pxflg = px_comb | tilepal; + const u16 *subpal = &paltbl[tilepal]; + // Read packed pixel data, skip start pixels + u32 tilepix = eswap32(*(u32 *)tile_ptr); + if (hflip) + tilepix <<= (start * 4); + else + tilepix >>= (start * 4); + // Only 32 bits (8 pixels * 4 bits) + for (u32 i = start; i < end; i++, dest_ptr++) { + u8 pval = hflip ? tilepix >> 28 : tilepix & 0xF; + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = subpal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pxflg | pval; + else if (rdtype == STCKCOLOR) // Stack pixels + *dest_ptr = pxflg | pval | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + if (rdtype == FULLCOLOR) + *dest_ptr = bgcolor; + else + *dest_ptr = 0 | bg_comb; + } + // Advance to next packed data + if (hflip) + tilepix <<= 4; + else + tilepix >>= 4; + } + } +} + +// Same as above, but optimized for full tiles. Skip comments here. +template +static inline void render_tile_Nbpp(u32 bg_comb, u32 px_comb, dtype *dest_ptr, u16 tile, const u8 *tile_base, int vertical_pixel_flip, + const u16 *paltbl) +{ + const u8 *tile_ptr = &tile_base[(tile & 0x3FF) * (is8bpp ? 64 : 32)]; + u16 bgcolor = paltbl[0]; + + if (tile & 0x800) + tile_ptr += vertical_pixel_flip; + + if (is8bpp) { + for (u32 j = 0; j < 2; j++) { + u32 tilepix = eswap32(((u32 *)tile_ptr)[hflip ? 1 - j : j]); + if (tilepix) { + for (u32 i = 0; i < 4; i++, dest_ptr++) { + u8 pval = hflip ? (tilepix >> (24 - i * 8)) : (tilepix >> (i * 8)); + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = paltbl[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_comb; // Add combine flags + else if (rdtype == STCKCOLOR) + *dest_ptr = pval | px_comb | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } + } else { + for (u32 i = 0; i < 4; i++, dest_ptr++) + if (isbase) + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } + } else { + u32 tilepix = eswap32(*(u32 *)tile_ptr); + if (tilepix) { // We can skip it all if the row is transparent + u16 tilepal = (tile >> 12) << 4; + u16 pxflg = px_comb | tilepal; + const u16 *subpal = &paltbl[tilepal]; + for (u32 i = 0; i < 8; i++, dest_ptr++) { + u8 pval = (hflip ? (tilepix >> ((7 - i) * 4)) : (tilepix >> (i * 4))) & 0xF; + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = subpal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pxflg | pval; + else if (rdtype == STCKCOLOR) + *dest_ptr = pxflg | pval | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } + } else if (isbase) { + // In this case we simply fill the pixels with background pixels + for (u32 i = 0; i < 8; i++, dest_ptr++) + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } +} + +template +static void render_scanline_text_fast(u32 layer, u32 start, u32 end, void *scanline, const u16 *paltbl) +{ + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u16 vcount = read_ioreg(REG_ADDR_VCOUNT); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 hoffset = (start + read_ioreg(REG_ADDR_BGxHOFS(layer))) % 512; + u32 voffset = (vcount + read_ioreg(REG_ADDR_BGxVOFS(layer))) % 512; + stype *dest_ptr = ((stype *)scanline) + start; + + // Calculate combine masks. These store 2 bits of info: 1st and 2nd target. + // If set, the current pixel belongs to a layer that is 1st or 2nd target. + u32 bg_comb = color_flags(5), px_comb = color_flags(layer); + + // Background map data is in VRAM, at an offset specified in 2K blocks. + // (each map data block is 32x32 tiles, at 16bpp, so 2KB) + u32 base_block = (bg_control & BGCNT_SCREENBASE_MASK) >> 8; + u16 *map_base = (u16 *)BG_SCREEN_ADDR(base_block); + u16 *map_ptr, *second_ptr; + + end -= start; + + // Skip the top one/two block(s) if using the bottom half + if ((map_size & 0x02) && (voffset >= 256)) + map_base += ((map_width / 8) * 32); + + // Skip the top tiles within the block + map_base += (((voffset % 256) / 8) * (map_width / 8)); + + // we might need to render from two charblocks, store a second pointer. + second_ptr = map_ptr = map_base; + + if (map_size & 0x01) { // If background is 512 pixels wide + if (hoffset >= 256) { + // If we are rendering the right block, skip a whole charblock + hoffset -= 256; + map_ptr += ((map_width / 8) * 32); + } else { + // If we are rendering the left block, we might overrun into the right + second_ptr += ((map_width / 8) * 32); + } + } else { + hoffset %= 256; // Background is 256 pixels wide + } + + // Skip the left blocks within the block + map_ptr += hoffset / 8; + + // Render a single scanline of text tiles + u32 tilewidth = is8bpp ? tile_width_8bpp : tile_width_4bpp; + u32 vert_pix_offset = (voffset % 8) * tilewidth; + // Calculate the pixel offset between a line and its "flipped" mirror. + // The values can be {56, 40, 24, 8, -8, -24, -40, -56} + s32 vflip_off + = is8bpp ? tile_size_8bpp - 2 * vert_pix_offset - tile_width_8bpp : tile_size_4bpp - 2 * vert_pix_offset - tile_width_4bpp; + + // The tilemap base is selected via bgcnt (16KiB chunks) + u32 tilecntrl = (bg_control >> 2) & 0x03; + // Account for the base offset plus the tile vertical offset + u8 *tile_base = BG_CHAR_ADDR(tilecntrl) + vert_pix_offset; + // Number of pixels available until the end of the tile block + u32 pixel_run = map_width - hoffset; + + u32 tile_hoff = hoffset % 8; + u32 partial_hcnt = 8 - tile_hoff; + + if (tile_hoff) { + // First partial tile, only right side is visible. + u32 todraw = MIN(end, partial_hcnt); // [1..7] + u32 stop = tile_hoff + todraw; // Usually 8, unless short run. + + u16 tile = eswap16(*map_ptr++); + if (tile & 0x400) // Tile horizontal flip + rend_part_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile_hoff, stop, tile, tile_base, + vflip_off, paltbl); + else + rend_part_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile_hoff, stop, tile, tile_base, + vflip_off, paltbl); + + dest_ptr += todraw; + end -= todraw; + pixel_run -= todraw; + } + + if (!end) + return; + + // Now render full tiles + u32 todraw = MIN(end, pixel_run) / 8; + + for (u32 i = 0; i < todraw; i++, dest_ptr += 8) { + u16 tile = eswap16(*map_ptr++); + if (tile & 0x400) // Tile horizontal flip + render_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile, tile_base, vflip_off, paltbl); + else + render_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile, tile_base, vflip_off, paltbl); + } + + end -= todraw * 8; + pixel_run -= todraw * 8; + + if (!end) + return; + + // Switch to the next char block if we ran out of tiles + if (!pixel_run) + map_ptr = second_ptr; + + todraw = end / 8; + for (u32 i = 0; i < todraw; i++, dest_ptr += 8) { + u16 tile = eswap16(*map_ptr++); + if (tile & 0x400) // Tile horizontal flip + render_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile, tile_base, vflip_off, paltbl); + else + render_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile, tile_base, vflip_off, paltbl); + } + + end -= todraw * 8; + + // Finalize the tile rendering the left side of it (from 0 up to "end"). + if (end) { + u16 tile = eswap16(*map_ptr++); + if (tile & 0x400) // Tile horizontal flip + rend_part_tile_Nbpp(bg_comb, px_comb, dest_ptr, 0, end, tile, tile_base, vflip_off, + paltbl); + else + rend_part_tile_Nbpp(bg_comb, px_comb, dest_ptr, 0, end, tile, tile_base, vflip_off, + paltbl); + } +} + +// A slow version of the above function that allows for mosaic effects +template +static void render_scanline_text_mosaic(u32 layer, u32 start, u32 end, void *scanline, const u16 *paltbl) +{ + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + const u32 mosh = (read_ioreg(REG_ADDR_MOSAIC) & 0xF) + 1; + const u32 mosv = ((read_ioreg(REG_ADDR_MOSAIC) >> 4) & 0xF) + 1; + u16 vcount = read_ioreg(REG_ADDR_VCOUNT); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 hoffset = (start + read_ioreg(REG_ADDR_BGxHOFS(layer))) % 512; + u16 vmosoff = vcount - vcount % mosv; + u32 voffset = (vmosoff + read_ioreg(REG_ADDR_BGxVOFS(layer))) % 512; + stype *dest_ptr = ((stype *)scanline) + start; + + u32 bg_comb = color_flags(5), px_comb = color_flags(layer); + + u32 base_block = (bg_control & BGCNT_SCREENBASE_MASK) >> 8; + u16 *map_base = (u16 *)BG_SCREEN_ADDR(base_block); + u16 *map_ptr, *second_ptr; + + if ((map_size & 0x02) && (voffset >= 256)) + map_base += ((map_width / 8) * 32); + + map_base += (((voffset % 256) / 8) * (map_width / 8)); + + second_ptr = map_ptr = map_base; + + if (map_size & 0x01) { // If background is 512 pixels wide + if (hoffset >= 256) { + // If we are rendering the right block, skip a whole charblock + hoffset -= 256; + map_ptr += ((map_width / 8) * 32); + } else { + // If we are rendering the left block, we might overrun into the right + second_ptr += ((map_width / 8) * 32); + } + } else { + hoffset %= 256; // Background is 256 pixels wide + } + + // Skip the left blocks within the block + map_ptr += hoffset / 8; + + // Render a single scanline of text tiles + u32 tilewidth = is8bpp ? tile_width_8bpp : tile_width_4bpp; + u32 vert_pix_offset = (voffset % 8) * tilewidth; + // Calculate the pixel offset between a line and its "flipped" mirror. + // The values can be {56, 40, 24, 8, -8, -24, -40, -56} + s32 vflip_off + = is8bpp ? tile_size_8bpp - 2 * vert_pix_offset - tile_width_8bpp : tile_size_4bpp - 2 * vert_pix_offset - tile_width_4bpp; + + // The tilemap base is selected via bgcnt (16KiB chunks) + u32 tilecntrl = (bg_control >> 2) & 0x03; + // Account for the base offset plus the tile vertical offset + u8 *tile_base = BG_CHAR_ADDR(tilecntrl) + vert_pix_offset; + + u16 bgcolor = paltbl[0]; + + // Iterate pixel by pixel, loading data every N pixels to honor mosaic effect + u8 pval = 0; + for (u32 i = 0; start < end; start++, i++, dest_ptr++) { + u16 tile = eswap16(*map_ptr); + + if (!(i % mosh)) { + const u8 *tile_ptr = &tile_base[(tile & 0x3FF) * (is8bpp ? 64 : 32)]; + + bool hflip = (tile & 0x400); + if (tile & 0x800) + tile_ptr += vflip_off; + + // Load byte or nibble with pixel data. + if (is8bpp) { + if (hflip) + pval = tile_ptr[7 - hoffset % 8]; + else + pval = tile_ptr[hoffset % 8]; + } else { + if (hflip) + pval = (tile_ptr[(7 - hoffset % 8) >> 1] >> (((hoffset & 1) ^ 1) * 4)) & 0xF; + else + pval = (tile_ptr[(hoffset % 8) >> 1] >> ((hoffset & 1) * 4)) & 0xF; + } + } + + if (is8bpp) { + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = paltbl[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_comb; // Add combine flags + else if (rdtype == STCKCOLOR) + *dest_ptr = pval | px_comb | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } else { + u16 tilepal = (tile >> 12) << 4; + u16 pxflg = px_comb | tilepal; + const u16 *subpal = &paltbl[tilepal]; + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = subpal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pxflg | pval; + else if (rdtype == STCKCOLOR) + *dest_ptr = pxflg | pval | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } + + // Need to continue from the next charblock + hoffset++; + if (hoffset % 8 == 0) + map_ptr++; + if (hoffset >= map_width) { + hoffset = 0; + map_ptr = second_ptr; + } + } +} + +template +static void render_scanline_text(u32 layer, u32 start, u32 end, void *scanline, const u16 *paltbl) +{ + // Tile mode has 4 and 8 bpp modes. + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + bool is8bpp = (read_ioreg(REG_ADDR_BGxCNT(layer)) & 0x80); + const u32 mosamount = read_ioreg(REG_ADDR_MOSAIC) & 0xFF; + bool has_mosaic = (bg_control & 0x40) && (mosamount != 0); + + if (has_mosaic) { + if (is8bpp) + render_scanline_text_mosaic(layer, start, end, scanline, paltbl); + else + render_scanline_text_mosaic(layer, start, end, scanline, paltbl); + } else { + if (is8bpp) + render_scanline_text_fast(layer, start, end, scanline, paltbl); + else + render_scanline_text_fast(layer, start, end, scanline, paltbl); + } +} + +static inline u8 lookup_pix_8bpp(u32 px, u32 py, const u8 *tile_base, const u8 *map_base, u32 map_size) +{ + // Pitch represents the log2(number of tiles per row) (from 16 to 128) + u32 map_pitch = map_size + 4; + // Given coords (px,py) in the background space, find the tile. + u32 mapoff = (px / 8) + ((py / 8) << map_pitch); + // Each tile is 8x8, so 64 bytes each. + const u8 *tile_ptr = &tile_base[map_base[mapoff] * tile_size_8bpp]; + // Read the 8bit color within the tile. + return tile_ptr[(px % 8) + ((py % 8) * 8)]; +} + +template +static inline void rend_pix_8bpp(dsttype *dest_ptr, u8 pval, u32 bg_comb, u32 px_comb, const u16 *pal) +{ + // Alhpa mode stacks previous value (unless rendering the first layer) + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = pal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_comb; // Add combine flags + else if (rdtype == STCKCOLOR) + // Stack pixels. If base, stack the base pixel. + *dest_ptr = pval | px_comb | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + // Transparent pixel, but we are base layer, so render background. + if (rdtype == FULLCOLOR) + *dest_ptr = pal[0]; + else + *dest_ptr = 0 | bg_comb; // Just backdrop color and combine flags + } +} + +template static inline void render_bdrop_pixel_8bpp(dsttype *dest_ptr, u32 bg_comb, u16 bgcol) +{ + // Alhpa mode stacks previous value (unless rendering the first layer) + if (rdtype == FULLCOLOR) + *dest_ptr = bgcol; + else + *dest_ptr = 0 | bg_comb; +} + +typedef void (*affine_render_function)(u32 layer, u32 start, u32 cnt, const u8 *map_base, u32 map_size, const u8 *tile_base, void *dst_ptr, + const u16 *pal); + +// Affine background rendering logic. +// wrap extends the background infinitely, otherwise transparent/backdrop fill +// rotate indicates if there's any rotation (optimized version for no-rotation) +// mosaic applies to horizontal mosaic (vertical is adjusted via affine ref) +template +static inline void render_affine_background(u32 layer, u32 start, u32 cnt, const u8 *map_base, u32 map_size, const u8 *tile_base, + void *dst_ptr_raw, const u16 *pal) +{ + + dtype *dst_ptr = (dtype *)dst_ptr_raw; + // Backdrop and current layer combine bits. + u32 bg_comb = color_flags(5); + u32 px_comb = color_flags(layer); + + s32 dx = (s16)read_ioreg(REG_ADDR_BGxPA(layer)); + s32 dy = (s16)read_ioreg(REG_ADDR_BGxPC(layer)); + + s32 source_x = affine_reference_x[layer - 2] + (start * dx); + s32 source_y = affine_reference_y[layer - 2] + (start * dy); + + // Maps are squared, four sizes available (128x128 to 1024x1024) + u32 width_height = 128 << map_size; + + // Horizontal mosaic effect. + const u32 mosh = (mosaic ? (read_ioreg(REG_ADDR_MOSAIC)) & 0xF : 0) + 1; + + if (wrap) { + // In wrap mode the entire space is covered, since it "wraps" at the edges + u8 pval = 0; + if (rotate) { + for (u32 i = 0; cnt; i++, cnt--) { + u32 pix_x = (u32)(source_x >> 8) & (width_height - 1); + u32 pix_y = (u32)(source_y >> 8) & (width_height - 1); + + // Lookup pixel and draw it (only every Nth if mosaic is on) + if (!mosaic || !(i % mosh)) + pval = lookup_pix_8bpp(pix_x, pix_y, tile_base, map_base, map_size); + rend_pix_8bpp(dst_ptr++, pval, bg_comb, px_comb, pal); + + source_x += dx; + source_y += dy; // Move to the next pixel + } + } else { + // Y coordinate stays contant across the walk. + const u32 pix_y = (u32)(source_y >> 8) & (width_height - 1); + for (u32 i = 0; cnt; i++, cnt--) { + u32 pix_x = (u32)(source_x >> 8) & (width_height - 1); + if (!mosaic || !(i % mosh)) + pval = lookup_pix_8bpp(pix_x, pix_y, tile_base, map_base, map_size); + rend_pix_8bpp(dst_ptr++, pval, bg_comb, px_comb, pal); + source_x += dx; // Only moving in the X direction. + } + } + } else { + u16 bgcol = pal[0]; + if (rotate) { + // Draw backdrop pixels if necessary until we reach the background edge. + while (cnt) { + // Draw backdrop pixels if they lie outside of the background. + u32 pix_x = (u32)(source_x >> 8), pix_y = (u32)(source_y >> 8); + + // Stop once we find a pixel that is actually *inside* the map. + if (pix_x < width_height && pix_y < width_height) + break; + + // Draw a backdrop pixel if we are the base layer. + if (isbase) + render_bdrop_pixel_8bpp(dst_ptr, bg_comb, bgcol); + + dst_ptr++; + source_x += dx; + source_y += dy; + cnt--; + } + + // Draw background pixels by looking them up in the map + u8 pval = 0; + for (u32 i = 0; cnt; i++, cnt--) { + u32 pix_x = (u32)(source_x >> 8), pix_y = (u32)(source_y >> 8); + + // Check if we run out of background pixels, stop drawing. + if (pix_x >= width_height || pix_y >= width_height) + break; + + // Lookup pixel and draw it. + if (!mosaic || !(i % mosh)) + pval = lookup_pix_8bpp(pix_x, pix_y, tile_base, map_base, map_size); + rend_pix_8bpp(dst_ptr++, pval, bg_comb, px_comb, pal); + + // Move to the next pixel, update coords accordingly + source_x += dx; + source_y += dy; + } + } else { + // Specialized version for scaled-only backgrounds + u8 pval = 0; + const u32 pix_y = (u32)(source_y >> 8); + if (pix_y < width_height) { // Check if within Y-coord range + // Draw/find till left edge + while (cnt) { + u32 pix_x = (u32)(source_x >> 8); + if (pix_x < width_height) + break; + + if (isbase) + render_bdrop_pixel_8bpp(dst_ptr, bg_comb, bgcol); + + dst_ptr++; + source_x += dx; + cnt--; + } + // Draw actual background + for (u32 i = 0; cnt; i++, cnt--) { + u32 pix_x = (u32)(source_x >> 8); + if (pix_x >= width_height) + break; + + if (!mosaic || !(i % mosh)) + pval = lookup_pix_8bpp(pix_x, pix_y, tile_base, map_base, map_size); + rend_pix_8bpp(dst_ptr++, pval, bg_comb, px_comb, pal); + + source_x += dx; + } + } + } + + // Complete the line on the right, if we ran out over the bg edge. + // Only necessary for the base layer, otherwise we can safely finish. + if (isbase) + while (cnt--) + render_bdrop_pixel_8bpp(dst_ptr++, bg_comb, bgcol); + } +} + +// Renders affine backgrounds. These differ substantially from non-affine +// ones. Tile maps are byte arrays (instead of 16 bit), limiting the map to +// 256 different tiles (with no flip bits and just one single 256 color pal). +// Optimize for common cases: wrap/non-wrap, scaling/rotation. +template +static void render_scanline_affine(u32 layer, u32 start, u32 end, void *scanline, const u16 *pal) +{ + + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u32 map_size = (bg_control >> 14) & 0x03; + + // Char block base pointer + u32 base_block = (bg_control & BGCNT_SCREENBASE_MASK) >> 8; + u8 *map_base = BG_SCREEN_ADDR(base_block); + // The tilemap base is selected via bgcnt (16KiB chunks) + u32 tilecntrl = (bg_control >> 2) & 0x03; + u8 *tile_base = BG_CHAR_ADDR(tilecntrl); + + dsttype *dest_ptr = ((dsttype *)scanline) + start; + const u32 mosamount = read_ioreg(REG_ADDR_MOSAIC) & 0xFF; + + bool has_mosaic = (bg_control & 0x40) && (mosamount != 0); + bool has_rotation = read_ioreg(REG_ADDR_BGxPC(layer)) != 0; + bool has_wrap = (bg_control >> 13) & 1; + + // Number of pixels to render + u32 cnt = end - start; + + // Four specialized versions for faster rendering on specific cases like + // scaling only or non-wrapped backgrounds. + u32 fidx = (has_wrap ? 0x4 : 0) | (has_rotation ? 0x2 : 0) | (has_mosaic ? 0x1 : 0); + + static const affine_render_function rdfns[8] = { + render_affine_background, + render_affine_background, + render_affine_background, + render_affine_background, + render_affine_background, + render_affine_background, + render_affine_background, + render_affine_background, + }; + + rdfns[fidx](layer, start, cnt, map_base, map_size, tile_base, dest_ptr, pal); +} + +template +static inline void bitmap_pixel_write(buftype *dst_ptr, pixfmt val, const u16 *palptr, u16 px_attr) +{ + if (mode != 4) + *dst_ptr = convert_palette(val); // Direct color, u16 bitmap + else if (val) { + if (rdmode == FULLCOLOR) + *dst_ptr = palptr[val]; + else if (rdmode == INDXCOLOR) + *dst_ptr = val | px_attr; // Add combine flags + else if (rdmode == STCKCOLOR) + *dst_ptr = val | px_attr | ((*dst_ptr) << 16); // Stack pixels + } +} + +typedef enum { + BLIT, // The bitmap has no scaling nor rotation on the X axis + SCALED, // The bitmap features some scaling (on the X axis) but no rotation + ROTATED // Bitmap has rotation (and perhaps scaling too) +} bm_rendmode; + +// Renders a bitmap honoring the pixel mode and any affine transformations. +// There's optimized versions for bitmaps without scaling / rotation. + +template // Whether mosaic effect is used. +static inline void render_scanline_bitmap(u32 start, u32 end, void *scanline, const u16 *palptr) +{ + s32 dx = (s16)read_ioreg(REG_ADDR_BG2PA); + s32 dy = (s16)read_ioreg(REG_ADDR_BG2PC); + s32 source_x = affine_reference_x[0] + (start * dx); // Always BG2 + s32 source_y = affine_reference_y[0] + (start * dy); + + // Premature abort render optimization if bitmap out of Y coordinate. + if ((rdmode != ROTATED) && ((u32)(source_y >> 8)) >= height) + return; + + // Modes 4 and 5 feature double buffering. + bool second_frame = (mode >= 4) && (read_ioreg(REG_ADDR_DISPCNT) & 0x10); + pixfmt *src_ptr = (pixfmt *)&VRAM[second_frame ? 0xA000 : 0x0000]; + dsttype *dst_ptr = ((dsttype *)scanline) + start; + u16 px_attr = color_flags(2); // Always BG2 + + const u32 mosh = (mosaic ? (read_ioreg(REG_ADDR_MOSAIC)) & 0xF : 0) + 1; + + if (rdmode == BLIT) { + // We just blit pixels (copy) from buffer to buffer. + const u32 pixel_y = (u32)(source_y >> 8); + if (source_x < 0) { + // The bitmap starts somewhere after "start", skip those pixels. + u32 delta = (-source_x + 255) >> 8; + dst_ptr += delta; + start += delta; + source_x = 0; + } + + u32 pixel_x = (u32)(source_x >> 8); + u32 pixcnt = MIN(end - start, width - pixel_x); + pixfmt *valptr = &src_ptr[pixel_x + (pixel_y * width)]; + pixfmt val = 0; + for (u32 i = 0; pixcnt; i++, pixcnt--, valptr++) { + // Pretty much pixel copier + if (!mosaic || !(i % mosh)) + val = sizeof(pixfmt) == 2 ? eswap16(*valptr) : *valptr; + bitmap_pixel_write(dst_ptr++, val, palptr, px_attr); + } + } else if (rdmode == SCALED) { + // Similarly to above, but now we need to sample pixels instead. + const u32 pixel_y = (u32)(source_y >> 8); + + // Find the "inside" of the bitmap + while (start < end) { + u32 pixel_x = (u32)(source_x >> 8); + if (pixel_x < width) + break; + source_x += dx; + start++; + dst_ptr++; + } + + u32 cnt = end - start; + pixfmt val = 0; + for (u32 i = 0; cnt; i++, cnt--) { + u32 pixel_x = (u32)(source_x >> 8); + if (pixel_x >= width) + break; // We reached the end of the bitmap + + if (!mosaic || !(i % mosh)) { + pixfmt *valptr = &src_ptr[pixel_x + (pixel_y * width)]; + val = sizeof(pixfmt) == 2 ? eswap16(*valptr) : *valptr; + } + + bitmap_pixel_write(dst_ptr++, val, palptr, px_attr); + source_x += dx; + } + } else { + // Look for the first pixel to be drawn. + while (start < end) { + u32 pixel_x = (u32)(source_x >> 8), pixel_y = (u32)(source_y >> 8); + if (pixel_x < width && pixel_y < height) + break; + start++; + dst_ptr++; + source_x += dx; + source_y += dy; + } + + pixfmt val = 0; + for (u32 i = 0; start < end; start++) { + u32 pixel_x = (u32)(source_x >> 8), pixel_y = (u32)(source_y >> 8); + + // Check if we run out of background pixels, stop drawing. + if (pixel_x >= width || pixel_y >= height) + break; + + // Lookup pixel and draw it. + if (!mosaic || !(i % mosh)) { + pixfmt *valptr = &src_ptr[pixel_x + (pixel_y * width)]; + val = sizeof(pixfmt) == 2 ? eswap16(*valptr) : *valptr; + } + + bitmap_pixel_write(dst_ptr++, val, palptr, px_attr); + + // Move to the next pixel, update coords accordingly + source_x += dx; + source_y += dy; + } + } +} + +// Object/Sprite rendering logic + +static const u8 obj_dim_table[3][4][2] = { { { 8, 8 }, { 16, 16 }, { 32, 32 }, { 64, 64 } }, + { { 16, 8 }, { 32, 8 }, { 32, 16 }, { 64, 32 } }, + { { 8, 16 }, { 8, 32 }, { 16, 32 }, { 32, 64 } } }; + +static u8 obj_priority_list[5][DISPLAY_HEIGHT][128]; +static u8 obj_priority_count[5][DISPLAY_HEIGHT]; +static u8 obj_alpha_count[DISPLAY_HEIGHT]; + +typedef struct { + s32 obj_x, obj_y; + s32 obj_w, obj_h; + const OamData *oam_data; + bool is_double; +} t_sprite; + +// Renders a tile row (8 pixels) for a regular (non-affine) object/sprite. +// tile_offset points to the VRAM offset where the data lives. +template +static inline void render_obj_part_tile_Nbpp(u32 px_comb, dsttype *dest_ptr, u32 start, u32 end, u32 tile_offset, u16 palette, + const u16 *pal) +{ + // Note that the last VRAM bank wrap around, hence the offset aliasing + const u8 *tile_ptr = OBJ_VRAM0 + (tile_offset & 0x7FFF); + u32 px_attr = px_comb | palette | 0x100; // Combine flags + high palette bit + + if (is8bpp) { + // Each byte is a color, mapped to a palete. + for (u32 i = start; i < end; i++, dest_ptr++) { + // Honor hflip by selecting bytes in the correct order + u32 sel = hflip ? (7 - i) : i; + u8 pval = tile_ptr[sel]; + // Alhpa mode stacks previous value + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = pal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_attr; // Add combine flags + else if (rdtype == STCKCOLOR) { + // Stack pixels on top of the pixel value and combine flags + // We do not stack OBJ on OBJ, rather overwrite the previous object + if (*dest_ptr & 0x100) + *dest_ptr = pval | px_attr | ((*dest_ptr) & 0xFFFF0000); + else + *dest_ptr = pval | px_attr | ((*dest_ptr) << 16); + } else if (rdtype == PIXCOPY) + *dest_ptr = dest_ptr[DISPLAY_WIDTH]; + } + } + } else { + // Only 32 bits (8 pixels * 4 bits) + for (u32 i = start; i < end; i++, dest_ptr++) { + u32 selb = hflip ? (3 - i / 2) : i / 2; + u32 seln = hflip ? ((i & 1) ^ 1) : (i & 1); + u8 pval = (tile_ptr[selb] >> (seln * 4)) & 0xF; + const u16 *subpal = &pal[palette]; + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = subpal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_attr; + else if (rdtype == STCKCOLOR) { + if (*dest_ptr & 0x100) + *dest_ptr = pval | px_attr | ((*dest_ptr) & 0xFFFF0000); + else + *dest_ptr = pval | px_attr | ((*dest_ptr) << 16); // Stack pixels + } else if (rdtype == PIXCOPY) + *dest_ptr = dest_ptr[DISPLAY_WIDTH]; + } + } + } +} + +// Same as above but optimized for full tiles +template +static inline void render_obj_tile_Nbpp(u32 px_comb, dsttype *dest_ptr, u32 tile_offset, u16 palette, const u16 *pal) +{ + const u8 *tile_ptr = &VRAM[0x10000 + (tile_offset & 0x7FFF)]; + u32 px_attr = px_comb | palette | 0x100; // Combine flags + high palette bit + + if (is8bpp) { + for (u32 j = 0; j < 2; j++) { + u32 tilepix = eswap32(((u32 *)tile_ptr)[hflip ? 1 - j : j]); + if (tilepix) { + for (u32 i = 0; i < 4; i++, dest_ptr++) { + u8 pval = hflip ? (tilepix >> (24 - i * 8)) : (tilepix >> (i * 8)); + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = pal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_attr; // Add combine flags + else if (rdtype == STCKCOLOR) { + if (*dest_ptr & 0x100) + *dest_ptr = pval | px_attr | ((*dest_ptr) & 0xFFFF0000); + else + *dest_ptr = pval | px_attr | ((*dest_ptr) << 16); + } else if (rdtype == PIXCOPY) + *dest_ptr = dest_ptr[DISPLAY_WIDTH]; + } + } + } else + dest_ptr += 4; + } + } else { + u32 tilepix = eswap32(*(u32 *)tile_ptr); + if (tilepix) { // Can skip all pixels if the row is just transparent + for (u32 i = 0; i < 8; i++, dest_ptr++) { + u8 pval = (hflip ? (tilepix >> ((7 - i) * 4)) : (tilepix >> (i * 4))) & 0xF; + const u16 *subpal = &pal[palette]; + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = subpal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_attr; + else if (rdtype == STCKCOLOR) { // Stack background, replace sprite + if (*dest_ptr & 0x100) + *dest_ptr = pval | px_attr | ((*dest_ptr) & 0xFFFF0000); + else + *dest_ptr = pval | px_attr | ((*dest_ptr) << 16); + } else if (rdtype == PIXCOPY) + *dest_ptr = dest_ptr[DISPLAY_WIDTH]; + } + } + } + } +} + +// Renders a regular sprite (non-affine) row to screen. +// delta_x is the object X coordinate referenced from the window start. +// cnt is the maximum number of pixels to draw, honoring window, obj width, etc. +template +static void render_object(s32 delta_x, u32 cnt, stype *dst_ptr, u32 tile_offset, u32 px_comb, u16 palette, const u16 *palptr) +{ + // Tile size in bytes for each mode + const u32 tile_bsize = is8bpp ? tile_size_8bpp : tile_size_4bpp; + // Number of bytes to advance (or rewind) on the tile map + const s32 tile_size_off = hflip ? -tile_bsize : tile_bsize; + + if (delta_x < 0) { // Left part is outside of the screen/window. + u32 offx = -delta_x; // How many pixels did we skip from the object? + s32 block_off = offx / 8; + u32 tile_off = offx % 8; + + // Skip the first object tiles (skips in the flip direction) + tile_offset += block_off * tile_size_off; + + // Render a partial tile to the left + if (tile_off) { + u32 residual = 8 - tile_off; // Pixel count to complete the first tile + u32 maxpix = MIN(residual, cnt); + render_obj_part_tile_Nbpp(px_comb, dst_ptr, tile_off, tile_off + maxpix, tile_offset, palette, + palptr); + + // Move to the next tile + tile_offset += tile_size_off; + // Account for drawn pixels + cnt -= maxpix; + dst_ptr += maxpix; + } + } else { + // Render object completely from the left. Skip the empty space to the left + dst_ptr += delta_x; + } + + // Render full tiles to the scan line. + s32 num_tiles = cnt / 8; + while (num_tiles--) { + // Render full tiles + render_obj_tile_Nbpp(px_comb, dst_ptr, tile_offset, palette, palptr); + tile_offset += tile_size_off; + dst_ptr += 8; + } + + // Render any partial tile on the end + cnt = cnt % 8; + if (cnt) + render_obj_part_tile_Nbpp(px_comb, dst_ptr, 0, cnt, tile_offset, palette, palptr); +} + +// A slower version of the version above, that renders objects pixel by pixel. +// This allows proper mosaic effects whenever necessary. +template +static void render_object_mosaic(s32 delta_x, u32 cnt, stype *dst_ptr, u32 base_tile_offset, u32 mosh, u32 px_comb, u16 palette, + const u16 *pal) +{ + const u32 tile_bsize = is8bpp ? tile_size_8bpp : tile_size_4bpp; + const s32 tile_size_off = hflip ? -tile_bsize : tile_bsize; + + u32 offx = 0; + if (delta_x < 0) { // Left part is outside of the screen/window. + offx = -delta_x; // Number of skipped pixels + } else { + dst_ptr += delta_x; + } + + u32 px_attr = px_comb | palette | 0x100; // Combine flags + high palette bit + + u8 pval = 0; + for (u32 i = 0; i < cnt; i++, offx++, dst_ptr++) { + if (!(i % mosh)) { + // Load tile pixel color. + u32 tile_offset = base_tile_offset + (offx / 8) * tile_size_off; + const u8 *tile_ptr = &VRAM[0x10000 + (tile_offset & 0x7FFF)]; + + // Lookup for each mode and flip value. + if (is8bpp) { + if (hflip) + pval = tile_ptr[7 - offx % 8]; + else + pval = tile_ptr[offx % 8]; + } else { + if (hflip) + pval = (tile_ptr[(7 - offx % 8) >> 1] >> (((offx & 1) ^ 1) * 4)) & 0xF; + else + pval = (tile_ptr[(offx % 8) >> 1] >> ((offx & 1) * 4)) & 0xF; + } + } + + // Write the pixel value as required + const u16 *subpal = &pal[palette]; + if (pval) { + if (rdtype == FULLCOLOR) + *dst_ptr = is8bpp ? pal[pval] : subpal[pval]; + else if (rdtype == INDXCOLOR) + *dst_ptr = pval | px_attr; // Add combine flags + else if (rdtype == STCKCOLOR) { + if (*dst_ptr & 0x100) + *dst_ptr = pval | px_attr | ((*dst_ptr) & 0xFFFF0000); + else + *dst_ptr = pval | px_attr | ((*dst_ptr) << 16); + } else if (rdtype == PIXCOPY) + *dst_ptr = dst_ptr[DISPLAY_WIDTH]; + } + } +} + +// Renders an affine sprite row to screen. +// They support 4bpp and 8bpp modes. 1D and 2D tile mapping modes. +// Their render area is limited to their size (and optionally double size) +template +static void render_affine_object(const t_sprite *obji, const t_affp *affp, bool is_double, u32 start, u32 end, stype *dst_ptr, u32 mosv, + u32 mosh, u32 base_tile, u32 pxcomb, u16 palette, const u16 *palptr) +{ + // Tile size in bytes for each mode + const u32 tile_bsize = is8bpp ? tile_size_8bpp : tile_size_4bpp; + const u32 tile_bwidth = is8bpp ? tile_width_8bpp : tile_width_4bpp; + + // Affine params + s32 dx = (s16)eswap16(affp->dx); + s32 dy = (s16)eswap16(affp->dy); + s32 dmx = (s16)eswap16(affp->dmx); + s32 dmy = (s16)eswap16(affp->dmy); + + // Object dimensions and boundaries + u32 obj_dimw = obji->obj_w; + u32 obj_dimh = obji->obj_h; + s32 middle_x = is_double ? obji->obj_w : (obji->obj_w / 2); + s32 middle_y = is_double ? obji->obj_h : (obji->obj_h / 2); + s32 obj_width = is_double ? obji->obj_w * 2 : obji->obj_w; + s32 obj_height = is_double ? obji->obj_h * 2 : obji->obj_h; + + s32 vcount = read_ioreg(REG_ADDR_VCOUNT); + if (mosaic) + vcount -= vcount % mosv; + s32 y_delta = vcount - (obji->obj_y + middle_y); + + if (obji->obj_x < (signed)start) + middle_x -= (start - obji->obj_x); + s32 source_x = (obj_dimw << 7) + (y_delta * dmx) - (middle_x * dx); + s32 source_y = (obj_dimh << 7) + (y_delta * dmy) - (middle_x * dy); + + // Early optimization if Y-coord is out completely for this line. + // (if there's no rotation Y coord remains identical throughout the line). + if (!rotate && ((u32)(source_y >> 8)) >= (u32)obj_height) + return; + + u32 d_start = MAX((signed)start, obji->obj_x); + u32 d_end = MIN((signed)end, obji->obj_x + obj_width); + u32 cnt = d_end - d_start; + dst_ptr += d_start; + + bool obj1dmap = read_ioreg(REG_ADDR_DISPCNT) & 0x40; + const u32 tile_pitch = obj1dmap ? (obj_dimw / 8) * tile_bsize : 1024; + u32 px_attr = pxcomb | palette | 0x100; // Combine flags + high palette bit + + // Skip pixels outside of the sprite area, until we reach the sprite "inside" + while (cnt) { + u32 pixel_x = (u32)(source_x >> 8), pixel_y = (u32)(source_y >> 8); + + // Stop once we find a pixel that is actually *inside* the map. + if (pixel_x < obj_dimw && pixel_y < obj_dimh) + break; + + dst_ptr++; + source_x += dx; + if (rotate) + source_y += dy; + cnt--; + } + + // Draw sprite pixels by looking them up first. Lookup address is tricky! + u8 pixval = 0; + for (u32 i = 0; i < cnt; i++) { + u32 pixel_x = (u32)(source_x >> 8), pixel_y = (u32)(source_y >> 8); + + // Check if we run out of the sprite, then we can safely abort. + if (pixel_x >= obj_dimw || pixel_y >= obj_dimh) + return; + + // For mosaic, we "remember" the last looked up pixel. + if (!mosaic || !(i % mosh)) { + // Lookup pixel and draw it. + if (is8bpp) { + // We lookup the byte directly and render it. + const u32 tile_off = base_tile + // Character base + ((pixel_y >> 3) * tile_pitch) + // Skip vertical blocks + ((pixel_x >> 3) * tile_bsize) + // Skip horizontal blocks + ((pixel_y & 0x7) * tile_bwidth) + // Skip vertical rows to the pixel + (pixel_x & 0x7); // Skip the horizontal offset + + pixval = *(OBJ_VRAM0 + (tile_off & 0x7FFF)); // Read pixel value! + } else { + const u32 tile_off = base_tile + // Character base + ((pixel_y >> 3) * tile_pitch) + // Skip vertical blocks + ((pixel_x >> 3) * tile_bsize) + // Skip horizontal blocks + ((pixel_y & 0x7) * tile_bwidth) + // Skip vertical rows to the pixel + ((pixel_x >> 1) & 0x3); // Skip the horizontal offset + + u8 pixpair = *(OBJ_VRAM0 + (tile_off & 0x7FFF)); // Read 2 pixels @4bpp + pixval = ((pixel_x & 1) ? pixpair >> 4 : pixpair & 0xF); + } + } + + // Render the pixel value + if (pixval) { + if (rdtype == FULLCOLOR) + *dst_ptr = palptr[pixval | palette]; + else if (rdtype == INDXCOLOR) + *dst_ptr = pixval | px_attr; // Add combine flags + else if (rdtype == STCKCOLOR) { + // Stack pixels on top of the pixel value and combine flags + if (*dst_ptr & 0x100) + *dst_ptr = pixval | px_attr | ((*dst_ptr) & 0xFFFF0000); + else + *dst_ptr = pixval | px_attr | ((*dst_ptr) << 16); // Stack pixels + } else if (rdtype == PIXCOPY) + *dst_ptr = dst_ptr[DISPLAY_WIDTH]; + } + + // Move to the next pixel, update coords accordingly + dst_ptr++; + source_x += dx; + if (rotate) + source_y += dy; + } +} + +// Renders a single sprite on the current scanline. +// This function calls the affine or regular renderer depending on the sprite. +// Will calculate whether sprite has certain effects (flip, rotation ...) to +// use an optimized renderer function. +template +inline static void render_sprite(const t_sprite *obji, bool is_affine, u32 start, u32 end, stype *scanline, u32 pxcomb, const u16 *palptr) +{ + s32 vcount = read_ioreg(REG_ADDR_VCOUNT); + bool obj1dmap = read_ioreg(REG_ADDR_DISPCNT) & 0x40; + u32 tile = obji->oam_data->split.tileNum; + if (is8bpp && !obj1dmap) { + tile &= ~1; + } + u32 base_tile = tile * 32; + + const u32 mosv = (mosaic ? (read_ioreg(REG_ADDR_MOSAIC) >> 12) & 0xF : 0) + 1; + const u32 mosh = (mosaic ? (read_ioreg(REG_ADDR_MOSAIC) >> 8) & 0xF : 0) + 1; + + // Render the object scanline using the correct mode. + // (in 4bpp mode calculate the palette number) + // Objects use the higher palette part + u16 pal = (is8bpp ? 0 : (obji->oam_data->split.paletteNum << 4)); + + if (is_affine) { + u32 pnum = obji->oam_data->split.matrixNum; + const t_affp *affp_base = (t_affp *)OAM; + const t_affp *affp = &affp_base[pnum]; + + if (affp->dy == 0) // No rotation happening (just scale) + render_affine_object(obji, affp, obji->is_double, start, end, scanline, mosv, mosh, + base_tile, pxcomb, pal, palptr); + else // Full rotation and scaling + render_affine_object(obji, affp, obji->is_double, start, end, scanline, mosv, mosh, + base_tile, pxcomb, pal, palptr); + } else { + // The object could be out of the window, check and skip. + if (obji->obj_x >= (signed)end || obji->obj_x + obji->obj_w <= (signed)start) + return; + + // Non-affine objects can be flipped on both edges. + bool hflip = (obji->oam_data->split.matrixNum >> 3) & 1; + bool vflip = (obji->oam_data->split.matrixNum >> 4) & 1; + + // Calulate the vertical offset (row) to be displayed. Account for vflip. + u32 voffset = vflip ? obji->obj_y + obji->obj_h - vcount - 1 : vcount - obji->obj_y; + if (mosaic) + voffset -= voffset % mosv; + + // Calculate base tile for the object (points to the row to be drawn). + u32 tile_bsize = is8bpp ? tile_size_8bpp : tile_size_4bpp; + u32 tile_bwidth = is8bpp ? tile_width_8bpp : tile_width_4bpp; + u32 obj_pitch = obj1dmap ? (obji->obj_w / 8) * tile_bsize : 1024; + u32 hflip_off = hflip ? ((obji->obj_w / 8) - 1) * tile_bsize : 0; + + // Calculate the pointer to the tile. + const u32 tile_offset = base_tile + // Char offset + (voffset / 8) * obj_pitch + // Select tile row offset + (voffset % 8) * tile_bwidth + // Skip tile rows + hflip_off; // Account for horizontal flip + + // Make everything relative to start + s32 obj_x_offset = obji->obj_x - start; + u32 clipped_width = obj_x_offset >= 0 ? obji->obj_w : obji->obj_w + obj_x_offset; + u32 max_range = obj_x_offset >= 0 ? end - obji->obj_x : end - start; + u32 max_draw = MIN(max_range, clipped_width); + + if (mosaic && mosh > 1) { + if (hflip) + render_object_mosaic(obj_x_offset, max_draw, &scanline[start], tile_offset, mosh, pxcomb, pal, + palptr); + else + render_object_mosaic(obj_x_offset, max_draw, &scanline[start], tile_offset, mosh, pxcomb, pal, + palptr); + } else { + if (hflip) + render_object(obj_x_offset, max_draw, &scanline[start], tile_offset, pxcomb, pal, palptr); + else + render_object(obj_x_offset, max_draw, &scanline[start], tile_offset, pxcomb, pal, palptr); + } + } +} + +// Renders objects on a scanline for a given priority. +// This function assumes that order_obj has been called to prepare the objects. +template void render_scanline_objs(u32 priority, u32 start, u32 end, void *raw_ptr, const u16 *palptr) +{ + stype *scanline = (stype *)raw_ptr; + s32 vcount = read_ioreg(REG_ADDR_VCOUNT); + s32 objn; + u32 objcnt = obj_priority_count[priority][vcount]; + u8 *objlist = obj_priority_list[priority][vcount]; + + // Render all the visible objects for this priority (back to front) + for (objn = objcnt - 1; objn >= 0; objn--) { + // Objects in the list are pre-filtered and sorted in the appropriate order + u32 objoff = objlist[objn]; + const OamData *oam_data = (OamData *)&OAM[objoff * OAM_DATA_SIZE_AFFINE]; + + u16 obj_shape = oam_data->split.shape; + u16 obj_size = oam_data->split.size; + bool is_affine = oam_data->split.affineMode & 1; + bool is_trans = oam_data->split.objMode == OBJ_MOD_SEMITRAN; + s32 obj_x = oam_data->split.x; + s32 obj_y = oam_data->split.y; +#if !EXTENDED_OAM + if (obj_x > DISPLAY_WIDTH) + obj_x -= 512; +#endif + + t_sprite obji = { + .obj_x = obj_x, + .obj_y = obj_y, + .obj_w = obj_dim_table[obj_shape][obj_size][0], + .obj_h = obj_dim_table[obj_shape][obj_size][1], + .oam_data = oam_data, + .is_double = !!((oam_data->split.affineMode >> 1) & 1), + }; + + s32 obj_maxw = (is_affine && obji.is_double) ? obji.obj_w * 2 : obji.obj_w; + + // The object could be out of the window, check and skip. + if (obji.obj_x >= (signed)end || obji.obj_x + obj_maxw <= (signed)start) + continue; + + // ST-OBJs force 1st target bit (forced blending) + bool forcebld = is_trans && rdtype != FULLCOLOR; +#if !EXTENDED_OAM + if (obji.obj_y > DISPLAY_HEIGHT) + obji.obj_y -= 256; +#endif + // In PIXCOPY mode, we have already some stuff rendered (winout) and now + // we render the "win-in" area for this object. The PIXCOPY function will + // copy (merge) the two pixels depending on the result of the sprite render + // The temporary buffer is rendered on the next scanline area. + if (rdtype == PIXCOPY) { + u32 sec_start = MAX((signed)start, obji.obj_x); + u32 sec_end = MIN((signed)end, obji.obj_x + obj_maxw); + u32 obj_enable = read_ioreg(REG_ADDR_WINOUT) >> 8; + + // Render at the next scanline! + u16 *tmp_ptr = (u16 *)&scanline[GBA_SCREEN_PITCH]; + render_scanline_conditional(sec_start, sec_end, tmp_ptr, obj_enable); + } + + // Calculate combine masks. These store 2 bits of info: 1st and 2nd target. + // If set, the current pixel belongs to a layer that is 1st or 2nd target. + // For ST-objs, we set an extra bit, for later blending. + u32 pxcomb = (forcebld ? 0x800 : 0) | color_flags(4); + + bool emosaic = oam_data->split.mosaic; + bool is_8bpp = oam_data->split.bpp; + + // Some games enable mosaic but set it to size 0 (1), so ignore. + const u32 mosreg = read_ioreg(REG_ADDR_MOSAIC) & 0xFF00; + + if (emosaic && mosreg) { + if (is_8bpp) + render_sprite(&obji, is_affine, start, end, scanline, pxcomb, palptr); + else + render_sprite(&obji, is_affine, start, end, scanline, pxcomb, palptr); + } else { + if (is_8bpp) + render_sprite(&obji, is_affine, start, end, scanline, pxcomb, palptr); + else + render_sprite(&obji, is_affine, start, end, scanline, pxcomb, palptr); + } + } +} + +int sprite_limit = 0; + +// Goes through the object list in the OAM (from #127 to #0) and adds objects +// into a sorted list by priority for the current row. +// Invisible objects are discarded. ST-objects are flagged. Cycle counting is +// performed to discard excessive objects (to match HW capabilities). +static void order_obj(u32 video_mode) +{ + u32 obj_num; + u32 row; + u16 rend_cycles[DISPLAY_HEIGHT]; + + bool hblank_free = read_ioreg(REG_ADDR_DISPCNT) & 0x20; + u16 max_rend_cycles = !sprite_limit ? REND_CYC_MAX : hblank_free ? REND_CYC_REDUCED : REND_CYC_SCANLINE; + + memset(obj_priority_count, 0, sizeof(obj_priority_count)); + memset(obj_alpha_count, 0, sizeof(obj_alpha_count)); + memset(rend_cycles, 0, sizeof(rend_cycles)); + + for (obj_num = 0; obj_num < 128; obj_num++) { + const OamData *oam_data = (OamData *)&OAM[obj_num * OAM_DATA_SIZE_AFFINE]; + + // Bit 9 disables regular sprites (that is, non-affine ones). + if (oam_data->split.affineMode == 2) + continue; + + u16 obj_shape = oam_data->split.shape; + u32 obj_mode = oam_data->split.objMode; + + // Prohibited shape and mode + if ((obj_shape == 0x3) || (obj_mode == OBJ_MOD_INVALID)) + continue; + + // On bitmap modes, objs 0-511 are not usable, ingore them. + if ((video_mode >= 3) && (!(oam_data->split.tileNum & 0x200))) + continue; + + // Calculate object size (from size and shape attr bits) + u16 obj_size = oam_data->split.size; + s32 obj_height = obj_dim_table[obj_shape][obj_size][1]; + s32 obj_width = obj_dim_table[obj_shape][obj_size][0]; + s32 obj_y = oam_data->split.y; + +#if !EXTENDED_OAM + if (obj_y > DISPLAY_HEIGHT) + obj_y -= 256; +#endif + // Double size for affine sprites with double bit set + if ((oam_data->split.affineMode >> 1) & 1) { + obj_height *= 2; + obj_width *= 2; + } + + if (((obj_y + obj_height) > 0) && (obj_y < DISPLAY_HEIGHT)) { + s32 obj_x = oam_data->split.x; + +#if !EXTENDED_OAM + if (obj_x > DISPLAY_WIDTH) + obj_x -= 512; +#endif + if (((obj_x + obj_width) > 0) && (obj_x < DISPLAY_WIDTH)) { + u32 obj_priority = oam_data->split.priority; + bool is_affine = oam_data->split.affineMode & 1; + // Clip Y coord and height to the 0..159 interval + u32 starty = MAX(obj_y, 0); + u32 endy = MIN(obj_y + obj_height, DISPLAY_HEIGHT); + + // Calculate needed cycles to render the sprite + u16 cyccnt = is_affine ? (10 + obj_width * 2) : obj_width; + + switch (obj_mode) { + case OBJ_MOD_SEMITRAN: + for (row = starty; row < endy; row++) { + if (rend_cycles[row] < max_rend_cycles) { + u32 cur_cnt = obj_priority_count[obj_priority][row]; + obj_priority_list[obj_priority][row][cur_cnt] = obj_num; + obj_priority_count[obj_priority][row] = cur_cnt + 1; + rend_cycles[row] += cyccnt; + // Mark the row as having semi-transparent objects + obj_alpha_count[row] = 1; + } + } + break; + case OBJ_MOD_WINDOW: + obj_priority = 4; + /* fallthrough */ + case OBJ_MOD_NORMAL: + // Add the object to the list. + for (row = starty; row < endy; row++) { + if (rend_cycles[row] < max_rend_cycles) { + u32 cur_cnt = obj_priority_count[obj_priority][row]; + obj_priority_list[obj_priority][row][cur_cnt] = obj_num; + obj_priority_count[obj_priority][row] = cur_cnt + 1; + rend_cycles[row] += cyccnt; + } + } + break; + }; + } + } + } +} + +u32 layer_order[16]; +u32 layer_count; + +// Sorts active BG/OBJ layers and generates an ordered list of layers. +// Things are drawn back to front, so lowest priority goes first. +static void order_layers(u32 layer_flags, u32 vcnt) +{ + bool obj_enabled = (layer_flags & 0x10); + s32 priority; + + layer_count = 0; + + for (priority = 3; priority >= 0; priority--) { + bool anyobj = obj_priority_count[priority][vcnt] > 0; + s32 lnum; + + for (lnum = 3; lnum >= 0; lnum--) { + if (((layer_flags >> lnum) & 1) && ((read_ioreg(REG_ADDR_BGxCNT(lnum)) & 0x03) == priority)) { + layer_order[layer_count++] = lnum; + } + } + + if (obj_enabled && anyobj) + layer_order[layer_count++] = priority | 0x04; + } +} + +// Blending is performed by separating an RGB value into 0G0R0B (32 bit) +// Since blending factors are at most 16, mult/add operations do not overflow +// to the neighbouring color and can be performed much faster than separatedly + +// Here follow the mask value to separate/expand the color to 32 bit, +// the mask to detect overflows in the blend operation and + +#define BLND_MSK (SATR_MSK | SATG_MSK | SATB_MSK) + +#define OVFG_MSK 0x04000000 +#define OVFR_MSK 0x00008000 +#define OVFB_MSK 0x00000020 +#define SATG_MSK 0x03E00000 +#define SATR_MSK 0x00007C00 +#define SATB_MSK 0x0000001F + +typedef enum { + OBJ_BLEND, // No effects, just blend forced-blend pixels (ie. ST objects) + BLEND_ONLY, // Just alpha blending (if the pixels are 1st and 2nd target) + BLEND_BRIGHT, // Perform alpha blending if appropiate, and brighten otherwise + BLEND_DARK, // Same but with darken effecg +} blendtype; + +// Applies blending (and optional brighten/darken) effect to a bunch of +// color-indexed pixel pairs. Depending on the mode and the pixel target +// number, blending, darken/brighten or no effect will be applied. +// Bits 0-8 encode the color index (paletted colors) +// Bit 9 is set if the pixel belongs to a 1st target layer +// Bit 10 is set if the pixel belongs to a 2nd target layer +// Bit 11 is set if the pixel belongs to a ST-object +template static void merge_blend(u32 start, u32 end, u16 *dst, u32 *src) +{ + u32 bldalpha = read_ioreg(REG_ADDR_BLDALPHA); + u32 brightf = MIN(16, read_ioreg(REG_ADDR_BLDY) & 0x1F); + u32 blend_a = MIN(16, (bldalpha >> 0) & 0x1F); + u32 blend_b = MIN(16, (bldalpha >> 8) & 0x1F); + + bool can_saturate = blend_a + blend_b > 16; + + if (can_saturate) { + // If blending can result in saturation, we need to clamp output values. + while (start < end) { + u32 pixpair = src[start]; + // If ST-OBJ, force blending mode (has priority over other effects). + // If regular blending mode, blend if 1st/2nd bits are set respectively. + // Otherwise, apply other color effects if 1st bit is set. + bool force_blend = (pixpair & 0x04000800) == 0x04000800; + bool do_blend = (pixpair & 0x04000200) == 0x04000200; + if ((st_objs && force_blend) || (do_blend && bldtype == BLEND_ONLY)) { + // Top pixel is 1st target, pixel below is 2nd target. Blend! + u16 p1 = PLTT[(pixpair >> 0) & 0x1FF]; + u16 p2 = PLTT[(pixpair >> 16) & 0x1FF]; + u32 p1e = (p1 | (p1 << 16)) & BLND_MSK; + u32 p2e = (p2 | (p2 << 16)) & BLND_MSK; + u32 pfe = (((p1e * blend_a) + (p2e * blend_b)) >> 4); + + // If the overflow bit is set, saturate (set) all bits to one. + if (pfe & (OVFR_MSK | OVFG_MSK | OVFB_MSK)) { + if (pfe & OVFG_MSK) + pfe |= SATG_MSK; + if (pfe & OVFR_MSK) + pfe |= SATR_MSK; + if (pfe & OVFB_MSK) + pfe |= SATB_MSK; + } + pfe &= BLND_MSK; + dst[start++] = (pfe >> 16) | pfe; + } else if ((bldtype == BLEND_DARK || bldtype == BLEND_BRIGHT) && (pixpair & 0x200) == 0x200) { + // Top pixel is 1st-target, can still apply bright/dark effect. + u16 pidx = PLTT[pixpair & 0x1FF]; + u32 epixel = (pidx | (pidx << 16)) & BLND_MSK; + u32 pa = bldtype == BLEND_DARK ? 0 : ((BLND_MSK * brightf) >> 4) & BLND_MSK; + u32 pb = ((epixel * (16 - brightf)) >> 4) & BLND_MSK; + epixel = (pa + pb) & BLND_MSK; + dst[start++] = (epixel >> 16) | epixel; + } else { + dst[start++] = PLTT[pixpair & 0x1FF]; // No effects + } + } + } else { + while (start < end) { + u32 pixpair = src[start]; + bool do_blend = (pixpair & 0x04000200) == 0x04000200; + bool force_blend = (pixpair & 0x04000800) == 0x04000800; + if ((st_objs && force_blend) || (do_blend && bldtype == BLEND_ONLY)) { + // Top pixel is 1st target, pixel below is 2nd target. Blend! + u16 p1 = PLTT[(pixpair >> 0) & 0x1FF]; + u16 p2 = PLTT[(pixpair >> 16) & 0x1FF]; + u32 p1e = (p1 | (p1 << 16)) & BLND_MSK; + u32 p2e = (p2 | (p2 << 16)) & BLND_MSK; + u32 pfe = (((p1e * blend_a) + (p2e * blend_b)) >> 4) & BLND_MSK; + dst[start++] = (pfe >> 16) | pfe; + } else if ((bldtype == BLEND_DARK || bldtype == BLEND_BRIGHT) && (pixpair & 0x200) == 0x200) { + // Top pixel is 1st-target, can still apply bright/dark effect. + u16 pidx = PLTT[pixpair & 0x1FF]; + u32 epixel = (pidx | (pidx << 16)) & BLND_MSK; + u32 pa = bldtype == BLEND_DARK ? 0 : ((BLND_MSK * brightf) >> 4) & BLND_MSK; + u32 pb = ((epixel * (16 - brightf)) >> 4) & BLND_MSK; + epixel = (pa + pb) & BLND_MSK; + dst[start++] = (epixel >> 16) | epixel; + } else { + dst[start++] = PLTT[pixpair & 0x1FF]; // No effects + } + } + } +} + +// Applies brighten/darken effect to a bunch of color-indexed pixels. +template static void merge_brightness(u32 start, u32 end, u16 *srcdst) +{ + u32 brightness = MIN(16, read_ioreg(REG_ADDR_BLDY) & 0x1F); + + while (start < end) { + u16 spix = srcdst[start]; + u16 pixcol = PLTT[spix & 0x1FF]; + + if ((spix & 0x200) == 0x200) { + // Pixel is 1st target, can apply color effect. + u32 epixel = (pixcol | (pixcol << 16)) & BLND_MSK; + u32 pa = bldtype == BLEND_DARK ? 0 : ((BLND_MSK * brightness) >> 4) & BLND_MSK; // B/W + u32 pb = ((epixel * (16 - brightness)) >> 4) & BLND_MSK; // Pixel color + epixel = (pa + pb) & BLND_MSK; + pixcol = (epixel >> 16) | epixel; + } + + srcdst[start++] = pixcol; + } +} + +// Fills a segment using the backdrop color (in the right mode). +template void fill_line_background(u32 start, u32 end, dsttype *scanline) +{ + dsttype bgcol = PLTT[0]; + u16 bg_comb = color_flags(5); + while (start < end) + if (rdmode == FULLCOLOR) + scanline[start++] = bgcol; + else + scanline[start++] = 0 | bg_comb; +} + +// Renders the backdrop color (ie. whenever no layer is active) applying +// any effects that might still apply (usually darken/brighten). +static void render_backdrop(u32 start, u32 end, u16 *scanline) +{ + u16 bldcnt = read_ioreg(REG_ADDR_BLDCNT); + u16 pixcol = PLTT[0]; + u32 effect = (bldcnt >> 6) & 0x03; + u32 bd_1st_target = ((bldcnt >> 0x5) & 0x01); + + if (bd_1st_target && effect == COL_EFFECT_BRIGHT) { + u32 brightness = MIN(16, read_ioreg(REG_ADDR_BLDY) & 0x1F); + + // Unpack 16 bit pixel for fast blending operation + u32 epixel = (pixcol | (pixcol << 16)) & BLND_MSK; + u32 pa = ((BLND_MSK * brightness) >> 4) & BLND_MSK; // White color + u32 pb = ((epixel * (16 - brightness)) >> 4) & BLND_MSK; // Pixel color + epixel = (pa + pb) & BLND_MSK; + pixcol = (epixel >> 16) | epixel; + } else if (bd_1st_target && effect == COL_EFFECT_DARK) { + u32 brightness = MIN(16, read_ioreg(REG_ADDR_BLDY) & 0x1F); + u32 epixel = (pixcol | (pixcol << 16)) & BLND_MSK; + epixel = ((epixel * (16 - brightness)) >> 4) & BLND_MSK; // Pixel color + pixcol = (epixel >> 16) | epixel; + } + + // Fill the line with that color + while (start < end) + scanline[start++] = pixcol; +} + +// Renders all the available and enabled layers (in tiled mode). +// Walks the list of layers in visibility order and renders them in the +// specified mode (taking into consideration the first layer, etc). +template +void tile_render_layers(u32 start, u32 end, dsttype *dst_ptr, u32 enabled_layers) +{ + u32 lnum; + u32 base_done = 0; + u16 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u16 video_mode = dispcnt & 0x07; + bool obj_enabled = (enabled_layers & 0x10); // Objects are visible + + bool objlayer_is_1st_tgt = ((read_ioreg(REG_ADDR_BLDCNT) >> 4) & 1) != 0; + bool has_trans_obj = obj_alpha_count[read_ioreg(REG_ADDR_VCOUNT)]; + + for (lnum = 0; lnum < layer_count; lnum++) { + u32 layer = layer_order[lnum]; + bool is_obj = layer & 0x4; + if (is_obj && obj_enabled) { + bool can_skip_blend = !has_trans_obj && !objlayer_is_1st_tgt; + + // If it's the first layer, make sure to fill with backdrop color. + if (!base_done) + fill_line_background(start, end, dst_ptr); + + // Optimization: skip blending mode if no blending can happen to this layer + if (objmode == STCKCOLOR && can_skip_blend) + render_scanline_objs(layer & 0x3, start, end, dst_ptr, &PLTT[0x100]); + else + render_scanline_objs(layer & 0x3, start, end, dst_ptr, &PLTT[0x100]); + + base_done = 1; + } else if (!is_obj && ((1 << layer) & enabled_layers)) { + bool layer_is_1st_tgt = ((read_ioreg(REG_ADDR_BLDCNT) >> layer) & 1) != 0; + bool can_skip_blend = !has_trans_obj && !layer_is_1st_tgt; + + bool is_affine = (video_mode >= 1) && (layer >= 2); + u32 fnidx = (base_done) | (is_affine ? 2 : 0); + + // Can optimize rendering if no blending can really happen. + // If stack mode, no blending and not base layer, we might speed up a bit + if (bgmode == STCKCOLOR && can_skip_blend) { + static const tile_render_function rdfns[4] = { + render_scanline_text, + render_scanline_text, + render_scanline_affine, + render_scanline_affine, + }; + rdfns[fnidx](layer, start, end, dst_ptr, PLTT); + } else { + static const tile_render_function rdfns[4] = { + render_scanline_text, + render_scanline_text, + render_scanline_affine, + render_scanline_affine, + }; + rdfns[fnidx](layer, start, end, dst_ptr, PLTT); + } + + base_done = 1; + } + } + + // Render background if we did not render any active layer. + if (!base_done) + fill_line_background(start, end, dst_ptr); +} + +// Renders all layers honoring color effects (blending, brighten/darken). +// It uses different rendering routines depending on the coloring effect +// requirements, speeding up common cases where no effects are used. + +// No effects use NORMAL mode (RBB565 color is written on the buffer). +// For blending, we use BLEND mode to record the two top-most pixels. +// For other effects we use COLOR16, which records an indexed color in the +// buffer (used for darken/brighten effects at later passes) or COLOR32, +// which similarly uses an indexed color for rendering but recording one +// color for the background and another one for the object layer. + +static void render_w_effects(u32 start, u32 end, u16 *scanline, u32 enable_flags, const layer_render_struct *renderers) +{ + bool effects_enabled = enable_flags & 0x20; // Window bit for effects. + bool obj_blend = obj_alpha_count[read_ioreg(REG_ADDR_VCOUNT)] > 0; + u16 bldcnt = read_ioreg(REG_ADDR_BLDCNT); + + // If the window bits disable effects, default to NONE + u32 effect_type = effects_enabled ? ((bldcnt >> 6) & 0x03) : COL_EFFECT_NONE; + + switch (effect_type) { + case COL_EFFECT_BRIGHT: { + // If no layers are 1st target, no effect will really happen. + bool some_1st_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x3F) != 0; + // If the factor is zero, it's the same as "regular" rendering. + bool non_zero_blend = (read_ioreg(REG_ADDR_BLDY) & 0x1F) != 0; + if (some_1st_tgt && non_zero_blend) { + if (obj_blend) { + u32 tmp_buf[DISPLAY_WIDTH]; + renderers->indexed_u32(start, end, tmp_buf, enable_flags); + merge_blend(start, end, scanline, tmp_buf); + } else { + renderers->indexed_u16(start, end, scanline, enable_flags); + merge_brightness(start, end, scanline); + } + return; + } + } break; + + case COL_EFFECT_DARK: { + // If no layers are 1st target, no effect will really happen. + bool some_1st_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x3F) != 0; + // If the factor is zero, it's the same as "regular" rendering. + bool non_zero_blend = (read_ioreg(REG_ADDR_BLDY) & 0x1F) != 0; + if (some_1st_tgt && non_zero_blend) { + if (obj_blend) { + u32 tmp_buf[DISPLAY_WIDTH]; + renderers->indexed_u32(start, end, tmp_buf, enable_flags); + merge_blend(start, end, scanline, tmp_buf); + } else { + renderers->indexed_u16(start, end, scanline, enable_flags); + merge_brightness(start, end, scanline); + } + return; + } + } break; + + case COL_EFFECT_BLEND: { + // If no layers are 1st or 2nd target, no effect will really happen. + bool some_1st_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x003F) != 0; + bool some_2nd_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x3F00) != 0; + // If 1st target is 100% opacity and 2nd is 0%, just render regularly. + bool non_trns_tgt = (read_ioreg(REG_ADDR_BLDALPHA) & 0x1F1F) != 0x001F; + if (some_1st_tgt && some_2nd_tgt && non_trns_tgt) { + u32 tmp_buf[DISPLAY_WIDTH]; + renderers->stacked(start, end, tmp_buf, enable_flags); + if (obj_blend) + merge_blend(start, end, scanline, tmp_buf); + else + merge_blend(start, end, scanline, tmp_buf); + return; + } + } break; + + case COL_EFFECT_NONE: + // Default case, see below. + break; + }; + + // Default rendering mode, without layer effects (except perhaps sprites). + if (obj_blend) { + u32 tmp_buf[DISPLAY_WIDTH]; + renderers->stacked(start, end, tmp_buf, enable_flags); + merge_blend(start, end, scanline, tmp_buf); + } else { + renderers->fullcolor(start, end, scanline, enable_flags); + } +} + +#define bitmap_layer_render_functions(rdmode, dsttype, mode, ttype, w, h) \ + { \ + { \ + render_scanline_bitmap, \ + render_scanline_bitmap, \ + render_scanline_bitmap, \ + }, \ + { \ + render_scanline_bitmap, \ + render_scanline_bitmap, \ + render_scanline_bitmap, \ + } \ + } + +static const bitmap_layer_render_struct idx32_bmrend[3][2] + = { bitmap_layer_render_functions(INDXCOLOR, u32, 3, u16, DISPLAY_WIDTH, DISPLAY_HEIGHT), + bitmap_layer_render_functions(INDXCOLOR, u32, 4, u8, DISPLAY_WIDTH, DISPLAY_HEIGHT), + bitmap_layer_render_functions(INDXCOLOR, u32, 5, u16, DISPLAY_HEIGHT, 128) }; + +// Render the BG and OBJ in a bitmap scanline from start to end ONLY if +// enable_flag allows that layer/OBJ. + +template +static void bitmap_render_layers(u32 start, u32 end, dsttype *scanline, u32 enable_flags) +{ + u16 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + bool has_trans_obj = obj_alpha_count[read_ioreg(REG_ADDR_VCOUNT)]; + bool objlayer_is_1st_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x10) != 0; + bool bg2_is_1st_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x4) != 0; + + // Fill in the renderers for a layer based on the mode type, + static const bitmap_layer_render_struct renderers[3][2] + = { bitmap_layer_render_functions(bgmode, dsttype, 3, u16, DISPLAY_WIDTH, DISPLAY_HEIGHT), + bitmap_layer_render_functions(bgmode, dsttype, 4, u8, DISPLAY_WIDTH, DISPLAY_HEIGHT), + bitmap_layer_render_functions(bgmode, dsttype, 5, u16, DISPLAY_HEIGHT, 128) }; + + const u32 mosamount = read_ioreg(REG_ADDR_MOSAIC) & 0xFF; + u32 bg_control = read_ioreg(REG_ADDR_BG2CNT); + u32 mmode = ((bg_control & 0x40) && (mosamount != 0)) ? 1 : 0; + + unsigned modeidx = (dispcnt & 0x07) - 3; + const bitmap_layer_render_struct *mode_rend = &renderers[modeidx][mmode]; + const bitmap_layer_render_struct *idxm_rend = &idx32_bmrend[modeidx][mmode]; + + u32 current_layer; + u32 layer_order_pos; + + fill_line_background(start, end, scanline); + + for (layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++) { + current_layer = layer_order[layer_order_pos]; + if (current_layer & 0x04) { + if (enable_flags & 0x10) { + bool can_skip_blend = !has_trans_obj && !objlayer_is_1st_tgt; + + // Optimization: skip blending mode if no blending can happen to this layer + if (objmode == STCKCOLOR && can_skip_blend) + render_scanline_objs(current_layer & 3, start, end, scanline, &PLTT[0x100]); + else + render_scanline_objs(current_layer & 3, start, end, scanline, &PLTT[0x100]); + } + } else { + if (enable_flags & 0x04) { + s32 dx = (s16)read_ioreg(REG_ADDR_BG2PA); + s32 dy = (s16)read_ioreg(REG_ADDR_BG2PC); + + // Optimization: Skip stack mode if there's no blending happening. + bool can_skip_blend = !has_trans_obj && !bg2_is_1st_tgt; + const bitmap_layer_render_struct *rd = (bgmode == STCKCOLOR && can_skip_blend) ? idxm_rend : mode_rend; + + if (dy) + rd->affine_render(start, end, scanline, PLTT); + else if (dx == 256) + rd->blit_render(start, end, scanline, PLTT); + else + rd->scale_render(start, end, scanline, PLTT); + } + } + } +} + +static const layer_render_struct tile_mode_renderers = { + .fullcolor = tile_render_layers, + .indexed_u16 = tile_render_layers, + .indexed_u32 = tile_render_layers, + .stacked = tile_render_layers, +}; + +static const layer_render_struct bitmap_mode_renderers = { + .fullcolor = bitmap_render_layers, + .indexed_u16 = bitmap_render_layers, + .indexed_u32 = bitmap_render_layers, + .stacked = bitmap_render_layers, +}; + +// Renders a full scanline, given an enable_flags mask (for which layers and +// effects are enabled). +static void render_scanline_conditional(u32 start, u32 end, u16 *scanline, u32 enable_flags) +{ + u16 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u32 video_mode = dispcnt & 0x07; + + // Check if any layer is actually active. + if (layer_count && (enable_flags & 0x1F)) { + // Color effects currently only supported in indexed-color modes (tiled and mode 4) + if (video_mode < 3) + render_w_effects(start, end, scanline, enable_flags, &tile_mode_renderers); + else if (video_mode == 4) + render_w_effects(start, end, scanline, enable_flags, &bitmap_mode_renderers); + else + // TODO: Implement mode 3 & 5 color effects (at least partially, ie. ST objs) + bitmap_mode_renderers.fullcolor(start, end, scanline, enable_flags); + } else + // Render the backdrop color, since no layers are enabled/visible. + render_backdrop(start, end, scanline); +} + +// Renders the are outside of all active windows +static void render_windowout_pass(u16 *scanline, u32 start, u32 end) +{ + u32 winout = read_ioreg(REG_ADDR_WINOUT); + u32 wndout_enable = winout & 0x3F; + + render_scanline_conditional(start, end, scanline, wndout_enable); +} + +// Renders window-obj. This is a pixel-level windowing effect, based on sprites +// (objects) with a special rendering mode (the sprites are not themselves +// visible but rather "enable" other pixels to be rendered conditionally). +static void render_windowobj_pass(u16 *scanline, u32 start, u32 end) +{ + u32 winout = read_ioreg(REG_ADDR_WINOUT); + u32 wndout_enable = winout & 0x3F; + + // First we render the "window-out" segment. + render_scanline_conditional(start, end, scanline, wndout_enable); + + // Now we render the objects in "copy" mode. This renders the scanline in + // WinObj-mode to a temporary buffer and performs a "copy-mode" render. + // In this mode, we copy pixels from the temp buffer to the final buffer + // whenever an object pixel is rendered. + render_scanline_objs(4, start, end, scanline, NULL); + + // TODO: Evaluate whether it's better to render the whole line and copy, + // or render subsegments and copy as we go (depends on the pixel/obj count) +} + +// If the window Y coordinates are out of the window range we can skip +// rendering the inside of the window. +inline bool in_window_y(u32 vcount, u32 top, u32 bottom) +{ + // TODO: check if these are reversed when top-bottom are also reversed. + if (top > DISPLAY_HEIGHT + 67) // This causes the window to be invisible + return false; + if (bottom > DISPLAY_HEIGHT + 67) // This makes it all visible + return true; + + if (top > bottom) /* Reversed: if not in the "band" */ + return vcount > top || vcount <= bottom; + + return vcount >= top && vcount < bottom; +} + +// Renders window 0/1. Checks boundaries and divides the segment into +// subsegments (if necessary) rendering each one in their right mode. +// outfn is called for "out-of-window" rendering. +template static void render_window_n_pass(u16 *scanline, u32 start, u32 end) +{ + u32 vcount = read_ioreg(REG_ADDR_VCOUNT); + // Check the Y coordinates to check if they fall in the right row + u32 win_top = WIN_GET_LOWER(*(winreg_t *)(REG_ADDR_WINxV(winnum))); + u32 win_bot = WIN_GET_HIGHER(*(winreg_t *)(REG_ADDR_WINxV(winnum))); + // Check the X coordinates and generate up to three segments + // Clip the coordinates to the [start, end) range. + u32 win_lraw = WIN_GET_LOWER(*(winreg_t *)(REG_ADDR_WINxH(winnum))); + u32 win_rraw = WIN_GET_HIGHER(*(winreg_t *)(REG_ADDR_WINxH(winnum))); + u32 win_l = MAX(start, MIN(end, win_lraw)); + u32 win_r = MAX(start, MIN(end, win_rraw)); + + bool goodwin = win_lraw < win_rraw; + + if (!in_window_y(vcount, win_top, win_bot) || (win_lraw == win_rraw)) + // WindowN is completely out, just render all out. + outfn(scanline, start, end); + else { + // Render window withtin the clipped range + // Enable bits for stuff inside the window (and outside) + u32 winin = (*(winreg_t *)REG_ADDR_WININ) & 0xFFFF; + u32 wndn_enable = (winin >> (8 * winnum)) & 0x3F; + + // If the window is defined upside down, the areas are inverted. + if (goodwin) { + // Render [start, win_l) range (which is outside the window) + if (win_l != start) + outfn(scanline, start, win_l); + // Render the actual window0 pixels + render_scanline_conditional(win_l, win_r, scanline, wndn_enable); + // Render the [win_l, end] range (outside) + if (win_r != end) + outfn(scanline, win_r, end); + } else { + // Render [0, win_r) range (which is "inside" window0) + if (win_r != start) + render_scanline_conditional(start, win_r, scanline, wndn_enable); + // The actual window is now outside, render recursively + outfn(scanline, win_r, win_l); + // Render the [win_l, DISPLAY_WIDTH] range ("inside") + if (win_l != end) + render_scanline_conditional(win_l, end, scanline, wndn_enable); + } + } +} + +// Renders a full scaleline, taking into consideration windowing effects. +// Breaks the rendering step into N steps, for each windowed region. +static void render_scanline_window(u16 *scanline) +{ + u16 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u32 win_ctrl = (dispcnt >> 13); + + // Priority decoding for windows + switch (win_ctrl) { + case 0x0: // No windows are active. + render_scanline_conditional(0, DISPLAY_WIDTH, scanline); + break; + + case 0x1: // Window 0 + render_window_n_pass(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x2: // Window 1 + render_window_n_pass(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x3: // Window 0 & 1 + render_window_n_pass, 0>(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x4: // Window Obj + render_windowobj_pass(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x5: // Window 0 & Obj + render_window_n_pass(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x6: // Window 1 & Obj + render_window_n_pass(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x7: // Window 0, 1 & Obj + render_window_n_pass, 0>(scanline, 0, DISPLAY_WIDTH); + break; + } +} + +static const u8 active_layers[] = { + 0x1F, // Mode 0, Tile BG0-3 and OBJ + 0x17, // Mode 1, Tile BG0-2 and OBJ + 0x1C, // Mode 2, Tile BG2-3 and OBJ + 0x14, // Mode 3, BMP BG2 and OBJ + 0x14, // Mode 4, BMP BG2 and OBJ + 0x14, // Mode 5, BMP BG2 and OBJ + 0, // Unused + 0, +}; + +void update_scanline(void) +{ + u32 pitch = get_screen_pitch(); + u16 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u32 vcount = read_ioreg(REG_ADDR_VCOUNT); + u16 *screen_offset = get_screen_pixels() + (vcount * pitch); + u32 video_mode = dispcnt & 0x07; + + order_layers((dispcnt >> 8) & active_layers[video_mode], vcount); + + // If the screen is in in forced blank draw pure white. + if (dispcnt & 0x80) + memset(screen_offset, 0xff, DISPLAY_WIDTH * sizeof(u16)); + else + render_scanline_window(screen_offset); + + // Mode 0 does not use any affine params at all. + if (video_mode) { + // Account for vertical mosaic effect, by correcting affine references. + const u32 bgmosv = ((read_ioreg(REG_ADDR_MOSAIC) >> 4) & 0xF) + 1; + + if (read_ioreg(REG_ADDR_BG2CNT) & 0x40) { // Mosaic enabled for this BG + if ((vcount % bgmosv) == bgmosv - 1) { // Correct after the last line + affine_reference_x[0] += (s16)read_ioreg(REG_ADDR_BG2PB) * bgmosv; + affine_reference_y[0] += (s16)read_ioreg(REG_ADDR_BG2PD) * bgmosv; + } + } else { + affine_reference_x[0] += (s16)read_ioreg(REG_ADDR_BG2PB); + affine_reference_y[0] += (s16)read_ioreg(REG_ADDR_BG2PD); + } + + if (read_ioreg(REG_ADDR_BG3CNT) & 0x40) { + if ((vcount % bgmosv) == bgmosv - 1) { + affine_reference_x[1] += (s16)read_ioreg(REG_ADDR_BG3PB) * bgmosv; + affine_reference_y[1] += (s16)read_ioreg(REG_ADDR_BG3PD) * bgmosv; + } + } else { + affine_reference_x[1] += (s16)read_ioreg(REG_ADDR_BG3PB); + affine_reference_y[1] += (s16)read_ioreg(REG_ADDR_BG3PD); + } + } +} + +extern "C" void DrawFrame_Fast(u16 *pixels) +{ + int i; + + gba_screen_pixels = pixels; + // convert_whole_palette(); + + // assume that the oam is only updated once before the frame + // starts to be drawn + u32 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u32 video_mode = dispcnt & 0x07; + order_obj(video_mode); + + for (i = 0; i < DISPLAY_HEIGHT; i++) { + + REG_VCOUNT = i; + if (((REG_DISPSTAT >> 8) & 0xFF) == REG_VCOUNT) { + REG_DISPSTAT |= INTR_FLAG_VCOUNT; + if (REG_DISPSTAT & DISPSTAT_VCOUNT_INTR) + gIntrTable[INTR_INDEX_VCOUNT](); + } + + // Render the backdrop color before each individual scanline. + // HBlank interrupt code could have changed it in between lines. + update_scanline(); + + REG_DISPSTAT |= INTR_FLAG_HBLANK; + + RunDMAs(DMA_HBLANK); + + if (REG_DISPSTAT & DISPSTAT_HBLANK_INTR) + gIntrTable[INTR_INDEX_HBLANK](); + + REG_DISPSTAT &= ~INTR_FLAG_HBLANK; + REG_DISPSTAT &= ~INTR_FLAG_VCOUNT; + } + + video_reload_counters(); +} + +#endif diff --git a/tools/scaninc/source_file.cpp b/tools/scaninc/source_file.cpp index 9d188eb73..53e258d95 100644 --- a/tools/scaninc/source_file.cpp +++ b/tools/scaninc/source_file.cpp @@ -31,7 +31,7 @@ SourceFileType GetFileType(std::string& path) std::string extension = path.substr(pos + 1); - if (extension == "c") + if (extension == "c" || extension == "cc") return SourceFileType::Cpp; else if (extension == "s") return SourceFileType::Asm;