diff --git a/.gitignore b/.gitignore index 60cc4e932..cf418a746 100644 --- a/.gitignore +++ b/.gitignore @@ -85,6 +85,12 @@ libagbsyscall/*.s *.exe *.dll *.sdl +*.iso + +# PSP build outputs +EBOOT.PBP +PARAM.SFO +sa2_debug.log # third party deps /ext diff --git a/Makefile b/Makefile index b972750e0..c7979cd8f 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,14 @@ else ifeq ($(CPU_ARCH),i386) TOOLCHAIN := /usr/x86_64-w64-mingw32/ PREFIX := x86_64-w64-mingw32- endif +# PSP +else ifeq ($(PLATFORM),psp) + PSPDEV ?= $(HOME)/pspdev + PSPSDK := $(PSPDEV)/psp/sdk + export PATH := $(PSPDEV)/bin:$(PATH) + PREFIX := psp- +else ifeq ($(PLATFORM),ps2) + PREFIX := mips64r5900el-ps2-elf- else # Native ifneq ($(PLATFORM),sdl) @@ -120,6 +128,14 @@ else ifeq ($(PLATFORM),sdl) ROM := $(BUILD_NAME).sdl ELF := $(ROM).elf MAP := $(ROM).map +else ifeq ($(PLATFORM),psp) +ROM := EBOOT.PBP +ELF := $(BUILD_NAME).psp.elf +MAP := $(BUILD_NAME).psp.map +else ifeq ($(PLATFORM),ps2) +ROM := $(BUILD_NAME).$(PLATFORM).iso +ELF := $(ROM:.iso=.elf) +MAP := $(ROM:.iso=.map) else ROM := $(BUILD_NAME).$(PLATFORM).exe ELF := $(ROM:.exe=.elf) @@ -156,11 +172,15 @@ TILESETS_SUBDIR = graphics/tilesets/ ifeq ($(PLATFORM),gba) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/*") else ifeq ($(PLATFORM),sdl) +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") +else ifeq ($(PLATFORM),psp) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*") +else ifeq ($(PLATFORM),ps2) +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") else ifeq ($(PLATFORM),sdl_win32) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*") else ifeq ($(PLATFORM),win32) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/pret_sdl/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/pret_sdl/*" -not -path "*/platform/psp/*") else C_SRCS := $(shell find $(C_SUBDIR) -name "*.c") endif @@ -225,6 +245,12 @@ else ifeq ($(PLATFORM),sdl) CC1FLAGS += -Wno-parentheses-equality -Wno-unused-value CPPFLAGS += -D TITLE_BAR=$(BUILD_NAME).$(PLATFORM) -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 $(shell sdl2-config --cflags) + else ifeq ($(PLATFORM),psp) + CC1FLAGS += -G0 + CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 -D SDL_MAIN_HANDLED -I$(PSPDEV)/psp/include/SDL2 -I$(PSPDEV)/psp/include -I$(PSPSDK)/include -D_PSP_FW_VERSION=600 + else ifeq ($(PLATFORM),ps2) + CC1FLAGS += -G0 -Wno-parentheses-equality -Wno-unused-value -ffast-math + CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 -D SDL_MAIN_HANDLED -D_EE -D__PS2__ -I$(PS2SDK)/common/include -I$(PS2SDK)/ee/include -I$(PS2SDK)/ports/include $(shell $(PS2SDK)/ports/bin/sdl2-config --cflags) else ifeq ($(PLATFORM),sdl_win32) CPPFLAGS += -D TITLE_BAR=$(BUILD_NAME).$(PLATFORM) -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 $(SDL_MINGW_FLAGS) else ifeq ($(PLATFORM),win32) @@ -249,6 +275,10 @@ else # for modern we are using a modern compiler # so instead of CPP we can use gcc -E to "preprocess only" CPP := $(CC1) -E + else ifeq ($(PLATFORM), psp) + CPP := $(CC1) -E + else ifeq ($(PLATFORM), ps2) + ASFLAGS += -msingle-float endif # Allow file input through stdin on modern GCC and set it to "compile only" CC1FLAGS += -x c -S @@ -258,7 +288,14 @@ ifeq ($(DEBUG),1) CC1FLAGS += -g3 -O0 CPPFLAGS += -D DEBUG=1 else - CC1FLAGS += -O2 + ifeq ($(PLATFORM),psp) + # -O3 for PSP (Allegrex MIPS, small D-cache) + CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer + else ifeq ($(PLATFORM),ps2) + CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer + else + CC1FLAGS += -O2 + endif CPPFLAGS += -D DEBUG=0 endif @@ -297,6 +334,11 @@ else ifeq ($(PLATFORM),sdl) else MAP_FLAG := -Xlinker -Map= endif +# PSP +else ifeq ($(PLATFORM),psp) + MAP_FLAG := -Xlinker -Map= +else ifeq ($(PLATFORM),ps2) + MAP_FLAG := -Xlinker -Map= # Win32 else MAP_FLAG := -Xlinker -Map= @@ -307,6 +349,10 @@ ifeq ($(PLATFORM),gba) LIBS := $(ROOT_DIR)/tools/agbcc/lib/libgcc.a $(ROOT_DIR)/tools/agbcc/lib/libc.a $(LIBABGSYSCALL_LIBS) else ifeq ($(PLATFORM),sdl) LIBS := $(shell sdl2-config --cflags --libs) +else ifeq ($(PLATFORM),psp) + LIBS := -L$(PSPDEV)/psp/lib -L$(PSPSDK)/lib -lSDL2 -lm -lGL -lpspvram -lpspaudio -lpspvfpu -lpspdisplay -lpspgu -lpspge -lpsphprm -lpspctrl -lpsppower -lpspdebug -lpspnet -lpspnet_apctl -Wl,-zmax-page-size=128 +else ifeq ($(PLATFORM),ps2) + LIBS := -lSDL2 $(shell $(PS2SDK)/ports/bin/sdl2-config --libs) -T$(PS2SDK)/ee/startup/linkfile -L$(PS2SDK)/common/lib -L$(PS2SDK)/ee/lib -L$(PS2DEV)/gsKit/lib -Wl,-zmax-page-size=128 else ifeq ($(PLATFORM),sdl_win32) LIBS := -mwin32 -lkernel32 -lwinmm -lmingw32 -lxinput $(SDL_MINGW_LIBS) else ifeq ($(PLATFORM), win32) @@ -316,7 +362,7 @@ endif #### MAIN TARGETS #### # these commands will run regardless of deps being completed -.PHONY: clean tools tidy clean-tools $(TOOLDIRS) libagbsyscall +.PHONY: clean tools tidy clean-tools $(TOOLDIRS) libagbsyscall ps2 # Ensure required directories exist $(shell mkdir -p $(C_BUILDDIR) $(ASM_BUILDDIR) $(DATA_ASM_BUILDDIR) $(SOUND_ASM_BUILDDIR) $(SONG_BUILDDIR) $(MID_BUILDDIR)) @@ -397,7 +443,8 @@ clean-tools: tidy: $(RM) -r build/* $(RM) SDL2.dll - $(RM) $(BUILD_NAME)*.exe $(BUILD_NAME)*.elf $(BUILD_NAME)*.map $(BUILD_NAME)*.sdl $(BUILD_NAME)*.gba + $(RM) $(BUILD_NAME)*.exe $(BUILD_NAME)*.elf $(BUILD_NAME)*.map $(BUILD_NAME)*.sdl $(BUILD_NAME)*.gba $(BUILD_NAME)*.iso + $(RM) EBOOT.PBP PARAM.SFO usa_beta: ; @$(MAKE) GAME_REGION=USA GAME_VARIANT=BETA @@ -409,6 +456,10 @@ europe: ; @$(MAKE) GAME_REGION=EUROPE sdl: ; @$(MAKE) PLATFORM=sdl +psp: ; @$(MAKE) PLATFORM=psp + +ps2: ; @$(MAKE) PLATFORM=ps2 + tas_sdl: ; @$(MAKE) sdl TAS_TESTING=1 sdl_win32: @@ -459,7 +510,7 @@ data/mb_chao_garden_japan.gba.lz: data/mb_chao_garden_japan.gba %.bin: %.aif ; $(AIF) $< $@ -$(ELF): $(OBJS) libagbsyscall +$(ELF): $(OBJS) ifeq ($(PLATFORM),gba) @echo "$(LD) -T $(LDSCRIPT) $(MAP_FLAG) $(MAP) -o $@" @$(CPP) -P $(CPPFLAGS) $(LDSCRIPT) > $(OBJ_DIR)/$(LDSCRIPT) @@ -476,6 +527,17 @@ ifeq ($(PLATFORM),gba) $(FIX) $@ -p -t"$(TITLE)" -c$(GAME_CODE) -m$(MAKER_CODE) -r$(GAME_REVISION) --silent else ifeq ($(PLATFORM),sdl) cp $< $@ +else ifeq ($(PLATFORM),psp) + psp-fixup-imports $< + mksfoex 'Sonic Advance 2' PARAM.SFO + psp-strip $< -o $(BUILD_NAME).psp_strip.elf + pack-pbp $@ PARAM.SFO NULL NULL NULL NULL NULL $(BUILD_NAME).psp_strip.elf NULL + -rm -f $(BUILD_NAME).psp_strip.elf +else ifeq ($(PLATFORM),ps2) + @echo Creating $(ROM) from $(ELF) + @cp -r ps2/ntsc $(OBJ_DIR)/iso + @cp $< $(OBJ_DIR)/iso/$(PS2_GAME_CODE) + @mkisofs -o $(ROM) $(OBJ_DIR)/iso/ else $(OBJCOPY) -O pei-x86-64 $< $@ endif diff --git a/README.md b/README.md index 382027ade..1dd07df9f 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ It can also build: * **sa2.sdl** `make sdl` (Linux/MacOS SDL 64bit port) * **sa2.sdl_win32.exe** `make sdl_win32` (Windows SDL 64bit port) * :construction: **sa2.win32.exe** `make win32` (Win32 native port, not functional) +* **EBOOT.PBP** `make psp` (PlayStation Portable homebrew port, requires [PSPDEV](https://github.com/pspdev/pspdev)) ## Current state diff --git a/asm/macros/portable.inc b/asm/macros/portable.inc index b389fb26a..fd66638b8 100644 --- a/asm/macros/portable.inc +++ b/asm/macros/portable.inc @@ -10,6 +10,8 @@ .macro mPtr value #if defined(__aarch64__) || defined(__x86_64__) .quad \value +#elif defined(__mips__) + .4byte \value #else .int \value #endif diff --git a/config.mk b/config.mk index 5f687443a..15affde18 100644 --- a/config.mk +++ b/config.mk @@ -63,6 +63,7 @@ MAKER_CODE := 78 BUILD_NAME := sa2 TITLE := SONICADVANC2 GAME_CODE := A2N +PS2_GAME_CODE := SLUS_054.02 # Revision diff --git a/include/config.h b/include/config.h index 15b9df4ec..845e61104 100644 --- a/include/config.h +++ b/include/config.h @@ -39,14 +39,20 @@ #define TAS_TESTING_WIDESCREEN_HACK 1 -#define RENDERER_SOFTWARE 0 -#define RENDERER_OPENGL 1 -#define RENDERER_COUNT 2 -#if PLATFORM_WIN32 && !PLATFORM_SDL +#define RENDERER_SOFTWARE 0 +#define RENDERER_OPENGL 1 +#define RENDERER_SOFTWARE_FAST 2 +#define RENDERER_COUNT 3 + +#ifndef RENDERER +#if defined(__PSP__) || defined(__PS2__) +#define RENDERER RENDERER_SOFTWARE_FAST +#elif PLATFORM_WIN32 && !PLATFORM_SDL // TODO: Only win32 for now #define RENDERER RENDERER_OPENGL #else #define RENDERER RENDERER_SOFTWARE #endif +#endif #endif // GUARD_SA2_CONFIG_H diff --git a/include/gba/defines.h b/include/gba/defines.h index b904ee74d..6b9d09afc 100644 --- a/include/gba/defines.h +++ b/include/gba/defines.h @@ -39,8 +39,14 @@ #define OAM_ENTRY_COUNT 128 #if PORTABLE // NOTE: Used in gba/types.h, so they have to be defined before the #include +#if defined(__PSP__) +// PSP: Use GBA-native resolution, SDL scales to 480x272 +#define DISPLAY_WIDTH 240 +#define DISPLAY_HEIGHT 160 +#else #define DISPLAY_WIDTH 426 #define DISPLAY_HEIGHT 240 +#endif // NOTE: We shouldn't consider WIDESCREEN_HACK a permanent thing. // This hack should best be removed once there's a "native" platform layer. diff --git a/include/gba/types.h b/include/gba/types.h index 72e721ad7..fa3234357 100644 --- a/include/gba/types.h +++ b/include/gba/types.h @@ -20,6 +20,9 @@ typedef struct __attribute__((packed)) name struct_body name; #endif +#ifdef __PS2__ +#include +#else typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; @@ -28,6 +31,7 @@ typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; +#endif #if (GAME == GAME_SA1) typedef u8 MetatileIndexType; diff --git a/include/platform/shared/rendering/sw_renderer_common.h b/include/platform/shared/rendering/sw_renderer_common.h new file mode 100644 index 000000000..ddb85d7ff --- /dev/null +++ b/include/platform/shared/rendering/sw_renderer_common.h @@ -0,0 +1,64 @@ +#ifndef GUARD_SW_RENDERER_COMMON_H +#define GUARD_SW_RENDERER_COMMON_H + +// shared color math for the gba ppu blend unit +// used by both the normal (multi-pass) and fast (single-pass) software renderers + +#include + +// bgr555 channel extraction +#define getAlphaBit(x) (((x) >> 15) & 1) +#define getRedChannel(x) (((x) >> 0) & 0x1F) +#define getGreenChannel(x) (((x) >> 5) & 0x1F) +#define getBlueChannel(x) (((x) >> 10) & 0x1F) +#define COLOR_OPAQUE 0x8000 + +static inline uint16_t alphaBlendColor(uint16_t targetA, uint16_t targetB, unsigned int eva, unsigned int evb) +{ + unsigned int r = ((getRedChannel(targetA) * eva) + (getRedChannel(targetB) * evb)) >> 4; + unsigned int g = ((getGreenChannel(targetA) * eva) + (getGreenChannel(targetB) * evb)) >> 4; + unsigned int b = ((getBlueChannel(targetA) * eva) + (getBlueChannel(targetB) * evb)) >> 4; + + if (r > 31) + r = 31; + if (g > 31) + g = 31; + if (b > 31) + b = 31; + + return r | (g << 5) | (b << 10) | COLOR_OPAQUE; +} + +static inline uint16_t alphaBrightnessIncrease(uint16_t targetA, unsigned int evy) +{ + unsigned int r = getRedChannel(targetA) + (31 - getRedChannel(targetA)) * evy / 16; + unsigned int g = getGreenChannel(targetA) + (31 - getGreenChannel(targetA)) * evy / 16; + unsigned int b = getBlueChannel(targetA) + (31 - getBlueChannel(targetA)) * evy / 16; + + if (r > 31) + r = 31; + if (g > 31) + g = 31; + if (b > 31) + b = 31; + + return r | (g << 5) | (b << 10) | COLOR_OPAQUE; +} + +static inline uint16_t alphaBrightnessDecrease(uint16_t targetA, unsigned int evy) +{ + unsigned int r = getRedChannel(targetA) - getRedChannel(targetA) * evy / 16; + unsigned int g = getGreenChannel(targetA) - getGreenChannel(targetA) * evy / 16; + unsigned int b = getBlueChannel(targetA) - getBlueChannel(targetA) * evy / 16; + + if (r > 31) + r = 31; + if (g > 31) + g = 31; + if (b > 31) + b = 31; + + return r | (g << 5) | (b << 10) | COLOR_OPAQUE; +} + +#endif // GUARD_SW_RENDERER_COMMON_H diff --git a/libagbsyscall/Makefile b/libagbsyscall/Makefile index 654a44e4b..7f6c55693 100644 --- a/libagbsyscall/Makefile +++ b/libagbsyscall/Makefile @@ -34,6 +34,10 @@ else ifeq ($(CPU_ARCH),i386) TOOLCHAIN := /usr/x86_64-w64-mingw32/ PREFIX := x86_64-w64-mingw32- endif +else ifeq ($(PLATFORM),psp) + PSPDEV ?= $(HOME)/pspdev + export PATH := $(PSPDEV)/bin:$(PATH) + PREFIX := psp- else ifneq ($(PLATFORM),sdl) $(error Unknown CPU architecture $(CPU_ARCH)) endif # (PLATFORM == gba) diff --git a/ps2/ntsc/SYSTEM.CNF b/ps2/ntsc/SYSTEM.CNF new file mode 100644 index 000000000..9c440527d --- /dev/null +++ b/ps2/ntsc/SYSTEM.CNF @@ -0,0 +1,3 @@ +BOOT2 = cdrom0:\SLUS_054.02;1 +VER = 1.00 +VMODE = NTSC \ No newline at end of file diff --git a/src/background.c b/src/background.c index 7fba66626..190160224 100644 --- a/src/background.c +++ b/src/background.c @@ -650,7 +650,7 @@ END_NONMATCH void UpdateBgAnimationTiles(Background *bg) { -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) Tilemap *tilemap = gTilemapsRef[bg->tilemapId]; if (tilemap->animFrameCount > 0) { if (tilemap->animDelay <= ++bg->animDelayCounter) { @@ -872,7 +872,7 @@ NONMATCH("asm/non_matching/engine/sub_80039E4.inc", bool32 sub_80039E4(void)) return TRUE; #endif -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) if (gBgSpritesCount != 0) { OamDataShort oam; s32 r5; diff --git a/src/core.c b/src/core.c index 4bd4c0782..e1914f57c 100644 --- a/src/core.c +++ b/src/core.c @@ -924,7 +924,7 @@ bool32 ProcessVramGraphicsCopyQueue(void) if ((graphics->src != 0) && (graphics->dest != 0)) #endif { -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) DmaCopy16(3, (void *)(graphics->src + offset), (void *)(graphics->dest + offset), COPY_CHUNK_SIZE); #endif graphics->size -= COPY_CHUNK_SIZE; @@ -939,7 +939,7 @@ bool32 ProcessVramGraphicsCopyQueue(void) if ((graphics->src != 0) && (graphics->dest != 0)) #endif { -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) DmaCopy16(3, (void *)(graphics->src + offset), (void *)(graphics->dest + offset), graphics->size); #endif } diff --git a/src/lib/m4a/m4a.c b/src/lib/m4a/m4a.c index db161254b..7b90fe3a9 100644 --- a/src/lib/m4a/m4a.c +++ b/src/lib/m4a/m4a.c @@ -1377,9 +1377,10 @@ cond_true : { return; } -cond_false: +cond_false : { track->cmdPtr += 4; } +} void MP2K_event_xcmd(struct MP2KPlayerState *mplayInfo, struct MP2KTrack *track) { diff --git a/src/platform/pret_sdl/sdl2.c b/src/platform/pret_sdl/sdl2.c index f4ceaf79a..1a77546ea 100644 --- a/src/platform/pret_sdl/sdl2.c +++ b/src/platform/pret_sdl/sdl2.c @@ -10,6 +10,11 @@ #include #endif +#ifdef __PSP__ +#include +extern int setupPspCallbacks(void); +#endif + #include #include "global.h" @@ -21,6 +26,7 @@ #include "lib/agb_flash/flash_internal.h" #include "platform/shared/dma.h" #include "platform/shared/input.h" +#include "platform/shared/rendering/sw_renderer_common.h" #if ENABLE_AUDIO #include "platform/shared/audio/cgb_audio.h" @@ -92,6 +98,16 @@ bool paused = false; bool stepOneFrame = false; bool headless = false; +#if defined(__PSP__) || defined(__PS2__) +static SDL_Joystick *joystick = NULL; +#endif + +#ifdef __PSP__ +#define PSP_SCREEN_W 480 +#define PSP_SCREEN_H 272 +static SDL_Rect pspDestRect; +#endif + double lastGameTime = 0; double curGameTime = 0; double fixedTimestep = 1.0 / 60.0; // 16.666667ms @@ -120,8 +136,57 @@ void *Platform_malloc(size_t numBytes) { return HeapAlloc(GetProcessHeap(), HEAP void Platform_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); } #endif +#ifdef __PS2__ +// TODO: clean these for what is needed +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +void reset_IOP() +{ + SifInitRpc(0); + while (!SifIopReset(NULL, 0)) { } // Comment this line if you want to "debug" through ps2link + while (!SifIopSync()) { } +} + +static void prepare_IOP() +{ + reset_IOP(); + SifInitRpc(0); + sbv_patch_enable_lmb(); + sbv_patch_disable_prefix_check(); +} + +static void init_drivers() +{ + init_only_boot_ps2_filesystem_driver(); + init_memcard_driver(true); +} + +static void deinit_drivers() +{ + deinit_memcard_driver(true); + deinit_only_boot_ps2_filesystem_driver(); +} +#endif + int main(int argc, char **argv) { +#ifdef __PSP__ + setupPspCallbacks(); +#endif + +#ifdef __PS2__ + prepare_IOP(); +#endif + const char *headlessEnv = getenv("HEADLESS"); if (headlessEnv && strcmp(headlessEnv, "true") == 0) { @@ -144,7 +209,9 @@ int main(int argc, char **argv) freopen("CON", "w", stdout); #endif +#ifndef __PS2__ ReadSaveFile("sa2.sav"); +#endif // Prevent the multiplayer screen from being drawn ( see core.c:EngineInit() ) REG_RCNT = 0x8000; @@ -162,14 +229,26 @@ int main(int argc, char **argv) return 1; } +#if defined(__PSP__) || defined(__PS2__) + if (SDL_NumJoysticks() > 0) { + joystick = SDL_JoystickOpen(0); + } +#endif + #ifdef TITLE_BAR const char *title = STR(TITLE_BAR); #else const char *title = "SAT-R sa2"; #endif +#ifdef __PSP__ + sdlWindow = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 480, 272, SDL_WINDOW_SHOWN); +#elif defined(__PS2__) + sdlWindow = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 640, 448, SDL_WINDOW_SHOWN); +#else sdlWindow = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, DISPLAY_WIDTH * videoScale, DISPLAY_HEIGHT * videoScale, SDL_WINDOW_SHOWN | SDL_WINDOW_RESIZABLE); +#endif if (sdlWindow == NULL) { fprintf(stderr, "Window could not be created! SDL_Error: %s\n", SDL_GetError()); return 1; @@ -191,7 +270,17 @@ int main(int argc, char **argv) } #endif +#ifdef __PSP__ + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC); + if (sdlRenderer == NULL) + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_ACCELERATED); + if (sdlRenderer == NULL) + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, 0); +#elif defined(__PS2__) + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_ACCELERATED); +#else sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_PRESENTVSYNC); +#endif if (sdlRenderer == NULL) { fprintf(stderr, "Renderer could not be created! SDL_Error: %s\n", SDL_GetError()); return 1; @@ -208,7 +297,12 @@ int main(int argc, char **argv) SDL_SetRenderDrawColor(sdlRenderer, 0, 0, 0, 255); SDL_RenderClear(sdlRenderer); SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "0"); +#ifdef __PSP__ + // SDL_RenderSetLogicalSize is broken on PSP, stretch to fill manually + pspDestRect = (SDL_Rect) { 0, 0, PSP_SCREEN_W, PSP_SCREEN_H }; +#else SDL_RenderSetLogicalSize(sdlRenderer, DISPLAY_WIDTH, DISPLAY_HEIGHT); +#endif #if ENABLE_VRAM_VIEW SDL_SetRenderDrawColor(vramRenderer, 0, 0, 0, 255); SDL_RenderClear(vramRenderer); @@ -229,6 +323,12 @@ int main(int argc, char **argv) } #endif +#ifdef __PS2__ + SDL_SetTextureScaleMode(sdlTexture, SDL_ScaleModeLinear); + // For some reason we are WAY blown out on the PS2 + SDL_SetTextureColorMod(sdlTexture, 140, 140, 140); +#endif + #if ENABLE_AUDIO SDL_AudioSpec want; @@ -239,9 +339,9 @@ int main(int argc, char **argv) want.samples = (want.freq / 60); cgb_audio_init(want.freq); - if (SDL_OpenAudio(&want, 0) < 0) + if (SDL_OpenAudio(&want, 0) < 0) { SDL_Log("Failed to open audio: %s", SDL_GetError()); - else { + } else { if (want.format != AUDIO_F32) /* we let this one thing change. */ SDL_Log("We didn't get Float32 audio format."); SDL_PauseAudio(0); @@ -259,12 +359,10 @@ int main(int argc, char **argv) bool newFrameRequested = FALSE; -// Every GBA frame we process the SDL events and render the number of times -// SDL requires us to for vsync. When we need another frame we break out of -// the loop via a return +// called every gba frame. we process sdl events and render as many times +// as vsync needs, then return when a new game frame is needed. void VBlankIntrWait(void) { - // ((struct MultiSioPacket *)gMultiSioArea.nextSendBufp) #define HANDLE_VBLANK_INTRS() \ ({ \ REG_DISPSTAT |= INTR_FLAG_VBLANK; \ @@ -281,16 +379,22 @@ void VBlankIntrWait(void) } bool frameAvailable = TRUE; + bool frameDrawn = false; +#if defined(__PSP__) || defined(__PS2__) + static int frames_skipped = 0; +#define MAX_FRAME_SKIP 2 +#endif while (isRunning) { +#if !defined(__PS2__) && !defined(__PSP__) ProcessSDLEvents(); +#endif if (!paused || stepOneFrame) { double dt = fixedTimestep / timeScale; // TODO: Fix speedup - // Hack to emulate the behaviour of threaded sdl - // it will not add any new values to the accumulator - // when a new frame was requested within a frame cycle + // don't accumulate time if we already requested a new frame + // this frame cycle (emulates threaded sdl behavior) if (!newFrameRequested) { double deltaTime = 0; @@ -312,8 +416,21 @@ void VBlankIntrWait(void) while (accumulator >= dt) { REG_KEYINPUT = KEYS_MASK ^ Platform_GetKeyInput(); if (frameAvailable) { +#if defined(__PSP__) || defined(__PS2__) + // frame skip: let game logic catch up when behind + if (accumulator >= dt * 2.0 && frames_skipped < MAX_FRAME_SKIP) { + frames_skipped++; + frameAvailable = FALSE; + HANDLE_VBLANK_INTRS(); + accumulator -= dt; + newFrameRequested = TRUE; + return; + } + frames_skipped = 0; +#endif VDraw(sdlTexture); frameAvailable = FALSE; + frameDrawn = true; HANDLE_VBLANK_INTRS(); @@ -329,6 +446,21 @@ void VBlankIntrWait(void) } } + // present +#ifdef __PSP__ + // manual blit since SDL_RenderSetLogicalSize doesn't work on psp + if (frameDrawn) { + SDL_RenderCopy(sdlRenderer, sdlTexture, NULL, &pspDestRect); + SDL_RenderPresent(sdlRenderer); + frameDrawn = false; + } else { + SDL_Delay(1); + } +#else +#ifdef __PS2__ + // Allow audio to play + DelayThread(800); +#endif SDL_RenderClear(sdlRenderer); SDL_RenderCopy(sdlRenderer, sdlTexture, NULL, NULL); @@ -345,6 +477,7 @@ void VBlankIntrWait(void) SDL_RenderPresent(sdlRenderer); #if ENABLE_VRAM_VIEW SDL_RenderPresent(vramRenderer); +#endif #endif } @@ -352,8 +485,11 @@ void VBlankIntrWait(void) SDL_DestroyWindow(sdlWindow); SDL_Quit(); +#ifdef __PSP__ + sceKernelExitGame(); +#endif exit(0); -#undef RUN_VBLANK_INTRS +#undef HANDLE_VBLANK_INTRS } static void ReadSaveFile(char *path) @@ -421,6 +557,71 @@ static void CloseSaveFile() static u16 keys; +#if defined(__PSP__) || defined(__PS2__) + +#ifdef __PS2__ +#define BTN_TRIANGLE 12 +#define BTN_CIRCLE 13 +#define BTN_CROSS 14 +#define BTN_SQUARE 15 +#define BTN_LTRIGGER 10 +#define BTN_RTRIGGER 11 +#define BTN_DOWN 6 +#define BTN_LEFT 7 +#define BTN_UP 4 +#define BTN_RIGHT 5 +#define BTN_SELECT 0 +#define BTN_START 3 +#else +#define BTN_TRIANGLE 0 +#define BTN_CIRCLE 1 +#define BTN_CROSS 2 +#define BTN_SQUARE 3 +#define BTN_LTRIGGER 4 +#define BTN_RTRIGGER 5 +#define BTN_DOWN 6 +#define BTN_LEFT 7 +#define BTN_UP 8 +#define BTN_RIGHT 9 +#define BTN_SELECT 10 +#define BTN_START 11 +#endif + +static u16 PollJoystickButtons(void) +{ + u16 keys = 0; + if (joystick == NULL) + return keys; + + SDL_JoystickUpdate(); + + if (SDL_JoystickGetButton(joystick, BTN_CROSS)) + keys |= A_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_CIRCLE)) + keys |= B_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_SQUARE)) + keys |= B_BUTTON; // Square also B + if (SDL_JoystickGetButton(joystick, BTN_START)) + keys |= START_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_SELECT)) + keys |= SELECT_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_LTRIGGER)) + keys |= L_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_RTRIGGER)) + keys |= R_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_UP)) + keys |= DPAD_UP; + if (SDL_JoystickGetButton(joystick, BTN_DOWN)) + keys |= DPAD_DOWN; + if (SDL_JoystickGetButton(joystick, BTN_LEFT)) + keys |= DPAD_LEFT; + if (SDL_JoystickGetButton(joystick, BTN_RIGHT)) + keys |= DPAD_RIGHT; + + return keys; +} +#endif + u32 fullScreenFlags = 0; static SDL_DisplayMode sdlDispMode = { 0 }; @@ -561,18 +762,33 @@ u16 Platform_GetKeyInput(void) return (gamepadKeys != 0) ? gamepadKeys : keys; #endif +#if defined(__PSP__) || defined(__PS2__) + return keys | PollJoystickButtons(); +#endif + return keys; } // BIOS function implementations are based on the VBA-M source code. -static uint32_t CPUReadMemory(const void *src) { return *(uint32_t *)src; } +// safe unaligned access for MIPS +static uint32_t CPUReadMemory(const void *src) +{ + uint32_t val; + memcpy(&val, src, sizeof(val)); + return val; +} -static void CPUWriteMemory(void *dest, uint32_t val) { *(uint32_t *)dest = val; } +static void CPUWriteMemory(void *dest, uint32_t val) { memcpy(dest, &val, sizeof(val)); } -static uint16_t CPUReadHalfWord(const void *src) { return *(uint16_t *)src; } +static uint16_t CPUReadHalfWord(const void *src) +{ + uint16_t val; + memcpy(&val, src, sizeof(val)); + return val; +} -static void CPUWriteHalfWord(void *dest, uint16_t val) { *(uint16_t *)dest = val; } +static void CPUWriteHalfWord(void *dest, uint16_t val) { memcpy(dest, &val, sizeof(val)); } static uint8_t CPUReadByte(const void *src) { return *(uint8_t *)src; } @@ -968,25 +1184,26 @@ static const uint16_t bgMapSizes[][2] = { #define applySpriteHorizontalMosaicEffect(x) (x - (x % (mosaicSpriteEffectX + 1))) #define applySpriteVerticalMosaicEffect(y) (y - (y % (mosaicSpriteEffectY + 1))) -// NOTE: This is the corrected function. static void RenderBGScanline(int bgNum, uint16_t control, uint16_t hoffs, uint16_t voffs, int lineNum, uint16_t *line) { unsigned int charBaseBlock = (control >> 2) & 3; unsigned int screenBaseBlock = (control & BGCNT_SCREENBASE_MASK) >> 8; - unsigned int bitsPerPixel = ((control >> 7) & 1) ? 8 : 4; + unsigned int is8bpp = (control >> 7) & 1; // Determine background dimensions from the control register unsigned int mapWidth = bgMapSizes[control >> 14][0]; // in tiles - unsigned int mapHeight = bgMapSizes[control >> 14][1]; // in tiles - unsigned int mapPixelWidth = mapWidth * TILE_WIDTH; - unsigned int mapPixelHeight = mapHeight * TILE_WIDTH; + unsigned int mapPixelWidth = mapWidth << 3; + unsigned int mapPixelHeight = bgMapSizes[control >> 14][1] << 3; + unsigned int pixelWidthMask = mapPixelWidth - 1; + unsigned int pixelHeightMask = mapPixelHeight - 1; uint8_t *bgtiles = (uint8_t *)BG_CHAR_ADDR(charBaseBlock); uint16_t *bgmap = (uint16_t *)BG_SCREEN_ADDR(screenBaseBlock); uint16_t *pal = (uint16_t *)PLTT; // Apply vertical mosaic effect to the entire scanline if enabled - if (control & BGCNT_MOSAIC) { + bool hasMosaic = control & BGCNT_MOSAIC; + if (hasMosaic) { lineNum = applyBGVerticalMosaicEffect(lineNum); } @@ -994,29 +1211,22 @@ static void RenderBGScanline(int bgNum, uint16_t control, uint16_t hoffs, uint16 hoffs &= 0x1FF; voffs &= 0x1FF; + unsigned int yy = (lineNum + voffs) & pixelHeightMask; + unsigned int mapY = yy >> 3; + unsigned int tileY = yy & 7; + unsigned int mapRowBase = mapY * mapWidth; + for (unsigned int x = 0; x < DISPLAY_WIDTH; x++) { - unsigned int xx, yy; + unsigned int xx; - // Calculate the source coordinate in the background map, applying scroll and mosaic - if (control & BGCNT_MOSAIC) { - xx = applyBGHorizontalMosaicEffect(x) + hoffs; + if (hasMosaic) { + xx = (applyBGHorizontalMosaicEffect(x) + hoffs) & pixelWidthMask; } else { - xx = x + hoffs; + xx = (x + hoffs) & pixelWidthMask; } - yy = lineNum + voffs; - - // Wrap the coordinates based on the background's actual pixel dimensions. - // This fixes issues with backgrounds that are not 256x256. - xx &= (mapPixelWidth - 1); - yy &= (mapPixelHeight - 1); - // Convert pixel coordinates to tile coordinates - unsigned int mapX = xx / TILE_WIDTH; - unsigned int mapY = yy / TILE_WIDTH; - - // Calculate the 1D index into the tilemap. This was the primary source of bugs, - // as the original code used a hardcoded map width of 32 tiles. - unsigned int mapIndex = mapY * mapWidth + mapX; + unsigned int mapX = xx >> 3; + unsigned int mapIndex = mapRowBase + mapX; uint16_t entry = bgmap[mapIndex]; unsigned int tileNum = entry & 0x3FF; @@ -1026,40 +1236,30 @@ static void RenderBGScanline(int bgNum, uint16_t control, uint16_t hoffs, uint16 vramPalIdBuffer[tileNum] = paletteNum; #endif - // Get the coordinate within the specific tile - unsigned int tileX = xx % TILE_WIDTH; - unsigned int tileY = yy % TILE_WIDTH; + unsigned int tx = xx & 7; + unsigned int ty = tileY; - // Handle horizontal and vertical tile flipping if (entry & (1 << 10)) - tileX = (TILE_WIDTH - 1) - tileX; // H-flip + tx = 7 - tx; if (entry & (1 << 11)) - tileY = (TILE_WIDTH - 1) - tileY; // V-flip + ty = 7 - ty; - // Calculate address of the pixel data and extract the color - if (bitsPerPixel == 4) { - uint32_t tileDataOffset = tileNum * TILE_SIZE_4BPP; - uint32_t pixelByteOffset = (tileY * TILE_WIDTH + tileX) / 2; + if (!is8bpp) { + uint32_t tileDataOffset = tileNum << 5; + uint32_t pixelByteOffset = (ty << 2) + (tx >> 1); uint8_t pixelPair = bgtiles[tileDataOffset + pixelByteOffset]; - uint8_t pixel; - if (tileX & 1) { - pixel = pixelPair >> 4; - } else { - pixel = pixelPair & 0xF; - } + uint8_t pixel = (tx & 1) ? (pixelPair >> 4) : (pixelPair & 0xF); if (pixel != 0) { - line[x] = pal[16 * paletteNum + pixel] | 0x8000; + line[x] = pal[(paletteNum << 4) + pixel] | 0x8000; } } else { // 8 bits per pixel - uint32_t tileDataOffset = tileNum * TILE_SIZE_8BPP; - uint32_t pixelByteOffset = tileY * TILE_WIDTH + tileX; + uint32_t tileDataOffset = tileNum << 6; + uint32_t pixelByteOffset = (ty << 3) + tx; uint8_t pixel = bgtiles[tileDataOffset + pixelByteOffset]; if (pixel != 0) { - // For 8bpp tiles, the palette number in the tile entry is ignored. - // The pixel value is a direct index into the 256-color palette. line[x] = pal[pixel] | 0x8000; } } @@ -1257,64 +1457,7 @@ const u8 spriteSizes[][2] = { { 32, 64 }, }; -#define getAlphaBit(x) ((x >> 15) & 1) -#define getRedChannel(x) ((x >> 0) & 0x1F) -#define getGreenChannel(x) ((x >> 5) & 0x1F) -#define getBlueChannel(x) ((x >> 10) & 0x1F) -#define isbgEnabled(x) ((REG_DISPCNT >> 8) & 0xF) & (1 << x) - -static uint16_t alphaBlendColor(uint16_t targetA, uint16_t targetB) -{ - unsigned int eva = REG_BLDALPHA & 0x1F; - unsigned int evb = (REG_BLDALPHA >> 8) & 0x1F; - // shift right by 4 = division by 16 - unsigned int r = ((getRedChannel(targetA) * eva) + (getRedChannel(targetB) * evb)) >> 4; - unsigned int g = ((getGreenChannel(targetA) * eva) + (getGreenChannel(targetB) * evb)) >> 4; - unsigned int b = ((getBlueChannel(targetA) * eva) + (getBlueChannel(targetB) * evb)) >> 4; - - if (r > 31) - r = 31; - if (g > 31) - g = 31; - if (b > 31) - b = 31; - - return r | (g << 5) | (b << 10) | (1 << 15); -} - -static uint16_t alphaBrightnessIncrease(uint16_t targetA) -{ - unsigned int evy = (REG_BLDY & 0x1F); - unsigned int r = getRedChannel(targetA) + (31 - getRedChannel(targetA)) * evy / 16; - unsigned int g = getGreenChannel(targetA) + (31 - getGreenChannel(targetA)) * evy / 16; - unsigned int b = getBlueChannel(targetA) + (31 - getBlueChannel(targetA)) * evy / 16; - - if (r > 31) - r = 31; - if (g > 31) - g = 31; - if (b > 31) - b = 31; - - return r | (g << 5) | (b << 10) | (1 << 15); -} - -static uint16_t alphaBrightnessDecrease(uint16_t targetA) -{ - unsigned int evy = (REG_BLDY & 0x1F); - unsigned int r = getRedChannel(targetA) - getRedChannel(targetA) * evy / 16; - unsigned int g = getGreenChannel(targetA) - getGreenChannel(targetA) * evy / 16; - unsigned int b = getBlueChannel(targetA) - getBlueChannel(targetA) * evy / 16; - - if (r > 31) - r = 31; - if (g > 31) - g = 31; - if (b > 31) - b = 31; - - return r | (g << 5) | (b << 10) | (1 << 15); -} +#define isbgEnabled(x) ((REG_DISPCNT >> 8) & 0xF) & (1 << x) // outputs the blended pixel in colorOutput, the prxxx are the bg priority and // subpriority, pixelpos is pixel offset in scanline @@ -1396,8 +1539,6 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool bool isAffine = oam->split.affineMode & 1; bool doubleSizeOrDisabled = (oam->split.affineMode >> 1) & 1; - bool isSemiTransparent = (oam->split.objMode == 1); - bool isObjWin = (oam->split.objMode == 2); if (!(isAffine) && doubleSizeOrDisabled) // disable for non-affine { @@ -1408,31 +1549,41 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool width = gOamShapesSizes[index][0]; height = gOamShapesSizes[index][1]; - int rect_width = width; - int rect_height = height; - int half_width = width / 2; int half_height = height / 2; - pixels = scanline->spriteLayers[oam->split.priority]; - int32_t x = oam->split.x; int32_t y = oam->split.y; #if !EXTENDED_OAM - // The regular, unextended values are 9 and 8 unsigned bits for x and y respectively. - // Once they have exceeded the screen's right or bottom, they get treated as signed values on original hardware. - // This is done so that, for example, a sprite at 0 on either axis that moves left or up will not suddenly disappear. - // - // With EXTENDED_OAM we are using signed 16 bit values, so we don't want to change the raw value. if (x >= DISPLAY_WIDTH) x -= 512; if (y >= DISPLAY_HEIGHT) y -= 256; #endif + if (isAffine && doubleSizeOrDisabled) { + half_width *= 2; + half_height *= 2; + } + + int spriteTop = y; + int spriteBottom = y + (half_height * 2); + if ((int)vcount < spriteTop || (int)vcount >= spriteBottom) + continue; + + int spriteLeft = x; + int spriteRight = x + (half_width * 2); + if (spriteRight < 0 || spriteLeft >= DISPLAY_WIDTH) + continue; + + bool isSemiTransparent = (oam->split.objMode == 1); + bool isObjWin = (oam->split.objMode == 2); + + int rect_width = width; + int rect_height = height; + if (isAffine) { - // TODO: there is probably a better way to do this u8 matrixNum = oam->split.matrixNum * 4; OamData *oam1 = &((OamData *)OAM)[matrixNum]; @@ -1445,26 +1596,22 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool matrix[1][0] = oam3->all.affineParam; matrix[1][1] = oam4->all.affineParam; - if (doubleSizeOrDisabled) // double size for affine - { + if (doubleSizeOrDisabled) { rect_width *= 2; rect_height *= 2; - half_width *= 2; - half_height *= 2; } } else { - // Identity matrix[0][0] = 0x100; matrix[0][1] = 0; matrix[1][0] = 0; matrix[1][1] = 0x100; } + pixels = scanline->spriteLayers[oam->split.priority]; x += half_width; y += half_height; - // Does this sprite actually draw on this scanline? - if (vcount >= (y - half_height) && vcount < (y + half_height)) { + { int local_y = (oam->split.mosaic == 1) ? applySpriteVerticalMosaicEffect(vcount) - y : vcount - y; int number = oam->split.tileNum; int palette = oam->split.paletteNum; @@ -1472,96 +1619,100 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool bool flipY = !isAffine && ((oam->split.matrixNum >> 4) & 1); bool is8BPP = oam->split.bpp & 1; - for (int local_x = -half_width; local_x <= half_width; local_x++) { + { uint8_t *tiledata = (uint8_t *)objtiles; - uint16_t *palette = (uint16_t *)(PLTT + (0x200 / 2)); - int local_mosaicX; - int tex_x; - int tex_y; - - unsigned int global_x = local_x + x; - - if (global_x < 0 || global_x >= DISPLAY_WIDTH) - continue; - - if (oam->split.mosaic == 1) { - // mosaic effect has to be applied to global coordinates otherwise - // the mosaic will scroll - local_mosaicX = applySpriteHorizontalMosaicEffect(global_x) - x; - tex_x = ((matrix[0][0] * local_mosaicX + matrix[0][1] * local_y) >> 8) + (width / 2); - tex_y = ((matrix[1][0] * local_mosaicX + matrix[1][1] * local_y) >> 8) + (height / 2); - } else { - tex_x = ((matrix[0][0] * local_x + matrix[0][1] * local_y) >> 8) + (width / 2); - tex_y = ((matrix[1][0] * local_x + matrix[1][1] * local_y) >> 8) + (height / 2); - } + uint16_t *sprpal = (uint16_t *)(PLTT + (0x200 / 2)); + for (int local_x = -half_width; local_x <= half_width; local_x++) { + int local_mosaicX; + int tex_x; + int tex_y; - /* Check if transformed coordinates are inside bounds. */ - - if (tex_x >= width || tex_y >= height || tex_x < 0 || tex_y < 0) - continue; - - if (flipX) - tex_x = width - tex_x - 1; - if (flipY) - tex_y = height - tex_y - 1; - - int tile_x = tex_x % 8; - int tile_y = tex_y % 8; - int block_x = tex_x / 8; - int block_y = tex_y / 8; - int block_offset = ((block_y * (REG_DISPCNT & 0x40 ? (width / 8) : 16)) + block_x); - uint16_t pixel = 0; - - if (!is8BPP) { - int tileDataIndex = (block_offset + oam->split.tileNum) * 32 + (tile_y * 4) + (tile_x / 2); - pixel = tiledata[tileDataIndex]; - if (tile_x & 1) - pixel >>= 4; - else - pixel &= 0xF; - palette += oam->split.paletteNum * 16; -#if ENABLE_VRAM_VIEW - vramPalIdBuffer[0x800 + (tileDataIndex / 32)] = 16 + oam->split.paletteNum; -#endif - } else { - pixel = tiledata[(block_offset * 2 + oam->split.tileNum) * 32 + (tile_y * 8) + tile_x]; - } + unsigned int global_x = local_x + x; - if (pixel != 0) { - uint16_t color = palette[pixel]; + if (global_x < 0 || global_x >= DISPLAY_WIDTH) + continue; + + if (oam->split.mosaic == 1) { + // mosaic effect has to be applied to global coordinates otherwise + // the mosaic will scroll + local_mosaicX = applySpriteHorizontalMosaicEffect(global_x) - x; + tex_x = ((matrix[0][0] * local_mosaicX + matrix[0][1] * local_y) >> 8) + (width / 2); + tex_y = ((matrix[1][0] * local_mosaicX + matrix[1][1] * local_y) >> 8) + (height / 2); + } else { + tex_x = ((matrix[0][0] * local_x + matrix[0][1] * local_y) >> 8) + (width / 2); + tex_y = ((matrix[1][0] * local_x + matrix[1][1] * local_y) >> 8) + (height / 2); + } + + /* Check if transformed coordinates are inside bounds. */ - // if sprite mode is 2 then write to the window mask instead - if (isObjWin) { - if (scanline->winMask[global_x] & WINMASK_WINOUT) - scanline->winMask[global_x] = (REG_WINOUT >> 8) & 0x3F; + if (tex_x >= width || tex_y >= height || tex_x < 0 || tex_y < 0) continue; + + if (flipX) + tex_x = width - tex_x - 1; + if (flipY) + tex_y = height - tex_y - 1; + + int tile_x = tex_x & 7; + int tile_y = tex_y & 7; + int block_x = tex_x >> 3; + int block_y = tex_y >> 3; + int block_offset = ((block_y * (REG_DISPCNT & 0x40 ? (width >> 3) : 16)) + block_x); + uint16_t pixel = 0; + + uint16_t *pixpal; + if (!is8BPP) { + int tileDataIndex = ((block_offset + oam->split.tileNum) << 5) + (tile_y << 2) + (tile_x >> 1); + pixel = tiledata[tileDataIndex]; + if (tile_x & 1) + pixel >>= 4; + else + pixel &= 0xF; + pixpal = sprpal + (oam->split.paletteNum << 4); +#if ENABLE_VRAM_VIEW + vramPalIdBuffer[0x800 + (tileDataIndex >> 5)] = 16 + oam->split.paletteNum; +#endif + } else { + pixel = tiledata[((block_offset * 2 + oam->split.tileNum) << 5) + (tile_y << 3) + tile_x]; + pixpal = sprpal; } - // this code runs if pixel is to be drawn - if (global_x < DISPLAY_WIDTH && global_x >= 0) { - // check if its enabled in the window (if window is enabled) - winShouldBlendPixel = (windowsEnabled == false || scanline->winMask[global_x] & WINMASK_CLR); - - // has to be separated from the blend mode switch statement - // because of OBJ semi transparancy feature - if ((blendMode == 1 && REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) || isSemiTransparent) { - uint16_t targetA = color; - uint16_t targetB = 0; - if (alphaBlendSelectTargetB(scanline, &targetB, oam->split.priority, 0, global_x, false)) { - color = alphaBlendColor(targetA, targetB); - } - } else if (REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) { - switch (blendMode) { - case 2: - color = alphaBrightnessIncrease(color); - break; - case 3: - color = alphaBrightnessDecrease(color); - break; - } + + if (pixel != 0) { + uint16_t color = pixpal[pixel]; + + // if sprite mode is 2 then write to the window mask instead + if (isObjWin) { + if (scanline->winMask[global_x] & WINMASK_WINOUT) + scanline->winMask[global_x] = (REG_WINOUT >> 8) & 0x3F; + continue; } + // this code runs if pixel is to be drawn + if (global_x < DISPLAY_WIDTH && global_x >= 0) { + // check if its enabled in the window (if window is enabled) + winShouldBlendPixel = (windowsEnabled == false || scanline->winMask[global_x] & WINMASK_CLR); + + // has to be separated from the blend mode switch statement + // because of OBJ semi transparancy feature + if ((blendMode == 1 && REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) || isSemiTransparent) { + uint16_t targetA = color; + uint16_t targetB = 0; + if (alphaBlendSelectTargetB(scanline, &targetB, oam->split.priority, 0, global_x, false)) { + color = alphaBlendColor(targetA, targetB, REG_BLDALPHA & 0x1F, (REG_BLDALPHA >> 8) & 0x1F); + } + } else if (REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) { + switch (blendMode) { + case 2: + color = alphaBrightnessIncrease(color, REG_BLDY & 0x1F); + break; + case 3: + color = alphaBrightnessDecrease(color, REG_BLDY & 0x1F); + break; + } + } - // write pixel to pixel framebuffer - pixels[global_x] = color | (1 << 15); + // write pixel to pixel framebuffer + pixels[global_x] = color | (1 << 15); + } } } } @@ -1574,14 +1725,19 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) unsigned int mode = REG_DISPCNT & 3; unsigned char numOfBgs = (mode == 0 ? 4 : 3); int bgnum, prnum; - struct scanlineData scanline; + static struct scanlineData scanline; unsigned int blendMode = (REG_BLDCNT >> 6) & 3; unsigned int xpos; + unsigned int enabledBgs = (REG_DISPCNT >> 8) & 0xF; - // initialize all priority bookkeeping data - memset(scanline.layers, 0, sizeof(scanline.layers)); - memset(scanline.winMask, 0, sizeof(scanline.winMask)); - memset(scanline.spriteLayers, 0, sizeof(scanline.spriteLayers)); + // Only zero the layers that are actually enabled, + // instead of blindly zeroing all 4+4 layers (~8KB total) every scanline. + for (bgnum = 0; bgnum < numOfBgs; bgnum++) { + if (enabledBgs & (1 << bgnum)) + memset(scanline.layers[bgnum], 0, sizeof(scanline.layers[bgnum])); + } + if (REG_DISPCNT & DISPCNT_OBJ_ON) + memset(scanline.spriteLayers, 0, sizeof(scanline.spriteLayers)); memset(scanline.prioritySortedBgsCount, 0, sizeof(scanline.prioritySortedBgsCount)); for (bgnum = 0; bgnum < numOfBgs; bgnum++) { @@ -1696,63 +1852,88 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) if (REG_DISPCNT & DISPCNT_OBJ_ON) DrawOamSprites(&scanline, vcount, windowsEnabled); - // iterate trough every priority in order - for (prnum = 3; prnum >= 0; prnum--) { - for (char prsub = scanline.prioritySortedBgsCount[prnum] - 1; prsub >= 0; prsub--) { - char bgnum = scanline.prioritySortedBgs[prnum][prsub]; - // if background is enabled then draw it - if (isbgEnabled(bgnum)) { - uint16_t *src = scanline.layers[bgnum]; - // copy all pixels to framebuffer + // iterate through every priority in order + if (blendMode == 0 && !windowsEnabled) { + for (prnum = 3; prnum >= 0; prnum--) { + for (char prsub = scanline.prioritySortedBgsCount[prnum] - 1; prsub >= 0; prsub--) { + char bgnum = scanline.prioritySortedBgs[prnum][prsub]; + if (isbgEnabled(bgnum)) { + uint16_t *src = scanline.layers[bgnum]; + for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { + uint16_t color = src[xpos]; + if (color & 0x8000) // alpha bit set = opaque + pixels[xpos] = color; + } + } + } + // draw sprites on current priority + if (REG_DISPCNT & DISPCNT_OBJ_ON) { + uint16_t *src = scanline.spriteLayers[prnum]; for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { - uint16_t color = src[xpos]; - bool winEffectEnable = true; - - if (!getAlphaBit(color)) - continue; // do nothing if alpha bit is not set + if (src[xpos] & 0x8000) + pixels[xpos] = src[xpos]; + } + } + } + } else { + // FULL PATH: blending and/or windows are active + for (prnum = 3; prnum >= 0; prnum--) { + for (char prsub = scanline.prioritySortedBgsCount[prnum] - 1; prsub >= 0; prsub--) { + char bgnum = scanline.prioritySortedBgs[prnum][prsub]; + // if background is enabled then draw it + if (isbgEnabled(bgnum)) { + uint16_t *src = scanline.layers[bgnum]; + // copy all pixels to framebuffer + for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { + uint16_t color = src[xpos]; + bool winEffectEnable = true; + + if (!getAlphaBit(color)) + continue; // do nothing if alpha bit is not set + + if (windowsEnabled) { + winEffectEnable = ((scanline.winMask[xpos] & WINMASK_CLR) >> 5); + // if bg is disabled inside the window then do not draw the pixel + if (!(scanline.winMask[xpos] & 1 << bgnum)) + continue; + } - if (windowsEnabled) { - winEffectEnable = ((scanline.winMask[xpos] & WINMASK_CLR) >> 5); - // if bg is disabled inside the window then do not draw the pixel - if (!(scanline.winMask[xpos] & 1 << bgnum)) - continue; - } + // blending code + if (blendMode != 0 && REG_BLDCNT & (1 << bgnum) && winEffectEnable) { + uint16_t targetA = color; + uint16_t targetB = 0; - // blending code - if (blendMode != 0 && REG_BLDCNT & (1 << bgnum) && winEffectEnable) { - uint16_t targetA = color; - uint16_t targetB = 0; - - switch (blendMode) { - case 1: { - char isSpriteBlendingEnabled = REG_BLDCNT & BLDCNT_TGT2_OBJ ? 1 : 0; - // find targetB and blend it - if (alphaBlendSelectTargetB(&scanline, &targetB, prnum, prsub + 1, xpos, isSpriteBlendingEnabled)) { - color = alphaBlendColor(targetA, targetB); - } - } break; - case 2: - color = alphaBrightnessIncrease(targetA); - break; - case 3: - color = alphaBrightnessDecrease(targetA); - break; + switch (blendMode) { + case 1: { + char isSpriteBlendingEnabled = REG_BLDCNT & BLDCNT_TGT2_OBJ ? 1 : 0; + // find targetB and blend it + if (alphaBlendSelectTargetB(&scanline, &targetB, prnum, prsub + 1, xpos, isSpriteBlendingEnabled)) { + color = alphaBlendColor(targetA, targetB, REG_BLDALPHA & 0x1F, (REG_BLDALPHA >> 8) & 0x1F); + } + } break; + case 2: + color = alphaBrightnessIncrease(targetA, REG_BLDY & 0x1F); + break; + case 3: + color = alphaBrightnessDecrease(targetA, REG_BLDY & 0x1F); + break; + } } + // write the pixel to scanline buffer output + pixels[xpos] = color; } - // write the pixel to scanline buffer output - pixels[xpos] = color; } } - } - // draw sprites on current priority - uint16_t *src = scanline.spriteLayers[prnum]; - for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { - if (getAlphaBit(src[xpos])) { - // check if sprite pixel draws inside window - if (windowsEnabled && !(scanline.winMask[xpos] & WINMASK_OBJ)) - continue; - // draw the pixel - pixels[xpos] = src[xpos]; + // draw sprites on current priority + uint16_t *src = scanline.spriteLayers[prnum]; + for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { + if (getAlphaBit(src[xpos])) { + // check if sprite pixel draws inside window + if (windowsEnabled && !(scanline.winMask[xpos] & WINMASK_OBJ)) + continue; + // draw the pixel + pixels[xpos] = src[xpos]; + } } } } @@ -1760,21 +1941,25 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) uint16_t *memsetu16(uint16_t *dst, uint16_t fill, size_t count) { - for (int i = 0; i < count; i++) { - *dst++ = fill; + uint32_t fill32 = ((uint32_t)fill << 16) | fill; + uint32_t *dst32 = (uint32_t *)dst; + size_t pairs = count >> 1; + for (size_t i = 0; i < pairs; i++) { + dst32[i] = fill32; } - - return 0; + if (count & 1) { + dst[count - 1] = fill; + } + return dst; } static void DrawFrame(uint16_t *pixels) { int i; - int j; - static uint16_t scanlines[DISPLAY_HEIGHT][DISPLAY_WIDTH]; - unsigned int blendMode = (REG_BLDCNT >> 6) & 3; for (i = 0; i < DISPLAY_HEIGHT; i++) { + uint16_t *scanline = &pixels[i * DISPLAY_WIDTH]; + REG_VCOUNT = i; if (((REG_DISPSTAT >> 8) & 0xFF) == REG_VCOUNT) { REG_DISPSTAT |= INTR_FLAG_VCOUNT; @@ -1782,10 +1967,10 @@ static void DrawFrame(uint16_t *pixels) gIntrTable[INTR_INDEX_VCOUNT](); } - // Render the backdrop color before the each individual scanline. - // HBlank interrupt code could have changed it inbetween lines. - memsetu16(scanlines[i], *(uint16_t *)PLTT, DISPLAY_WIDTH); - DrawScanline(scanlines[i], i); + // Render the backdrop color before each individual scanline. + // HBlank interrupt code could have changed it in between lines. + memsetu16(scanline, *(uint16_t *)PLTT, DISPLAY_WIDTH); + DrawScanline(scanline, i); REG_DISPSTAT |= INTR_FLAG_HBLANK; @@ -1797,14 +1982,6 @@ static void DrawFrame(uint16_t *pixels) REG_DISPSTAT &= ~INTR_FLAG_HBLANK; REG_DISPSTAT &= ~INTR_FLAG_VCOUNT; } - - // Copy to screen - for (i = 0; i < DISPLAY_HEIGHT; i++) { - uint16_t *src = scanlines[i]; - for (j = 0; j < DISPLAY_WIDTH; j++) { - pixels[i * DISPLAY_WIDTH + j] = src[j]; - } - } } #if ENABLE_VRAM_VIEW @@ -1845,8 +2022,14 @@ void VramDraw(SDL_Texture *texture) void VDraw(SDL_Texture *texture) { - memset(gameImage, 0, sizeof(gameImage)); +#if RENDERER == RENDERER_SOFTWARE_FAST + { + extern void DrawFrame_Fast(uint16_t * pixels); + DrawFrame_Fast(gameImage); + } +#else DrawFrame(gameImage); +#endif SDL_UpdateTexture(texture, NULL, gameImage, DISPLAY_WIDTH * sizeof(Uint16)); REG_VCOUNT = DISPLAY_HEIGHT + 1; // prep for being in VBlank period } diff --git a/src/platform/psp/psp_module.c b/src/platform/psp/psp_module.c new file mode 100644 index 000000000..18040b1e1 --- /dev/null +++ b/src/platform/psp/psp_module.c @@ -0,0 +1,40 @@ +#include +#include +#include +#include + +PSP_MODULE_INFO("SonicAdvance2", 0, 1, 0); +PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER | THREAD_ATTR_VFPU); +PSP_HEAP_SIZE_KB(-1024); + +unsigned int sce_newlib_stack_size = 512 * 1024; + +extern bool isRunning; + +int exitCallback(int arg1, int arg2, void *common) +{ + (void)arg1; + (void)arg2; + (void)common; + isRunning = false; + return 0; +} + +int callbackThread(SceSize args, void *argp) +{ + (void)args; + (void)argp; + int cbid = sceKernelCreateCallback("Exit Callback", exitCallback, NULL); + sceKernelRegisterExitCallback(cbid); + sceKernelSleepThreadCB(); + return 0; +} + +int setupPspCallbacks(void) +{ + int thid = sceKernelCreateThread("update_thread", callbackThread, 0x11, 0xFA0, 0, 0); + if (thid >= 0) { + sceKernelStartThread(thid, 0, 0); + } + return thid; +} diff --git a/src/platform/shared/audio/m4a_sound_mixer.c b/src/platform/shared/audio/m4a_sound_mixer.c index 49b0d65bc..67e276fb1 100644 --- a/src/platform/shared/audio/m4a_sound_mixer.c +++ b/src/platform/shared/audio/m4a_sound_mixer.c @@ -419,7 +419,7 @@ void MP2K_event_rept(struct MP2KPlayerState *unused, struct MP2KTrack *track) MP2K_event_goto(unused, track); } else { track->repeatCount = 0; - track->cmdPtr += sizeof(u8) + sizeof(u8 *); + track->cmdPtr = track->cmdPtr + sizeof(u8 *); } } } diff --git a/src/platform/shared/dma.c b/src/platform/shared/dma.c index 60ad6144f..d2f55e045 100644 --- a/src/platform/shared/dma.c +++ b/src/platform/shared/dma.c @@ -1,10 +1,26 @@ #include +#include #include "global.h" #include "platform/shared/dma.h" +// safe unaligned access for MIPS +static inline void dma_copy32(void *dst, const void *src) +{ + u32 tmp; + memcpy(&tmp, src, 4); + memcpy(dst, &tmp, 4); +} + +static inline void dma_copy16(void *dst, const void *src) +{ + u16 tmp; + memcpy(&tmp, src, 2); + memcpy(dst, &tmp, 2); +} + struct DMATransfer DMAList[DMA_COUNT] = { 0 }; -void RunDMAs(u32 type) +void RunDMAs(DmaStartTypes type) { for (int dmaNum = 0; dmaNum < DMA_COUNT; dmaNum++) { struct DMATransfer *dma = &DMAList[dmaNum]; @@ -23,9 +39,9 @@ void RunDMAs(u32 type) // printf("DMA%d src=%p, dest=%p, control=%d\n", dmaNum, dma->src, dma->dst, dma->control); for (int i = 0; i < dma->size; i++) { if ((dma->control) & DMA_32BIT) - *dma->dst32 = *dma->src32; + dma_copy32(dma->dst, dma->src); else - *dma->dst16 = *dma->src16; + dma_copy16(dma->dst, dma->src); // process destination pointer changes if (((dma->control) & DMA_DEST_MASK) == DMA_DEST_INC) { diff --git a/src/platform/shared/rendering/sw_renderer_fast.c b/src/platform/shared/rendering/sw_renderer_fast.c new file mode 100644 index 000000000..9a1283d2e --- /dev/null +++ b/src/platform/shared/rendering/sw_renderer_fast.c @@ -0,0 +1,1199 @@ +// sw_renderer_fast.c -- single-pass back-to-front gba ppu renderer +// +// the default renderer does multiple passes per scanline which thrashes +// the data cache on older platforms with tiny L1 and no L2 + +// this one composites everything in one pass per scanline, painting +// layers directly into the output buffer from back to front. a +// layerIds[] side-buffer tracks what wrote each pixel so alpha +// blending can find its target-b inline. +// +// 4bpp text bgs get a batched path that reads one u32 per 8 pixels. +// 8bpp and mosaic bgs fall back to per-pixel. sprites are pre-filtered +// per scanline so we only touch the ones that actually matter. + +#include "config.h" + +#if RENDERER == RENDERER_SOFTWARE_FAST + +#include +#include +#include +#include + +#include "global.h" +#include "core.h" +#include "gba/defines.h" +#include "gba/io_reg.h" +#include "gba/types.h" +#include "platform/shared/dma.h" +#include "platform/shared/rendering/sw_renderer_common.h" + +extern IntrFunc gIntrTable[16]; +extern uint8_t REG_BASE[IO_SIZE]; +extern uint16_t PLTT[PLTT_SIZE / sizeof(uint16_t)]; +extern uint8_t VRAM[VRAM_SIZE]; +extern uint8_t OAM[OAM_SIZE]; +extern const u8 gOamShapesSizes[12][2]; + +#ifndef TILE_WIDTH +#define TILE_WIDTH 8 +#endif + +#define IsBGEnabled(n) (((REG_DISPCNT >> 8) & 0xF) & (1 << (n))) + +// mosaic +#define MOSAIC_BG_X (REG_MOSAIC & 0xF) +#define MOSAIC_BG_Y ((REG_MOSAIC >> 4) & 0xF) +#define MOSAIC_SPR_X ((REG_MOSAIC >> 8) & 0xF) +#define MOSAIC_SPR_Y ((REG_MOSAIC >> 12) & 0xF) +#define ApplyMosaicBGX(x) ((x) - ((x) % (MOSAIC_BG_X + 1))) +#define ApplyMosaicBGY(y) ((y) - ((y) % (MOSAIC_BG_Y + 1))) +#define ApplyMosaicSprX(x) ((x) - ((x) % (MOSAIC_SPR_X + 1))) +#define ApplyMosaicSprY(y) ((y) - ((y) % (MOSAIC_SPR_Y + 1))) + +// tilemap entry fields +#define TILE_NUM(e) ((e)&0x3FF) +#define TILE_PALETTE(e) (((e) >> 12) & 0xF) +#define TILE_HFLIP(e) ((e) & (1 << 10)) +#define TILE_VFLIP(e) ((e) & (1 << 11)) + +// window mask bits +#define WINMASK_BG0 (1 << 0) +#define WINMASK_BG1 (1 << 1) +#define WINMASK_BG2 (1 << 2) +#define WINMASK_BG3 (1 << 3) +#define WINMASK_OBJ (1 << 4) +#define WINMASK_CLR (1 << 5) +#define WINMASK_WINOUT (1 << 6) + +// layer ids for blend target tracking +#define LAYER_BG0 0 +#define LAYER_BG1 1 +#define LAYER_BG2 2 +#define LAYER_BG3 3 +#define LAYER_OBJ 4 +#define LAYER_BACKDROP 5 + +static const uint16_t bgMapSizes[][2] = { + { 32, 32 }, + { 64, 32 }, + { 32, 64 }, + { 64, 64 }, +}; + +// 16-bit fill using 32-bit writes +static inline void Memset16(uint16_t *dst, uint16_t fill, unsigned int count) +{ + uint32_t fill32 = ((uint32_t)fill << 16) | fill; + uint32_t *dst32 = (uint32_t *)dst; + unsigned int pairs = count >> 1; + for (unsigned int i = 0; i < pairs; i++) + dst32[i] = fill32; + if (count & 1) + dst[count - 1] = fill; +} + +static inline uint32_t GetBgRefX(int bg) { return (bg == 2) ? REG_BG2X : (bg == 3) ? REG_BG3X : 0; } +static inline uint32_t GetBgRefY(int bg) { return (bg == 2) ? REG_BG2Y : (bg == 3) ? REG_BG3Y : 0; } +static inline uint16_t GetBgPA(int bg) { return (bg == 2) ? REG_BG2PA : (bg == 3) ? REG_BG3PA : 0; } +static inline uint16_t GetBgPB(int bg) { return (bg == 2) ? REG_BG2PB : (bg == 3) ? REG_BG3PB : 0; } +static inline uint16_t GetBgPC(int bg) { return (bg == 2) ? REG_BG2PC : (bg == 3) ? REG_BG3PC : 0; } +static inline uint16_t GetBgPD(int bg) { return (bg == 2) ? REG_BG2PD : (bg == 3) ? REG_BG3PD : 0; } + +// handles the wraparound case where left > right +static inline bool WindowContainsX(u16 left, u16 right, u16 x) +{ + if (left > right) + return (x >= left || x < right); + return (x >= left && x < right); +} + +// check if a layer can be the target-b for alpha blending +static inline bool IsBlendTargetB(uint8_t layerId, unsigned int bldcnt) +{ + if (layerId <= 3) + return (bldcnt & (1 << (8 + layerId))) != 0; + if (layerId == LAYER_OBJ) + return (bldcnt & BLDCNT_TGT2_OBJ) != 0; + if (layerId == LAYER_BACKDROP) + return (bldcnt & BLDCNT_TGT2_BD) != 0; + return false; +} + +// sprites with oam mode 1 always try alpha blending regardless of bldcnt +static inline uint16_t BlendSpritePixel(uint16_t color, unsigned int x, uint16_t *output, uint8_t *layerIds, bool isSemiTransparent, + unsigned int blendMode, unsigned int bldcnt, bool windowsEnabled, uint16_t *winMask, + unsigned int eva, unsigned int evb, unsigned int evy) +{ + bool winAllowsBlend = !windowsEnabled || (winMask && (winMask[x] & WINMASK_CLR)); + + bool doAlpha = (blendMode == 1 && (bldcnt & BLDCNT_TGT1_OBJ) && winAllowsBlend) || isSemiTransparent; + + if (doAlpha) { + if (IsBlendTargetB(layerIds[x], bldcnt)) + return alphaBlendColor(color, output[x], eva, evb); + } else if ((bldcnt & BLDCNT_TGT1_OBJ) && winAllowsBlend) { + if (blendMode == 2) + return alphaBrightnessIncrease(color, evy); + if (blendMode == 3) + return alphaBrightnessDecrease(color, evy); + } + + return color; +} + +// write a bg pixel with inline blend resolution +static inline void WriteBGPixelBlended(unsigned int x, uint8_t pixel, const uint16_t *palBase, int bgNum, uint16_t *output, + uint8_t *layerIds, unsigned int blendMode, bool bgIsTargetA, bool useWindows, unsigned int winBgBit, + uint16_t *winMask, unsigned int bldcnt, unsigned int eva, unsigned int evb, unsigned int evy) +{ + uint16_t color = palBase[pixel] | COLOR_OPAQUE; + + if (useWindows && !(winMask[x] & winBgBit)) + return; + + if (bgIsTargetA && (!useWindows || (winMask[x] & WINMASK_CLR))) { + uint16_t src = color; + switch (blendMode) { + case 1: + if (IsBlendTargetB(layerIds[x], bldcnt)) + color = alphaBlendColor(src, output[x], eva, evb); + break; + case 2: + color = alphaBrightnessIncrease(src, evy); + break; + case 3: + color = alphaBrightnessDecrease(src, evy); + break; + } + } + + output[x] = color; + layerIds[x] = bgNum; +} + +static void RenderTextBG(int bgNum, uint16_t control, uint16_t hoffs, uint16_t voffs, int lineNum, uint16_t *output) +{ + unsigned int charBase = (control >> 2) & 3; + unsigned int screenBase = (control & BGCNT_SCREENBASE_MASK) >> 8; + unsigned int is8bpp = (control >> 7) & 1; + + unsigned int mapW = bgMapSizes[control >> 14][0]; + unsigned int mapPxW = mapW << 3; + unsigned int mapPxH = bgMapSizes[control >> 14][1] << 3; + unsigned int wMask = mapPxW - 1; + unsigned int hMask = mapPxH - 1; + + uint8_t *tiles = (uint8_t *)BG_CHAR_ADDR(charBase); + uint16_t *map = (uint16_t *)BG_SCREEN_ADDR(screenBase); + uint16_t *pal = (uint16_t *)PLTT; + + bool hasMosaic = control & BGCNT_MOSAIC; + if (hasMosaic) + lineNum = ApplyMosaicBGY(lineNum); + + hoffs &= 0x1FF; + voffs &= 0x1FF; + + unsigned int yy = (lineNum + voffs) & hMask; + unsigned int mapY = yy >> 3; + unsigned int tileY = yy & 7; + unsigned int rowBase = mapY * mapW; + + // slow path: 8bpp or mosaic, one pixel at a time + if (hasMosaic || is8bpp) { + for (unsigned int x = 0; x < DISPLAY_WIDTH; x++) { + unsigned int xx = hasMosaic ? (ApplyMosaicBGX(x) + hoffs) & wMask : (x + hoffs) & wMask; + + uint16_t entry = map[rowBase + (xx >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int tx = xx & 7; + unsigned int ty = tileY; + if (TILE_HFLIP(entry)) + tx = 7 - tx; + if (TILE_VFLIP(entry)) + ty = 7 - ty; + + if (!is8bpp) { + uint8_t pair = tiles[(tileNum << 5) + (ty << 2) + (tx >> 1)]; + uint8_t pixel = (tx & 1) ? (pair >> 4) : (pair & 0xF); + if (pixel) + output[x] = pal[(palNum << 4) + pixel] | COLOR_OPAQUE; + } else { + uint8_t pixel = tiles[(tileNum << 6) + (ty << 3) + tx]; + if (pixel) + output[x] = pal[pixel] | COLOR_OPAQUE; + } + } + return; + } + + // fast path: 4bpp, read one u32 per tile row, unroll 8 pixels + unsigned int x = 0; + + // left edge: partial tile if scroll isn't tile-aligned + { + unsigned int startX = hoffs & wMask; + unsigned int startOff = startX & 7; + + if (startOff != 0) { + uint16_t entry = map[rowBase + (startX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; + bool hflip = TILE_HFLIP(entry); + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + + unsigned int partial = 8 - startOff; + if (partial > DISPLAY_WIDTH) + partial = DISPLAY_WIDTH; + + for (unsigned int t = 0; t < partial && x < DISPLAY_WIDTH; t++, x++) { + unsigned int tx = startOff + t; + if (hflip) + tx = 7 - tx; + uint8_t pixel = (row >> (tx << 2)) & 0xF; + if (pixel) + output[x] = pal[(palNum << 4) + pixel] | COLOR_OPAQUE; + } + } + } + + // middle: full tiles, 8 pixels at a time + while (x + 8 <= DISPLAY_WIDTH) { + unsigned int srcX = (x + hoffs) & wMask; + uint16_t entry = map[rowBase + (srcX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint16_t *palBase = pal + (palNum << 4); + + if (!TILE_HFLIP(entry)) { + uint8_t p; + p = row & 0xF; + if (p) + output[x] = palBase[p] | COLOR_OPAQUE; + p = (row >> 4) & 0xF; + if (p) + output[x + 1] = palBase[p] | COLOR_OPAQUE; + p = (row >> 8) & 0xF; + if (p) + output[x + 2] = palBase[p] | COLOR_OPAQUE; + p = (row >> 12) & 0xF; + if (p) + output[x + 3] = palBase[p] | COLOR_OPAQUE; + p = (row >> 16) & 0xF; + if (p) + output[x + 4] = palBase[p] | COLOR_OPAQUE; + p = (row >> 20) & 0xF; + if (p) + output[x + 5] = palBase[p] | COLOR_OPAQUE; + p = (row >> 24) & 0xF; + if (p) + output[x + 6] = palBase[p] | COLOR_OPAQUE; + p = (row >> 28) & 0xF; + if (p) + output[x + 7] = palBase[p] | COLOR_OPAQUE; + } else { + uint8_t p; + p = (row >> 28) & 0xF; + if (p) + output[x] = palBase[p] | COLOR_OPAQUE; + p = (row >> 24) & 0xF; + if (p) + output[x + 1] = palBase[p] | COLOR_OPAQUE; + p = (row >> 20) & 0xF; + if (p) + output[x + 2] = palBase[p] | COLOR_OPAQUE; + p = (row >> 16) & 0xF; + if (p) + output[x + 3] = palBase[p] | COLOR_OPAQUE; + p = (row >> 12) & 0xF; + if (p) + output[x + 4] = palBase[p] | COLOR_OPAQUE; + p = (row >> 8) & 0xF; + if (p) + output[x + 5] = palBase[p] | COLOR_OPAQUE; + p = (row >> 4) & 0xF; + if (p) + output[x + 6] = palBase[p] | COLOR_OPAQUE; + p = row & 0xF; + if (p) + output[x + 7] = palBase[p] | COLOR_OPAQUE; + } + x += 8; + } + + // right edge: leftover partial tile + if (x < DISPLAY_WIDTH) { + unsigned int srcX = (x + hoffs) & wMask; + uint16_t entry = map[rowBase + (srcX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; + bool hflip = TILE_HFLIP(entry); + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + + for (unsigned int t = 0; x < DISPLAY_WIDTH; t++, x++) { + unsigned int tx = hflip ? (7 - t) : t; + uint8_t pixel = (row >> (tx << 2)) & 0xF; + if (pixel) + output[x] = pal[(palNum << 4) + pixel] | COLOR_OPAQUE; + } + } +} + +// same thing but with blend/window tracking baked in +static void RenderTextBGBlend(int bgNum, uint16_t control, uint16_t hoffs, uint16_t voffs, int lineNum, uint16_t *output, uint8_t *layerIds, + unsigned int blendMode, bool windowsEnabled, uint16_t *winMask, unsigned int bldcnt, unsigned int eva, + unsigned int evb, unsigned int evy) +{ + unsigned int charBase = (control >> 2) & 3; + unsigned int screenBase = (control & BGCNT_SCREENBASE_MASK) >> 8; + unsigned int is8bpp = (control >> 7) & 1; + + unsigned int mapW = bgMapSizes[control >> 14][0]; + unsigned int mapPxW = mapW << 3; + unsigned int mapPxH = bgMapSizes[control >> 14][1] << 3; + unsigned int wMask = mapPxW - 1; + unsigned int hMask = mapPxH - 1; + + uint8_t *tiles = (uint8_t *)BG_CHAR_ADDR(charBase); + uint16_t *map = (uint16_t *)BG_SCREEN_ADDR(screenBase); + uint16_t *pal = (uint16_t *)PLTT; + + bool hasMosaic = control & BGCNT_MOSAIC; + if (hasMosaic) + lineNum = ApplyMosaicBGY(lineNum); + + hoffs &= 0x1FF; + voffs &= 0x1FF; + + unsigned int yy = (lineNum + voffs) & hMask; + unsigned int mapY = yy >> 3; + unsigned int tileY = yy & 7; + unsigned int rowBase = mapY * mapW; + + bool bgIsTargetA = (blendMode != 0) && (bldcnt & (1 << bgNum)); + bool useWindows = windowsEnabled && (winMask != NULL); + unsigned int winBgBit = 1 << bgNum; + + // slow path: 8bpp or mosaic + if (hasMosaic || is8bpp) { + for (unsigned int x = 0; x < DISPLAY_WIDTH; x++) { + unsigned int xx = hasMosaic ? (ApplyMosaicBGX(x) + hoffs) & wMask : (x + hoffs) & wMask; + + uint16_t entry = map[rowBase + (xx >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int tx = xx & 7; + unsigned int ty = tileY; + if (TILE_HFLIP(entry)) + tx = 7 - tx; + if (TILE_VFLIP(entry)) + ty = 7 - ty; + + uint8_t pixel; + if (!is8bpp) { + uint8_t pair = tiles[(tileNum << 5) + (ty << 2) + (tx >> 1)]; + pixel = (tx & 1) ? (pair >> 4) : (pair & 0xF); + } else { + pixel = tiles[(tileNum << 6) + (ty << 3) + tx]; + } + + if (pixel == 0) + continue; + + uint16_t color = !is8bpp ? pal[(palNum << 4) + pixel] | COLOR_OPAQUE : pal[pixel] | COLOR_OPAQUE; + + if (useWindows && !(winMask[x] & winBgBit)) + continue; + + if (bgIsTargetA && (!useWindows || (winMask[x] & WINMASK_CLR))) { + uint16_t src = color; + switch (blendMode) { + case 1: + if (IsBlendTargetB(layerIds[x], bldcnt)) + color = alphaBlendColor(src, output[x], eva, evb); + break; + case 2: + color = alphaBrightnessIncrease(src, evy); + break; + case 3: + color = alphaBrightnessDecrease(src, evy); + break; + } + } + + output[x] = color; + layerIds[x] = bgNum; + } + return; + } + + // fast path: 4bpp batched with inline blend + unsigned int x = 0; + + // left edge partial tile + { + unsigned int startX = hoffs & wMask; + unsigned int startOff = startX & 7; + + if (startOff != 0) { + uint16_t entry = map[rowBase + (startX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; + bool hflip = TILE_HFLIP(entry); + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint16_t *palBase = pal + (palNum << 4); + + unsigned int partial = 8 - startOff; + if (partial > DISPLAY_WIDTH) + partial = DISPLAY_WIDTH; + + for (unsigned int t = 0; t < partial && x < DISPLAY_WIDTH; t++, x++) { + unsigned int tx = startOff + t; + if (hflip) + tx = 7 - tx; + uint8_t pixel = (row >> (tx << 2)) & 0xF; + if (pixel) + WriteBGPixelBlended(x, pixel, palBase, bgNum, output, layerIds, blendMode, bgIsTargetA, useWindows, winBgBit, winMask, + bldcnt, eva, evb, evy); + } + } + } + + // middle: full tiles + while (x + 8 <= DISPLAY_WIDTH) { + unsigned int srcX = (x + hoffs) & wMask; + uint16_t entry = map[rowBase + (srcX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint16_t *palBase = pal + (palNum << 4); + +#define BLEND_PX(off, shift) \ + do { \ + uint8_t p = (row >> (shift)) & 0xF; \ + if (p) \ + WriteBGPixelBlended(x + (off), p, palBase, bgNum, output, layerIds, blendMode, bgIsTargetA, useWindows, winBgBit, winMask, \ + bldcnt, eva, evb, evy); \ + } while (0) + + if (!TILE_HFLIP(entry)) { + BLEND_PX(0, 0); + BLEND_PX(1, 4); + BLEND_PX(2, 8); + BLEND_PX(3, 12); + BLEND_PX(4, 16); + BLEND_PX(5, 20); + BLEND_PX(6, 24); + BLEND_PX(7, 28); + } else { + BLEND_PX(0, 28); + BLEND_PX(1, 24); + BLEND_PX(2, 20); + BLEND_PX(3, 16); + BLEND_PX(4, 12); + BLEND_PX(5, 8); + BLEND_PX(6, 4); + BLEND_PX(7, 0); + } + +#undef BLEND_PX + + x += 8; + } + + // right edge partial tile + if (x < DISPLAY_WIDTH) { + unsigned int srcX = (x + hoffs) & wMask; + uint16_t entry = map[rowBase + (srcX >> 3)]; + unsigned int tileNum = TILE_NUM(entry); + unsigned int palNum = TILE_PALETTE(entry); + unsigned int ty = tileY; + if (TILE_VFLIP(entry)) + ty = 7 - ty; + bool hflip = TILE_HFLIP(entry); + + uint32_t row = *(uint32_t *)(tiles + (tileNum << 5) + (ty << 2)); + uint16_t *palBase = pal + (palNum << 4); + + for (unsigned int t = 0; x < DISPLAY_WIDTH; t++, x++) { + unsigned int tx = hflip ? (7 - t) : t; + uint8_t pixel = (row >> (tx << 2)) & 0xF; + if (pixel) + WriteBGPixelBlended(x, pixel, palBase, bgNum, output, layerIds, blendMode, bgIsTargetA, useWindows, winBgBit, winMask, + bldcnt, eva, evb, evy); + } + } +} + +static void RenderAffineBG(int bgNum, uint16_t control, int lineNum, uint16_t *output) +{ + vBgCnt *bgcnt = (vBgCnt *)&control; + + uint8_t *tiles = (uint8_t *)(VRAM + bgcnt->charBaseBlock * 0x4000); + uint8_t *map = (uint8_t *)(VRAM + bgcnt->screenBaseBlock * 0x800); + uint16_t *pal = (uint16_t *)PLTT; + + if (control & BGCNT_MOSAIC) + lineNum = ApplyMosaicBGY(lineNum); + + s16 pa = GetBgPA(bgNum); + s16 pb = GetBgPB(bgNum); + s16 pc = GetBgPC(bgNum); + s16 pd = GetBgPD(bgNum); + + // always square: 128/256/512/1024 + int size = 128; + switch (bgcnt->screenSize) { + case 1: + size = 256; + break; + case 2: + size = 512; + break; + case 3: + size = 1024; + break; + } + int mask = size - 1; + int yshift = ((control >> 14) & 3) + 4; + + // sign-extend 28-bit reference point, advance by scanline + s32 refX = GetBgRefX(bgNum); + s32 refY = GetBgRefY(bgNum); + refX = (refX & (1 << 27)) ? refX | 0xF0000000 : refX; + refY = (refY & (1 << 27)) ? refY | 0xF0000000 : refY; + refX += lineNum * pb; + refY += lineNum * pd; + + int curX = refX; + int curY = refY; + + if (bgcnt->areaOverflowMode) { + // wraparound + for (int x = 0; x < DISPLAY_WIDTH; x++) { + int tx = (curX >> 8) & mask; + int ty = (curY >> 8) & mask; + int tile = map[(tx >> 3) + ((ty >> 3) << yshift)]; + uint8_t pixel = tiles[(tile << 6) + ((ty & 7) << 3) + (tx & 7)]; + if (pixel) + output[x] = pal[pixel] | COLOR_OPAQUE; + curX += pa; + curY += pc; + } + } else { + // clamp: outside the map = transparent + for (int x = 0; x < DISPLAY_WIDTH; x++) { + int tx = curX >> 8; + int ty = curY >> 8; + if (tx >= 0 && ty >= 0 && tx < size && ty < size) { + int tile = map[(tx >> 3) + ((ty >> 3) << yshift)]; + uint8_t pixel = tiles[(tile << 6) + ((ty & 7) << 3) + (tx & 7)]; + if (pixel) + output[x] = pal[pixel] | COLOR_OPAQUE; + } + curX += pa; + curY += pc; + } + } + + // horizontal mosaic as a post-pass + if ((control & BGCNT_MOSAIC) && MOSAIC_BG_X > 0) { + for (int x = 0; x < DISPLAY_WIDTH; x++) + output[x] = output[ApplyMosaicBGX(x)]; + } +} + +// same deal with blend/window support +static void RenderAffineBGBlend(int bgNum, uint16_t control, int lineNum, uint16_t *output, uint8_t *layerIds, unsigned int blendMode, + bool windowsEnabled, uint16_t *winMask, unsigned int bldcnt, unsigned int eva, unsigned int evb, + unsigned int evy) +{ + vBgCnt *bgcnt = (vBgCnt *)&control; + + uint8_t *tiles = (uint8_t *)(VRAM + bgcnt->charBaseBlock * 0x4000); + uint8_t *map = (uint8_t *)(VRAM + bgcnt->screenBaseBlock * 0x800); + uint16_t *pal = (uint16_t *)PLTT; + + if (control & BGCNT_MOSAIC) + lineNum = ApplyMosaicBGY(lineNum); + + s16 pa = GetBgPA(bgNum); + s16 pb = GetBgPB(bgNum); + s16 pc = GetBgPC(bgNum); + s16 pd = GetBgPD(bgNum); + + int size = 128; + switch (bgcnt->screenSize) { + case 1: + size = 256; + break; + case 2: + size = 512; + break; + case 3: + size = 1024; + break; + } + int mask = size - 1; + int yshift = ((control >> 14) & 3) + 4; + + s32 refX = GetBgRefX(bgNum); + s32 refY = GetBgRefY(bgNum); + refX = (refX & (1 << 27)) ? refX | 0xF0000000 : refX; + refY = (refY & (1 << 27)) ? refY | 0xF0000000 : refY; + refX += lineNum * pb; + refY += lineNum * pd; + + int curX = refX; + int curY = refY; + + bool bgIsTargetA = (blendMode != 0) && (bldcnt & (1 << bgNum)); + + for (int x = 0; x < DISPLAY_WIDTH; x++) { + int tx, ty; + + if (bgcnt->areaOverflowMode) { + tx = (curX >> 8) & mask; + ty = (curY >> 8) & mask; + } else { + tx = curX >> 8; + ty = curY >> 8; + if (tx < 0 || ty < 0 || tx >= size || ty >= size) { + curX += pa; + curY += pc; + continue; + } + } + + int tile = map[(tx >> 3) + ((ty >> 3) << yshift)]; + uint8_t pixel = tiles[(tile << 6) + ((ty & 7) << 3) + (tx & 7)]; + + curX += pa; + curY += pc; + + if (pixel == 0) + continue; + + uint16_t color = pal[pixel] | COLOR_OPAQUE; + + if (windowsEnabled && winMask && !(winMask[x] & (1 << bgNum))) + continue; + + bool winAllowsBlend = true; + if (windowsEnabled && winMask) + winAllowsBlend = (winMask[x] & WINMASK_CLR) >> 5; + + if (bgIsTargetA && winAllowsBlend) { + uint16_t src = color; + switch (blendMode) { + case 1: + if (IsBlendTargetB(layerIds[x], bldcnt)) + color = alphaBlendColor(src, output[x], eva, evb); + break; + case 2: + color = alphaBrightnessIncrease(src, evy); + break; + case 3: + color = alphaBrightnessDecrease(src, evy); + break; + } + } + + output[x] = color; + layerIds[x] = bgNum; + } + + if ((control & BGCNT_MOSAIC) && MOSAIC_BG_X > 0) { + for (int x = 0; x < DISPLAY_WIDTH; x++) + output[x] = output[ApplyMosaicBGX(x)]; + } +} + +#define MAX_SPRITES_PER_PRIORITY 32 + +typedef struct { + uint8_t oamIndex; +} ActiveSprite; + +static ActiveSprite sActiveSprites[4][MAX_SPRITES_PER_PRIORITY]; +static int sActiveSpriteCount[4]; + +static void PrefilterSprites(uint16_t vcount) +{ + sActiveSpriteCount[0] = 0; + sActiveSpriteCount[1] = 0; + sActiveSpriteCount[2] = 0; + sActiveSpriteCount[3] = 0; + + if (!(REG_DISPCNT & DISPCNT_OBJ_ON)) + return; + + // back-to-front so lower oam indices (higher hw priority) draw last + for (int i = OAM_ENTRY_COUNT - 1; i >= 0; i--) { + OamData *oam = &((OamData *)OAM)[i]; + + bool isAffine = oam->split.affineMode & 1; + bool isDisabled = (oam->split.affineMode >> 1) & 1; + + if (!isAffine && isDisabled) + continue; + + s32 idx = (oam->split.shape << 2) | oam->split.size; + unsigned int width = gOamShapesSizes[idx][0]; + unsigned int height = gOamShapesSizes[idx][1]; + int halfW = width / 2; + int halfH = height / 2; + + int32_t sx = oam->split.x; + int32_t sy = oam->split.y; +#if !EXTENDED_OAM + if (sx >= DISPLAY_WIDTH) + sx -= 512; + if (sy >= DISPLAY_HEIGHT) + sy -= 256; +#endif + + // double-size affine sprites have 2x bounding box + if (isAffine && isDisabled) { + halfW *= 2; + halfH *= 2; + } + + if ((int)vcount < sy || (int)vcount >= sy + halfH * 2) + continue; + if (sx + halfW * 2 < 0 || sx >= DISPLAY_WIDTH) + continue; + + int pri = oam->split.priority; + if (sActiveSpriteCount[pri] < MAX_SPRITES_PER_PRIORITY) { + sActiveSprites[pri][sActiveSpriteCount[pri]].oamIndex = i; + sActiveSpriteCount[pri]++; + } + } +} + +static void DrawSpritesAtPriority(int priority, uint16_t vcount, uint16_t *output, uint8_t *layerIds, bool windowsEnabled, + uint16_t *winMask, unsigned int blendMode, bool objWinOnly, unsigned int bldcnt, unsigned int eva, + unsigned int evb, unsigned int evy) +{ + uint8_t *tiledata = (uint8_t *)OBJ_VRAM0; + uint16_t *sprpal = (uint16_t *)PLTT + (0x200 / 2); + int16_t matrix[2][2]; + + // only 1-D tile mapping supported + if (!(REG_DISPCNT & (1 << 6))) + return; + + for (int s = 0; s < sActiveSpriteCount[priority]; s++) { + int i = sActiveSprites[priority][s].oamIndex; + OamData *oam = &((OamData *)OAM)[i]; + + bool isAffine = oam->split.affineMode & 1; + bool doubleSize = (oam->split.affineMode >> 1) & 1; + + s32 idx = (oam->split.shape << 2) | oam->split.size; + unsigned int width = gOamShapesSizes[idx][0]; + unsigned int height = gOamShapesSizes[idx][1]; + int halfW = width / 2; + int halfH = height / 2; + + int32_t x = oam->split.x; + int32_t y = oam->split.y; +#if !EXTENDED_OAM + if (x >= DISPLAY_WIDTH) + x -= 512; + if (y >= DISPLAY_HEIGHT) + y -= 256; +#endif + if (isAffine && doubleSize) { + halfW *= 2; + halfH *= 2; + } + + bool isSemiTransparent = (oam->split.objMode == 1); + bool isObjWin = (oam->split.objMode == 2); + + if (objWinOnly && !isObjWin) + continue; + if (!objWinOnly && isObjWin) + continue; + + int rectWidth = width; + int rectHeight = height; + + if (isAffine) { + u8 matrixNum = oam->split.matrixNum * 4; + OamData *m0 = &((OamData *)OAM)[matrixNum]; + OamData *m1 = &((OamData *)OAM)[matrixNum + 1]; + OamData *m2 = &((OamData *)OAM)[matrixNum + 2]; + OamData *m3 = &((OamData *)OAM)[matrixNum + 3]; + matrix[0][0] = m0->all.affineParam; + matrix[0][1] = m1->all.affineParam; + matrix[1][0] = m2->all.affineParam; + matrix[1][1] = m3->all.affineParam; + if (doubleSize) { + rectWidth *= 2; + rectHeight *= 2; + } + } else { + matrix[0][0] = 0x100; // identity in 8.8 fixed point + matrix[0][1] = 0; + matrix[1][0] = 0; + matrix[1][1] = 0x100; + } + + x += halfW; + y += halfH; + + int localY = (oam->split.mosaic == 1) ? ApplyMosaicSprY(vcount) - y : vcount - y; + bool flipX = !isAffine && ((oam->split.matrixNum >> 3) & 1); + bool flipY = !isAffine && ((oam->split.matrixNum >> 4) & 1); + bool is8bpp = oam->split.bpp & 1; + + int startLX = -halfW; + int endLX = halfW; + if (startLX + x < 0) + startLX = -x; + if (endLX + x >= DISPLAY_WIDTH) + endLX = DISPLAY_WIDTH - 1 - x; + + // fast path: non-affine 4bpp, no mosaic -- batched tile row reads + if (!isAffine && !is8bpp && !oam->split.mosaic) { + int texY = localY + halfH; + if (flipY) + texY = height - texY - 1; + if (texY < 0 || texY >= (int)height) + continue; + + int tileRowY = texY & 7; + int blockY = texY >> 3; + int tilesPerRow = (REG_DISPCNT & 0x40) ? ((int)width >> 3) : 16; + int tileBase = blockY * tilesPerRow + oam->split.tileNum; + int rowByteOff = tileRowY << 2; + uint16_t *pixpal = sprpal + (oam->split.paletteNum << 4); + + int lx = startLX; + while (lx <= endLX) { + int rawX = lx + halfW; + int texX = flipX ? ((int)width - 1 - rawX) : rawX; + + if (texX < 0 || texX >= (int)width) { + lx++; + continue; + } + + int blockX = texX >> 3; + int tileXStart = texX & 7; + + uint32_t rowData = *(uint32_t *)(tiledata + ((tileBase + blockX) << 5) + rowByteOff); + + int pixelsInTile = !flipX ? (8 - tileXStart) : (tileXStart + 1); + int remain = endLX - lx + 1; + if (pixelsInTile > remain) + pixelsInTile = remain; + + if (!flipX) { + int texRemain = (int)width - texX; + if (pixelsInTile > texRemain) + pixelsInTile = texRemain; + } else { + int texRemain = texX + 1; + if (pixelsInTile > texRemain) + pixelsInTile = texRemain; + } + + for (int p = 0; p < pixelsInTile; p++, lx++) { + int curTX = flipX ? (tileXStart - p) : (tileXStart + p); + uint8_t pixel = (rowData >> (curTX << 2)) & 0xF; + if (pixel == 0) + continue; + + int gx = lx + x; + uint16_t color = pixpal[pixel]; + + // obj window sprites modify the window mask, not the framebuffer + if (isObjWin) { + if (windowsEnabled && winMask && (winMask[gx] & WINMASK_WINOUT)) + winMask[gx] = (REG_WINOUT >> 8) & 0x3F; + continue; + } + + if (layerIds && blendMode != 0) + color = BlendSpritePixel(color, gx, output, layerIds, isSemiTransparent, blendMode, bldcnt, windowsEnabled, winMask, + eva, evb, evy); + + if (windowsEnabled && winMask && !(winMask[gx] & WINMASK_OBJ)) + continue; + + output[gx] = color | COLOR_OPAQUE; + if (layerIds) + layerIds[gx] = LAYER_OBJ; + } + } + continue; + } + + // generic path: affine, 8bpp, or mosaic -- per pixel + for (int localX = startLX; localX <= endLX; localX++) { + int gx = localX + x; + int texX, texY; + + if (!isAffine) { + int lmx = localX; + if (oam->split.mosaic == 1) + lmx = ApplyMosaicSprX(gx) - x; + texX = lmx + halfW; + texY = localY + halfH; + if (flipX) + texX = width - texX - 1; + if (flipY) + texY = height - texY - 1; + } else { + int lmx = localX; + int lmy = localY; + if (oam->split.mosaic == 1) { + lmx = ApplyMosaicSprX(gx) - x; + lmy = ApplyMosaicSprY(vcount) - y; + } + // apply 2x2 affine matrix (8.8 fixed point) + texX = ((matrix[0][0] * lmx + matrix[0][1] * lmy) >> 8) + (width / 2); + texY = ((matrix[1][0] * lmx + matrix[1][1] * lmy) >> 8) + (height / 2); + } + + if (texX < 0 || texY < 0 || texX >= (int)width || texY >= (int)height) + continue; + + int tileX = texX & 7; + int tileY = texY & 7; + int blockX = texX >> 3; + int blockY = texY >> 3; + int blockOffset = blockY * (REG_DISPCNT & 0x40 ? ((int)width >> 3) : 16) + blockX; + + uint16_t pixel = 0; + uint16_t *pixpal; + + if (!is8bpp) { + int tdi = ((blockOffset + oam->split.tileNum) << 5) + (tileY << 2) + (tileX >> 1); + pixel = tiledata[tdi]; + if (tileX & 1) + pixel >>= 4; + else + pixel &= 0xF; + pixpal = sprpal + (oam->split.paletteNum << 4); + } else { + pixel = tiledata[((blockOffset * 2 + oam->split.tileNum) << 5) + (tileY << 3) + tileX]; + pixpal = sprpal; + } + + if (pixel == 0) + continue; + + uint16_t color = pixpal[pixel]; + + if (isObjWin) { + if (windowsEnabled && winMask && (winMask[gx] & WINMASK_WINOUT)) + winMask[gx] = (REG_WINOUT >> 8) & 0x3F; + continue; + } + + if (layerIds && blendMode != 0) + color = BlendSpritePixel(color, gx, output, layerIds, isSemiTransparent, blendMode, bldcnt, windowsEnabled, winMask, eva, + evb, evy); + + if (windowsEnabled && winMask && !(winMask[gx] & WINMASK_OBJ)) + continue; + + output[gx] = color | COLOR_OPAQUE; + if (layerIds) + layerIds[gx] = LAYER_OBJ; + } + } +} + +static void DrawScanline(uint16_t *pixels, uint16_t vcount) +{ + unsigned int mode = REG_DISPCNT & 3; + unsigned int numBGs = (mode == 0) ? 4 : 3; + unsigned int blendMode = (REG_BLDCNT >> 6) & 3; + unsigned int enabledBgs = (REG_DISPCNT >> 8) & 0xF; + + // sort bgs by priority + uint16_t bgcnts[4]; + char bgPriority[4]; + char bgsByPri[4][4]; + char bgsByPriCount[4] = { 0, 0, 0, 0 }; + + for (int bg = 0; bg < (int)numBGs; bg++) { + uint16_t cnt = *(uint16_t *)(REG_ADDR_BG0CNT + bg * 2); + bgcnts[bg] = cnt; + uint16_t pri = cnt & 3; + bgPriority[bg] = pri; + bgsByPri[pri][bgsByPriCount[pri]] = bg; + bgsByPriCount[pri]++; + } + + // window setup + bool windowsEnabled = false; + u16 win0Bot, win0Top, win0Right, win0Left; + u16 win1Bot, win1Top, win1Right, win1Left; + bool win0Active = false, win1Active = false; + static uint16_t winMask[DISPLAY_WIDTH]; + + if (REG_DISPCNT & DISPCNT_WIN0_ON) { + win0Bot = WIN_GET_HIGHER(REG_WIN0V); + win0Top = WIN_GET_LOWER(REG_WIN0V); + win0Right = WIN_GET_HIGHER(REG_WIN0H); + win0Left = WIN_GET_LOWER(REG_WIN0H); + if (win0Top > win0Bot) + win0Active = (vcount >= win0Top || vcount < win0Bot); + else + win0Active = (vcount >= win0Top && vcount < win0Bot); + windowsEnabled = true; + } + if (REG_DISPCNT & DISPCNT_WIN1_ON) { + win1Bot = WIN_GET_HIGHER(REG_WIN1V); + win1Top = WIN_GET_LOWER(REG_WIN1V); + win1Right = WIN_GET_HIGHER(REG_WIN1H); + win1Left = WIN_GET_LOWER(REG_WIN1H); + if (win1Top > win1Bot) + win1Active = (vcount >= win1Top || vcount < win1Bot); + else + win1Active = (vcount >= win1Top && vcount < win1Bot); + windowsEnabled = true; + } + if ((REG_DISPCNT & DISPCNT_OBJWIN_ON) && (REG_DISPCNT & DISPCNT_OBJ_ON)) + windowsEnabled = true; + + // build per-pixel window mask + if (windowsEnabled) { + for (unsigned int xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { + if (win0Active && WindowContainsX(win0Left, win0Right, xpos)) + winMask[xpos] = REG_WININ & 0x3F; + else if (win1Active && WindowContainsX(win1Left, win1Right, xpos)) + winMask[xpos] = (REG_WININ >> 8) & 0x3F; + else + winMask[xpos] = (REG_WINOUT & 0x3F) | WINMASK_WINOUT; + } + } + + PrefilterSprites(vcount); + + // layerIds tracks who wrote each pixel so alpha blend can find target-b + static uint8_t layerIds[DISPLAY_WIDTH]; + bool needLayerIds = (blendMode != 0 || windowsEnabled); + uint8_t *lids = needLayerIds ? layerIds : NULL; + uint16_t *wmask = windowsEnabled ? winMask : NULL; + + if (needLayerIds) + memset(layerIds, LAYER_BACKDROP, DISPLAY_WIDTH); + + // grab blend regs once per scanline + unsigned int bldcnt = REG_BLDCNT; + unsigned int bld_eva = REG_BLDALPHA & 0x1F; + unsigned int bld_evb = (REG_BLDALPHA >> 8) & 0x1F; + unsigned int bld_evy = REG_BLDY & 0x1F; + + // obj window pass -- these sprites modify the window mask, not the framebuffer + if (windowsEnabled && (REG_DISPCNT & DISPCNT_OBJWIN_ON) && (REG_DISPCNT & DISPCNT_OBJ_ON)) { + for (int pri = 0; pri < 4; pri++) + DrawSpritesAtPriority(pri, vcount, pixels, lids, windowsEnabled, wmask, blendMode, + /*objWinOnly=*/true, bldcnt, bld_eva, bld_evb, bld_evy); + } + + // back-to-front: priority 3 first, 0 last (0 is topmost) + for (int pri = 3; pri >= 0; pri--) { + for (int sub = bgsByPriCount[pri] - 1; sub >= 0; sub--) { + int bg = bgsByPri[pri][sub]; + if (!IsBGEnabled(bg)) + continue; + + if (!needLayerIds) { + switch (mode) { + case 0: + RenderTextBG(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), + vcount, pixels); + break; + case 1: + if (bg == 2) + RenderAffineBG(bg, bgcnts[bg], vcount, pixels); + else + RenderTextBG(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), + vcount, pixels); + break; + } + } else { + switch (mode) { + case 0: + RenderTextBGBlend(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), + *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), vcount, pixels, lids, blendMode, windowsEnabled, wmask, + bldcnt, bld_eva, bld_evb, bld_evy); + break; + case 1: + if (bg == 2) + RenderAffineBGBlend(bg, bgcnts[bg], vcount, pixels, lids, blendMode, windowsEnabled, wmask, bldcnt, bld_eva, + bld_evb, bld_evy); + else + RenderTextBGBlend(bg, bgcnts[bg], *(uint16_t *)(REG_ADDR_BG0HOFS + bg * 4), + *(uint16_t *)(REG_ADDR_BG0VOFS + bg * 4), vcount, pixels, lids, blendMode, windowsEnabled, + wmask, bldcnt, bld_eva, bld_evb, bld_evy); + break; + } + } + } + + if (REG_DISPCNT & DISPCNT_OBJ_ON) + DrawSpritesAtPriority(pri, vcount, pixels, lids, windowsEnabled, wmask, blendMode, + /*objWinOnly=*/false, bldcnt, bld_eva, bld_evb, bld_evy); + } +} + +void DrawFrame_Fast(uint16_t *pixels) +{ + for (int i = 0; i < DISPLAY_HEIGHT; i++) { + uint16_t *scanline = &pixels[i * DISPLAY_WIDTH]; + + REG_VCOUNT = i; + if (((REG_DISPSTAT >> 8) & 0xFF) == REG_VCOUNT) { + REG_DISPSTAT |= INTR_FLAG_VCOUNT; + if (REG_DISPSTAT & DISPSTAT_VCOUNT_INTR) + gIntrTable[INTR_INDEX_VCOUNT](); + } + + Memset16(scanline, *(uint16_t *)PLTT, DISPLAY_WIDTH); + DrawScanline(scanline, i); + + REG_DISPSTAT |= INTR_FLAG_HBLANK; + RunDMAs(DMA_HBLANK); + if (REG_DISPSTAT & DISPSTAT_HBLANK_INTR) + gIntrTable[INTR_INDEX_HBLANK](); + + REG_DISPSTAT &= ~INTR_FLAG_HBLANK; + REG_DISPSTAT &= ~INTR_FLAG_VCOUNT; + } +} + +#endif