diff --git a/.gitignore b/.gitignore index 60cc4e932..cf418a746 100644 --- a/.gitignore +++ b/.gitignore @@ -85,6 +85,12 @@ libagbsyscall/*.s *.exe *.dll *.sdl +*.iso + +# PSP build outputs +EBOOT.PBP +PARAM.SFO +sa2_debug.log # third party deps /ext diff --git a/Makefile b/Makefile index b972750e0..e07a3e3cc 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,16 @@ else ifeq ($(CPU_ARCH),i386) TOOLCHAIN := /usr/x86_64-w64-mingw32/ PREFIX := x86_64-w64-mingw32- endif +# PSP +else ifeq ($(PLATFORM),psp) + PSPDEV ?= $(HOME)/pspdev + PSPSDK := $(PSPDEV)/psp/sdk + export PATH := $(PSPDEV)/bin:$(PATH) + PREFIX := psp- +else ifeq ($(PLATFORM),sdl_ps2) + PREFIX := mips64r5900el-ps2-elf- +else ifeq ($(PLATFORM),ps2) + PREFIX := mips64r5900el-ps2-elf- else # Native ifneq ($(PLATFORM),sdl) @@ -74,6 +84,7 @@ CC1 := tools/agbcc/bin/agbcc$(EXE) CC1_OLD := tools/agbcc/bin/old_agbcc$(EXE) else CC1 := $(PREFIX)gcc$(EXE) +CXX := $(PREFIX)g++$(EXE) CC1_OLD := $(CC1) endif @@ -120,6 +131,18 @@ else ifeq ($(PLATFORM),sdl) ROM := $(BUILD_NAME).sdl ELF := $(ROM).elf MAP := $(ROM).map +else ifeq ($(PLATFORM),psp) +ROM := EBOOT.PBP +ELF := $(BUILD_NAME).psp.elf +MAP := $(BUILD_NAME).psp.map +else ifeq ($(PLATFORM),sdl_ps2) +ROM := $(BUILD_NAME).$(PLATFORM).iso +ELF := $(ROM:.iso=.elf) +MAP := $(ROM:.iso=.map) +else ifeq ($(PLATFORM),ps2) +ROM := $(BUILD_NAME).$(PLATFORM).iso +ELF := $(ROM:.iso=.elf) +MAP := $(ROM:.iso=.map) else ROM := $(BUILD_NAME).$(PLATFORM).exe ELF := $(ROM:.exe=.elf) @@ -156,16 +179,30 @@ TILESETS_SUBDIR = graphics/tilesets/ ifeq ($(PLATFORM),gba) C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/*") else ifeq ($(PLATFORM),sdl) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*" -not -path "*/platform/ps2/*") +else ifeq ($(PLATFORM),psp) +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/ps2/*") +else ifeq ($(PLATFORM),sdl_ps2) +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*" -not -path "*/platform/ps2/*") +else ifeq ($(PLATFORM),ps2) +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*" -not -path "*/platform/pret_sdl/*") else ifeq ($(PLATFORM),sdl_win32) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/win32/*" -not -path "*/platform/psp/*" -not -path "*/platform/ps2/*") else ifeq ($(PLATFORM),win32) -C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/pret_sdl/*") +C_SRCS := $(shell find $(C_SUBDIR) -name "*.c" -not -path "*/platform/pret_sdl/*" -not -path "*/platform/psp/*" -not -path "*/platform/ps2/*") else C_SRCS := $(shell find $(C_SUBDIR) -name "*.c") endif C_OBJS := $(patsubst $(C_SUBDIR)/%.c,$(C_BUILDDIR)/%.o,$(C_SRCS)) +ifeq ($(PLATFORM),gba) +CXX_SRCS := $(shell find $(C_SUBDIR) -name "*.cc" -not -path "*/platform/*") +else +CXX_SRCS := $(shell find $(C_SUBDIR) -name "*.cc") +endif + +CXX_OBJS := $(patsubst $(C_SUBDIR)/%.cc,$(C_BUILDDIR)/%.o,$(CXX_SRCS)) + # Platform not included as we only need the headers for decomp scratches C_HEADERS := $(shell find $(INCLUDE_DIRS) -name "*.h" -not -path "*/platform/*") @@ -189,7 +226,7 @@ MID_OBJS := $(patsubst $(MID_SUBDIR)/%.mid,$(MID_BUILDDIR)/%.o,$(MID_SRCS)) SOUND_ASM_SRCS := $(wildcard $(SOUND_ASM_SUBDIR)/*.s) SOUND_ASM_OBJS := $(patsubst $(SOUND_ASM_SUBDIR)/%.s,$(SOUND_ASM_BUILDDIR)/%.o,$(SOUND_ASM_SRCS)) -OBJS := $(C_OBJS) $(ASM_OBJS) $(C_ASM_OBJS) $(DATA_ASM_OBJS) $(SONG_OBJS) $(MID_OBJS) +OBJS := $(C_OBJS) $(CXX_OBJS) $(ASM_OBJS) $(C_ASM_OBJS) $(DATA_ASM_OBJS) $(SONG_OBJS) $(MID_OBJS) OBJS_REL := $(patsubst $(OBJ_DIR)/%,%,$(OBJS)) FORMAT_SRC_PATHS := $(shell find . -name "*.c" ! -path '*/src/data/*' ! -path '*/build/*' ! -path '*/ext/*') @@ -225,6 +262,15 @@ else ifeq ($(PLATFORM),sdl) CC1FLAGS += -Wno-parentheses-equality -Wno-unused-value CPPFLAGS += -D TITLE_BAR=$(BUILD_NAME).$(PLATFORM) -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 $(shell sdl2-config --cflags) + else ifeq ($(PLATFORM),psp) + CC1FLAGS += -G0 + CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 -D SDL_MAIN_HANDLED -I$(PSPDEV)/psp/include/SDL2 -I$(PSPDEV)/psp/include -I$(PSPSDK)/include -D_PSP_FW_VERSION=600 + else ifeq ($(PLATFORM),sdl_ps2) + CC1FLAGS += -G0 -Wno-parentheses-equality -Wno-unused-value -ffast-math + CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 -D SDL_MAIN_HANDLED -D_EE -D__PS2__ -I$(PS2SDK)/common/include -I$(PS2SDK)/ee/include -I$(PS2SDK)/ports/include $(shell $(PS2SDK)/ports/bin/sdl2-config --cflags) + else ifeq ($(PLATFORM),ps2) + CC1FLAGS += -G0 -Wno-parentheses-equality -Wno-unused-value -ffast-math + CPPFLAGS += -D PLATFORM_GBA=0 -D PLATFORM_SDL=0 -D PLATFORM_WIN32=0 -D_EE -D__PS2__ -I$(PS2SDK)/common/include -I$(PS2SDK)/ee/include -I$(PS2DEV)/gsKit/include -I$(PS2SDK)/ports/include else ifeq ($(PLATFORM),sdl_win32) CPPFLAGS += -D TITLE_BAR=$(BUILD_NAME).$(PLATFORM) -D PLATFORM_GBA=0 -D PLATFORM_SDL=1 -D PLATFORM_WIN32=0 $(SDL_MINGW_FLAGS) else ifeq ($(PLATFORM),win32) @@ -241,24 +287,20 @@ else endif endif -ifeq ($(PLATFORM),gba) - ASFLAGS += -mcpu=arm7tdmi -mthumb-interwork - CC1FLAGS += -mthumb-interwork -else - ifeq ($(PLATFORM), sdl) - # for modern we are using a modern compiler - # so instead of CPP we can use gcc -E to "preprocess only" - CPP := $(CC1) -E - endif - # Allow file input through stdin on modern GCC and set it to "compile only" - CC1FLAGS += -x c -S -endif - ifeq ($(DEBUG),1) CC1FLAGS += -g3 -O0 CPPFLAGS += -D DEBUG=1 else - CC1FLAGS += -O2 + ifeq ($(PLATFORM),psp) + # -O3 for PSP (Allegrex MIPS, small D-cache) + CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer + else ifeq ($(PLATFORM),sdl_ps2) + CC1FLAGS += -O3 -funroll-loops -fomit-frame-pointer + else ifeq ($(PLATFORM),ps2) + CC1FLAGS += -O3 -fomit-frame-pointer + else + CC1FLAGS += -O2 + endif CPPFLAGS += -D DEBUG=0 endif @@ -285,6 +327,28 @@ else CPPFLAGS += -D ENABLE_DECOMP_CREDITS=1 endif +CXXFLAGS := $(CC1FLAGS) $(CPPFLAGS) -fno-rtti -fno-exceptions -std=c++11 + +ifeq ($(PLATFORM),gba) + ASFLAGS += -mcpu=arm7tdmi -mthumb-interwork + CC1FLAGS += -mthumb-interwork +else + ifeq ($(PLATFORM), sdl) + # for modern we are using a modern compiler + # so instead of CPP we can use gcc -E to "preprocess only" + CPP := $(CC1) -E + else ifeq ($(PLATFORM), psp) + CPP := $(CC1) -E + else ifeq ($(PLATFORM), sdl_ps2) + ASFLAGS += -msingle-float + else ifeq ($(PLATFORM), ps2) + ASFLAGS += -msingle-float + endif + # Allow file input through stdin on modern gcc/g++ and set it to "compile only" + CC1FLAGS += -x c -S + CXXFLAGS += -x c++ -S +endif + ### LINKER FLAGS ### # GBA @@ -297,6 +361,13 @@ else ifeq ($(PLATFORM),sdl) else MAP_FLAG := -Xlinker -Map= endif +# PSP +else ifeq ($(PLATFORM),psp) + MAP_FLAG := -Xlinker -Map= +else ifeq ($(PLATFORM),sdl_ps2) + MAP_FLAG := -Xlinker -Map= +else ifeq ($(PLATFORM),ps2) + MAP_FLAG := -Xlinker -Map= # Win32 else MAP_FLAG := -Xlinker -Map= @@ -307,6 +378,12 @@ ifeq ($(PLATFORM),gba) LIBS := $(ROOT_DIR)/tools/agbcc/lib/libgcc.a $(ROOT_DIR)/tools/agbcc/lib/libc.a $(LIBABGSYSCALL_LIBS) else ifeq ($(PLATFORM),sdl) LIBS := $(shell sdl2-config --cflags --libs) +else ifeq ($(PLATFORM),psp) + LIBS := -L$(PSPDEV)/psp/lib -L$(PSPSDK)/lib -lSDL2 -lm -lGL -lpspvram -lpspaudio -lpspvfpu -lpspdisplay -lpspgu -lpspge -lpsphprm -lpspctrl -lpsppower -lpspdebug -lpspnet -lpspnet_apctl -Wl,-zmax-page-size=128 +else ifeq ($(PLATFORM),sdl_ps2) + LIBS := -lSDL2 $(shell $(PS2SDK)/ports/bin/sdl2-config --libs) -T$(PS2SDK)/ee/startup/linkfile -L$(PS2SDK)/common/lib -L$(PS2SDK)/ee/lib -L$(PS2DEV)/gsKit/lib -Wl,-zmax-page-size=128 +else ifeq ($(PLATFORM),ps2) + LIBS := -T$(PS2SDK)/ee/startup/linkfile -L$(PS2SDK)/common/lib -L$(PS2SDK)/ee/lib -L$(PS2DEV)/gsKit/lib -L$(PS2SDK)/ports/lib -lgskit -ldmakit -lps2_drivers -lmc -lpatches -Wl,-zmax-page-size=128 else ifeq ($(PLATFORM),sdl_win32) LIBS := -mwin32 -lkernel32 -lwinmm -lmingw32 -lxinput $(SDL_MINGW_LIBS) else ifeq ($(PLATFORM), win32) @@ -316,7 +393,7 @@ endif #### MAIN TARGETS #### # these commands will run regardless of deps being completed -.PHONY: clean tools tidy clean-tools $(TOOLDIRS) libagbsyscall +.PHONY: clean tools tidy clean-tools $(TOOLDIRS) libagbsyscall ps2 # Ensure required directories exist $(shell mkdir -p $(C_BUILDDIR) $(ASM_BUILDDIR) $(DATA_ASM_BUILDDIR) $(SOUND_ASM_BUILDDIR) $(SONG_BUILDDIR) $(MID_BUILDDIR)) @@ -397,7 +474,8 @@ clean-tools: tidy: $(RM) -r build/* $(RM) SDL2.dll - $(RM) $(BUILD_NAME)*.exe $(BUILD_NAME)*.elf $(BUILD_NAME)*.map $(BUILD_NAME)*.sdl $(BUILD_NAME)*.gba + $(RM) $(BUILD_NAME)*.exe $(BUILD_NAME)*.elf $(BUILD_NAME)*.map $(BUILD_NAME)*.sdl $(BUILD_NAME)*.gba $(BUILD_NAME)*.iso + $(RM) EBOOT.PBP PARAM.SFO usa_beta: ; @$(MAKE) GAME_REGION=USA GAME_VARIANT=BETA @@ -409,6 +487,12 @@ europe: ; @$(MAKE) GAME_REGION=EUROPE sdl: ; @$(MAKE) PLATFORM=sdl +psp: ; @$(MAKE) PLATFORM=psp + +sdl_ps2: ; @$(MAKE) PLATFORM=sdl_ps2 + +ps2: ; @$(MAKE) PLATFORM=ps2 + tas_sdl: ; @$(MAKE) sdl TAS_TESTING=1 sdl_win32: @@ -459,7 +543,7 @@ data/mb_chao_garden_japan.gba.lz: data/mb_chao_garden_japan.gba %.bin: %.aif ; $(AIF) $< $@ -$(ELF): $(OBJS) libagbsyscall +$(ELF): $(OBJS) ifeq ($(PLATFORM),gba) @echo "$(LD) -T $(LDSCRIPT) $(MAP_FLAG) $(MAP) -o $@" @$(CPP) -P $(CPPFLAGS) $(LDSCRIPT) > $(OBJ_DIR)/$(LDSCRIPT) @@ -470,14 +554,35 @@ else @cd $(OBJ_DIR) && $(CC1) $(MAP_FLAG)$(ROOT_DIR)/$(MAP) $(OBJS_REL) $(LIBS) -o $(ROOT_DIR)/$@ endif -$(ROM): $(ELF) + ifeq ($(PLATFORM),gba) +$(ROM): $(ELF) libagbsyscall $(OBJCOPY) -O binary --pad-to 0x8400000 $< $@ $(FIX) $@ -p -t"$(TITLE)" -c$(GAME_CODE) -m$(MAKER_CODE) -r$(GAME_REVISION) --silent -else ifeq ($(PLATFORM),sdl) - cp $< $@ -else +else ifeq ($(PLATFORM),win32) +$(ROM): $(ELF) libagbsyscall $(OBJCOPY) -O pei-x86-64 $< $@ +else +$(ROM): $(ELF) +ifeq ($(PLATFORM),sdl) + cp $< $@ +else ifeq ($(PLATFORM),psp) + psp-fixup-imports $< + mksfoex 'Sonic Advance 2' PARAM.SFO + psp-strip $< -o $(BUILD_NAME).psp_strip.elf + pack-pbp $@ PARAM.SFO NULL NULL NULL NULL NULL $(BUILD_NAME).psp_strip.elf NULL + -rm -f $(BUILD_NAME).psp_strip.elf +else ifeq ($(PLATFORM),sdl_ps2) + @echo Creating $(ROM) from $(ELF) + @cp -r ps2/ntsc $(OBJ_DIR)/iso + @cp $< $(OBJ_DIR)/iso/$(PS2_GAME_CODE) + @mkisofs -o $(ROM) $(OBJ_DIR)/iso/ +else ifeq ($(PLATFORM),ps2) + @echo Creating $(ROM) from $(ELF) + @cp -r ps2/ntsc $(OBJ_DIR)/iso + @cp $< $(OBJ_DIR)/iso/$(PS2_GAME_CODE) + @mkisofs -o $(ROM) $(OBJ_DIR)/iso/ +endif endif # Build c sources, and ensure alignment @@ -491,11 +596,21 @@ ifeq ($(PLATFORM), gba) endif @$(AS) $(ASFLAGS) $(C_BUILDDIR)/$*.s -o $@ +$(C_BUILDDIR)/%.o: $(C_SUBDIR)/%.cc + @echo "$(CXX) -o $@ $<" + @$(shell mkdir -p $(shell dirname '$(C_BUILDDIR)/$*.o')) + @$(CXX) $(CXXFLAGS) -o $(C_BUILDDIR)/$*.s $< + @$(AS) $(ASFLAGS) $(C_BUILDDIR)/$*.s -o $@ + # Scan the src dependencies to determine if any dependent files have changed $(C_BUILDDIR)/%.d: $(C_SUBDIR)/%.c @$(shell mkdir -p $(shell dirname '$(C_BUILDDIR)/$*.d')) $(SCANINC) -M $@ $(INCLUDE_SCANINC_ARGS) $< +$(C_BUILDDIR)/%.d: $(C_SUBDIR)/%.cc + @$(shell mkdir -p $(shell dirname '$(C_BUILDDIR)/$*.d')) + $(SCANINC) -M $@ $(INCLUDE_SCANINC_ARGS) $< + # rule for sources from the src dir (parts of libraries) $(C_BUILDDIR)/%.o: $(C_SUBDIR)/%.s @echo "$(AS) -o $@ $<" @@ -515,6 +630,7 @@ $(DATA_ASM_BUILDDIR)/%.d: $(DATA_ASM_SUBDIR)/%.s ifneq ($(NODEP),1) -include $(addprefix $(OBJ_DIR)/,$(C_SRCS:.c=.d)) +-include $(addprefix $(OBJ_DIR)/,$(CXX_SRCS:.cc=.d)) -include $(addprefix $(OBJ_DIR)/,$(DATA_ASM_SRCS:.s=.d)) endif diff --git a/README.md b/README.md index 382027ade..1dd07df9f 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ It can also build: * **sa2.sdl** `make sdl` (Linux/MacOS SDL 64bit port) * **sa2.sdl_win32.exe** `make sdl_win32` (Windows SDL 64bit port) * :construction: **sa2.win32.exe** `make win32` (Win32 native port, not functional) +* **EBOOT.PBP** `make psp` (PlayStation Portable homebrew port, requires [PSPDEV](https://github.com/pspdev/pspdev)) ## Current state diff --git a/asm/macros/portable.inc b/asm/macros/portable.inc index b389fb26a..fd66638b8 100644 --- a/asm/macros/portable.inc +++ b/asm/macros/portable.inc @@ -10,6 +10,8 @@ .macro mPtr value #if defined(__aarch64__) || defined(__x86_64__) .quad \value +#elif defined(__mips__) + .4byte \value #else .int \value #endif diff --git a/config.mk b/config.mk index 5f687443a..15affde18 100644 --- a/config.mk +++ b/config.mk @@ -63,6 +63,7 @@ MAKER_CODE := 78 BUILD_NAME := sa2 TITLE := SONICADVANC2 GAME_CODE := A2N +PS2_GAME_CODE := SLUS_054.02 # Revision diff --git a/include/config.h b/include/config.h index 15b9df4ec..dd1e301d2 100644 --- a/include/config.h +++ b/include/config.h @@ -39,14 +39,20 @@ #define TAS_TESTING_WIDESCREEN_HACK 1 -#define RENDERER_SOFTWARE 0 -#define RENDERER_OPENGL 1 -#define RENDERER_COUNT 2 -#if PLATFORM_WIN32 && !PLATFORM_SDL +#define RENDERER_SOFTWARE 0 +#define RENDERER_OPENGL 1 +#define RENDERER_SOFTWARE_FAST 2 +#define RENDERER_COUNT 3 + +#ifndef RENDERER +#if defined(__PSP__) || defined(__PS2__) +#define RENDERER RENDERER_SOFTWARE_FAST +#elif PLATFORM_WIN32 && !PLATFORM_SDL // TODO: Only win32 for now #define RENDERER RENDERER_OPENGL #else -#define RENDERER RENDERER_SOFTWARE +#define RENDERER RENDERER_SOFTWARE_FAST +#endif #endif #endif // GUARD_SA2_CONFIG_H diff --git a/include/gba/defines.h b/include/gba/defines.h index b904ee74d..45371a2e1 100644 --- a/include/gba/defines.h +++ b/include/gba/defines.h @@ -39,8 +39,18 @@ #define OAM_ENTRY_COUNT 128 #if PORTABLE // NOTE: Used in gba/types.h, so they have to be defined before the #include +#if defined(__PSP__) +// PSP: Use GBA-native resolution, SDL scales to 480x272 +#define DISPLAY_WIDTH 240 +#define DISPLAY_HEIGHT 160 +#elif defined(__PS2__) +// Runs at 60fps with the "fast draw" +#define DISPLAY_WIDTH 320 +#define DISPLAY_HEIGHT 180 +#else #define DISPLAY_WIDTH 426 #define DISPLAY_HEIGHT 240 +#endif // NOTE: We shouldn't consider WIDESCREEN_HACK a permanent thing. // This hack should best be removed once there's a "native" platform layer. diff --git a/include/gba/types.h b/include/gba/types.h index 72e721ad7..419e1fdda 100644 --- a/include/gba/types.h +++ b/include/gba/types.h @@ -20,6 +20,9 @@ typedef struct __attribute__((packed)) name struct_body name; #endif +#ifdef __PS2__ +#include +#else typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; @@ -28,6 +31,7 @@ typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; +#endif #if (GAME == GAME_SA1) typedef u8 MetatileIndexType; @@ -38,12 +42,12 @@ typedef u16 MetatileIndexType; // If the DISPLAY_HEIGHT was >255, scanline effects would break, // so we have to make this variable bigger. // (u16 should be plenty for screen coordinates, right?) -#if !defined(DISPLAY_HEIGHT) -#error DISPLAY_HEIGHT not defined. +#if !defined(WIDESCREEN_HACK) +#error WIDESCREEN_HACK not defined. #endif /// TODO: Technically this should only be #if (DISPLAY_HEIGHT > 255), // we should probably replace uses of int_vcount with a different type where a high DISPLAY_WIDTH necessitates u16. -#if ((DISPLAY_WIDTH > 255) || (DISPLAY_HEIGHT > 255)) +#if WIDESCREEN_HACK typedef u16 int_vcount; #else typedef u8 int_vcount; diff --git a/include/lib/m4a/m4a_internal.h b/include/lib/m4a/m4a_internal.h index 7755591c6..0ae01ef88 100644 --- a/include/lib/m4a/m4a_internal.h +++ b/include/lib/m4a/m4a_internal.h @@ -243,6 +243,8 @@ struct SoundMixerState { #if PLATFORM_GBA s8 pcmBuffer[PCM_DMA_BUF_SIZE * 2]; #else + // TODO: let's not make this float, they are slow + // on older systems float pcmBuffer[PCM_DMA_BUF_SIZE * 2]; #endif }; diff --git a/include/platform/platform.h b/include/platform/platform.h index 0a44b8f1f..504c23586 100644 --- a/include/platform/platform.h +++ b/include/platform/platform.h @@ -21,6 +21,6 @@ extern void Platform_RLFree(unsigned char *dest); extern void Platform_LZDecompressUnsafe(unsigned char *src, unsigned char *dest); extern void Platform_RLDecompressUnsafe(unsigned char *src, unsigned char *dest); -extern void Platform_QueueAudio(const void *data, u32 numBytes); +extern void Platform_QueueAudio(const float *data, u32 numBytes); #endif // GUARD_SA2_PLATFORM_H diff --git a/include/platform/shared/rendering/sw_renderer_common.h b/include/platform/shared/rendering/sw_renderer_common.h new file mode 100644 index 000000000..ddb85d7ff --- /dev/null +++ b/include/platform/shared/rendering/sw_renderer_common.h @@ -0,0 +1,64 @@ +#ifndef GUARD_SW_RENDERER_COMMON_H +#define GUARD_SW_RENDERER_COMMON_H + +// shared color math for the gba ppu blend unit +// used by both the normal (multi-pass) and fast (single-pass) software renderers + +#include + +// bgr555 channel extraction +#define getAlphaBit(x) (((x) >> 15) & 1) +#define getRedChannel(x) (((x) >> 0) & 0x1F) +#define getGreenChannel(x) (((x) >> 5) & 0x1F) +#define getBlueChannel(x) (((x) >> 10) & 0x1F) +#define COLOR_OPAQUE 0x8000 + +static inline uint16_t alphaBlendColor(uint16_t targetA, uint16_t targetB, unsigned int eva, unsigned int evb) +{ + unsigned int r = ((getRedChannel(targetA) * eva) + (getRedChannel(targetB) * evb)) >> 4; + unsigned int g = ((getGreenChannel(targetA) * eva) + (getGreenChannel(targetB) * evb)) >> 4; + unsigned int b = ((getBlueChannel(targetA) * eva) + (getBlueChannel(targetB) * evb)) >> 4; + + if (r > 31) + r = 31; + if (g > 31) + g = 31; + if (b > 31) + b = 31; + + return r | (g << 5) | (b << 10) | COLOR_OPAQUE; +} + +static inline uint16_t alphaBrightnessIncrease(uint16_t targetA, unsigned int evy) +{ + unsigned int r = getRedChannel(targetA) + (31 - getRedChannel(targetA)) * evy / 16; + unsigned int g = getGreenChannel(targetA) + (31 - getGreenChannel(targetA)) * evy / 16; + unsigned int b = getBlueChannel(targetA) + (31 - getBlueChannel(targetA)) * evy / 16; + + if (r > 31) + r = 31; + if (g > 31) + g = 31; + if (b > 31) + b = 31; + + return r | (g << 5) | (b << 10) | COLOR_OPAQUE; +} + +static inline uint16_t alphaBrightnessDecrease(uint16_t targetA, unsigned int evy) +{ + unsigned int r = getRedChannel(targetA) - getRedChannel(targetA) * evy / 16; + unsigned int g = getGreenChannel(targetA) - getGreenChannel(targetA) * evy / 16; + unsigned int b = getBlueChannel(targetA) - getBlueChannel(targetA) * evy / 16; + + if (r > 31) + r = 31; + if (g > 31) + g = 31; + if (b > 31) + b = 31; + + return r | (g << 5) | (b << 10) | COLOR_OPAQUE; +} + +#endif // GUARD_SW_RENDERER_COMMON_H diff --git a/libagbsyscall/Makefile b/libagbsyscall/Makefile index 654a44e4b..7f6c55693 100644 --- a/libagbsyscall/Makefile +++ b/libagbsyscall/Makefile @@ -34,6 +34,10 @@ else ifeq ($(CPU_ARCH),i386) TOOLCHAIN := /usr/x86_64-w64-mingw32/ PREFIX := x86_64-w64-mingw32- endif +else ifeq ($(PLATFORM),psp) + PSPDEV ?= $(HOME)/pspdev + export PATH := $(PSPDEV)/bin:$(PATH) + PREFIX := psp- else ifneq ($(PLATFORM),sdl) $(error Unknown CPU architecture $(CPU_ARCH)) endif # (PLATFORM == gba) diff --git a/ps2/ntsc/SYSTEM.CNF b/ps2/ntsc/SYSTEM.CNF new file mode 100644 index 000000000..9c440527d --- /dev/null +++ b/ps2/ntsc/SYSTEM.CNF @@ -0,0 +1,3 @@ +BOOT2 = cdrom0:\SLUS_054.02;1 +VER = 1.00 +VMODE = NTSC \ No newline at end of file diff --git a/src/background.c b/src/background.c index 7fba66626..190160224 100644 --- a/src/background.c +++ b/src/background.c @@ -650,7 +650,7 @@ END_NONMATCH void UpdateBgAnimationTiles(Background *bg) { -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) Tilemap *tilemap = gTilemapsRef[bg->tilemapId]; if (tilemap->animFrameCount > 0) { if (tilemap->animDelay <= ++bg->animDelayCounter) { @@ -872,7 +872,7 @@ NONMATCH("asm/non_matching/engine/sub_80039E4.inc", bool32 sub_80039E4(void)) return TRUE; #endif -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) if (gBgSpritesCount != 0) { OamDataShort oam; s32 r5; diff --git a/src/core.c b/src/core.c index ea52547b7..a5ffe374d 100644 --- a/src/core.c +++ b/src/core.c @@ -924,7 +924,7 @@ bool32 ProcessVramGraphicsCopyQueue(void) if ((graphics->src != 0) && (graphics->dest != 0)) #endif { -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) DmaCopy16(3, (void *)(graphics->src + offset), (void *)(graphics->dest + offset), COPY_CHUNK_SIZE); #endif graphics->size -= COPY_CHUNK_SIZE; @@ -939,7 +939,7 @@ bool32 ProcessVramGraphicsCopyQueue(void) if ((graphics->src != 0) && (graphics->dest != 0)) #endif { -#if (RENDERER == RENDERER_SOFTWARE) +#if (RENDERER != RENDERER_OPENGL) DmaCopy16(3, (void *)(graphics->src + offset), (void *)(graphics->dest + offset), graphics->size); #endif } diff --git a/src/game/special_stage/world.c b/src/game/special_stage/world.c index eb293bf74..a41ac6d5c 100644 --- a/src/game/special_stage/world.c +++ b/src/game/special_stage/world.c @@ -239,14 +239,14 @@ void sub_806EA04(void) *unk1884++ = (Q_16_16_TO_INT(temp) * cos) >> 0x10; // BG2PA // HACK: in SDL we don't handle these PB and PD values properly -#if PLATFORM_SDL +#if !PLATFORM_GBA && (RENDERER == RENDERER_SOFTWARE_FAST || RENDERER == RENDERER_SOFTWARE) *unk1884++ = 0; #else *unk1884++ = (Q_16_16_TO_INT(temp) * sin) >> 0x10; // BG2PB #endif *unk1884++ = (Q_16_16_TO_INT(temp) * -sin) >> 0x10; // BG2PC -#if PLATFORM_SDL +#if !PLATFORM_GBA && (RENDERER == RENDERER_SOFTWARE_FAST || RENDERER == RENDERER_SOFTWARE) *unk1884++ = 0; #else *unk1884++ = (Q_16_16_TO_INT(temp) * cos) >> 0x10; // BG2PD diff --git a/src/platform/pret_sdl/sdl2.c b/src/platform/pret_sdl/sdl2.c index f4ceaf79a..f63b7ba7e 100644 --- a/src/platform/pret_sdl/sdl2.c +++ b/src/platform/pret_sdl/sdl2.c @@ -10,6 +10,11 @@ #include #endif +#ifdef __PSP__ +#include +extern int setupPspCallbacks(void); +#endif + #include #include "global.h" @@ -21,6 +26,7 @@ #include "lib/agb_flash/flash_internal.h" #include "platform/shared/dma.h" #include "platform/shared/input.h" +#include "platform/shared/rendering/sw_renderer_common.h" #if ENABLE_AUDIO #include "platform/shared/audio/cgb_audio.h" @@ -92,6 +98,16 @@ bool paused = false; bool stepOneFrame = false; bool headless = false; +#if defined(__PSP__) || defined(__PS2__) +static SDL_Joystick *joystick = NULL; +#endif + +#ifdef __PSP__ +#define PSP_SCREEN_W 480 +#define PSP_SCREEN_H 272 +static SDL_Rect pspDestRect; +#endif + double lastGameTime = 0; double curGameTime = 0; double fixedTimestep = 1.0 / 60.0; // 16.666667ms @@ -120,8 +136,57 @@ void *Platform_malloc(size_t numBytes) { return HeapAlloc(GetProcessHeap(), HEAP void Platform_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); } #endif +#ifdef __PS2__ +// TODO: clean these for what is needed +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +void reset_IOP() +{ + SifInitRpc(0); + while (!SifIopReset(NULL, 0)) { } // Comment this line if you want to "debug" through ps2link + while (!SifIopSync()) { } +} + +static void prepare_IOP() +{ + reset_IOP(); + SifInitRpc(0); + sbv_patch_enable_lmb(); + sbv_patch_disable_prefix_check(); +} + +static void init_drivers() +{ + init_only_boot_ps2_filesystem_driver(); + init_memcard_driver(true); +} + +static void deinit_drivers() +{ + deinit_memcard_driver(true); + deinit_only_boot_ps2_filesystem_driver(); +} +#endif + int main(int argc, char **argv) { +#ifdef __PSP__ + setupPspCallbacks(); +#endif + +#ifdef __PS2__ + prepare_IOP(); +#endif + const char *headlessEnv = getenv("HEADLESS"); if (headlessEnv && strcmp(headlessEnv, "true") == 0) { @@ -144,15 +209,19 @@ int main(int argc, char **argv) freopen("CON", "w", stdout); #endif +#ifndef __PS2__ ReadSaveFile("sa2.sav"); +#endif // Prevent the multiplayer screen from being drawn ( see core.c:EngineInit() ) REG_RCNT = 0x8000; REG_KEYINPUT = 0x3FF; if (headless) { +#if ENABLE_AUDIO // Required or it makes an infinite loop cgb_audio_init(48000); +#endif AgbMain(); return 1; } @@ -162,14 +231,26 @@ int main(int argc, char **argv) return 1; } +#if defined(__PSP__) || defined(__PS2__) + if (SDL_NumJoysticks() > 0) { + joystick = SDL_JoystickOpen(0); + } +#endif + #ifdef TITLE_BAR const char *title = STR(TITLE_BAR); #else const char *title = "SAT-R sa2"; #endif +#ifdef __PSP__ + sdlWindow = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 480, 272, SDL_WINDOW_SHOWN); +#elif defined(__PS2__) + sdlWindow = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 640, 448, SDL_WINDOW_SHOWN); +#else sdlWindow = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, DISPLAY_WIDTH * videoScale, DISPLAY_HEIGHT * videoScale, SDL_WINDOW_SHOWN | SDL_WINDOW_RESIZABLE); +#endif if (sdlWindow == NULL) { fprintf(stderr, "Window could not be created! SDL_Error: %s\n", SDL_GetError()); return 1; @@ -191,7 +272,17 @@ int main(int argc, char **argv) } #endif +#ifdef __PSP__ + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC); + if (sdlRenderer == NULL) + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_ACCELERATED); + if (sdlRenderer == NULL) + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, 0); +#elif defined(__PS2__) + sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_ACCELERATED); +#else sdlRenderer = SDL_CreateRenderer(sdlWindow, -1, SDL_RENDERER_PRESENTVSYNC); +#endif if (sdlRenderer == NULL) { fprintf(stderr, "Renderer could not be created! SDL_Error: %s\n", SDL_GetError()); return 1; @@ -208,7 +299,12 @@ int main(int argc, char **argv) SDL_SetRenderDrawColor(sdlRenderer, 0, 0, 0, 255); SDL_RenderClear(sdlRenderer); SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "0"); +#ifdef __PSP__ + // SDL_RenderSetLogicalSize is broken on PSP, stretch to fill manually + pspDestRect = (SDL_Rect) { 0, 0, PSP_SCREEN_W, PSP_SCREEN_H }; +#else SDL_RenderSetLogicalSize(sdlRenderer, DISPLAY_WIDTH, DISPLAY_HEIGHT); +#endif #if ENABLE_VRAM_VIEW SDL_SetRenderDrawColor(vramRenderer, 0, 0, 0, 255); SDL_RenderClear(vramRenderer); @@ -229,6 +325,12 @@ int main(int argc, char **argv) } #endif +#ifdef __PS2__ + SDL_SetTextureScaleMode(sdlTexture, SDL_ScaleModeLinear); + // For some reason we are WAY blown out on the PS2 + SDL_SetTextureColorMod(sdlTexture, 140, 140, 140); +#endif + #if ENABLE_AUDIO SDL_AudioSpec want; @@ -239,9 +341,9 @@ int main(int argc, char **argv) want.samples = (want.freq / 60); cgb_audio_init(want.freq); - if (SDL_OpenAudio(&want, 0) < 0) + if (SDL_OpenAudio(&want, 0) < 0) { SDL_Log("Failed to open audio: %s", SDL_GetError()); - else { + } else { if (want.format != AUDIO_F32) /* we let this one thing change. */ SDL_Log("We didn't get Float32 audio format."); SDL_PauseAudio(0); @@ -259,12 +361,10 @@ int main(int argc, char **argv) bool newFrameRequested = FALSE; -// Every GBA frame we process the SDL events and render the number of times -// SDL requires us to for vsync. When we need another frame we break out of -// the loop via a return +// called every gba frame. we process sdl events and render as many times +// as vsync needs, then return when a new game frame is needed. void VBlankIntrWait(void) { - // ((struct MultiSioPacket *)gMultiSioArea.nextSendBufp) #define HANDLE_VBLANK_INTRS() \ ({ \ REG_DISPSTAT |= INTR_FLAG_VBLANK; \ @@ -281,16 +381,22 @@ void VBlankIntrWait(void) } bool frameAvailable = TRUE; + bool frameDrawn = false; +#if defined(__PSP__) || defined(__PS2__) + static int frames_skipped = 0; +#define MAX_FRAME_SKIP 2 +#endif while (isRunning) { +#if !defined(__PS2__) && !defined(__PSP__) ProcessSDLEvents(); +#endif if (!paused || stepOneFrame) { double dt = fixedTimestep / timeScale; // TODO: Fix speedup - // Hack to emulate the behaviour of threaded sdl - // it will not add any new values to the accumulator - // when a new frame was requested within a frame cycle + // don't accumulate time if we already requested a new frame + // this frame cycle (emulates threaded sdl behavior) if (!newFrameRequested) { double deltaTime = 0; @@ -312,8 +418,21 @@ void VBlankIntrWait(void) while (accumulator >= dt) { REG_KEYINPUT = KEYS_MASK ^ Platform_GetKeyInput(); if (frameAvailable) { +#if defined(__PSP__) || defined(__PS2__) + // frame skip: let game logic catch up when behind + if (accumulator >= dt * 2.0 && frames_skipped < MAX_FRAME_SKIP) { + frames_skipped++; + frameAvailable = FALSE; + HANDLE_VBLANK_INTRS(); + accumulator -= dt; + newFrameRequested = TRUE; + return; + } + frames_skipped = 0; +#endif VDraw(sdlTexture); frameAvailable = FALSE; + frameDrawn = true; HANDLE_VBLANK_INTRS(); @@ -329,6 +448,21 @@ void VBlankIntrWait(void) } } + // present +#ifdef __PSP__ + // manual blit since SDL_RenderSetLogicalSize doesn't work on psp + if (frameDrawn) { + SDL_RenderCopy(sdlRenderer, sdlTexture, NULL, &pspDestRect); + SDL_RenderPresent(sdlRenderer); + frameDrawn = false; + } else { + SDL_Delay(1); + } +#else +#ifdef __PS2__ + // Allow audio to play + DelayThread(800); +#endif SDL_RenderClear(sdlRenderer); SDL_RenderCopy(sdlRenderer, sdlTexture, NULL, NULL); @@ -345,6 +479,7 @@ void VBlankIntrWait(void) SDL_RenderPresent(sdlRenderer); #if ENABLE_VRAM_VIEW SDL_RenderPresent(vramRenderer); +#endif #endif } @@ -352,8 +487,11 @@ void VBlankIntrWait(void) SDL_DestroyWindow(sdlWindow); SDL_Quit(); +#ifdef __PSP__ + sceKernelExitGame(); +#endif exit(0); -#undef RUN_VBLANK_INTRS +#undef HANDLE_VBLANK_INTRS } static void ReadSaveFile(char *path) @@ -421,10 +559,75 @@ static void CloseSaveFile() static u16 keys; +#if defined(__PSP__) || defined(__PS2__) + +#ifdef __PS2__ +#define BTN_TRIANGLE 12 +#define BTN_CIRCLE 13 +#define BTN_CROSS 14 +#define BTN_SQUARE 15 +#define BTN_LTRIGGER 10 +#define BTN_RTRIGGER 11 +#define BTN_DOWN 6 +#define BTN_LEFT 7 +#define BTN_UP 4 +#define BTN_RIGHT 5 +#define BTN_SELECT 0 +#define BTN_START 3 +#else +#define BTN_TRIANGLE 0 +#define BTN_CIRCLE 1 +#define BTN_CROSS 2 +#define BTN_SQUARE 3 +#define BTN_LTRIGGER 4 +#define BTN_RTRIGGER 5 +#define BTN_DOWN 6 +#define BTN_LEFT 7 +#define BTN_UP 8 +#define BTN_RIGHT 9 +#define BTN_SELECT 10 +#define BTN_START 11 +#endif + +static u16 PollJoystickButtons(void) +{ + u16 keys = 0; + if (joystick == NULL) + return keys; + + SDL_JoystickUpdate(); + + if (SDL_JoystickGetButton(joystick, BTN_CROSS)) + keys |= A_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_CIRCLE)) + keys |= B_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_SQUARE)) + keys |= B_BUTTON; // Square also B + if (SDL_JoystickGetButton(joystick, BTN_START)) + keys |= START_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_SELECT)) + keys |= SELECT_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_LTRIGGER)) + keys |= L_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_RTRIGGER)) + keys |= R_BUTTON; + if (SDL_JoystickGetButton(joystick, BTN_UP)) + keys |= DPAD_UP; + if (SDL_JoystickGetButton(joystick, BTN_DOWN)) + keys |= DPAD_DOWN; + if (SDL_JoystickGetButton(joystick, BTN_LEFT)) + keys |= DPAD_LEFT; + if (SDL_JoystickGetButton(joystick, BTN_RIGHT)) + keys |= DPAD_RIGHT; + + return keys; +} +#endif + u32 fullScreenFlags = 0; static SDL_DisplayMode sdlDispMode = { 0 }; -void Platform_QueueAudio(const void *data, uint32_t bytesCount) +void Platform_QueueAudio(const float *data, uint32_t bytesCount) { if (headless) { return; @@ -561,18 +764,33 @@ u16 Platform_GetKeyInput(void) return (gamepadKeys != 0) ? gamepadKeys : keys; #endif +#if defined(__PSP__) || defined(__PS2__) + return keys | PollJoystickButtons(); +#endif + return keys; } // BIOS function implementations are based on the VBA-M source code. -static uint32_t CPUReadMemory(const void *src) { return *(uint32_t *)src; } +// safe unaligned access for MIPS +static uint32_t CPUReadMemory(const void *src) +{ + uint32_t val; + memcpy(&val, src, sizeof(val)); + return val; +} -static void CPUWriteMemory(void *dest, uint32_t val) { *(uint32_t *)dest = val; } +static void CPUWriteMemory(void *dest, uint32_t val) { memcpy(dest, &val, sizeof(val)); } -static uint16_t CPUReadHalfWord(const void *src) { return *(uint16_t *)src; } +static uint16_t CPUReadHalfWord(const void *src) +{ + uint16_t val; + memcpy(&val, src, sizeof(val)); + return val; +} -static void CPUWriteHalfWord(void *dest, uint16_t val) { *(uint16_t *)dest = val; } +static void CPUWriteHalfWord(void *dest, uint16_t val) { memcpy(dest, &val, sizeof(val)); } static uint8_t CPUReadByte(const void *src) { return *(uint8_t *)src; } @@ -968,25 +1186,26 @@ static const uint16_t bgMapSizes[][2] = { #define applySpriteHorizontalMosaicEffect(x) (x - (x % (mosaicSpriteEffectX + 1))) #define applySpriteVerticalMosaicEffect(y) (y - (y % (mosaicSpriteEffectY + 1))) -// NOTE: This is the corrected function. static void RenderBGScanline(int bgNum, uint16_t control, uint16_t hoffs, uint16_t voffs, int lineNum, uint16_t *line) { unsigned int charBaseBlock = (control >> 2) & 3; unsigned int screenBaseBlock = (control & BGCNT_SCREENBASE_MASK) >> 8; - unsigned int bitsPerPixel = ((control >> 7) & 1) ? 8 : 4; + unsigned int is8bpp = (control >> 7) & 1; // Determine background dimensions from the control register unsigned int mapWidth = bgMapSizes[control >> 14][0]; // in tiles - unsigned int mapHeight = bgMapSizes[control >> 14][1]; // in tiles - unsigned int mapPixelWidth = mapWidth * TILE_WIDTH; - unsigned int mapPixelHeight = mapHeight * TILE_WIDTH; + unsigned int mapPixelWidth = mapWidth << 3; + unsigned int mapPixelHeight = bgMapSizes[control >> 14][1] << 3; + unsigned int pixelWidthMask = mapPixelWidth - 1; + unsigned int pixelHeightMask = mapPixelHeight - 1; uint8_t *bgtiles = (uint8_t *)BG_CHAR_ADDR(charBaseBlock); uint16_t *bgmap = (uint16_t *)BG_SCREEN_ADDR(screenBaseBlock); uint16_t *pal = (uint16_t *)PLTT; // Apply vertical mosaic effect to the entire scanline if enabled - if (control & BGCNT_MOSAIC) { + bool hasMosaic = control & BGCNT_MOSAIC; + if (hasMosaic) { lineNum = applyBGVerticalMosaicEffect(lineNum); } @@ -994,29 +1213,22 @@ static void RenderBGScanline(int bgNum, uint16_t control, uint16_t hoffs, uint16 hoffs &= 0x1FF; voffs &= 0x1FF; + unsigned int yy = (lineNum + voffs) & pixelHeightMask; + unsigned int mapY = yy >> 3; + unsigned int tileY = yy & 7; + unsigned int mapRowBase = mapY * mapWidth; + for (unsigned int x = 0; x < DISPLAY_WIDTH; x++) { - unsigned int xx, yy; + unsigned int xx; - // Calculate the source coordinate in the background map, applying scroll and mosaic - if (control & BGCNT_MOSAIC) { - xx = applyBGHorizontalMosaicEffect(x) + hoffs; + if (hasMosaic) { + xx = (applyBGHorizontalMosaicEffect(x) + hoffs) & pixelWidthMask; } else { - xx = x + hoffs; + xx = (x + hoffs) & pixelWidthMask; } - yy = lineNum + voffs; - // Wrap the coordinates based on the background's actual pixel dimensions. - // This fixes issues with backgrounds that are not 256x256. - xx &= (mapPixelWidth - 1); - yy &= (mapPixelHeight - 1); - - // Convert pixel coordinates to tile coordinates - unsigned int mapX = xx / TILE_WIDTH; - unsigned int mapY = yy / TILE_WIDTH; - - // Calculate the 1D index into the tilemap. This was the primary source of bugs, - // as the original code used a hardcoded map width of 32 tiles. - unsigned int mapIndex = mapY * mapWidth + mapX; + unsigned int mapX = xx >> 3; + unsigned int mapIndex = mapRowBase + mapX; uint16_t entry = bgmap[mapIndex]; unsigned int tileNum = entry & 0x3FF; @@ -1026,40 +1238,30 @@ static void RenderBGScanline(int bgNum, uint16_t control, uint16_t hoffs, uint16 vramPalIdBuffer[tileNum] = paletteNum; #endif - // Get the coordinate within the specific tile - unsigned int tileX = xx % TILE_WIDTH; - unsigned int tileY = yy % TILE_WIDTH; + unsigned int tx = xx & 7; + unsigned int ty = tileY; - // Handle horizontal and vertical tile flipping if (entry & (1 << 10)) - tileX = (TILE_WIDTH - 1) - tileX; // H-flip + tx = 7 - tx; if (entry & (1 << 11)) - tileY = (TILE_WIDTH - 1) - tileY; // V-flip + ty = 7 - ty; - // Calculate address of the pixel data and extract the color - if (bitsPerPixel == 4) { - uint32_t tileDataOffset = tileNum * TILE_SIZE_4BPP; - uint32_t pixelByteOffset = (tileY * TILE_WIDTH + tileX) / 2; + if (!is8bpp) { + uint32_t tileDataOffset = tileNum << 5; + uint32_t pixelByteOffset = (ty << 2) + (tx >> 1); uint8_t pixelPair = bgtiles[tileDataOffset + pixelByteOffset]; - uint8_t pixel; - if (tileX & 1) { - pixel = pixelPair >> 4; - } else { - pixel = pixelPair & 0xF; - } + uint8_t pixel = (tx & 1) ? (pixelPair >> 4) : (pixelPair & 0xF); if (pixel != 0) { - line[x] = pal[16 * paletteNum + pixel] | 0x8000; + line[x] = pal[(paletteNum << 4) + pixel] | 0x8000; } } else { // 8 bits per pixel - uint32_t tileDataOffset = tileNum * TILE_SIZE_8BPP; - uint32_t pixelByteOffset = tileY * TILE_WIDTH + tileX; + uint32_t tileDataOffset = tileNum << 6; + uint32_t pixelByteOffset = (ty << 3) + tx; uint8_t pixel = bgtiles[tileDataOffset + pixelByteOffset]; if (pixel != 0) { - // For 8bpp tiles, the palette number in the tile entry is ignored. - // The pixel value is a direct index into the 256-color palette. line[x] = pal[pixel] | 0x8000; } } @@ -1257,64 +1459,7 @@ const u8 spriteSizes[][2] = { { 32, 64 }, }; -#define getAlphaBit(x) ((x >> 15) & 1) -#define getRedChannel(x) ((x >> 0) & 0x1F) -#define getGreenChannel(x) ((x >> 5) & 0x1F) -#define getBlueChannel(x) ((x >> 10) & 0x1F) -#define isbgEnabled(x) ((REG_DISPCNT >> 8) & 0xF) & (1 << x) - -static uint16_t alphaBlendColor(uint16_t targetA, uint16_t targetB) -{ - unsigned int eva = REG_BLDALPHA & 0x1F; - unsigned int evb = (REG_BLDALPHA >> 8) & 0x1F; - // shift right by 4 = division by 16 - unsigned int r = ((getRedChannel(targetA) * eva) + (getRedChannel(targetB) * evb)) >> 4; - unsigned int g = ((getGreenChannel(targetA) * eva) + (getGreenChannel(targetB) * evb)) >> 4; - unsigned int b = ((getBlueChannel(targetA) * eva) + (getBlueChannel(targetB) * evb)) >> 4; - - if (r > 31) - r = 31; - if (g > 31) - g = 31; - if (b > 31) - b = 31; - - return r | (g << 5) | (b << 10) | (1 << 15); -} - -static uint16_t alphaBrightnessIncrease(uint16_t targetA) -{ - unsigned int evy = (REG_BLDY & 0x1F); - unsigned int r = getRedChannel(targetA) + (31 - getRedChannel(targetA)) * evy / 16; - unsigned int g = getGreenChannel(targetA) + (31 - getGreenChannel(targetA)) * evy / 16; - unsigned int b = getBlueChannel(targetA) + (31 - getBlueChannel(targetA)) * evy / 16; - - if (r > 31) - r = 31; - if (g > 31) - g = 31; - if (b > 31) - b = 31; - - return r | (g << 5) | (b << 10) | (1 << 15); -} - -static uint16_t alphaBrightnessDecrease(uint16_t targetA) -{ - unsigned int evy = (REG_BLDY & 0x1F); - unsigned int r = getRedChannel(targetA) - getRedChannel(targetA) * evy / 16; - unsigned int g = getGreenChannel(targetA) - getGreenChannel(targetA) * evy / 16; - unsigned int b = getBlueChannel(targetA) - getBlueChannel(targetA) * evy / 16; - - if (r > 31) - r = 31; - if (g > 31) - g = 31; - if (b > 31) - b = 31; - - return r | (g << 5) | (b << 10) | (1 << 15); -} +#define isbgEnabled(x) ((REG_DISPCNT >> 8) & 0xF) & (1 << x) // outputs the blended pixel in colorOutput, the prxxx are the bg priority and // subpriority, pixelpos is pixel offset in scanline @@ -1396,8 +1541,6 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool bool isAffine = oam->split.affineMode & 1; bool doubleSizeOrDisabled = (oam->split.affineMode >> 1) & 1; - bool isSemiTransparent = (oam->split.objMode == 1); - bool isObjWin = (oam->split.objMode == 2); if (!(isAffine) && doubleSizeOrDisabled) // disable for non-affine { @@ -1408,31 +1551,41 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool width = gOamShapesSizes[index][0]; height = gOamShapesSizes[index][1]; - int rect_width = width; - int rect_height = height; - int half_width = width / 2; int half_height = height / 2; - pixels = scanline->spriteLayers[oam->split.priority]; - int32_t x = oam->split.x; int32_t y = oam->split.y; #if !EXTENDED_OAM - // The regular, unextended values are 9 and 8 unsigned bits for x and y respectively. - // Once they have exceeded the screen's right or bottom, they get treated as signed values on original hardware. - // This is done so that, for example, a sprite at 0 on either axis that moves left or up will not suddenly disappear. - // - // With EXTENDED_OAM we are using signed 16 bit values, so we don't want to change the raw value. if (x >= DISPLAY_WIDTH) x -= 512; if (y >= DISPLAY_HEIGHT) y -= 256; #endif + if (isAffine && doubleSizeOrDisabled) { + half_width *= 2; + half_height *= 2; + } + + int spriteTop = y; + int spriteBottom = y + (half_height * 2); + if ((int)vcount < spriteTop || (int)vcount >= spriteBottom) + continue; + + int spriteLeft = x; + int spriteRight = x + (half_width * 2); + if (spriteRight < 0 || spriteLeft >= DISPLAY_WIDTH) + continue; + + bool isSemiTransparent = (oam->split.objMode == 1); + bool isObjWin = (oam->split.objMode == 2); + + int rect_width = width; + int rect_height = height; + if (isAffine) { - // TODO: there is probably a better way to do this u8 matrixNum = oam->split.matrixNum * 4; OamData *oam1 = &((OamData *)OAM)[matrixNum]; @@ -1445,26 +1598,22 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool matrix[1][0] = oam3->all.affineParam; matrix[1][1] = oam4->all.affineParam; - if (doubleSizeOrDisabled) // double size for affine - { + if (doubleSizeOrDisabled) { rect_width *= 2; rect_height *= 2; - half_width *= 2; - half_height *= 2; } } else { - // Identity matrix[0][0] = 0x100; matrix[0][1] = 0; matrix[1][0] = 0; matrix[1][1] = 0x100; } + pixels = scanline->spriteLayers[oam->split.priority]; x += half_width; y += half_height; - // Does this sprite actually draw on this scanline? - if (vcount >= (y - half_height) && vcount < (y + half_height)) { + { int local_y = (oam->split.mosaic == 1) ? applySpriteVerticalMosaicEffect(vcount) - y : vcount - y; int number = oam->split.tileNum; int palette = oam->split.paletteNum; @@ -1472,96 +1621,100 @@ static void DrawOamSprites(struct scanlineData *scanline, uint16_t vcount, bool bool flipY = !isAffine && ((oam->split.matrixNum >> 4) & 1); bool is8BPP = oam->split.bpp & 1; - for (int local_x = -half_width; local_x <= half_width; local_x++) { + { uint8_t *tiledata = (uint8_t *)objtiles; - uint16_t *palette = (uint16_t *)(PLTT + (0x200 / 2)); - int local_mosaicX; - int tex_x; - int tex_y; - - unsigned int global_x = local_x + x; - - if (global_x < 0 || global_x >= DISPLAY_WIDTH) - continue; - - if (oam->split.mosaic == 1) { - // mosaic effect has to be applied to global coordinates otherwise - // the mosaic will scroll - local_mosaicX = applySpriteHorizontalMosaicEffect(global_x) - x; - tex_x = ((matrix[0][0] * local_mosaicX + matrix[0][1] * local_y) >> 8) + (width / 2); - tex_y = ((matrix[1][0] * local_mosaicX + matrix[1][1] * local_y) >> 8) + (height / 2); - } else { - tex_x = ((matrix[0][0] * local_x + matrix[0][1] * local_y) >> 8) + (width / 2); - tex_y = ((matrix[1][0] * local_x + matrix[1][1] * local_y) >> 8) + (height / 2); - } + uint16_t *sprpal = (uint16_t *)(PLTT + (0x200 / 2)); + for (int local_x = -half_width; local_x <= half_width; local_x++) { + int local_mosaicX; + int tex_x; + int tex_y; - /* Check if transformed coordinates are inside bounds. */ - - if (tex_x >= width || tex_y >= height || tex_x < 0 || tex_y < 0) - continue; - - if (flipX) - tex_x = width - tex_x - 1; - if (flipY) - tex_y = height - tex_y - 1; - - int tile_x = tex_x % 8; - int tile_y = tex_y % 8; - int block_x = tex_x / 8; - int block_y = tex_y / 8; - int block_offset = ((block_y * (REG_DISPCNT & 0x40 ? (width / 8) : 16)) + block_x); - uint16_t pixel = 0; - - if (!is8BPP) { - int tileDataIndex = (block_offset + oam->split.tileNum) * 32 + (tile_y * 4) + (tile_x / 2); - pixel = tiledata[tileDataIndex]; - if (tile_x & 1) - pixel >>= 4; - else - pixel &= 0xF; - palette += oam->split.paletteNum * 16; -#if ENABLE_VRAM_VIEW - vramPalIdBuffer[0x800 + (tileDataIndex / 32)] = 16 + oam->split.paletteNum; -#endif - } else { - pixel = tiledata[(block_offset * 2 + oam->split.tileNum) * 32 + (tile_y * 8) + tile_x]; - } + unsigned int global_x = local_x + x; - if (pixel != 0) { - uint16_t color = palette[pixel]; + if (global_x < 0 || global_x >= DISPLAY_WIDTH) + continue; - // if sprite mode is 2 then write to the window mask instead - if (isObjWin) { - if (scanline->winMask[global_x] & WINMASK_WINOUT) - scanline->winMask[global_x] = (REG_WINOUT >> 8) & 0x3F; + if (oam->split.mosaic == 1) { + // mosaic effect has to be applied to global coordinates otherwise + // the mosaic will scroll + local_mosaicX = applySpriteHorizontalMosaicEffect(global_x) - x; + tex_x = ((matrix[0][0] * local_mosaicX + matrix[0][1] * local_y) >> 8) + (width / 2); + tex_y = ((matrix[1][0] * local_mosaicX + matrix[1][1] * local_y) >> 8) + (height / 2); + } else { + tex_x = ((matrix[0][0] * local_x + matrix[0][1] * local_y) >> 8) + (width / 2); + tex_y = ((matrix[1][0] * local_x + matrix[1][1] * local_y) >> 8) + (height / 2); + } + + /* Check if transformed coordinates are inside bounds. */ + + if (tex_x >= width || tex_y >= height || tex_x < 0 || tex_y < 0) continue; + + if (flipX) + tex_x = width - tex_x - 1; + if (flipY) + tex_y = height - tex_y - 1; + + int tile_x = tex_x & 7; + int tile_y = tex_y & 7; + int block_x = tex_x >> 3; + int block_y = tex_y >> 3; + int block_offset = ((block_y * (REG_DISPCNT & 0x40 ? (width >> 3) : 16)) + block_x); + uint16_t pixel = 0; + + uint16_t *pixpal; + if (!is8BPP) { + int tileDataIndex = ((block_offset + oam->split.tileNum) << 5) + (tile_y << 2) + (tile_x >> 1); + pixel = tiledata[tileDataIndex]; + if (tile_x & 1) + pixel >>= 4; + else + pixel &= 0xF; + pixpal = sprpal + (oam->split.paletteNum << 4); +#if ENABLE_VRAM_VIEW + vramPalIdBuffer[0x800 + (tileDataIndex >> 5)] = 16 + oam->split.paletteNum; +#endif + } else { + pixel = tiledata[((block_offset * 2 + oam->split.tileNum) << 5) + (tile_y << 3) + tile_x]; + pixpal = sprpal; } - // this code runs if pixel is to be drawn - if (global_x < DISPLAY_WIDTH && global_x >= 0) { - // check if its enabled in the window (if window is enabled) - winShouldBlendPixel = (windowsEnabled == false || scanline->winMask[global_x] & WINMASK_CLR); - - // has to be separated from the blend mode switch statement - // because of OBJ semi transparancy feature - if ((blendMode == 1 && REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) || isSemiTransparent) { - uint16_t targetA = color; - uint16_t targetB = 0; - if (alphaBlendSelectTargetB(scanline, &targetB, oam->split.priority, 0, global_x, false)) { - color = alphaBlendColor(targetA, targetB); - } - } else if (REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) { - switch (blendMode) { - case 2: - color = alphaBrightnessIncrease(color); - break; - case 3: - color = alphaBrightnessDecrease(color); - break; - } + + if (pixel != 0) { + uint16_t color = pixpal[pixel]; + + // if sprite mode is 2 then write to the window mask instead + if (isObjWin) { + if (scanline->winMask[global_x] & WINMASK_WINOUT) + scanline->winMask[global_x] = (REG_WINOUT >> 8) & 0x3F; + continue; } + // this code runs if pixel is to be drawn + if (global_x < DISPLAY_WIDTH && global_x >= 0) { + // check if its enabled in the window (if window is enabled) + winShouldBlendPixel = (windowsEnabled == false || scanline->winMask[global_x] & WINMASK_CLR); + + // has to be separated from the blend mode switch statement + // because of OBJ semi transparancy feature + if ((blendMode == 1 && REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) || isSemiTransparent) { + uint16_t targetA = color; + uint16_t targetB = 0; + if (alphaBlendSelectTargetB(scanline, &targetB, oam->split.priority, 0, global_x, false)) { + color = alphaBlendColor(targetA, targetB, REG_BLDALPHA & 0x1F, (REG_BLDALPHA >> 8) & 0x1F); + } + } else if (REG_BLDCNT & BLDCNT_TGT1_OBJ && winShouldBlendPixel) { + switch (blendMode) { + case 2: + color = alphaBrightnessIncrease(color, REG_BLDY & 0x1F); + break; + case 3: + color = alphaBrightnessDecrease(color, REG_BLDY & 0x1F); + break; + } + } - // write pixel to pixel framebuffer - pixels[global_x] = color | (1 << 15); + // write pixel to pixel framebuffer + pixels[global_x] = color | (1 << 15); + } } } } @@ -1574,14 +1727,19 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) unsigned int mode = REG_DISPCNT & 3; unsigned char numOfBgs = (mode == 0 ? 4 : 3); int bgnum, prnum; - struct scanlineData scanline; + static struct scanlineData scanline; unsigned int blendMode = (REG_BLDCNT >> 6) & 3; unsigned int xpos; + unsigned int enabledBgs = (REG_DISPCNT >> 8) & 0xF; - // initialize all priority bookkeeping data - memset(scanline.layers, 0, sizeof(scanline.layers)); - memset(scanline.winMask, 0, sizeof(scanline.winMask)); - memset(scanline.spriteLayers, 0, sizeof(scanline.spriteLayers)); + // Only zero the layers that are actually enabled, + // instead of blindly zeroing all 4+4 layers (~8KB total) every scanline. + for (bgnum = 0; bgnum < numOfBgs; bgnum++) { + if (enabledBgs & (1 << bgnum)) + memset(scanline.layers[bgnum], 0, sizeof(scanline.layers[bgnum])); + } + if (REG_DISPCNT & DISPCNT_OBJ_ON) + memset(scanline.spriteLayers, 0, sizeof(scanline.spriteLayers)); memset(scanline.prioritySortedBgsCount, 0, sizeof(scanline.prioritySortedBgsCount)); for (bgnum = 0; bgnum < numOfBgs; bgnum++) { @@ -1696,63 +1854,88 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) if (REG_DISPCNT & DISPCNT_OBJ_ON) DrawOamSprites(&scanline, vcount, windowsEnabled); - // iterate trough every priority in order - for (prnum = 3; prnum >= 0; prnum--) { - for (char prsub = scanline.prioritySortedBgsCount[prnum] - 1; prsub >= 0; prsub--) { - char bgnum = scanline.prioritySortedBgs[prnum][prsub]; - // if background is enabled then draw it - if (isbgEnabled(bgnum)) { - uint16_t *src = scanline.layers[bgnum]; - // copy all pixels to framebuffer + // iterate through every priority in order + if (blendMode == 0 && !windowsEnabled) { + for (prnum = 3; prnum >= 0; prnum--) { + for (char prsub = scanline.prioritySortedBgsCount[prnum] - 1; prsub >= 0; prsub--) { + char bgnum = scanline.prioritySortedBgs[prnum][prsub]; + if (isbgEnabled(bgnum)) { + uint16_t *src = scanline.layers[bgnum]; + for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { + uint16_t color = src[xpos]; + if (color & 0x8000) // alpha bit set = opaque + pixels[xpos] = color; + } + } + } + // draw sprites on current priority + if (REG_DISPCNT & DISPCNT_OBJ_ON) { + uint16_t *src = scanline.spriteLayers[prnum]; for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { - uint16_t color = src[xpos]; - bool winEffectEnable = true; - - if (!getAlphaBit(color)) - continue; // do nothing if alpha bit is not set + if (src[xpos] & 0x8000) + pixels[xpos] = src[xpos]; + } + } + } + } else { + // FULL PATH: blending and/or windows are active + for (prnum = 3; prnum >= 0; prnum--) { + for (char prsub = scanline.prioritySortedBgsCount[prnum] - 1; prsub >= 0; prsub--) { + char bgnum = scanline.prioritySortedBgs[prnum][prsub]; + // if background is enabled then draw it + if (isbgEnabled(bgnum)) { + uint16_t *src = scanline.layers[bgnum]; + // copy all pixels to framebuffer + for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { + uint16_t color = src[xpos]; + bool winEffectEnable = true; + + if (!getAlphaBit(color)) + continue; // do nothing if alpha bit is not set + + if (windowsEnabled) { + winEffectEnable = ((scanline.winMask[xpos] & WINMASK_CLR) >> 5); + // if bg is disabled inside the window then do not draw the pixel + if (!(scanline.winMask[xpos] & 1 << bgnum)) + continue; + } - if (windowsEnabled) { - winEffectEnable = ((scanline.winMask[xpos] & WINMASK_CLR) >> 5); - // if bg is disabled inside the window then do not draw the pixel - if (!(scanline.winMask[xpos] & 1 << bgnum)) - continue; - } + // blending code + if (blendMode != 0 && REG_BLDCNT & (1 << bgnum) && winEffectEnable) { + uint16_t targetA = color; + uint16_t targetB = 0; - // blending code - if (blendMode != 0 && REG_BLDCNT & (1 << bgnum) && winEffectEnable) { - uint16_t targetA = color; - uint16_t targetB = 0; - - switch (blendMode) { - case 1: { - char isSpriteBlendingEnabled = REG_BLDCNT & BLDCNT_TGT2_OBJ ? 1 : 0; - // find targetB and blend it - if (alphaBlendSelectTargetB(&scanline, &targetB, prnum, prsub + 1, xpos, isSpriteBlendingEnabled)) { - color = alphaBlendColor(targetA, targetB); - } - } break; - case 2: - color = alphaBrightnessIncrease(targetA); - break; - case 3: - color = alphaBrightnessDecrease(targetA); - break; + switch (blendMode) { + case 1: { + char isSpriteBlendingEnabled = REG_BLDCNT & BLDCNT_TGT2_OBJ ? 1 : 0; + // find targetB and blend it + if (alphaBlendSelectTargetB(&scanline, &targetB, prnum, prsub + 1, xpos, isSpriteBlendingEnabled)) { + color = alphaBlendColor(targetA, targetB, REG_BLDALPHA & 0x1F, (REG_BLDALPHA >> 8) & 0x1F); + } + } break; + case 2: + color = alphaBrightnessIncrease(targetA, REG_BLDY & 0x1F); + break; + case 3: + color = alphaBrightnessDecrease(targetA, REG_BLDY & 0x1F); + break; + } } + // write the pixel to scanline buffer output + pixels[xpos] = color; } - // write the pixel to scanline buffer output - pixels[xpos] = color; } } - } - // draw sprites on current priority - uint16_t *src = scanline.spriteLayers[prnum]; - for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { - if (getAlphaBit(src[xpos])) { - // check if sprite pixel draws inside window - if (windowsEnabled && !(scanline.winMask[xpos] & WINMASK_OBJ)) - continue; - // draw the pixel - pixels[xpos] = src[xpos]; + // draw sprites on current priority + uint16_t *src = scanline.spriteLayers[prnum]; + for (xpos = 0; xpos < DISPLAY_WIDTH; xpos++) { + if (getAlphaBit(src[xpos])) { + // check if sprite pixel draws inside window + if (windowsEnabled && !(scanline.winMask[xpos] & WINMASK_OBJ)) + continue; + // draw the pixel + pixels[xpos] = src[xpos]; + } } } } @@ -1760,21 +1943,25 @@ static void DrawScanline(uint16_t *pixels, uint16_t vcount) uint16_t *memsetu16(uint16_t *dst, uint16_t fill, size_t count) { - for (int i = 0; i < count; i++) { - *dst++ = fill; + uint32_t fill32 = ((uint32_t)fill << 16) | fill; + uint32_t *dst32 = (uint32_t *)dst; + size_t pairs = count >> 1; + for (size_t i = 0; i < pairs; i++) { + dst32[i] = fill32; } - - return 0; + if (count & 1) { + dst[count - 1] = fill; + } + return dst; } static void DrawFrame(uint16_t *pixels) { int i; - int j; - static uint16_t scanlines[DISPLAY_HEIGHT][DISPLAY_WIDTH]; - unsigned int blendMode = (REG_BLDCNT >> 6) & 3; for (i = 0; i < DISPLAY_HEIGHT; i++) { + uint16_t *scanline = &pixels[i * DISPLAY_WIDTH]; + REG_VCOUNT = i; if (((REG_DISPSTAT >> 8) & 0xFF) == REG_VCOUNT) { REG_DISPSTAT |= INTR_FLAG_VCOUNT; @@ -1782,10 +1969,10 @@ static void DrawFrame(uint16_t *pixels) gIntrTable[INTR_INDEX_VCOUNT](); } - // Render the backdrop color before the each individual scanline. - // HBlank interrupt code could have changed it inbetween lines. - memsetu16(scanlines[i], *(uint16_t *)PLTT, DISPLAY_WIDTH); - DrawScanline(scanlines[i], i); + // Render the backdrop color before each individual scanline. + // HBlank interrupt code could have changed it in between lines. + memsetu16(scanline, *(uint16_t *)PLTT, DISPLAY_WIDTH); + DrawScanline(scanline, i); REG_DISPSTAT |= INTR_FLAG_HBLANK; @@ -1797,14 +1984,6 @@ static void DrawFrame(uint16_t *pixels) REG_DISPSTAT &= ~INTR_FLAG_HBLANK; REG_DISPSTAT &= ~INTR_FLAG_VCOUNT; } - - // Copy to screen - for (i = 0; i < DISPLAY_HEIGHT; i++) { - uint16_t *src = scanlines[i]; - for (j = 0; j < DISPLAY_WIDTH; j++) { - pixels[i * DISPLAY_WIDTH + j] = src[j]; - } - } } #if ENABLE_VRAM_VIEW @@ -1845,8 +2024,14 @@ void VramDraw(SDL_Texture *texture) void VDraw(SDL_Texture *texture) { - memset(gameImage, 0, sizeof(gameImage)); +#if RENDERER == RENDERER_SOFTWARE_FAST + { + extern void DrawFrame_Fast(uint16_t * pixels); + DrawFrame_Fast(gameImage); + } +#else DrawFrame(gameImage); +#endif SDL_UpdateTexture(texture, NULL, gameImage, DISPLAY_WIDTH * sizeof(Uint16)); REG_VCOUNT = DISPLAY_HEIGHT + 1; // prep for being in VBlank period } diff --git a/src/platform/ps2/ps2.c b/src/platform/ps2/ps2.c new file mode 100644 index 000000000..b0541e2e6 --- /dev/null +++ b/src/platform/ps2/ps2.c @@ -0,0 +1,897 @@ +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "audsrv.h" + +#include "global.h" +#include "core.h" +#include "multi_sio.h" +#include "gba/defines.h" +#include "gba/io_reg.h" +#include "gba/types.h" +#include "lib/agb_flash/flash_internal.h" +#include "platform/shared/dma.h" + +static GSGLOBAL *gsGlobal; +static GSTEXTURE screen; + +#include "platform/shared/audio/cgb_audio.h" + +#ifndef TILE_WIDTH +#define TILE_WIDTH 8 +#endif + +extern IntrFunc gIntrTable[16]; + +ALIGNED(256) uint16_t gameImage[DISPLAY_WIDTH * DISPLAY_HEIGHT]; + +struct VidMode { + const char *name; + s16 mode; + s16 interlace; + s16 field; + int max_width; + int max_height; + int width; + int height; + int vck; + int iPassCount; + int x_off; + int y_off; +}; + +static const struct VidMode vid_modes[] = { + { "240p", GS_MODE_NTSC, GS_NONINTERLACED, GS_FRAME, 652, 224, 320, 224, 2, 1, 0, 0 }, +#if !defined(VERSION_EU) + // NTSC + { "480i", GS_MODE_NTSC, GS_INTERLACED, GS_FIELD, 704, 480, 704, 452, 4, 1, 0, 0 }, + { "480p", GS_MODE_DTV_480P, GS_NONINTERLACED, GS_FRAME, 704, 480, 704, 452, 2, 1, 0, 0 }, +#else + // PAL + { "576i", GS_MODE_PAL, GS_INTERLACED, GS_FIELD, 704, 576, 704, 536, 4, 1, 0, 0 }, + { "576p", GS_MODE_DTV_576P, GS_NONINTERLACED, GS_FRAME, 704, 576, 704, 536, 2, 1, 0, 0 }, +#endif + // HDTV + { "720p", GS_MODE_DTV_720P, GS_NONINTERLACED, GS_FRAME, 1280, 720, 1280, 720, 1, 2, 0, 0 }, + { "1080i", GS_MODE_DTV_1080I, GS_INTERLACED, GS_FRAME, 1920, 1080, 1920, 1080, 1, 2, 0, 0 }, +}; + +static int vsync_sema_1st_id; +static int vsync_sema_2nd_id; +static int vsync_sema_id = -1; +static int vsync_id = -1; + +static const struct VidMode *vid_mode; +static bool use_hires = false; + +bool speedUp = false; +bool isRunning = true; +bool paused = false; +bool stepOneFrame = false; +bool headless = false; + +double lastGameTime = 0; +double curGameTime = 0; +double fixedTimestep = 1.0 / 60.0; // 16.666667ms +double timeScale = 1.0; +double accumulator = 0.0; + +static FILE *sSaveFile = NULL; + +extern void AgbMain(void); +void DoSoftReset(void) {}; + +void VDraw(void); +void UpdateTexture(void); + +static void ReadSaveFile(char *path); +static void StoreSaveFile(void); +static void CloseSaveFile(void); + +u16 Platform_GetKeyInput(void); + +#define SAMPLES_HIGH 544 +#define SAMPLES_LOW 528 + +static bool audio_ps2_init(void) +{ + if (init_audio_driver() != 0) + return false; + audsrv_set_volume(MAX_VOLUME); + + audsrv_fmt_t fmt; + + fmt.freq = 48000; + fmt.bits = 16; + fmt.channels = 2; + + if (audsrv_set_format(&fmt)) { + printf("audio_ps2: unsupported sound format\n"); + audsrv_quit(); + return false; + } + + return true; +} + +static int audio_ps2_buffered(void) { return audsrv_queued() / 4; } + +static void audio_ps2_play(const uint8_t *buf, size_t len) +{ + if (audio_ps2_buffered() < 6000) { + audsrv_play_audio(buf, len); + } +} + +void reset_IOP() +{ + SifInitRpc(0); + while (!SifIopReset(NULL, 0)) { } // Comment this line if you want to "debug" through ps2link + while (!SifIopSync()) { } +} + +static void prepare_IOP() +{ + reset_IOP(); + SifInitRpc(0); + sbv_patch_enable_lmb(); + sbv_patch_disable_prefix_check(); +} + +static void init_drivers() +{ + init_only_boot_ps2_filesystem_driver(); + init_memcard_driver(true); +} + +static void deinit_drivers() +{ + deinit_memcard_driver(true); + deinit_only_boot_ps2_filesystem_driver(); +} + +void platform_video_init(void) +{ + if (vid_mode == NULL) { + vid_mode = &vid_modes[3]; // Standard def 480p + } else { + if (use_hires) { + gsKit_hires_deinit_global(gsGlobal); + } else { + gsKit_deinit_global(gsGlobal); + if (vsync_id != -1) { + gsKit_remove_vsync_handler(vsync_id); + } + vsync_sema_id = -1; + } + } + use_hires = (vid_mode->mode == GS_MODE_DTV_720P || vid_mode->mode == GS_MODE_DTV_1080I); + + if (use_hires) { + gsGlobal = gsKit_hires_init_global(); + } else { + gsGlobal = gsKit_init_global(); + } + + dmaKit_init(D_CTRL_RELE_OFF, D_CTRL_MFD_OFF, D_CTRL_STS_UNSPEC, D_CTRL_STD_OFF, D_CTRL_RCYC_8, 1 << DMA_CHANNEL_GIF); + + dmaKit_chan_init(DMA_CHANNEL_GIF); + + gsGlobal->Mode = vid_mode->mode; + gsGlobal->Width = vid_mode->width; + gsGlobal->Height = vid_mode->height; + if (gsGlobal->Mode == GS_MODE_DTV_1080I) { + gsGlobal->Height /= 2; + } + + gsGlobal->Interlace = vid_mode->interlace; + gsGlobal->Field = vid_mode->field; + gsGlobal->ZBuffering = GS_SETTING_ON; + gsGlobal->DoubleBuffering = GS_SETTING_ON; + gsGlobal->PrimAAEnable = GS_SETTING_OFF; + gsGlobal->Dithering = GS_SETTING_OFF; + gsGlobal->PSM = GS_PSM_CT16; + gsGlobal->PSMZ = GS_PSMZ_16; + + if (use_hires) { + gsKit_hires_init_screen(gsGlobal, vid_mode->iPassCount); + } else { + gsKit_init_screen(gsGlobal); + } + // hires sets the texture pointer to the wrong location. Ensure it's correct. + gsGlobal->TexturePointer = gsGlobal->CurrentPointer; + gsKit_TexManager_init(gsGlobal); + + screen.Width = DISPLAY_WIDTH; + screen.Height = DISPLAY_HEIGHT; + screen.PSM = GS_PSM_CT16; + screen.Mem = (void *)gameImage; +} + +int main(int argc, char **argv) +{ + prepare_IOP(); + init_drivers(); + + // ReadSaveFile("sa2.sav"); + + // Prevent the multiplayer screen from being drawn ( see core.c:EngineInit() ) + REG_RCNT = 0x8000; + REG_KEYINPUT = 0x3FF; + + audio_ps2_init(); + platform_video_init(); + // controller init + + cgb_audio_init(48000); + + VDraw(); + // while (true) { + // UpdateTexture(); + // gsKit_sync_flip(gsGlobal); + // gsKit_queue_exec(gsGlobal); + // } + AgbMain(); + + return 0; +} + +bool newFrameRequested = FALSE; +int skipFrame = 0; + +// called every gba frame. we process sdl events and render as many times +// as vsync needs, then return when a new game frame is needed. +void VBlankIntrWait(void) +{ +#define HANDLE_VBLANK_INTRS() \ + ({ \ + REG_DISPSTAT |= INTR_FLAG_VBLANK; \ + RunDMAs(DMA_VBLANK); \ + if (REG_DISPSTAT & DISPSTAT_VBLANK_INTR) \ + gIntrTable[INTR_INDEX_VBLANK](); \ + REG_DISPSTAT &= ~INTR_FLAG_VBLANK; \ + }) + + bool frameAvailable = TRUE; + bool frameDrawn = false; + static int frames_skipped = 0; + if (isRunning) { + REG_KEYINPUT = KEYS_MASK ^ Platform_GetKeyInput(); + + // Only render 30fps when in widescreen as the draw func is too slow for the ps2 + // #if DISPLAY_WIDTH > 240 + // skipFrame++; + // skipFrame %= 2; + // #endif + if (skipFrame == 0) { + VDraw(); + } else { + UpdateTexture(); + } + HANDLE_VBLANK_INTRS(); + if (skipFrame != 0) { + return; + } + + if (use_hires) { + gsKit_hires_flip_ext(gsGlobal, GSFLIP_RATE_LIMIT_1); + } else { + gsKit_sync_flip(gsGlobal); + gsKit_queue_exec(gsGlobal); + } + gsKit_TexManager_nextFrame(gsGlobal); + return; + } + // #define MAX_FRAME_SKIP 2 + + // while (isRunning) { + // if (!paused || stepOneFrame) { + // double dt = fixedTimestep / timeScale; // TODO: Fix speedup + + // // don't accumulate time if we already requested a new frame + // // this frame cycle (emulates threaded sdl behavior) + // if (!newFrameRequested) { + // double deltaTime = 0; + + // // TODO: fix + // curGameTime += dt; + // if (stepOneFrame) { + // deltaTime = dt; + // } else { + // // TODO: divide by expected frequency + // deltaTime = (double)((curGameTime - lastGameTime) / 1); + // if (deltaTime > (dt * 5)) + // deltaTime = dt * 5; + // } + // lastGameTime = curGameTime; + + // accumulator += deltaTime; + // } else { + // newFrameRequested = FALSE; + // } + + // while (accumulator >= dt) { + // REG_KEYINPUT = KEYS_MASK ^ Platform_GetKeyInput(); + // if (frameAvailable) { + // // frame skip: let game logic catch up when behind + // if (accumulator >= dt * 2.0 && frames_skipped < MAX_FRAME_SKIP) { + // frames_skipped++; + // frameAvailable = FALSE; + // HANDLE_VBLANK_INTRS(); + // accumulator -= dt; + // newFrameRequested = TRUE; + // return; + // } + // frames_skipped = 0; + // VDraw(); + // frameAvailable = FALSE; + // frameDrawn = true; + + // HANDLE_VBLANK_INTRS(); + + // accumulator -= dt; + // } else { + // newFrameRequested = TRUE; + // return; + // } + // } + + // if (paused && stepOneFrame) { + // stepOneFrame = false; + // } + // } + + // if (use_hires) { + // gsKit_hires_flip_ext(gsGlobal, GSFLIP_RATE_LIMIT_1); + // } else { + // // gsKit_flip(gs_global); + // gsKit_sync_flip(gsGlobal); + // gsKit_queue_exec(gsGlobal); + // } + // gsKit_TexManager_nextFrame(gsGlobal); + // } + + CloseSaveFile(); + + deinit_drivers(); + exit(0); +#undef HANDLE_VBLANK_INTRS +} + +static void ReadSaveFile(char *path) +{ + // Check whether the saveFile exists, and create it if not + sSaveFile = fopen(path, "r+b"); + if (sSaveFile == NULL) { + sSaveFile = fopen(path, "w+b"); + } + + fseek(sSaveFile, 0, SEEK_END); + int fileSize = ftell(sSaveFile); + fseek(sSaveFile, 0, SEEK_SET); + + // Only read as many bytes as fit inside the buffer + // or as many bytes as are in the file + int bytesToRead = (fileSize < sizeof(FLASH_BASE)) ? fileSize : sizeof(FLASH_BASE); + + int bytesRead = fread(FLASH_BASE, 1, bytesToRead, sSaveFile); + + // Fill the buffer if the savefile was just created or smaller than the buffer itself + for (int i = bytesRead; i < sizeof(FLASH_BASE); i++) { + FLASH_BASE[i] = 0xFF; + } +} + +static void StoreSaveFile() +{ + if (sSaveFile != NULL) { + fseek(sSaveFile, 0, SEEK_SET); + fwrite(FLASH_BASE, 1, sizeof(FLASH_BASE), sSaveFile); + } +} + +void Platform_StoreSaveFile(void) { StoreSaveFile(); } + +static void CloseSaveFile() +{ + if (sSaveFile != NULL) { + fclose(sSaveFile); + } +} + +s16 converted_audio[4096]; + +void float_audio_to_s16(const float *input, int16_t *output, size_t length) +{ + if (!input || !output) + return; + + for (size_t i = 0; i < length; i++) { + float sample = input[i]; + + if (sample > 1.0f) + sample = 1.0f; + else if (sample < -1.0f) + sample = -1.0f; + + output[i] = (int16_t)(sample * 32767.0f + (sample >= 0 ? 0.5f : -0.5f)); + } +} + +void Platform_QueueAudio(const float *data, uint32_t bytesCount) +{ + float_audio_to_s16(data, converted_audio, bytesCount / sizeof(float)); + audio_ps2_play((void *)converted_audio, bytesCount / sizeof(float) * sizeof(u16)); +} + +// TODO: handle input +u16 Platform_GetKeyInput(void) { return 0; } + +// BIOS function implementations are based on the VBA-M source code. + +// safe unaligned access for MIPS +static uint32_t CPUReadMemory(const void *src) +{ + uint32_t val; + memcpy(&val, src, sizeof(val)); + return val; +} + +static void CPUWriteMemory(void *dest, uint32_t val) { memcpy(dest, &val, sizeof(val)); } + +static uint16_t CPUReadHalfWord(const void *src) +{ + uint16_t val; + memcpy(&val, src, sizeof(val)); + return val; +} + +static void CPUWriteHalfWord(void *dest, uint16_t val) { memcpy(dest, &val, sizeof(val)); } + +static uint8_t CPUReadByte(const void *src) { return *(uint8_t *)src; } + +static void CPUWriteByte(void *dest, uint8_t val) { *(uint8_t *)dest = val; } + +void CpuSet(const void *src, void *dst, u32 cnt) +{ + if (dst == NULL) { + puts("Attempted to CpuSet to NULL\n"); + return; + } + + int count = cnt & 0x1FFFFF; + + const u8 *source = src; + u8 *dest = dst; + + // 32-bit ? + if ((cnt >> 26) & 1) { + // assert(((uintptr_t)src & ~3) == (uintptr_t)src); + // assert(((uintptr_t)dst & ~3) == (uintptr_t)dst); + + // needed for 32-bit mode! + // source = (u8 *)((uint32_t )source & ~3); + // dest = (u8 *)((uint32_t )dest & ~3); + + // fill ? + if ((cnt >> 24) & 1) { + uint32_t value = CPUReadMemory(source); + while (count) { + CPUWriteMemory(dest, value); + dest += 4; + count--; + } + } else { + // copy + while (count) { + CPUWriteMemory(dest, CPUReadMemory(source)); + source += 4; + dest += 4; + count--; + } + } + } else { + // No align on 16-bit fill? + // assert(((uintptr_t)src & ~1) == (uintptr_t)src); + // assert(((uintptr_t)dst & ~1) == (uintptr_t)dst); + + // 16-bit fill? + if ((cnt >> 24) & 1) { + uint16_t value = CPUReadHalfWord(source); + while (count) { + CPUWriteHalfWord(dest, value); + dest += 2; + count--; + } + } else { + // copy + while (count) { + CPUWriteHalfWord(dest, CPUReadHalfWord(source)); + source += 2; + dest += 2; + count--; + } + } + } +} + +void CpuFastSet(const void *src, void *dst, u32 cnt) +{ + if (dst == NULL) { + puts("Attempted to CpuFastSet to NULL\n"); + return; + } + + int count = cnt & 0x1FFFFF; + + const u8 *source = src; + u8 *dest = dst; + + // source = (u8 *)((uint32_t )source & ~3); + // dest = (u8 *)((uint32_t )dest & ~3); + + // fill? + if ((cnt >> 24) & 1) { + uint32_t value = CPUReadMemory(source); + while (count > 0) { + // BIOS always transfers 32 bytes at a time + for (int i = 0; i < 8; i++) { + CPUWriteMemory(dest, value); + dest += 4; + } + count -= 8; + } + } else { + // copy + while (count > 0) { + // BIOS always transfers 32 bytes at a time + for (int i = 0; i < 8; i++) { + uint32_t value = CPUReadMemory(source); + CPUWriteMemory(dest, value); + source += 4; + dest += 4; + } + count -= 8; + } + } +} + +void LZ77UnCompVram(const void *src_, void *dest_) +{ + const u8 *src = (const u8 *)src_; + u8 *dest = dest_; + int destSize = (src[3] << 16) | (src[2] << 8) | src[1]; + int srcPos = 4; + int destPos = 0; + + for (;;) { + unsigned char flags = src[srcPos++]; + + for (int i = 0; i < 8; i++) { + if (flags & 0x80) { + int blockSize = (src[srcPos] >> 4) + 3; + int blockDistance = (((src[srcPos] & 0xF) << 8) | src[srcPos + 1]) + 1; + + srcPos += 2; + + int blockPos = destPos - blockDistance; + + // Some Ruby/Sapphire tilesets overflow. + if (destPos + blockSize > destSize) { + blockSize = destSize - destPos; + // fprintf(stderr, "Destination buffer overflow.\n"); + puts("Destination buffer overflow.\n"); + } + + if (blockPos < 0) + goto fail; + + for (int j = 0; j < blockSize; j++) + dest[destPos++] = dest[blockPos + j]; + } else { + if (destPos >= destSize) + goto fail; + + dest[destPos++] = src[srcPos++]; + } + + if (destPos == destSize) { + return; + } + + flags <<= 1; + } + } + +fail: + puts("Fatal error while decompressing LZ file.\n"); +} + +void LZ77UnCompWram(const void *src, void *dst) +{ + const uint8_t *source = src; + uint8_t *dest = dst; + + uint32_t header = CPUReadMemory(source); + source += 4; + + int len = header >> 8; + + while (len > 0) { + uint8_t d = CPUReadByte(source++); + + if (d) { + for (int i = 0; i < 8; i++) { + if (d & 0x80) { + uint16_t data = CPUReadByte(source++) << 8; + data |= CPUReadByte(source++); + int length = (data >> 12) + 3; + int offset = (data & 0x0FFF); + uint8_t *windowOffset = dest - offset - 1; + for (int i2 = 0; i2 < length; i2++) { + CPUWriteByte(dest++, CPUReadByte(windowOffset++)); + len--; + if (len == 0) + return; + } + } else { + CPUWriteByte(dest++, CPUReadByte(source++)); + len--; + if (len == 0) + return; + } + d <<= 1; + } + } else { + for (int i = 0; i < 8; i++) { + CPUWriteByte(dest++, CPUReadByte(source++)); + len--; + if (len == 0) + return; + } + } + } +} + +void RLUnCompWram(const void *src, void *dest) +{ + int remaining = CPUReadMemory(src) >> 8; + int blockHeader; + int block; + src += 4; + while (remaining > 0) { + blockHeader = CPUReadByte(src); + src++; + if (blockHeader & 0x80) // Compressed? + { + blockHeader &= 0x7F; + blockHeader += 3; + block = CPUReadByte(src); + src++; + while (blockHeader-- && remaining) { + remaining--; + CPUWriteByte(dest, block); + dest++; + } + } else // Uncompressed + { + blockHeader++; + while (blockHeader-- && remaining) { + remaining--; + u8 byte = CPUReadByte(src); + src++; + CPUWriteByte(dest, byte); + dest++; + } + } + } +} + +void RLUnCompVram(const void *src, void *dest) +{ + int remaining = CPUReadMemory(src) >> 8; + int padding = (4 - remaining) & 0x3; + int blockHeader; + int block; + int halfWord = 0; + src += 4; + while (remaining > 0) { + blockHeader = CPUReadByte(src); + src++; + if (blockHeader & 0x80) // Compressed? + { + blockHeader &= 0x7F; + blockHeader += 3; + block = CPUReadByte(src); + src++; + while (blockHeader-- && remaining) { + remaining--; + if ((uintptr_t)dest & 1) { + halfWord |= block << 8; + CPUWriteHalfWord((void *)((uintptr_t)dest ^ 1), halfWord); + } else + halfWord = block; + dest++; + } + } else // Uncompressed + { + blockHeader++; + while (blockHeader-- && remaining) { + remaining--; + u8 byte = CPUReadByte(src); + src++; + if ((uintptr_t)dest & 1) { + halfWord |= byte << 8; + CPUWriteHalfWord((void *)((uintptr_t)dest ^ 1), halfWord); + } else + halfWord = byte; + dest++; + } + } + } + if ((uintptr_t)dest & 1) { + padding--; + dest++; + } + for (; padding > 0; padding -= 2, dest += 2) + CPUWriteHalfWord(dest, 0); +} + +const s16 sineTable[256] + = { (s16)0x0000, (s16)0x0192, (s16)0x0323, (s16)0x04B5, (s16)0x0645, (s16)0x07D5, (s16)0x0964, (s16)0x0AF1, (s16)0x0C7C, (s16)0x0E05, + (s16)0x0F8C, (s16)0x1111, (s16)0x1294, (s16)0x1413, (s16)0x158F, (s16)0x1708, (s16)0x187D, (s16)0x19EF, (s16)0x1B5D, (s16)0x1CC6, + (s16)0x1E2B, (s16)0x1F8B, (s16)0x20E7, (s16)0x223D, (s16)0x238E, (s16)0x24DA, (s16)0x261F, (s16)0x275F, (s16)0x2899, (s16)0x29CD, + (s16)0x2AFA, (s16)0x2C21, (s16)0x2D41, (s16)0x2E5A, (s16)0x2F6B, (s16)0x3076, (s16)0x3179, (s16)0x3274, (s16)0x3367, (s16)0x3453, + (s16)0x3536, (s16)0x3612, (s16)0x36E5, (s16)0x37AF, (s16)0x3871, (s16)0x392A, (s16)0x39DA, (s16)0x3A82, (s16)0x3B20, (s16)0x3BB6, + (s16)0x3C42, (s16)0x3CC5, (s16)0x3D3E, (s16)0x3DAE, (s16)0x3E14, (s16)0x3E71, (s16)0x3EC5, (s16)0x3F0E, (s16)0x3F4E, (s16)0x3F84, + (s16)0x3FB1, (s16)0x3FD3, (s16)0x3FEC, (s16)0x3FFB, (s16)0x4000, (s16)0x3FFB, (s16)0x3FEC, (s16)0x3FD3, (s16)0x3FB1, (s16)0x3F84, + (s16)0x3F4E, (s16)0x3F0E, (s16)0x3EC5, (s16)0x3E71, (s16)0x3E14, (s16)0x3DAE, (s16)0x3D3E, (s16)0x3CC5, (s16)0x3C42, (s16)0x3BB6, + (s16)0x3B20, (s16)0x3A82, (s16)0x39DA, (s16)0x392A, (s16)0x3871, (s16)0x37AF, (s16)0x36E5, (s16)0x3612, (s16)0x3536, (s16)0x3453, + (s16)0x3367, (s16)0x3274, (s16)0x3179, (s16)0x3076, (s16)0x2F6B, (s16)0x2E5A, (s16)0x2D41, (s16)0x2C21, (s16)0x2AFA, (s16)0x29CD, + (s16)0x2899, (s16)0x275F, (s16)0x261F, (s16)0x24DA, (s16)0x238E, (s16)0x223D, (s16)0x20E7, (s16)0x1F8B, (s16)0x1E2B, (s16)0x1CC6, + (s16)0x1B5D, (s16)0x19EF, (s16)0x187D, (s16)0x1708, (s16)0x158F, (s16)0x1413, (s16)0x1294, (s16)0x1111, (s16)0x0F8C, (s16)0x0E05, + (s16)0x0C7C, (s16)0x0AF1, (s16)0x0964, (s16)0x07D5, (s16)0x0645, (s16)0x04B5, (s16)0x0323, (s16)0x0192, (s16)0x0000, (s16)0xFE6E, + (s16)0xFCDD, (s16)0xFB4B, (s16)0xF9BB, (s16)0xF82B, (s16)0xF69C, (s16)0xF50F, (s16)0xF384, (s16)0xF1FB, (s16)0xF074, (s16)0xEEEF, + (s16)0xED6C, (s16)0xEBED, (s16)0xEA71, (s16)0xE8F8, (s16)0xE783, (s16)0xE611, (s16)0xE4A3, (s16)0xE33A, (s16)0xE1D5, (s16)0xE075, + (s16)0xDF19, (s16)0xDDC3, (s16)0xDC72, (s16)0xDB26, (s16)0xD9E1, (s16)0xD8A1, (s16)0xD767, (s16)0xD633, (s16)0xD506, (s16)0xD3DF, + (s16)0xD2BF, (s16)0xD1A6, (s16)0xD095, (s16)0xCF8A, (s16)0xCE87, (s16)0xCD8C, (s16)0xCC99, (s16)0xCBAD, (s16)0xCACA, (s16)0xC9EE, + (s16)0xC91B, (s16)0xC851, (s16)0xC78F, (s16)0xC6D6, (s16)0xC626, (s16)0xC57E, (s16)0xC4E0, (s16)0xC44A, (s16)0xC3BE, (s16)0xC33B, + (s16)0xC2C2, (s16)0xC252, (s16)0xC1EC, (s16)0xC18F, (s16)0xC13B, (s16)0xC0F2, (s16)0xC0B2, (s16)0xC07C, (s16)0xC04F, (s16)0xC02D, + (s16)0xC014, (s16)0xC005, (s16)0xC000, (s16)0xC005, (s16)0xC014, (s16)0xC02D, (s16)0xC04F, (s16)0xC07C, (s16)0xC0B2, (s16)0xC0F2, + (s16)0xC13B, (s16)0xC18F, (s16)0xC1EC, (s16)0xC252, (s16)0xC2C2, (s16)0xC33B, (s16)0xC3BE, (s16)0xC44A, (s16)0xC4E0, (s16)0xC57E, + (s16)0xC626, (s16)0xC6D6, (s16)0xC78F, (s16)0xC851, (s16)0xC91B, (s16)0xC9EE, (s16)0xCACA, (s16)0xCBAD, (s16)0xCC99, (s16)0xCD8C, + (s16)0xCE87, (s16)0xCF8A, (s16)0xD095, (s16)0xD1A6, (s16)0xD2BF, (s16)0xD3DF, (s16)0xD506, (s16)0xD633, (s16)0xD767, (s16)0xD8A1, + (s16)0xD9E1, (s16)0xDB26, (s16)0xDC72, (s16)0xDDC3, (s16)0xDF19, (s16)0xE075, (s16)0xE1D5, (s16)0xE33A, (s16)0xE4A3, (s16)0xE611, + (s16)0xE783, (s16)0xE8F8, (s16)0xEA71, (s16)0xEBED, (s16)0xED6C, (s16)0xEEEF, (s16)0xF074, (s16)0xF1FB, (s16)0xF384, (s16)0xF50F, + (s16)0xF69C, (s16)0xF82B, (s16)0xF9BB, (s16)0xFB4B, (s16)0xFCDD, (s16)0xFE6E }; + +void SoftReset(u32 resetFlags) { } + +void SoftResetExram(u32 resetFlags) { } + +// Following functions taken from mGBA's source +u16 ArcTan(s16 i) +{ + s32 a = -((i * i) >> 14); + s32 b = ((0xA9 * a) >> 14) + 0x390; + b = ((b * a) >> 14) + 0x91C; + b = ((b * a) >> 14) + 0xFB6; + b = ((b * a) >> 14) + 0x16AA; + b = ((b * a) >> 14) + 0x2081; + b = ((b * a) >> 14) + 0x3651; + b = ((b * a) >> 14) + 0xA2F9; + + return (i * b) >> 16; +} + +u16 ArcTan2(s16 x, s16 y) +{ + if (!y) { + if (x >= 0) + return 0; + return 0x8000; + } + if (!x) { + if (y >= 0) + return 0x4000; + return 0xC000; + } + if (y >= 0) { + if (x >= 0) { + if (x >= y) + return ArcTan((y << 14) / x); + } else if (-x >= y) + return ArcTan((y << 14) / x) + 0x8000; + return 0x4000 - ArcTan((x << 14) / y); + } else { + if (x <= 0) { + if (-x > -y) + return ArcTan((y << 14) / x) + 0x8000; + } else if (x >= -y) + return ArcTan((y << 14) / x) + 0x10000; + return 0xC000 - ArcTan((x << 14) / y); + } +} + +u16 Sqrt(u32 num) +{ + if (!num) + return 0; + u32 lower; + u32 upper = num; + u32 bound = 1; + while (bound < upper) { + upper >>= 1; + bound <<= 1; + } + while (1) { + upper = num; + u32 accum = 0; + lower = bound; + while (1) { + u32 oldLower = lower; + if (lower <= upper >> 1) + lower <<= 1; + if (oldLower >= upper >> 1) + break; + } + while (1) { + accum <<= 1; + if (upper >= lower) { + ++accum; + upper -= lower; + } + if (lower == bound) + break; + lower >>= 1; + } + u32 oldBound = bound; + bound += accum; + bound >>= 1; + if (bound >= oldBound) { + bound = oldBound; + break; + } + } + return bound; +} + +int MultiBoot(struct MultiBootParam *mp) { return 0; } + +void VDraw(void) +{ + extern void DrawFrame_Fast(uint16_t * pixels); + DrawFrame_Fast(gameImage); + UpdateTexture(); + REG_VCOUNT = DISPLAY_HEIGHT + 1; // prep for being in VBlank period +} + +void UpdateTexture(void) +{ + gsKit_TexManager_invalidate(gsGlobal, &screen); + gsKit_TexManager_bind(gsGlobal, &screen); + + int startX = (gsGlobal->Width); + int startY = (gsGlobal->Height); + + gsKit_clear(gsGlobal, GS_SETREG_RGBAQ(0, 0, 0, 0, 0)); + + gsKit_prim_sprite_texture(gsGlobal, &screen, + 0.0f, // X1 + 0.0f, // Y2 + 0.0f, // U1 + 0.0f, // V1 + startX, // X2 + startY, // Y2 + gsGlobal->Width, // U2 + gsGlobal->Height, // V2 + 0, GS_SETREG_RGBAQ(128, 128, 128, 0, 0)); +} diff --git a/src/platform/psp/psp_module.c b/src/platform/psp/psp_module.c new file mode 100644 index 000000000..18040b1e1 --- /dev/null +++ b/src/platform/psp/psp_module.c @@ -0,0 +1,40 @@ +#include +#include +#include +#include + +PSP_MODULE_INFO("SonicAdvance2", 0, 1, 0); +PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER | THREAD_ATTR_VFPU); +PSP_HEAP_SIZE_KB(-1024); + +unsigned int sce_newlib_stack_size = 512 * 1024; + +extern bool isRunning; + +int exitCallback(int arg1, int arg2, void *common) +{ + (void)arg1; + (void)arg2; + (void)common; + isRunning = false; + return 0; +} + +int callbackThread(SceSize args, void *argp) +{ + (void)args; + (void)argp; + int cbid = sceKernelCreateCallback("Exit Callback", exitCallback, NULL); + sceKernelRegisterExitCallback(cbid); + sceKernelSleepThreadCB(); + return 0; +} + +int setupPspCallbacks(void) +{ + int thid = sceKernelCreateThread("update_thread", callbackThread, 0x11, 0xFA0, 0, 0); + if (thid >= 0) { + sceKernelStartThread(thid, 0, 0); + } + return thid; +} diff --git a/src/platform/shared/audio/m4a_sound_mixer.c b/src/platform/shared/audio/m4a_sound_mixer.c index 49b0d65bc..6482d6382 100644 --- a/src/platform/shared/audio/m4a_sound_mixer.c +++ b/src/platform/shared/audio/m4a_sound_mixer.c @@ -31,6 +31,9 @@ struct SoundMixerState *SOUND_INFO_PTR = &sSoundInfo; void SoundMain(void) { +#if !ENABLE_AUDIO + return; +#endif struct SoundMixerState *mixer = SOUND_INFO_PTR; if (mixer->lockStatus != ID_NUMBER) { @@ -908,7 +911,6 @@ void m4aSoundVSync(void) float *m4aBuffer = mixer->pcmBuffer; float *cgbBuffer = cgb_get_buffer(); s32 dmaCounter = mixer->dmaCounter; - bool8 shouldQueue = FALSE; if (dmaCounter > 1) { m4aBuffer += samplesPerFrame * (mixer->framesPerDmaCycle - (dmaCounter - 1)); @@ -916,14 +918,9 @@ void m4aSoundVSync(void) for (u32 i = 0; i < samplesPerFrame; i++) { audioBuffer[i] = m4aBuffer[i] + cgbBuffer[i]; - if (audioBuffer[i] != 0) { - shouldQueue = TRUE; - } } - if (shouldQueue) { - Platform_QueueAudio(audioBuffer, samplesPerFrame * 4); - } + Platform_QueueAudio(audioBuffer, samplesPerFrame * sizeof(float)); if ((s8)(--mixer->dmaCounter) <= 0) mixer->dmaCounter = mixer->framesPerDmaCycle; } diff --git a/src/platform/shared/dma.c b/src/platform/shared/dma.c index 60ad6144f..d2f55e045 100644 --- a/src/platform/shared/dma.c +++ b/src/platform/shared/dma.c @@ -1,10 +1,26 @@ #include +#include #include "global.h" #include "platform/shared/dma.h" +// safe unaligned access for MIPS +static inline void dma_copy32(void *dst, const void *src) +{ + u32 tmp; + memcpy(&tmp, src, 4); + memcpy(dst, &tmp, 4); +} + +static inline void dma_copy16(void *dst, const void *src) +{ + u16 tmp; + memcpy(&tmp, src, 2); + memcpy(dst, &tmp, 2); +} + struct DMATransfer DMAList[DMA_COUNT] = { 0 }; -void RunDMAs(u32 type) +void RunDMAs(DmaStartTypes type) { for (int dmaNum = 0; dmaNum < DMA_COUNT; dmaNum++) { struct DMATransfer *dma = &DMAList[dmaNum]; @@ -23,9 +39,9 @@ void RunDMAs(u32 type) // printf("DMA%d src=%p, dest=%p, control=%d\n", dmaNum, dma->src, dma->dst, dma->control); for (int i = 0; i < dma->size; i++) { if ((dma->control) & DMA_32BIT) - *dma->dst32 = *dma->src32; + dma_copy32(dma->dst, dma->src); else - *dma->dst16 = *dma->src16; + dma_copy16(dma->dst, dma->src); // process destination pointer changes if (((dma->control) & DMA_DEST_MASK) == DMA_DEST_INC) { diff --git a/src/platform/shared/rendering/sw_renderer_fast.cc b/src/platform/shared/rendering/sw_renderer_fast.cc new file mode 100644 index 000000000..10a505865 --- /dev/null +++ b/src/platform/shared/rendering/sw_renderer_fast.cc @@ -0,0 +1,2298 @@ +/* gameplaySP - Modified to fit the SA2 codebase (FreshOllie - 2026) + * + * Copyright (C) 2006 Exophase + * Copyright (C) 2023 David Guillen Fandos + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +extern "C" { +#include "config.h" +} + +#if RENDERER == RENDERER_SOFTWARE_FAST + +#include +#include + +extern "C" { +#include "global.h" +#include "core.h" +#include "gba/defines.h" +#include "gba/io_reg.h" +#include "gba/types.h" + +#include "platform/shared/dma.h" +} + +#define eswap16(value) (value) +#define eswap32(value) (value) + +#define GBA_SCREEN_PITCH DISPLAY_WIDTH + +typedef u32 fixed16_16; +typedef u32 fixed8_24; + +#define float_to_fp16_16(value) (fixed16_16)((value)*65536.0) + +#define fp16_16_to_float(value) (float)((value) / 65536.0) + +#define u32_to_fp16_16(value) ((value) << 16) + +#define fp16_16_to_u32(value) ((value) >> 16) + +#define fp16_16_fractional_part(value) ((value)&0xFFFF) + +#define float_to_fp8_24(value) (fixed8_24)((value)*16777216.0) + +#define fp8_24_fractional_part(value) ((value)&0xFFFFFF) + +#define fixed_div(numerator, denominator, bits) (((numerator * (1 << bits)) + (denominator / 2)) / denominator) + +#define read_ioreg(regaddr) (eswap16(*(u16 *)(regaddr))) +#define read_ioreg32(regaddr) (read_ioreg(regaddr) | (read_ioreg((regaddr) + sizeof(u16)) << 16)) + +#define convert_palette(value) (value & 0x7FFF) + +u16 *gba_screen_pixels = NULL; + +#define get_screen_pixels() gba_screen_pixels +#define get_screen_pitch() GBA_SCREEN_PITCH + +#define REG_ADDR_BGxCNT(n) (REG_ADDR_BG0CNT + (n) * sizeof(u16)) +#define REG_ADDR_WINxH(n) (REG_ADDR_WIN0H + (n) * sizeof(winreg_t)) +#define REG_ADDR_WINxV(n) (REG_ADDR_WIN0V + (n) * sizeof(winreg_t)) +#define REG_ADDR_BGxHOFS(n) (REG_ADDR_BG0HOFS + ((n)*2) * sizeof(u16)) +#define REG_ADDR_BGxVOFS(n) (REG_ADDR_BG0VOFS + ((n)*2) * sizeof(u16)) +#define REG_ADDR_BGxPA(n) (REG_ADDR_BG2PA + ((n)-2) * 8 * sizeof(u16)) +#define REG_ADDR_BGxPB(n) (REG_ADDR_BG2PB + ((n)-2) * 8 * sizeof(u16)) +#define REG_ADDR_BGxPC(n) (REG_ADDR_BG2PC + ((n)-2) * 8 * sizeof(u16)) +#define REG_ADDR_BGxPD(n) (REG_ADDR_BG2PD + ((n)-2) * 8 * sizeof(u16)) + +typedef struct { + u16 pad0[OAM_DATA_COUNT_AFFINE - 1]; + u16 dx; + u16 pad1[OAM_DATA_COUNT_AFFINE - 1]; + u16 dmx; + u16 pad2[OAM_DATA_COUNT_AFFINE - 1]; + u16 dy; + u16 pad3[OAM_DATA_COUNT_AFFINE - 1]; + u16 dmy; +} t_affp; + +typedef void (*bitmap_render_function)(u32 start, u32 end, void *dest_ptr, const u16 *pal); +typedef void (*tile_render_function)(u32 layer, u32 start, u32 end, void *dest_ptr, const u16 *pal); + +typedef void (*render_function_u16)(u32 start, u32 end, u16 *scanline, u32 enable_flags); +typedef void (*render_function_u32)(u32 start, u32 end, u32 *scanline, u32 enable_flags); + +typedef void (*window_render_function)(u16 *scanline, u32 start, u32 end); + +static void render_scanline_conditional(u32 start, u32 end, u16 *scanline, u32 enable_flags = 0x3F); + +typedef struct { + bitmap_render_function blit_render; + bitmap_render_function scale_render; + bitmap_render_function affine_render; +} bitmap_layer_render_struct; + +typedef struct { + render_function_u16 fullcolor; + render_function_u16 indexed_u16; + render_function_u32 indexed_u32; + render_function_u32 stacked; +} layer_render_struct; + +// Object blending modes +#define OBJ_MOD_NORMAL 0 +#define OBJ_MOD_SEMITRAN 1 +#define OBJ_MOD_WINDOW 2 +#define OBJ_MOD_INVALID 3 + +// BLDCNT color effect modes +#define COL_EFFECT_NONE 0x0 +#define COL_EFFECT_BLEND 0x1 +#define COL_EFFECT_BRIGHT 0x2 +#define COL_EFFECT_DARK 0x3 + +// Background render modes +#define RENDER_NORMAL 0 +#define RENDER_COL16 1 +#define RENDER_COL32 2 +#define RENDER_ALPHA 3 + +// Byte lengths of complete tiles and tile rows in 4bpp and 8bpp. + +#define tile_width_4bpp 4 +#define tile_size_4bpp 32 +#define tile_width_8bpp 8 +#define tile_size_8bpp 64 + +// Sprite rendering cycles +#define REND_CYC_MAX 32768 /* Theoretical max is 17920 */ +#define REND_CYC_SCANLINE 1210 +#define REND_CYC_REDUCED 954 + +// Generate bit mask (bits 9th and 10th) with information about the pixel +// status (1st and/or 2nd target) for later blending. +static inline u16 color_flags(u32 layer) +{ + u32 bldcnt = read_ioreg(REG_ADDR_BLDCNT); + return (((bldcnt >> layer) & 0x01) | // 1st target + ((bldcnt >> (layer + 7)) & 0x02) // 2nd target + ) + << 9; +} + +static const u32 map_widths[] = { 256, 512, 256, 512 }; + +typedef enum { + FULLCOLOR, // Regular rendering, output a 16 bit color + INDXCOLOR, // Rendering to indexed color, so we can later apply dark/bright + STCKCOLOR, // Stacks two indexed pixels (+flags) to apply blending + PIXCOPY // Special mode used for sprites, to allow for obj-window drawing +} rendtype; + +s32 affine_reference_x[2]; +s32 affine_reference_y[2]; + +static inline s32 signext28(u32 value) +{ + s32 ret = (s32)(value << 4); + return ret >> 4; +} + +void video_reload_counters() +{ + /* This happens every Vblank */ + affine_reference_x[0] = signext28(read_ioreg32(REG_ADDR_BG2X_L)); + affine_reference_y[0] = signext28(read_ioreg32(REG_ADDR_BG2Y_L)); + affine_reference_x[1] = signext28(read_ioreg32(REG_ADDR_BG3X_L)); + affine_reference_y[1] = signext28(read_ioreg32(REG_ADDR_BG3Y_L)); +} + +// Renders non-affine tiled background layer. +// Will process a full or partial tile (start and end within 0..8) and draw +// it in either 8 or 4 bpp mode. Honors vertical and horizontal flip. + +// tile contains the tile info (contains tile index, flip bits, pal info) +// hflip causes the tile pixels lookup to be reversed (from MSB to LSB +// If isbase is not set, color 0 is interpreted as transparent, otherwise +// we are drawing the base layer, so palette[0] is used (backdrop). + +template +static inline void rend_part_tile_Nbpp(u32 bg_comb, u32 px_comb, dtype *dest_ptr, u32 start, u32 end, u16 tile, const u8 *tile_base, + int vertical_pixel_flip, const u16 *paltbl) +{ + // Seek to the specified tile, using the tile number and size. + // tile_base already points to the right tile-line vertical offset + const u8 *tile_ptr = &tile_base[(tile & 0x3FF) * (is8bpp ? 64 : 32)]; + u16 bgcolor = paltbl[0]; + + // On vertical flip, apply the mirror offset + if (tile & 0x800) + tile_ptr += vertical_pixel_flip; + + if (is8bpp) { + // Each byte is a color, mapped to a palete. 8 bytes can be read as 64bit + for (u32 i = start; i < end; i++, dest_ptr++) { + // Honor hflip by selecting bytes in the correct order + u32 sel = hflip ? (7 - i) : i; + u8 pval = tile_ptr[sel]; + // Alhpa mode stacks previous value (unless rendering the first layer) + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = paltbl[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_comb; // Add combine flags + else if (rdtype == STCKCOLOR) + // Stack pixels on top of the pixel value and combine flags + *dest_ptr = pval | px_comb | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + if (rdtype == FULLCOLOR) + *dest_ptr = bgcolor; + else + *dest_ptr = 0 | bg_comb; // Add combine flags + } + } + } else { + // In 4bpp mode, the tile[15..12] bits contain the sub-palette number. + u16 tilepal = (tile >> 12) << 4; + u16 pxflg = px_comb | tilepal; + const u16 *subpal = &paltbl[tilepal]; + // Read packed pixel data, skip start pixels + u32 tilepix = eswap32(*(u32 *)tile_ptr); + if (hflip) + tilepix <<= (start * 4); + else + tilepix >>= (start * 4); + // Only 32 bits (8 pixels * 4 bits) + for (u32 i = start; i < end; i++, dest_ptr++) { + u8 pval = hflip ? tilepix >> 28 : tilepix & 0xF; + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = subpal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pxflg | pval; + else if (rdtype == STCKCOLOR) // Stack pixels + *dest_ptr = pxflg | pval | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + if (rdtype == FULLCOLOR) + *dest_ptr = bgcolor; + else + *dest_ptr = 0 | bg_comb; + } + // Advance to next packed data + if (hflip) + tilepix <<= 4; + else + tilepix >>= 4; + } + } +} + +// Same as above, but optimized for full tiles. Skip comments here. +template +static inline void render_tile_Nbpp(u32 bg_comb, u32 px_comb, dtype *dest_ptr, u16 tile, const u8 *tile_base, int vertical_pixel_flip, + const u16 *paltbl) +{ + const u8 *tile_ptr = &tile_base[(tile & 0x3FF) * (is8bpp ? 64 : 32)]; + u16 bgcolor = paltbl[0]; + + if (tile & 0x800) + tile_ptr += vertical_pixel_flip; + + if (is8bpp) { + for (u32 j = 0; j < 2; j++) { + u32 tilepix = eswap32(((u32 *)tile_ptr)[hflip ? 1 - j : j]); + if (tilepix) { + for (u32 i = 0; i < 4; i++, dest_ptr++) { + u8 pval = hflip ? (tilepix >> (24 - i * 8)) : (tilepix >> (i * 8)); + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = paltbl[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_comb; // Add combine flags + else if (rdtype == STCKCOLOR) + *dest_ptr = pval | px_comb | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } + } else { + for (u32 i = 0; i < 4; i++, dest_ptr++) + if (isbase) + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } + } else { + u32 tilepix = eswap32(*(u32 *)tile_ptr); + if (tilepix) { // We can skip it all if the row is transparent + u16 tilepal = (tile >> 12) << 4; + u16 pxflg = px_comb | tilepal; + const u16 *subpal = &paltbl[tilepal]; + for (u32 i = 0; i < 8; i++, dest_ptr++) { + u8 pval = (hflip ? (tilepix >> ((7 - i) * 4)) : (tilepix >> (i * 4))) & 0xF; + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = subpal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pxflg | pval; + else if (rdtype == STCKCOLOR) + *dest_ptr = pxflg | pval | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } + } else if (isbase) { + // In this case we simply fill the pixels with background pixels + for (u32 i = 0; i < 8; i++, dest_ptr++) + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } +} + +template +static void render_scanline_text_fast(u32 layer, u32 start, u32 end, void *scanline, const u16 *paltbl) +{ + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u16 vcount = read_ioreg(REG_ADDR_VCOUNT); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 hoffset = (start + read_ioreg(REG_ADDR_BGxHOFS(layer))) % 512; + u32 voffset = (vcount + read_ioreg(REG_ADDR_BGxVOFS(layer))) % 512; + stype *dest_ptr = ((stype *)scanline) + start; + + // Calculate combine masks. These store 2 bits of info: 1st and 2nd target. + // If set, the current pixel belongs to a layer that is 1st or 2nd target. + u32 bg_comb = color_flags(5), px_comb = color_flags(layer); + + // Background map data is in VRAM, at an offset specified in 2K blocks. + // (each map data block is 32x32 tiles, at 16bpp, so 2KB) + u32 base_block = (bg_control & BGCNT_SCREENBASE_MASK) >> 8; + u16 *map_base = (u16 *)BG_SCREEN_ADDR(base_block); + u16 *map_ptr, *second_ptr; + + end -= start; + + // Skip the top one/two block(s) if using the bottom half + if ((map_size & 0x02) && (voffset >= 256)) + map_base += ((map_width / 8) * 32); + + // Skip the top tiles within the block + map_base += (((voffset % 256) / 8) * (map_width / 8)); + + // we might need to render from two charblocks, store a second pointer. + second_ptr = map_ptr = map_base; + + if (map_size & 0x01) { // If background is 512 pixels wide + if (hoffset >= 256) { + // If we are rendering the right block, skip a whole charblock + hoffset -= 256; + map_ptr += ((map_width / 8) * 32); + } else { + // If we are rendering the left block, we might overrun into the right + second_ptr += ((map_width / 8) * 32); + } + } else { + hoffset %= 256; // Background is 256 pixels wide + } + + // Skip the left blocks within the block + map_ptr += hoffset / 8; + + // Render a single scanline of text tiles + u32 tilewidth = is8bpp ? tile_width_8bpp : tile_width_4bpp; + u32 vert_pix_offset = (voffset % 8) * tilewidth; + // Calculate the pixel offset between a line and its "flipped" mirror. + // The values can be {56, 40, 24, 8, -8, -24, -40, -56} + s32 vflip_off + = is8bpp ? tile_size_8bpp - 2 * vert_pix_offset - tile_width_8bpp : tile_size_4bpp - 2 * vert_pix_offset - tile_width_4bpp; + + // The tilemap base is selected via bgcnt (16KiB chunks) + u32 tilecntrl = (bg_control >> 2) & 0x03; + // Account for the base offset plus the tile vertical offset + u8 *tile_base = BG_CHAR_ADDR(tilecntrl) + vert_pix_offset; + // Number of pixels available until the end of the tile block + u32 pixel_run = map_width - hoffset; + + u32 tile_hoff = hoffset % 8; + u32 partial_hcnt = 8 - tile_hoff; + + if (tile_hoff) { + // First partial tile, only right side is visible. + u32 todraw = MIN(end, partial_hcnt); // [1..7] + u32 stop = tile_hoff + todraw; // Usually 8, unless short run. + + u16 tile = eswap16(*map_ptr++); + if (tile & 0x400) // Tile horizontal flip + rend_part_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile_hoff, stop, tile, tile_base, + vflip_off, paltbl); + else + rend_part_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile_hoff, stop, tile, tile_base, + vflip_off, paltbl); + + dest_ptr += todraw; + end -= todraw; + pixel_run -= todraw; + } + + if (!end) + return; + + // Now render full tiles + u32 todraw = MIN(end, pixel_run) / 8; + + for (u32 i = 0; i < todraw; i++, dest_ptr += 8) { + u16 tile = eswap16(*map_ptr++); + if (tile & 0x400) // Tile horizontal flip + render_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile, tile_base, vflip_off, paltbl); + else + render_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile, tile_base, vflip_off, paltbl); + } + + end -= todraw * 8; + pixel_run -= todraw * 8; + + if (!end) + return; + + // Switch to the next char block if we ran out of tiles + if (!pixel_run) + map_ptr = second_ptr; + + todraw = end / 8; + for (u32 i = 0; i < todraw; i++, dest_ptr += 8) { + u16 tile = eswap16(*map_ptr++); + if (tile & 0x400) // Tile horizontal flip + render_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile, tile_base, vflip_off, paltbl); + else + render_tile_Nbpp(bg_comb, px_comb, dest_ptr, tile, tile_base, vflip_off, paltbl); + } + + end -= todraw * 8; + + // Finalize the tile rendering the left side of it (from 0 up to "end"). + if (end) { + u16 tile = eswap16(*map_ptr++); + if (tile & 0x400) // Tile horizontal flip + rend_part_tile_Nbpp(bg_comb, px_comb, dest_ptr, 0, end, tile, tile_base, vflip_off, + paltbl); + else + rend_part_tile_Nbpp(bg_comb, px_comb, dest_ptr, 0, end, tile, tile_base, vflip_off, + paltbl); + } +} + +// A slow version of the above function that allows for mosaic effects +template +static void render_scanline_text_mosaic(u32 layer, u32 start, u32 end, void *scanline, const u16 *paltbl) +{ + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + const u32 mosh = (read_ioreg(REG_ADDR_MOSAIC) & 0xF) + 1; + const u32 mosv = ((read_ioreg(REG_ADDR_MOSAIC) >> 4) & 0xF) + 1; + u16 vcount = read_ioreg(REG_ADDR_VCOUNT); + u32 map_size = (bg_control >> 14) & 0x03; + u32 map_width = map_widths[map_size]; + u32 hoffset = (start + read_ioreg(REG_ADDR_BGxHOFS(layer))) % 512; + u16 vmosoff = vcount - vcount % mosv; + u32 voffset = (vmosoff + read_ioreg(REG_ADDR_BGxVOFS(layer))) % 512; + stype *dest_ptr = ((stype *)scanline) + start; + + u32 bg_comb = color_flags(5), px_comb = color_flags(layer); + + u32 base_block = (bg_control & BGCNT_SCREENBASE_MASK) >> 8; + u16 *map_base = (u16 *)BG_SCREEN_ADDR(base_block); + u16 *map_ptr, *second_ptr; + + if ((map_size & 0x02) && (voffset >= 256)) + map_base += ((map_width / 8) * 32); + + map_base += (((voffset % 256) / 8) * (map_width / 8)); + + second_ptr = map_ptr = map_base; + + if (map_size & 0x01) { // If background is 512 pixels wide + if (hoffset >= 256) { + // If we are rendering the right block, skip a whole charblock + hoffset -= 256; + map_ptr += ((map_width / 8) * 32); + } else { + // If we are rendering the left block, we might overrun into the right + second_ptr += ((map_width / 8) * 32); + } + } else { + hoffset %= 256; // Background is 256 pixels wide + } + + // Skip the left blocks within the block + map_ptr += hoffset / 8; + + // Render a single scanline of text tiles + u32 tilewidth = is8bpp ? tile_width_8bpp : tile_width_4bpp; + u32 vert_pix_offset = (voffset % 8) * tilewidth; + // Calculate the pixel offset between a line and its "flipped" mirror. + // The values can be {56, 40, 24, 8, -8, -24, -40, -56} + s32 vflip_off + = is8bpp ? tile_size_8bpp - 2 * vert_pix_offset - tile_width_8bpp : tile_size_4bpp - 2 * vert_pix_offset - tile_width_4bpp; + + // The tilemap base is selected via bgcnt (16KiB chunks) + u32 tilecntrl = (bg_control >> 2) & 0x03; + // Account for the base offset plus the tile vertical offset + u8 *tile_base = BG_CHAR_ADDR(tilecntrl) + vert_pix_offset; + + u16 bgcolor = paltbl[0]; + + // Iterate pixel by pixel, loading data every N pixels to honor mosaic effect + u8 pval = 0; + for (u32 i = 0; start < end; start++, i++, dest_ptr++) { + u16 tile = eswap16(*map_ptr); + + if (!(i % mosh)) { + const u8 *tile_ptr = &tile_base[(tile & 0x3FF) * (is8bpp ? 64 : 32)]; + + bool hflip = (tile & 0x400); + if (tile & 0x800) + tile_ptr += vflip_off; + + // Load byte or nibble with pixel data. + if (is8bpp) { + if (hflip) + pval = tile_ptr[7 - hoffset % 8]; + else + pval = tile_ptr[hoffset % 8]; + } else { + if (hflip) + pval = (tile_ptr[(7 - hoffset % 8) >> 1] >> (((hoffset & 1) ^ 1) * 4)) & 0xF; + else + pval = (tile_ptr[(hoffset % 8) >> 1] >> ((hoffset & 1) * 4)) & 0xF; + } + } + + if (is8bpp) { + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = paltbl[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_comb; // Add combine flags + else if (rdtype == STCKCOLOR) + *dest_ptr = pval | px_comb | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } else { + u16 tilepal = (tile >> 12) << 4; + u16 pxflg = px_comb | tilepal; + const u16 *subpal = &paltbl[tilepal]; + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = subpal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pxflg | pval; + else if (rdtype == STCKCOLOR) + *dest_ptr = pxflg | pval | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + *dest_ptr = (rdtype == FULLCOLOR) ? bgcolor : 0 | bg_comb; + } + } + + // Need to continue from the next charblock + hoffset++; + if (hoffset % 8 == 0) + map_ptr++; + if (hoffset >= map_width) { + hoffset = 0; + map_ptr = second_ptr; + } + } +} + +template +static void render_scanline_text(u32 layer, u32 start, u32 end, void *scanline, const u16 *paltbl) +{ + // Tile mode has 4 and 8 bpp modes. + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + bool is8bpp = (read_ioreg(REG_ADDR_BGxCNT(layer)) & 0x80); + const u32 mosamount = read_ioreg(REG_ADDR_MOSAIC) & 0xFF; + bool has_mosaic = (bg_control & 0x40) && (mosamount != 0); + + if (has_mosaic) { + if (is8bpp) + render_scanline_text_mosaic(layer, start, end, scanline, paltbl); + else + render_scanline_text_mosaic(layer, start, end, scanline, paltbl); + } else { + if (is8bpp) + render_scanline_text_fast(layer, start, end, scanline, paltbl); + else + render_scanline_text_fast(layer, start, end, scanline, paltbl); + } +} + +static inline u8 lookup_pix_8bpp(u32 px, u32 py, const u8 *tile_base, const u8 *map_base, u32 map_size) +{ + // Pitch represents the log2(number of tiles per row) (from 16 to 128) + u32 map_pitch = map_size + 4; + // Given coords (px,py) in the background space, find the tile. + u32 mapoff = (px / 8) + ((py / 8) << map_pitch); + // Each tile is 8x8, so 64 bytes each. + const u8 *tile_ptr = &tile_base[map_base[mapoff] * tile_size_8bpp]; + // Read the 8bit color within the tile. + return tile_ptr[(px % 8) + ((py % 8) * 8)]; +} + +template +static inline void rend_pix_8bpp(dsttype *dest_ptr, u8 pval, u32 bg_comb, u32 px_comb, const u16 *pal) +{ + // Alhpa mode stacks previous value (unless rendering the first layer) + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = pal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_comb; // Add combine flags + else if (rdtype == STCKCOLOR) + // Stack pixels. If base, stack the base pixel. + *dest_ptr = pval | px_comb | ((isbase ? bg_comb : *dest_ptr) << 16); + } else if (isbase) { + // Transparent pixel, but we are base layer, so render background. + if (rdtype == FULLCOLOR) + *dest_ptr = pal[0]; + else + *dest_ptr = 0 | bg_comb; // Just backdrop color and combine flags + } +} + +template static inline void render_bdrop_pixel_8bpp(dsttype *dest_ptr, u32 bg_comb, u16 bgcol) +{ + // Alhpa mode stacks previous value (unless rendering the first layer) + if (rdtype == FULLCOLOR) + *dest_ptr = bgcol; + else + *dest_ptr = 0 | bg_comb; +} + +typedef void (*affine_render_function)(u32 layer, u32 start, u32 cnt, const u8 *map_base, u32 map_size, const u8 *tile_base, void *dst_ptr, + const u16 *pal); + +// Affine background rendering logic. +// wrap extends the background infinitely, otherwise transparent/backdrop fill +// rotate indicates if there's any rotation (optimized version for no-rotation) +// mosaic applies to horizontal mosaic (vertical is adjusted via affine ref) +template +static inline void render_affine_background(u32 layer, u32 start, u32 cnt, const u8 *map_base, u32 map_size, const u8 *tile_base, + void *dst_ptr_raw, const u16 *pal) +{ + + dtype *dst_ptr = (dtype *)dst_ptr_raw; + // Backdrop and current layer combine bits. + u32 bg_comb = color_flags(5); + u32 px_comb = color_flags(layer); + + s32 dx = (s16)read_ioreg(REG_ADDR_BGxPA(layer)); + s32 dy = (s16)read_ioreg(REG_ADDR_BGxPC(layer)); + + s32 source_x = affine_reference_x[layer - 2] + (start * dx); + s32 source_y = affine_reference_y[layer - 2] + (start * dy); + + // Maps are squared, four sizes available (128x128 to 1024x1024) + u32 width_height = 128 << map_size; + + // Horizontal mosaic effect. + const u32 mosh = (mosaic ? (read_ioreg(REG_ADDR_MOSAIC)) & 0xF : 0) + 1; + + if (wrap) { + // In wrap mode the entire space is covered, since it "wraps" at the edges + u8 pval = 0; + if (rotate) { + for (u32 i = 0; cnt; i++, cnt--) { + u32 pix_x = (u32)(source_x >> 8) & (width_height - 1); + u32 pix_y = (u32)(source_y >> 8) & (width_height - 1); + + // Lookup pixel and draw it (only every Nth if mosaic is on) + if (!mosaic || !(i % mosh)) + pval = lookup_pix_8bpp(pix_x, pix_y, tile_base, map_base, map_size); + rend_pix_8bpp(dst_ptr++, pval, bg_comb, px_comb, pal); + + source_x += dx; + source_y += dy; // Move to the next pixel + } + } else { + // Y coordinate stays contant across the walk. + const u32 pix_y = (u32)(source_y >> 8) & (width_height - 1); + for (u32 i = 0; cnt; i++, cnt--) { + u32 pix_x = (u32)(source_x >> 8) & (width_height - 1); + if (!mosaic || !(i % mosh)) + pval = lookup_pix_8bpp(pix_x, pix_y, tile_base, map_base, map_size); + rend_pix_8bpp(dst_ptr++, pval, bg_comb, px_comb, pal); + source_x += dx; // Only moving in the X direction. + } + } + } else { + u16 bgcol = pal[0]; + if (rotate) { + // Draw backdrop pixels if necessary until we reach the background edge. + while (cnt) { + // Draw backdrop pixels if they lie outside of the background. + u32 pix_x = (u32)(source_x >> 8), pix_y = (u32)(source_y >> 8); + + // Stop once we find a pixel that is actually *inside* the map. + if (pix_x < width_height && pix_y < width_height) + break; + + // Draw a backdrop pixel if we are the base layer. + if (isbase) + render_bdrop_pixel_8bpp(dst_ptr, bg_comb, bgcol); + + dst_ptr++; + source_x += dx; + source_y += dy; + cnt--; + } + + // Draw background pixels by looking them up in the map + u8 pval = 0; + for (u32 i = 0; cnt; i++, cnt--) { + u32 pix_x = (u32)(source_x >> 8), pix_y = (u32)(source_y >> 8); + + // Check if we run out of background pixels, stop drawing. + if (pix_x >= width_height || pix_y >= width_height) + break; + + // Lookup pixel and draw it. + if (!mosaic || !(i % mosh)) + pval = lookup_pix_8bpp(pix_x, pix_y, tile_base, map_base, map_size); + rend_pix_8bpp(dst_ptr++, pval, bg_comb, px_comb, pal); + + // Move to the next pixel, update coords accordingly + source_x += dx; + source_y += dy; + } + } else { + // Specialized version for scaled-only backgrounds + u8 pval = 0; + const u32 pix_y = (u32)(source_y >> 8); + if (pix_y < width_height) { // Check if within Y-coord range + // Draw/find till left edge + while (cnt) { + u32 pix_x = (u32)(source_x >> 8); + if (pix_x < width_height) + break; + + if (isbase) + render_bdrop_pixel_8bpp(dst_ptr, bg_comb, bgcol); + + dst_ptr++; + source_x += dx; + cnt--; + } + // Draw actual background + for (u32 i = 0; cnt; i++, cnt--) { + u32 pix_x = (u32)(source_x >> 8); + if (pix_x >= width_height) + break; + + if (!mosaic || !(i % mosh)) + pval = lookup_pix_8bpp(pix_x, pix_y, tile_base, map_base, map_size); + rend_pix_8bpp(dst_ptr++, pval, bg_comb, px_comb, pal); + + source_x += dx; + } + } + } + + // Complete the line on the right, if we ran out over the bg edge. + // Only necessary for the base layer, otherwise we can safely finish. + if (isbase) + while (cnt--) + render_bdrop_pixel_8bpp(dst_ptr++, bg_comb, bgcol); + } +} + +// Renders affine backgrounds. These differ substantially from non-affine +// ones. Tile maps are byte arrays (instead of 16 bit), limiting the map to +// 256 different tiles (with no flip bits and just one single 256 color pal). +// Optimize for common cases: wrap/non-wrap, scaling/rotation. +template +static void render_scanline_affine(u32 layer, u32 start, u32 end, void *scanline, const u16 *pal) +{ + + u32 bg_control = read_ioreg(REG_ADDR_BGxCNT(layer)); + u32 map_size = (bg_control >> 14) & 0x03; + + // Char block base pointer + u32 base_block = (bg_control & BGCNT_SCREENBASE_MASK) >> 8; + u8 *map_base = BG_SCREEN_ADDR(base_block); + // The tilemap base is selected via bgcnt (16KiB chunks) + u32 tilecntrl = (bg_control >> 2) & 0x03; + u8 *tile_base = BG_CHAR_ADDR(tilecntrl); + + dsttype *dest_ptr = ((dsttype *)scanline) + start; + const u32 mosamount = read_ioreg(REG_ADDR_MOSAIC) & 0xFF; + + bool has_mosaic = (bg_control & 0x40) && (mosamount != 0); + bool has_rotation = read_ioreg(REG_ADDR_BGxPC(layer)) != 0; + bool has_wrap = (bg_control >> 13) & 1; + + // Number of pixels to render + u32 cnt = end - start; + + // Four specialized versions for faster rendering on specific cases like + // scaling only or non-wrapped backgrounds. + u32 fidx = (has_wrap ? 0x4 : 0) | (has_rotation ? 0x2 : 0) | (has_mosaic ? 0x1 : 0); + + static const affine_render_function rdfns[8] = { + render_affine_background, + render_affine_background, + render_affine_background, + render_affine_background, + render_affine_background, + render_affine_background, + render_affine_background, + render_affine_background, + }; + + rdfns[fidx](layer, start, cnt, map_base, map_size, tile_base, dest_ptr, pal); +} + +template +static inline void bitmap_pixel_write(buftype *dst_ptr, pixfmt val, const u16 *palptr, u16 px_attr) +{ + if (mode != 4) + *dst_ptr = convert_palette(val); // Direct color, u16 bitmap + else if (val) { + if (rdmode == FULLCOLOR) + *dst_ptr = palptr[val]; + else if (rdmode == INDXCOLOR) + *dst_ptr = val | px_attr; // Add combine flags + else if (rdmode == STCKCOLOR) + *dst_ptr = val | px_attr | ((*dst_ptr) << 16); // Stack pixels + } +} + +typedef enum { + BLIT, // The bitmap has no scaling nor rotation on the X axis + SCALED, // The bitmap features some scaling (on the X axis) but no rotation + ROTATED // Bitmap has rotation (and perhaps scaling too) +} bm_rendmode; + +// Renders a bitmap honoring the pixel mode and any affine transformations. +// There's optimized versions for bitmaps without scaling / rotation. + +template // Whether mosaic effect is used. +static inline void render_scanline_bitmap(u32 start, u32 end, void *scanline, const u16 *palptr) +{ + s32 dx = (s16)read_ioreg(REG_ADDR_BG2PA); + s32 dy = (s16)read_ioreg(REG_ADDR_BG2PC); + s32 source_x = affine_reference_x[0] + (start * dx); // Always BG2 + s32 source_y = affine_reference_y[0] + (start * dy); + + // Premature abort render optimization if bitmap out of Y coordinate. + if ((rdmode != ROTATED) && ((u32)(source_y >> 8)) >= height) + return; + + // Modes 4 and 5 feature double buffering. + bool second_frame = (mode >= 4) && (read_ioreg(REG_ADDR_DISPCNT) & 0x10); + pixfmt *src_ptr = (pixfmt *)&VRAM[second_frame ? 0xA000 : 0x0000]; + dsttype *dst_ptr = ((dsttype *)scanline) + start; + u16 px_attr = color_flags(2); // Always BG2 + + const u32 mosh = (mosaic ? (read_ioreg(REG_ADDR_MOSAIC)) & 0xF : 0) + 1; + + if (rdmode == BLIT) { + // We just blit pixels (copy) from buffer to buffer. + const u32 pixel_y = (u32)(source_y >> 8); + if (source_x < 0) { + // The bitmap starts somewhere after "start", skip those pixels. + u32 delta = (-source_x + 255) >> 8; + dst_ptr += delta; + start += delta; + source_x = 0; + } + + u32 pixel_x = (u32)(source_x >> 8); + u32 pixcnt = MIN(end - start, width - pixel_x); + pixfmt *valptr = &src_ptr[pixel_x + (pixel_y * width)]; + pixfmt val = 0; + for (u32 i = 0; pixcnt; i++, pixcnt--, valptr++) { + // Pretty much pixel copier + if (!mosaic || !(i % mosh)) + val = sizeof(pixfmt) == 2 ? eswap16(*valptr) : *valptr; + bitmap_pixel_write(dst_ptr++, val, palptr, px_attr); + } + } else if (rdmode == SCALED) { + // Similarly to above, but now we need to sample pixels instead. + const u32 pixel_y = (u32)(source_y >> 8); + + // Find the "inside" of the bitmap + while (start < end) { + u32 pixel_x = (u32)(source_x >> 8); + if (pixel_x < width) + break; + source_x += dx; + start++; + dst_ptr++; + } + + u32 cnt = end - start; + pixfmt val = 0; + for (u32 i = 0; cnt; i++, cnt--) { + u32 pixel_x = (u32)(source_x >> 8); + if (pixel_x >= width) + break; // We reached the end of the bitmap + + if (!mosaic || !(i % mosh)) { + pixfmt *valptr = &src_ptr[pixel_x + (pixel_y * width)]; + val = sizeof(pixfmt) == 2 ? eswap16(*valptr) : *valptr; + } + + bitmap_pixel_write(dst_ptr++, val, palptr, px_attr); + source_x += dx; + } + } else { + // Look for the first pixel to be drawn. + while (start < end) { + u32 pixel_x = (u32)(source_x >> 8), pixel_y = (u32)(source_y >> 8); + if (pixel_x < width && pixel_y < height) + break; + start++; + dst_ptr++; + source_x += dx; + source_y += dy; + } + + pixfmt val = 0; + for (u32 i = 0; start < end; start++) { + u32 pixel_x = (u32)(source_x >> 8), pixel_y = (u32)(source_y >> 8); + + // Check if we run out of background pixels, stop drawing. + if (pixel_x >= width || pixel_y >= height) + break; + + // Lookup pixel and draw it. + if (!mosaic || !(i % mosh)) { + pixfmt *valptr = &src_ptr[pixel_x + (pixel_y * width)]; + val = sizeof(pixfmt) == 2 ? eswap16(*valptr) : *valptr; + } + + bitmap_pixel_write(dst_ptr++, val, palptr, px_attr); + + // Move to the next pixel, update coords accordingly + source_x += dx; + source_y += dy; + } + } +} + +// Object/Sprite rendering logic + +static const u8 obj_dim_table[3][4][2] = { { { 8, 8 }, { 16, 16 }, { 32, 32 }, { 64, 64 } }, + { { 16, 8 }, { 32, 8 }, { 32, 16 }, { 64, 32 } }, + { { 8, 16 }, { 8, 32 }, { 16, 32 }, { 32, 64 } } }; + +static u8 obj_priority_list[5][DISPLAY_HEIGHT][128]; +static u8 obj_priority_count[5][DISPLAY_HEIGHT]; +static u8 obj_alpha_count[DISPLAY_HEIGHT]; + +typedef struct { + s32 obj_x, obj_y; + s32 obj_w, obj_h; + const OamData *oam_data; + bool is_double; +} t_sprite; + +// Renders a tile row (8 pixels) for a regular (non-affine) object/sprite. +// tile_offset points to the VRAM offset where the data lives. +template +static inline void render_obj_part_tile_Nbpp(u32 px_comb, dsttype *dest_ptr, u32 start, u32 end, u32 tile_offset, u16 palette, + const u16 *pal) +{ + // Note that the last VRAM bank wrap around, hence the offset aliasing + const u8 *tile_ptr = OBJ_VRAM0 + (tile_offset & 0x7FFF); + u32 px_attr = px_comb | palette | 0x100; // Combine flags + high palette bit + + if (is8bpp) { + // Each byte is a color, mapped to a palete. + for (u32 i = start; i < end; i++, dest_ptr++) { + // Honor hflip by selecting bytes in the correct order + u32 sel = hflip ? (7 - i) : i; + u8 pval = tile_ptr[sel]; + // Alhpa mode stacks previous value + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = pal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_attr; // Add combine flags + else if (rdtype == STCKCOLOR) { + // Stack pixels on top of the pixel value and combine flags + // We do not stack OBJ on OBJ, rather overwrite the previous object + if (*dest_ptr & 0x100) + *dest_ptr = pval | px_attr | ((*dest_ptr) & 0xFFFF0000); + else + *dest_ptr = pval | px_attr | ((*dest_ptr) << 16); + } else if (rdtype == PIXCOPY) + *dest_ptr = dest_ptr[DISPLAY_WIDTH]; + } + } + } else { + // Only 32 bits (8 pixels * 4 bits) + for (u32 i = start; i < end; i++, dest_ptr++) { + u32 selb = hflip ? (3 - i / 2) : i / 2; + u32 seln = hflip ? ((i & 1) ^ 1) : (i & 1); + u8 pval = (tile_ptr[selb] >> (seln * 4)) & 0xF; + const u16 *subpal = &pal[palette]; + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = subpal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_attr; + else if (rdtype == STCKCOLOR) { + if (*dest_ptr & 0x100) + *dest_ptr = pval | px_attr | ((*dest_ptr) & 0xFFFF0000); + else + *dest_ptr = pval | px_attr | ((*dest_ptr) << 16); // Stack pixels + } else if (rdtype == PIXCOPY) + *dest_ptr = dest_ptr[DISPLAY_WIDTH]; + } + } + } +} + +// Same as above but optimized for full tiles +template +static inline void render_obj_tile_Nbpp(u32 px_comb, dsttype *dest_ptr, u32 tile_offset, u16 palette, const u16 *pal) +{ + const u8 *tile_ptr = &VRAM[0x10000 + (tile_offset & 0x7FFF)]; + u32 px_attr = px_comb | palette | 0x100; // Combine flags + high palette bit + + if (is8bpp) { + for (u32 j = 0; j < 2; j++) { + u32 tilepix = eswap32(((u32 *)tile_ptr)[hflip ? 1 - j : j]); + if (tilepix) { + for (u32 i = 0; i < 4; i++, dest_ptr++) { + u8 pval = hflip ? (tilepix >> (24 - i * 8)) : (tilepix >> (i * 8)); + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = pal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_attr; // Add combine flags + else if (rdtype == STCKCOLOR) { + if (*dest_ptr & 0x100) + *dest_ptr = pval | px_attr | ((*dest_ptr) & 0xFFFF0000); + else + *dest_ptr = pval | px_attr | ((*dest_ptr) << 16); + } else if (rdtype == PIXCOPY) + *dest_ptr = dest_ptr[DISPLAY_WIDTH]; + } + } + } else + dest_ptr += 4; + } + } else { + u32 tilepix = eswap32(*(u32 *)tile_ptr); + if (tilepix) { // Can skip all pixels if the row is just transparent + for (u32 i = 0; i < 8; i++, dest_ptr++) { + u8 pval = (hflip ? (tilepix >> ((7 - i) * 4)) : (tilepix >> (i * 4))) & 0xF; + const u16 *subpal = &pal[palette]; + if (pval) { + if (rdtype == FULLCOLOR) + *dest_ptr = subpal[pval]; + else if (rdtype == INDXCOLOR) + *dest_ptr = pval | px_attr; + else if (rdtype == STCKCOLOR) { // Stack background, replace sprite + if (*dest_ptr & 0x100) + *dest_ptr = pval | px_attr | ((*dest_ptr) & 0xFFFF0000); + else + *dest_ptr = pval | px_attr | ((*dest_ptr) << 16); + } else if (rdtype == PIXCOPY) + *dest_ptr = dest_ptr[DISPLAY_WIDTH]; + } + } + } + } +} + +// Renders a regular sprite (non-affine) row to screen. +// delta_x is the object X coordinate referenced from the window start. +// cnt is the maximum number of pixels to draw, honoring window, obj width, etc. +template +static void render_object(s32 delta_x, u32 cnt, stype *dst_ptr, u32 tile_offset, u32 px_comb, u16 palette, const u16 *palptr) +{ + // Tile size in bytes for each mode + const u32 tile_bsize = is8bpp ? tile_size_8bpp : tile_size_4bpp; + // Number of bytes to advance (or rewind) on the tile map + const s32 tile_size_off = hflip ? -tile_bsize : tile_bsize; + + if (delta_x < 0) { // Left part is outside of the screen/window. + u32 offx = -delta_x; // How many pixels did we skip from the object? + s32 block_off = offx / 8; + u32 tile_off = offx % 8; + + // Skip the first object tiles (skips in the flip direction) + tile_offset += block_off * tile_size_off; + + // Render a partial tile to the left + if (tile_off) { + u32 residual = 8 - tile_off; // Pixel count to complete the first tile + u32 maxpix = MIN(residual, cnt); + render_obj_part_tile_Nbpp(px_comb, dst_ptr, tile_off, tile_off + maxpix, tile_offset, palette, + palptr); + + // Move to the next tile + tile_offset += tile_size_off; + // Account for drawn pixels + cnt -= maxpix; + dst_ptr += maxpix; + } + } else { + // Render object completely from the left. Skip the empty space to the left + dst_ptr += delta_x; + } + + // Render full tiles to the scan line. + s32 num_tiles = cnt / 8; + while (num_tiles--) { + // Render full tiles + render_obj_tile_Nbpp(px_comb, dst_ptr, tile_offset, palette, palptr); + tile_offset += tile_size_off; + dst_ptr += 8; + } + + // Render any partial tile on the end + cnt = cnt % 8; + if (cnt) + render_obj_part_tile_Nbpp(px_comb, dst_ptr, 0, cnt, tile_offset, palette, palptr); +} + +// A slower version of the version above, that renders objects pixel by pixel. +// This allows proper mosaic effects whenever necessary. +template +static void render_object_mosaic(s32 delta_x, u32 cnt, stype *dst_ptr, u32 base_tile_offset, u32 mosh, u32 px_comb, u16 palette, + const u16 *pal) +{ + const u32 tile_bsize = is8bpp ? tile_size_8bpp : tile_size_4bpp; + const s32 tile_size_off = hflip ? -tile_bsize : tile_bsize; + + u32 offx = 0; + if (delta_x < 0) { // Left part is outside of the screen/window. + offx = -delta_x; // Number of skipped pixels + } else { + dst_ptr += delta_x; + } + + u32 px_attr = px_comb | palette | 0x100; // Combine flags + high palette bit + + u8 pval = 0; + for (u32 i = 0; i < cnt; i++, offx++, dst_ptr++) { + if (!(i % mosh)) { + // Load tile pixel color. + u32 tile_offset = base_tile_offset + (offx / 8) * tile_size_off; + const u8 *tile_ptr = &VRAM[0x10000 + (tile_offset & 0x7FFF)]; + + // Lookup for each mode and flip value. + if (is8bpp) { + if (hflip) + pval = tile_ptr[7 - offx % 8]; + else + pval = tile_ptr[offx % 8]; + } else { + if (hflip) + pval = (tile_ptr[(7 - offx % 8) >> 1] >> (((offx & 1) ^ 1) * 4)) & 0xF; + else + pval = (tile_ptr[(offx % 8) >> 1] >> ((offx & 1) * 4)) & 0xF; + } + } + + // Write the pixel value as required + const u16 *subpal = &pal[palette]; + if (pval) { + if (rdtype == FULLCOLOR) + *dst_ptr = is8bpp ? pal[pval] : subpal[pval]; + else if (rdtype == INDXCOLOR) + *dst_ptr = pval | px_attr; // Add combine flags + else if (rdtype == STCKCOLOR) { + if (*dst_ptr & 0x100) + *dst_ptr = pval | px_attr | ((*dst_ptr) & 0xFFFF0000); + else + *dst_ptr = pval | px_attr | ((*dst_ptr) << 16); + } else if (rdtype == PIXCOPY) + *dst_ptr = dst_ptr[DISPLAY_WIDTH]; + } + } +} + +// Renders an affine sprite row to screen. +// They support 4bpp and 8bpp modes. 1D and 2D tile mapping modes. +// Their render area is limited to their size (and optionally double size) +template +static void render_affine_object(const t_sprite *obji, const t_affp *affp, bool is_double, u32 start, u32 end, stype *dst_ptr, u32 mosv, + u32 mosh, u32 base_tile, u32 pxcomb, u16 palette, const u16 *palptr) +{ + // Tile size in bytes for each mode + const u32 tile_bsize = is8bpp ? tile_size_8bpp : tile_size_4bpp; + const u32 tile_bwidth = is8bpp ? tile_width_8bpp : tile_width_4bpp; + + // Affine params + s32 dx = (s16)eswap16(affp->dx); + s32 dy = (s16)eswap16(affp->dy); + s32 dmx = (s16)eswap16(affp->dmx); + s32 dmy = (s16)eswap16(affp->dmy); + + // Object dimensions and boundaries + u32 obj_dimw = obji->obj_w; + u32 obj_dimh = obji->obj_h; + s32 middle_x = is_double ? obji->obj_w : (obji->obj_w / 2); + s32 middle_y = is_double ? obji->obj_h : (obji->obj_h / 2); + s32 obj_width = is_double ? obji->obj_w * 2 : obji->obj_w; + s32 obj_height = is_double ? obji->obj_h * 2 : obji->obj_h; + + s32 vcount = read_ioreg(REG_ADDR_VCOUNT); + if (mosaic) + vcount -= vcount % mosv; + s32 y_delta = vcount - (obji->obj_y + middle_y); + + if (obji->obj_x < (signed)start) + middle_x -= (start - obji->obj_x); + s32 source_x = (obj_dimw << 7) + (y_delta * dmx) - (middle_x * dx); + s32 source_y = (obj_dimh << 7) + (y_delta * dmy) - (middle_x * dy); + + // Early optimization if Y-coord is out completely for this line. + // (if there's no rotation Y coord remains identical throughout the line). + if (!rotate && ((u32)(source_y >> 8)) >= (u32)obj_height) + return; + + u32 d_start = MAX((signed)start, obji->obj_x); + u32 d_end = MIN((signed)end, obji->obj_x + obj_width); + u32 cnt = d_end - d_start; + dst_ptr += d_start; + + bool obj1dmap = read_ioreg(REG_ADDR_DISPCNT) & 0x40; + const u32 tile_pitch = obj1dmap ? (obj_dimw / 8) * tile_bsize : 1024; + u32 px_attr = pxcomb | palette | 0x100; // Combine flags + high palette bit + + // Skip pixels outside of the sprite area, until we reach the sprite "inside" + while (cnt) { + u32 pixel_x = (u32)(source_x >> 8), pixel_y = (u32)(source_y >> 8); + + // Stop once we find a pixel that is actually *inside* the map. + if (pixel_x < obj_dimw && pixel_y < obj_dimh) + break; + + dst_ptr++; + source_x += dx; + if (rotate) + source_y += dy; + cnt--; + } + + // Draw sprite pixels by looking them up first. Lookup address is tricky! + u8 pixval = 0; + for (u32 i = 0; i < cnt; i++) { + u32 pixel_x = (u32)(source_x >> 8), pixel_y = (u32)(source_y >> 8); + + // Check if we run out of the sprite, then we can safely abort. + if (pixel_x >= obj_dimw || pixel_y >= obj_dimh) + return; + + // For mosaic, we "remember" the last looked up pixel. + if (!mosaic || !(i % mosh)) { + // Lookup pixel and draw it. + if (is8bpp) { + // We lookup the byte directly and render it. + const u32 tile_off = base_tile + // Character base + ((pixel_y >> 3) * tile_pitch) + // Skip vertical blocks + ((pixel_x >> 3) * tile_bsize) + // Skip horizontal blocks + ((pixel_y & 0x7) * tile_bwidth) + // Skip vertical rows to the pixel + (pixel_x & 0x7); // Skip the horizontal offset + + pixval = *(OBJ_VRAM0 + (tile_off & 0x7FFF)); // Read pixel value! + } else { + const u32 tile_off = base_tile + // Character base + ((pixel_y >> 3) * tile_pitch) + // Skip vertical blocks + ((pixel_x >> 3) * tile_bsize) + // Skip horizontal blocks + ((pixel_y & 0x7) * tile_bwidth) + // Skip vertical rows to the pixel + ((pixel_x >> 1) & 0x3); // Skip the horizontal offset + + u8 pixpair = *(OBJ_VRAM0 + (tile_off & 0x7FFF)); // Read 2 pixels @4bpp + pixval = ((pixel_x & 1) ? pixpair >> 4 : pixpair & 0xF); + } + } + + // Render the pixel value + if (pixval) { + if (rdtype == FULLCOLOR) + *dst_ptr = palptr[pixval | palette]; + else if (rdtype == INDXCOLOR) + *dst_ptr = pixval | px_attr; // Add combine flags + else if (rdtype == STCKCOLOR) { + // Stack pixels on top of the pixel value and combine flags + if (*dst_ptr & 0x100) + *dst_ptr = pixval | px_attr | ((*dst_ptr) & 0xFFFF0000); + else + *dst_ptr = pixval | px_attr | ((*dst_ptr) << 16); // Stack pixels + } else if (rdtype == PIXCOPY) + *dst_ptr = dst_ptr[DISPLAY_WIDTH]; + } + + // Move to the next pixel, update coords accordingly + dst_ptr++; + source_x += dx; + if (rotate) + source_y += dy; + } +} + +// Renders a single sprite on the current scanline. +// This function calls the affine or regular renderer depending on the sprite. +// Will calculate whether sprite has certain effects (flip, rotation ...) to +// use an optimized renderer function. +template +inline static void render_sprite(const t_sprite *obji, bool is_affine, u32 start, u32 end, stype *scanline, u32 pxcomb, const u16 *palptr) +{ + s32 vcount = read_ioreg(REG_ADDR_VCOUNT); + bool obj1dmap = read_ioreg(REG_ADDR_DISPCNT) & 0x40; + u32 tile = obji->oam_data->split.tileNum; + if (is8bpp && !obj1dmap) { + tile &= ~1; + } + u32 base_tile = tile * 32; + + const u32 mosv = (mosaic ? (read_ioreg(REG_ADDR_MOSAIC) >> 12) & 0xF : 0) + 1; + const u32 mosh = (mosaic ? (read_ioreg(REG_ADDR_MOSAIC) >> 8) & 0xF : 0) + 1; + + // Render the object scanline using the correct mode. + // (in 4bpp mode calculate the palette number) + // Objects use the higher palette part + u16 pal = (is8bpp ? 0 : (obji->oam_data->split.paletteNum << 4)); + + if (is_affine) { + u32 pnum = obji->oam_data->split.matrixNum; + const t_affp *affp_base = (t_affp *)OAM; + const t_affp *affp = &affp_base[pnum]; + + if (affp->dy == 0) // No rotation happening (just scale) + render_affine_object(obji, affp, obji->is_double, start, end, scanline, mosv, mosh, + base_tile, pxcomb, pal, palptr); + else // Full rotation and scaling + render_affine_object(obji, affp, obji->is_double, start, end, scanline, mosv, mosh, + base_tile, pxcomb, pal, palptr); + } else { + // The object could be out of the window, check and skip. + if (obji->obj_x >= (signed)end || obji->obj_x + obji->obj_w <= (signed)start) + return; + + // Non-affine objects can be flipped on both edges. + bool hflip = (obji->oam_data->split.matrixNum >> 3) & 1; + bool vflip = (obji->oam_data->split.matrixNum >> 4) & 1; + + // Calulate the vertical offset (row) to be displayed. Account for vflip. + u32 voffset = vflip ? obji->obj_y + obji->obj_h - vcount - 1 : vcount - obji->obj_y; + if (mosaic) + voffset -= voffset % mosv; + + // Calculate base tile for the object (points to the row to be drawn). + u32 tile_bsize = is8bpp ? tile_size_8bpp : tile_size_4bpp; + u32 tile_bwidth = is8bpp ? tile_width_8bpp : tile_width_4bpp; + u32 obj_pitch = obj1dmap ? (obji->obj_w / 8) * tile_bsize : 1024; + u32 hflip_off = hflip ? ((obji->obj_w / 8) - 1) * tile_bsize : 0; + + // Calculate the pointer to the tile. + const u32 tile_offset = base_tile + // Char offset + (voffset / 8) * obj_pitch + // Select tile row offset + (voffset % 8) * tile_bwidth + // Skip tile rows + hflip_off; // Account for horizontal flip + + // Make everything relative to start + s32 obj_x_offset = obji->obj_x - start; + u32 clipped_width = obj_x_offset >= 0 ? obji->obj_w : obji->obj_w + obj_x_offset; + u32 max_range = obj_x_offset >= 0 ? end - obji->obj_x : end - start; + u32 max_draw = MIN(max_range, clipped_width); + + if (mosaic && mosh > 1) { + if (hflip) + render_object_mosaic(obj_x_offset, max_draw, &scanline[start], tile_offset, mosh, pxcomb, pal, + palptr); + else + render_object_mosaic(obj_x_offset, max_draw, &scanline[start], tile_offset, mosh, pxcomb, pal, + palptr); + } else { + if (hflip) + render_object(obj_x_offset, max_draw, &scanline[start], tile_offset, pxcomb, pal, palptr); + else + render_object(obj_x_offset, max_draw, &scanline[start], tile_offset, pxcomb, pal, palptr); + } + } +} + +// Renders objects on a scanline for a given priority. +// This function assumes that order_obj has been called to prepare the objects. +template void render_scanline_objs(u32 priority, u32 start, u32 end, void *raw_ptr, const u16 *palptr) +{ + stype *scanline = (stype *)raw_ptr; + s32 vcount = read_ioreg(REG_ADDR_VCOUNT); + s32 objn; + u32 objcnt = obj_priority_count[priority][vcount]; + u8 *objlist = obj_priority_list[priority][vcount]; + + // Render all the visible objects for this priority (back to front) + for (objn = objcnt - 1; objn >= 0; objn--) { + // Objects in the list are pre-filtered and sorted in the appropriate order + u32 objoff = objlist[objn]; + const OamData *oam_data = (OamData *)&OAM[objoff * OAM_DATA_SIZE_AFFINE]; + + u16 obj_shape = oam_data->split.shape; + u16 obj_size = oam_data->split.size; + bool is_affine = oam_data->split.affineMode & 1; + bool is_trans = oam_data->split.objMode == OBJ_MOD_SEMITRAN; + s32 obj_x = oam_data->split.x; + s32 obj_y = oam_data->split.y; +#if !EXTENDED_OAM + if (obj_x > DISPLAY_WIDTH) + obj_x -= 512; +#endif + + t_sprite obji = { + .obj_x = obj_x, + .obj_y = obj_y, + .obj_w = obj_dim_table[obj_shape][obj_size][0], + .obj_h = obj_dim_table[obj_shape][obj_size][1], + .oam_data = oam_data, + .is_double = !!((oam_data->split.affineMode >> 1) & 1), + }; + + s32 obj_maxw = (is_affine && obji.is_double) ? obji.obj_w * 2 : obji.obj_w; + + // The object could be out of the window, check and skip. + if (obji.obj_x >= (signed)end || obji.obj_x + obj_maxw <= (signed)start) + continue; + + // ST-OBJs force 1st target bit (forced blending) + bool forcebld = is_trans && rdtype != FULLCOLOR; +#if !EXTENDED_OAM + if (obji.obj_y > DISPLAY_HEIGHT) + obji.obj_y -= 256; +#endif + // In PIXCOPY mode, we have already some stuff rendered (winout) and now + // we render the "win-in" area for this object. The PIXCOPY function will + // copy (merge) the two pixels depending on the result of the sprite render + // The temporary buffer is rendered on the next scanline area. + if (rdtype == PIXCOPY) { + u32 sec_start = MAX((signed)start, obji.obj_x); + u32 sec_end = MIN((signed)end, obji.obj_x + obj_maxw); + u32 obj_enable = read_ioreg(REG_ADDR_WINOUT) >> 8; + + // Render at the next scanline! + u16 *tmp_ptr = (u16 *)&scanline[GBA_SCREEN_PITCH]; + render_scanline_conditional(sec_start, sec_end, tmp_ptr, obj_enable); + } + + // Calculate combine masks. These store 2 bits of info: 1st and 2nd target. + // If set, the current pixel belongs to a layer that is 1st or 2nd target. + // For ST-objs, we set an extra bit, for later blending. + u32 pxcomb = (forcebld ? 0x800 : 0) | color_flags(4); + + bool emosaic = oam_data->split.mosaic; + bool is_8bpp = oam_data->split.bpp; + + // Some games enable mosaic but set it to size 0 (1), so ignore. + const u32 mosreg = read_ioreg(REG_ADDR_MOSAIC) & 0xFF00; + + if (emosaic && mosreg) { + if (is_8bpp) + render_sprite(&obji, is_affine, start, end, scanline, pxcomb, palptr); + else + render_sprite(&obji, is_affine, start, end, scanline, pxcomb, palptr); + } else { + if (is_8bpp) + render_sprite(&obji, is_affine, start, end, scanline, pxcomb, palptr); + else + render_sprite(&obji, is_affine, start, end, scanline, pxcomb, palptr); + } + } +} + +int sprite_limit = 0; + +// Goes through the object list in the OAM (from #127 to #0) and adds objects +// into a sorted list by priority for the current row. +// Invisible objects are discarded. ST-objects are flagged. Cycle counting is +// performed to discard excessive objects (to match HW capabilities). +static void order_obj(u32 video_mode) +{ + u32 obj_num; + u32 row; + u16 rend_cycles[DISPLAY_HEIGHT]; + + bool hblank_free = read_ioreg(REG_ADDR_DISPCNT) & 0x20; + u16 max_rend_cycles = !sprite_limit ? REND_CYC_MAX : hblank_free ? REND_CYC_REDUCED : REND_CYC_SCANLINE; + + memset(obj_priority_count, 0, sizeof(obj_priority_count)); + memset(obj_alpha_count, 0, sizeof(obj_alpha_count)); + memset(rend_cycles, 0, sizeof(rend_cycles)); + + for (obj_num = 0; obj_num < 128; obj_num++) { + const OamData *oam_data = (OamData *)&OAM[obj_num * OAM_DATA_SIZE_AFFINE]; + + // Bit 9 disables regular sprites (that is, non-affine ones). + if (oam_data->split.affineMode == 2) + continue; + + u16 obj_shape = oam_data->split.shape; + u32 obj_mode = oam_data->split.objMode; + + // Prohibited shape and mode + if ((obj_shape == 0x3) || (obj_mode == OBJ_MOD_INVALID)) + continue; + + // On bitmap modes, objs 0-511 are not usable, ingore them. + if ((video_mode >= 3) && (!(oam_data->split.tileNum & 0x200))) + continue; + + // Calculate object size (from size and shape attr bits) + u16 obj_size = oam_data->split.size; + s32 obj_height = obj_dim_table[obj_shape][obj_size][1]; + s32 obj_width = obj_dim_table[obj_shape][obj_size][0]; + s32 obj_y = oam_data->split.y; + +#if !EXTENDED_OAM + if (obj_y > DISPLAY_HEIGHT) + obj_y -= 256; +#endif + // Double size for affine sprites with double bit set + if ((oam_data->split.affineMode >> 1) & 1) { + obj_height *= 2; + obj_width *= 2; + } + + if (((obj_y + obj_height) > 0) && (obj_y < DISPLAY_HEIGHT)) { + s32 obj_x = oam_data->split.x; + +#if !EXTENDED_OAM + if (obj_x > DISPLAY_WIDTH) + obj_x -= 512; +#endif + if (((obj_x + obj_width) > 0) && (obj_x < DISPLAY_WIDTH)) { + u32 obj_priority = oam_data->split.priority; + bool is_affine = oam_data->split.affineMode & 1; + // Clip Y coord and height to the 0..159 interval + u32 starty = MAX(obj_y, 0); + u32 endy = MIN(obj_y + obj_height, DISPLAY_HEIGHT); + + // Calculate needed cycles to render the sprite + u16 cyccnt = is_affine ? (10 + obj_width * 2) : obj_width; + + switch (obj_mode) { + case OBJ_MOD_SEMITRAN: + for (row = starty; row < endy; row++) { + if (rend_cycles[row] < max_rend_cycles) { + u32 cur_cnt = obj_priority_count[obj_priority][row]; + obj_priority_list[obj_priority][row][cur_cnt] = obj_num; + obj_priority_count[obj_priority][row] = cur_cnt + 1; + rend_cycles[row] += cyccnt; + // Mark the row as having semi-transparent objects + obj_alpha_count[row] = 1; + } + } + break; + case OBJ_MOD_WINDOW: + obj_priority = 4; + /* fallthrough */ + case OBJ_MOD_NORMAL: + // Add the object to the list. + for (row = starty; row < endy; row++) { + if (rend_cycles[row] < max_rend_cycles) { + u32 cur_cnt = obj_priority_count[obj_priority][row]; + obj_priority_list[obj_priority][row][cur_cnt] = obj_num; + obj_priority_count[obj_priority][row] = cur_cnt + 1; + rend_cycles[row] += cyccnt; + } + } + break; + }; + } + } + } +} + +u32 layer_order[16]; +u32 layer_count; + +// Sorts active BG/OBJ layers and generates an ordered list of layers. +// Things are drawn back to front, so lowest priority goes first. +static void order_layers(u32 layer_flags, u32 vcnt) +{ + bool obj_enabled = (layer_flags & 0x10); + s32 priority; + + layer_count = 0; + + for (priority = 3; priority >= 0; priority--) { + bool anyobj = obj_priority_count[priority][vcnt] > 0; + s32 lnum; + + for (lnum = 3; lnum >= 0; lnum--) { + if (((layer_flags >> lnum) & 1) && ((read_ioreg(REG_ADDR_BGxCNT(lnum)) & 0x03) == priority)) { + layer_order[layer_count++] = lnum; + } + } + + if (obj_enabled && anyobj) + layer_order[layer_count++] = priority | 0x04; + } +} + +// Blending is performed by separating an RGB value into 0G0R0B (32 bit) +// Since blending factors are at most 16, mult/add operations do not overflow +// to the neighbouring color and can be performed much faster than separatedly + +// Here follow the mask value to separate/expand the color to 32 bit, +// the mask to detect overflows in the blend operation and + +#define BLND_MSK (SATR_MSK | SATG_MSK | SATB_MSK) + +#define OVFG_MSK 0x04000000 +#define OVFR_MSK 0x00008000 +#define OVFB_MSK 0x00000020 +#define SATG_MSK 0x03E00000 +#define SATR_MSK 0x00007C00 +#define SATB_MSK 0x0000001F + +typedef enum { + OBJ_BLEND, // No effects, just blend forced-blend pixels (ie. ST objects) + BLEND_ONLY, // Just alpha blending (if the pixels are 1st and 2nd target) + BLEND_BRIGHT, // Perform alpha blending if appropiate, and brighten otherwise + BLEND_DARK, // Same but with darken effecg +} blendtype; + +// Applies blending (and optional brighten/darken) effect to a bunch of +// color-indexed pixel pairs. Depending on the mode and the pixel target +// number, blending, darken/brighten or no effect will be applied. +// Bits 0-8 encode the color index (paletted colors) +// Bit 9 is set if the pixel belongs to a 1st target layer +// Bit 10 is set if the pixel belongs to a 2nd target layer +// Bit 11 is set if the pixel belongs to a ST-object +template static void merge_blend(u32 start, u32 end, u16 *dst, u32 *src) +{ + u32 bldalpha = read_ioreg(REG_ADDR_BLDALPHA); + u32 brightf = MIN(16, read_ioreg(REG_ADDR_BLDY) & 0x1F); + u32 blend_a = MIN(16, (bldalpha >> 0) & 0x1F); + u32 blend_b = MIN(16, (bldalpha >> 8) & 0x1F); + + bool can_saturate = blend_a + blend_b > 16; + + if (can_saturate) { + // If blending can result in saturation, we need to clamp output values. + while (start < end) { + u32 pixpair = src[start]; + // If ST-OBJ, force blending mode (has priority over other effects). + // If regular blending mode, blend if 1st/2nd bits are set respectively. + // Otherwise, apply other color effects if 1st bit is set. + bool force_blend = (pixpair & 0x04000800) == 0x04000800; + bool do_blend = (pixpair & 0x04000200) == 0x04000200; + if ((st_objs && force_blend) || (do_blend && bldtype == BLEND_ONLY)) { + // Top pixel is 1st target, pixel below is 2nd target. Blend! + u16 p1 = PLTT[(pixpair >> 0) & 0x1FF]; + u16 p2 = PLTT[(pixpair >> 16) & 0x1FF]; + u32 p1e = (p1 | (p1 << 16)) & BLND_MSK; + u32 p2e = (p2 | (p2 << 16)) & BLND_MSK; + u32 pfe = (((p1e * blend_a) + (p2e * blend_b)) >> 4); + + // If the overflow bit is set, saturate (set) all bits to one. + if (pfe & (OVFR_MSK | OVFG_MSK | OVFB_MSK)) { + if (pfe & OVFG_MSK) + pfe |= SATG_MSK; + if (pfe & OVFR_MSK) + pfe |= SATR_MSK; + if (pfe & OVFB_MSK) + pfe |= SATB_MSK; + } + pfe &= BLND_MSK; + dst[start++] = (pfe >> 16) | pfe; + } else if ((bldtype == BLEND_DARK || bldtype == BLEND_BRIGHT) && (pixpair & 0x200) == 0x200) { + // Top pixel is 1st-target, can still apply bright/dark effect. + u16 pidx = PLTT[pixpair & 0x1FF]; + u32 epixel = (pidx | (pidx << 16)) & BLND_MSK; + u32 pa = bldtype == BLEND_DARK ? 0 : ((BLND_MSK * brightf) >> 4) & BLND_MSK; + u32 pb = ((epixel * (16 - brightf)) >> 4) & BLND_MSK; + epixel = (pa + pb) & BLND_MSK; + dst[start++] = (epixel >> 16) | epixel; + } else { + dst[start++] = PLTT[pixpair & 0x1FF]; // No effects + } + } + } else { + while (start < end) { + u32 pixpair = src[start]; + bool do_blend = (pixpair & 0x04000200) == 0x04000200; + bool force_blend = (pixpair & 0x04000800) == 0x04000800; + if ((st_objs && force_blend) || (do_blend && bldtype == BLEND_ONLY)) { + // Top pixel is 1st target, pixel below is 2nd target. Blend! + u16 p1 = PLTT[(pixpair >> 0) & 0x1FF]; + u16 p2 = PLTT[(pixpair >> 16) & 0x1FF]; + u32 p1e = (p1 | (p1 << 16)) & BLND_MSK; + u32 p2e = (p2 | (p2 << 16)) & BLND_MSK; + u32 pfe = (((p1e * blend_a) + (p2e * blend_b)) >> 4) & BLND_MSK; + dst[start++] = (pfe >> 16) | pfe; + } else if ((bldtype == BLEND_DARK || bldtype == BLEND_BRIGHT) && (pixpair & 0x200) == 0x200) { + // Top pixel is 1st-target, can still apply bright/dark effect. + u16 pidx = PLTT[pixpair & 0x1FF]; + u32 epixel = (pidx | (pidx << 16)) & BLND_MSK; + u32 pa = bldtype == BLEND_DARK ? 0 : ((BLND_MSK * brightf) >> 4) & BLND_MSK; + u32 pb = ((epixel * (16 - brightf)) >> 4) & BLND_MSK; + epixel = (pa + pb) & BLND_MSK; + dst[start++] = (epixel >> 16) | epixel; + } else { + dst[start++] = PLTT[pixpair & 0x1FF]; // No effects + } + } + } +} + +// Applies brighten/darken effect to a bunch of color-indexed pixels. +template static void merge_brightness(u32 start, u32 end, u16 *srcdst) +{ + u32 brightness = MIN(16, read_ioreg(REG_ADDR_BLDY) & 0x1F); + + while (start < end) { + u16 spix = srcdst[start]; + u16 pixcol = PLTT[spix & 0x1FF]; + + if ((spix & 0x200) == 0x200) { + // Pixel is 1st target, can apply color effect. + u32 epixel = (pixcol | (pixcol << 16)) & BLND_MSK; + u32 pa = bldtype == BLEND_DARK ? 0 : ((BLND_MSK * brightness) >> 4) & BLND_MSK; // B/W + u32 pb = ((epixel * (16 - brightness)) >> 4) & BLND_MSK; // Pixel color + epixel = (pa + pb) & BLND_MSK; + pixcol = (epixel >> 16) | epixel; + } + + srcdst[start++] = pixcol; + } +} + +// Fills a segment using the backdrop color (in the right mode). +template void fill_line_background(u32 start, u32 end, dsttype *scanline) +{ + dsttype bgcol = PLTT[0]; + u16 bg_comb = color_flags(5); + while (start < end) + if (rdmode == FULLCOLOR) + scanline[start++] = bgcol; + else + scanline[start++] = 0 | bg_comb; +} + +// Renders the backdrop color (ie. whenever no layer is active) applying +// any effects that might still apply (usually darken/brighten). +static void render_backdrop(u32 start, u32 end, u16 *scanline) +{ + u16 bldcnt = read_ioreg(REG_ADDR_BLDCNT); + u16 pixcol = PLTT[0]; + u32 effect = (bldcnt >> 6) & 0x03; + u32 bd_1st_target = ((bldcnt >> 0x5) & 0x01); + + if (bd_1st_target && effect == COL_EFFECT_BRIGHT) { + u32 brightness = MIN(16, read_ioreg(REG_ADDR_BLDY) & 0x1F); + + // Unpack 16 bit pixel for fast blending operation + u32 epixel = (pixcol | (pixcol << 16)) & BLND_MSK; + u32 pa = ((BLND_MSK * brightness) >> 4) & BLND_MSK; // White color + u32 pb = ((epixel * (16 - brightness)) >> 4) & BLND_MSK; // Pixel color + epixel = (pa + pb) & BLND_MSK; + pixcol = (epixel >> 16) | epixel; + } else if (bd_1st_target && effect == COL_EFFECT_DARK) { + u32 brightness = MIN(16, read_ioreg(REG_ADDR_BLDY) & 0x1F); + u32 epixel = (pixcol | (pixcol << 16)) & BLND_MSK; + epixel = ((epixel * (16 - brightness)) >> 4) & BLND_MSK; // Pixel color + pixcol = (epixel >> 16) | epixel; + } + + // Fill the line with that color + while (start < end) + scanline[start++] = pixcol; +} + +// Renders all the available and enabled layers (in tiled mode). +// Walks the list of layers in visibility order and renders them in the +// specified mode (taking into consideration the first layer, etc). +template +void tile_render_layers(u32 start, u32 end, dsttype *dst_ptr, u32 enabled_layers) +{ + u32 lnum; + u32 base_done = 0; + u16 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u16 video_mode = dispcnt & 0x07; + bool obj_enabled = (enabled_layers & 0x10); // Objects are visible + + bool objlayer_is_1st_tgt = ((read_ioreg(REG_ADDR_BLDCNT) >> 4) & 1) != 0; + bool has_trans_obj = obj_alpha_count[read_ioreg(REG_ADDR_VCOUNT)]; + + for (lnum = 0; lnum < layer_count; lnum++) { + u32 layer = layer_order[lnum]; + bool is_obj = layer & 0x4; + if (is_obj && obj_enabled) { + bool can_skip_blend = !has_trans_obj && !objlayer_is_1st_tgt; + + // If it's the first layer, make sure to fill with backdrop color. + if (!base_done) + fill_line_background(start, end, dst_ptr); + + // Optimization: skip blending mode if no blending can happen to this layer + if (objmode == STCKCOLOR && can_skip_blend) + render_scanline_objs(layer & 0x3, start, end, dst_ptr, &PLTT[0x100]); + else + render_scanline_objs(layer & 0x3, start, end, dst_ptr, &PLTT[0x100]); + + base_done = 1; + } else if (!is_obj && ((1 << layer) & enabled_layers)) { + bool layer_is_1st_tgt = ((read_ioreg(REG_ADDR_BLDCNT) >> layer) & 1) != 0; + bool can_skip_blend = !has_trans_obj && !layer_is_1st_tgt; + + bool is_affine = (video_mode >= 1) && (layer >= 2); + u32 fnidx = (base_done) | (is_affine ? 2 : 0); + + // Can optimize rendering if no blending can really happen. + // If stack mode, no blending and not base layer, we might speed up a bit + if (bgmode == STCKCOLOR && can_skip_blend) { + static const tile_render_function rdfns[4] = { + render_scanline_text, + render_scanline_text, + render_scanline_affine, + render_scanline_affine, + }; + rdfns[fnidx](layer, start, end, dst_ptr, PLTT); + } else { + static const tile_render_function rdfns[4] = { + render_scanline_text, + render_scanline_text, + render_scanline_affine, + render_scanline_affine, + }; + rdfns[fnidx](layer, start, end, dst_ptr, PLTT); + } + + base_done = 1; + } + } + + // Render background if we did not render any active layer. + if (!base_done) + fill_line_background(start, end, dst_ptr); +} + +// Renders all layers honoring color effects (blending, brighten/darken). +// It uses different rendering routines depending on the coloring effect +// requirements, speeding up common cases where no effects are used. + +// No effects use NORMAL mode (RBB565 color is written on the buffer). +// For blending, we use BLEND mode to record the two top-most pixels. +// For other effects we use COLOR16, which records an indexed color in the +// buffer (used for darken/brighten effects at later passes) or COLOR32, +// which similarly uses an indexed color for rendering but recording one +// color for the background and another one for the object layer. + +static void render_w_effects(u32 start, u32 end, u16 *scanline, u32 enable_flags, const layer_render_struct *renderers) +{ + bool effects_enabled = enable_flags & 0x20; // Window bit for effects. + bool obj_blend = obj_alpha_count[read_ioreg(REG_ADDR_VCOUNT)] > 0; + u16 bldcnt = read_ioreg(REG_ADDR_BLDCNT); + + // If the window bits disable effects, default to NONE + u32 effect_type = effects_enabled ? ((bldcnt >> 6) & 0x03) : COL_EFFECT_NONE; + + switch (effect_type) { + case COL_EFFECT_BRIGHT: { + // If no layers are 1st target, no effect will really happen. + bool some_1st_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x3F) != 0; + // If the factor is zero, it's the same as "regular" rendering. + bool non_zero_blend = (read_ioreg(REG_ADDR_BLDY) & 0x1F) != 0; + if (some_1st_tgt && non_zero_blend) { + if (obj_blend) { + u32 tmp_buf[DISPLAY_WIDTH]; + renderers->indexed_u32(start, end, tmp_buf, enable_flags); + merge_blend(start, end, scanline, tmp_buf); + } else { + renderers->indexed_u16(start, end, scanline, enable_flags); + merge_brightness(start, end, scanline); + } + return; + } + } break; + + case COL_EFFECT_DARK: { + // If no layers are 1st target, no effect will really happen. + bool some_1st_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x3F) != 0; + // If the factor is zero, it's the same as "regular" rendering. + bool non_zero_blend = (read_ioreg(REG_ADDR_BLDY) & 0x1F) != 0; + if (some_1st_tgt && non_zero_blend) { + if (obj_blend) { + u32 tmp_buf[DISPLAY_WIDTH]; + renderers->indexed_u32(start, end, tmp_buf, enable_flags); + merge_blend(start, end, scanline, tmp_buf); + } else { + renderers->indexed_u16(start, end, scanline, enable_flags); + merge_brightness(start, end, scanline); + } + return; + } + } break; + + case COL_EFFECT_BLEND: { + // If no layers are 1st or 2nd target, no effect will really happen. + bool some_1st_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x003F) != 0; + bool some_2nd_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x3F00) != 0; + // If 1st target is 100% opacity and 2nd is 0%, just render regularly. + bool non_trns_tgt = (read_ioreg(REG_ADDR_BLDALPHA) & 0x1F1F) != 0x001F; + if (some_1st_tgt && some_2nd_tgt && non_trns_tgt) { + u32 tmp_buf[DISPLAY_WIDTH]; + renderers->stacked(start, end, tmp_buf, enable_flags); + if (obj_blend) + merge_blend(start, end, scanline, tmp_buf); + else + merge_blend(start, end, scanline, tmp_buf); + return; + } + } break; + + case COL_EFFECT_NONE: + // Default case, see below. + break; + }; + + // Default rendering mode, without layer effects (except perhaps sprites). + if (obj_blend) { + u32 tmp_buf[DISPLAY_WIDTH]; + renderers->stacked(start, end, tmp_buf, enable_flags); + merge_blend(start, end, scanline, tmp_buf); + } else { + renderers->fullcolor(start, end, scanline, enable_flags); + } +} + +#define bitmap_layer_render_functions(rdmode, dsttype, mode, ttype, w, h) \ + { \ + { \ + render_scanline_bitmap, \ + render_scanline_bitmap, \ + render_scanline_bitmap, \ + }, \ + { \ + render_scanline_bitmap, \ + render_scanline_bitmap, \ + render_scanline_bitmap, \ + } \ + } + +static const bitmap_layer_render_struct idx32_bmrend[3][2] + = { bitmap_layer_render_functions(INDXCOLOR, u32, 3, u16, DISPLAY_WIDTH, DISPLAY_HEIGHT), + bitmap_layer_render_functions(INDXCOLOR, u32, 4, u8, DISPLAY_WIDTH, DISPLAY_HEIGHT), + bitmap_layer_render_functions(INDXCOLOR, u32, 5, u16, DISPLAY_HEIGHT, 128) }; + +// Render the BG and OBJ in a bitmap scanline from start to end ONLY if +// enable_flag allows that layer/OBJ. + +template +static void bitmap_render_layers(u32 start, u32 end, dsttype *scanline, u32 enable_flags) +{ + u16 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + bool has_trans_obj = obj_alpha_count[read_ioreg(REG_ADDR_VCOUNT)]; + bool objlayer_is_1st_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x10) != 0; + bool bg2_is_1st_tgt = (read_ioreg(REG_ADDR_BLDCNT) & 0x4) != 0; + + // Fill in the renderers for a layer based on the mode type, + static const bitmap_layer_render_struct renderers[3][2] + = { bitmap_layer_render_functions(bgmode, dsttype, 3, u16, DISPLAY_WIDTH, DISPLAY_HEIGHT), + bitmap_layer_render_functions(bgmode, dsttype, 4, u8, DISPLAY_WIDTH, DISPLAY_HEIGHT), + bitmap_layer_render_functions(bgmode, dsttype, 5, u16, DISPLAY_HEIGHT, 128) }; + + const u32 mosamount = read_ioreg(REG_ADDR_MOSAIC) & 0xFF; + u32 bg_control = read_ioreg(REG_ADDR_BG2CNT); + u32 mmode = ((bg_control & 0x40) && (mosamount != 0)) ? 1 : 0; + + unsigned modeidx = (dispcnt & 0x07) - 3; + const bitmap_layer_render_struct *mode_rend = &renderers[modeidx][mmode]; + const bitmap_layer_render_struct *idxm_rend = &idx32_bmrend[modeidx][mmode]; + + u32 current_layer; + u32 layer_order_pos; + + fill_line_background(start, end, scanline); + + for (layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++) { + current_layer = layer_order[layer_order_pos]; + if (current_layer & 0x04) { + if (enable_flags & 0x10) { + bool can_skip_blend = !has_trans_obj && !objlayer_is_1st_tgt; + + // Optimization: skip blending mode if no blending can happen to this layer + if (objmode == STCKCOLOR && can_skip_blend) + render_scanline_objs(current_layer & 3, start, end, scanline, &PLTT[0x100]); + else + render_scanline_objs(current_layer & 3, start, end, scanline, &PLTT[0x100]); + } + } else { + if (enable_flags & 0x04) { + s32 dx = (s16)read_ioreg(REG_ADDR_BG2PA); + s32 dy = (s16)read_ioreg(REG_ADDR_BG2PC); + + // Optimization: Skip stack mode if there's no blending happening. + bool can_skip_blend = !has_trans_obj && !bg2_is_1st_tgt; + const bitmap_layer_render_struct *rd = (bgmode == STCKCOLOR && can_skip_blend) ? idxm_rend : mode_rend; + + if (dy) + rd->affine_render(start, end, scanline, PLTT); + else if (dx == 256) + rd->blit_render(start, end, scanline, PLTT); + else + rd->scale_render(start, end, scanline, PLTT); + } + } + } +} + +static const layer_render_struct tile_mode_renderers = { + .fullcolor = tile_render_layers, + .indexed_u16 = tile_render_layers, + .indexed_u32 = tile_render_layers, + .stacked = tile_render_layers, +}; + +static const layer_render_struct bitmap_mode_renderers = { + .fullcolor = bitmap_render_layers, + .indexed_u16 = bitmap_render_layers, + .indexed_u32 = bitmap_render_layers, + .stacked = bitmap_render_layers, +}; + +// Renders a full scanline, given an enable_flags mask (for which layers and +// effects are enabled). +static void render_scanline_conditional(u32 start, u32 end, u16 *scanline, u32 enable_flags) +{ + u16 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u32 video_mode = dispcnt & 0x07; + + // Check if any layer is actually active. + if (layer_count && (enable_flags & 0x1F)) { + // Color effects currently only supported in indexed-color modes (tiled and mode 4) + if (video_mode < 3) + render_w_effects(start, end, scanline, enable_flags, &tile_mode_renderers); + else if (video_mode == 4) + render_w_effects(start, end, scanline, enable_flags, &bitmap_mode_renderers); + else + // TODO: Implement mode 3 & 5 color effects (at least partially, ie. ST objs) + bitmap_mode_renderers.fullcolor(start, end, scanline, enable_flags); + } else + // Render the backdrop color, since no layers are enabled/visible. + render_backdrop(start, end, scanline); +} + +// Renders the are outside of all active windows +static void render_windowout_pass(u16 *scanline, u32 start, u32 end) +{ + u32 winout = read_ioreg(REG_ADDR_WINOUT); + u32 wndout_enable = winout & 0x3F; + + render_scanline_conditional(start, end, scanline, wndout_enable); +} + +// Renders window-obj. This is a pixel-level windowing effect, based on sprites +// (objects) with a special rendering mode (the sprites are not themselves +// visible but rather "enable" other pixels to be rendered conditionally). +static void render_windowobj_pass(u16 *scanline, u32 start, u32 end) +{ + u32 winout = read_ioreg(REG_ADDR_WINOUT); + u32 wndout_enable = winout & 0x3F; + + // First we render the "window-out" segment. + render_scanline_conditional(start, end, scanline, wndout_enable); + + // Now we render the objects in "copy" mode. This renders the scanline in + // WinObj-mode to a temporary buffer and performs a "copy-mode" render. + // In this mode, we copy pixels from the temp buffer to the final buffer + // whenever an object pixel is rendered. + render_scanline_objs(4, start, end, scanline, NULL); + + // TODO: Evaluate whether it's better to render the whole line and copy, + // or render subsegments and copy as we go (depends on the pixel/obj count) +} + +// If the window Y coordinates are out of the window range we can skip +// rendering the inside of the window. +inline bool in_window_y(u32 vcount, u32 top, u32 bottom) +{ + // TODO: check if these are reversed when top-bottom are also reversed. + if (top > DISPLAY_HEIGHT + 67) // This causes the window to be invisible + return false; + if (bottom > DISPLAY_HEIGHT + 67) // This makes it all visible + return true; + + if (top > bottom) /* Reversed: if not in the "band" */ + return vcount > top || vcount <= bottom; + + return vcount >= top && vcount < bottom; +} + +// Renders window 0/1. Checks boundaries and divides the segment into +// subsegments (if necessary) rendering each one in their right mode. +// outfn is called for "out-of-window" rendering. +template static void render_window_n_pass(u16 *scanline, u32 start, u32 end) +{ + u32 vcount = read_ioreg(REG_ADDR_VCOUNT); + // Check the Y coordinates to check if they fall in the right row + u32 win_top = WIN_GET_LOWER(*(winreg_t *)(REG_ADDR_WINxV(winnum))); + u32 win_bot = WIN_GET_HIGHER(*(winreg_t *)(REG_ADDR_WINxV(winnum))); + // Check the X coordinates and generate up to three segments + // Clip the coordinates to the [start, end) range. + u32 win_lraw = WIN_GET_LOWER(*(winreg_t *)(REG_ADDR_WINxH(winnum))); + u32 win_rraw = WIN_GET_HIGHER(*(winreg_t *)(REG_ADDR_WINxH(winnum))); + u32 win_l = MAX(start, MIN(end, win_lraw)); + u32 win_r = MAX(start, MIN(end, win_rraw)); + + bool goodwin = win_lraw < win_rraw; + + if (!in_window_y(vcount, win_top, win_bot) || (win_lraw == win_rraw)) + // WindowN is completely out, just render all out. + outfn(scanline, start, end); + else { + // Render window withtin the clipped range + // Enable bits for stuff inside the window (and outside) + u32 winin = (*(winreg_t *)REG_ADDR_WININ) & 0xFFFF; + u32 wndn_enable = (winin >> (8 * winnum)) & 0x3F; + + // If the window is defined upside down, the areas are inverted. + if (goodwin) { + // Render [start, win_l) range (which is outside the window) + if (win_l != start) + outfn(scanline, start, win_l); + // Render the actual window0 pixels + render_scanline_conditional(win_l, win_r, scanline, wndn_enable); + // Render the [win_l, end] range (outside) + if (win_r != end) + outfn(scanline, win_r, end); + } else { + // Render [0, win_r) range (which is "inside" window0) + if (win_r != start) + render_scanline_conditional(start, win_r, scanline, wndn_enable); + // The actual window is now outside, render recursively + outfn(scanline, win_r, win_l); + // Render the [win_l, DISPLAY_WIDTH] range ("inside") + if (win_l != end) + render_scanline_conditional(win_l, end, scanline, wndn_enable); + } + } +} + +// Renders a full scaleline, taking into consideration windowing effects. +// Breaks the rendering step into N steps, for each windowed region. +static void render_scanline_window(u16 *scanline) +{ + u16 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u32 win_ctrl = (dispcnt >> 13); + + // Priority decoding for windows + switch (win_ctrl) { + case 0x0: // No windows are active. + render_scanline_conditional(0, DISPLAY_WIDTH, scanline); + break; + + case 0x1: // Window 0 + render_window_n_pass(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x2: // Window 1 + render_window_n_pass(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x3: // Window 0 & 1 + render_window_n_pass, 0>(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x4: // Window Obj + render_windowobj_pass(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x5: // Window 0 & Obj + render_window_n_pass(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x6: // Window 1 & Obj + render_window_n_pass(scanline, 0, DISPLAY_WIDTH); + break; + + case 0x7: // Window 0, 1 & Obj + render_window_n_pass, 0>(scanline, 0, DISPLAY_WIDTH); + break; + } +} + +static const u8 active_layers[] = { + 0x1F, // Mode 0, Tile BG0-3 and OBJ + 0x17, // Mode 1, Tile BG0-2 and OBJ + 0x1C, // Mode 2, Tile BG2-3 and OBJ + 0x14, // Mode 3, BMP BG2 and OBJ + 0x14, // Mode 4, BMP BG2 and OBJ + 0x14, // Mode 5, BMP BG2 and OBJ + 0, // Unused + 0, +}; + +void update_scanline(void) +{ + u32 pitch = get_screen_pitch(); + u16 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u32 vcount = read_ioreg(REG_ADDR_VCOUNT); + u16 *screen_offset = get_screen_pixels() + (vcount * pitch); + u32 video_mode = dispcnt & 0x07; + + order_layers((dispcnt >> 8) & active_layers[video_mode], vcount); + + // If the screen is in in forced blank draw pure white. + if (dispcnt & 0x80) + memset(screen_offset, 0xff, DISPLAY_WIDTH * sizeof(u16)); + else + render_scanline_window(screen_offset); + + // Mode 0 does not use any affine params at all. + if (video_mode) { + // Account for vertical mosaic effect, by correcting affine references. + const u32 bgmosv = ((read_ioreg(REG_ADDR_MOSAIC) >> 4) & 0xF) + 1; + + if (read_ioreg(REG_ADDR_BG2CNT) & 0x40) { // Mosaic enabled for this BG + if ((vcount % bgmosv) == bgmosv - 1) { // Correct after the last line + affine_reference_x[0] += (s16)read_ioreg(REG_ADDR_BG2PB) * bgmosv; + affine_reference_y[0] += (s16)read_ioreg(REG_ADDR_BG2PD) * bgmosv; + } + } else { + affine_reference_x[0] += (s16)read_ioreg(REG_ADDR_BG2PB); + affine_reference_y[0] += (s16)read_ioreg(REG_ADDR_BG2PD); + } + + if (read_ioreg(REG_ADDR_BG3CNT) & 0x40) { + if ((vcount % bgmosv) == bgmosv - 1) { + affine_reference_x[1] += (s16)read_ioreg(REG_ADDR_BG3PB) * bgmosv; + affine_reference_y[1] += (s16)read_ioreg(REG_ADDR_BG3PD) * bgmosv; + } + } else { + affine_reference_x[1] += (s16)read_ioreg(REG_ADDR_BG3PB); + affine_reference_y[1] += (s16)read_ioreg(REG_ADDR_BG3PD); + } + } +} + +extern "C" void DrawFrame_Fast(u16 *pixels) +{ + int i; + + gba_screen_pixels = pixels; + // convert_whole_palette(); + + // assume that the oam is only updated once before the frame + // starts to be drawn + u32 dispcnt = read_ioreg(REG_ADDR_DISPCNT); + u32 video_mode = dispcnt & 0x07; + order_obj(video_mode); + + for (i = 0; i < DISPLAY_HEIGHT; i++) { + + REG_VCOUNT = i; + if (((REG_DISPSTAT >> 8) & 0xFF) == REG_VCOUNT) { + REG_DISPSTAT |= INTR_FLAG_VCOUNT; + if (REG_DISPSTAT & DISPSTAT_VCOUNT_INTR) + gIntrTable[INTR_INDEX_VCOUNT](); + } + + // Render the backdrop color before each individual scanline. + // HBlank interrupt code could have changed it in between lines. + update_scanline(); + + REG_DISPSTAT |= INTR_FLAG_HBLANK; + + RunDMAs(DMA_HBLANK); + + if (REG_DISPSTAT & DISPSTAT_HBLANK_INTR) + gIntrTable[INTR_INDEX_HBLANK](); + + REG_DISPSTAT &= ~INTR_FLAG_HBLANK; + REG_DISPSTAT &= ~INTR_FLAG_VCOUNT; + } + + video_reload_counters(); +} + +#endif diff --git a/src/platform/win32/win32.c b/src/platform/win32/win32.c index 261a18e60..60d375dfb 100644 --- a/src/platform/win32/win32.c +++ b/src/platform/win32/win32.c @@ -449,4 +449,4 @@ void *Platform_malloc(size_t numBytes) { return HeapAlloc(GetProcessHeap(), HEAP void Platform_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); } -void Platform_QueueAudio(const u8 *data, u32 numBytes) { } +void Platform_QueueAudio(const float *data, u32 numBytes) { } diff --git a/src/sprite.c b/src/sprite.c index 94f677da0..234ae233e 100644 --- a/src/sprite.c +++ b/src/sprite.c @@ -9,7 +9,7 @@ #include "animation_commands.h" #include "platform/platform.h" -#if !PLATFORM_GBA && !PLATFORM_SDL +#if !PLATFORM_GBA && RENDERER != RENDERER_SOFTWARE_FAST && RENDERER != RENDERER_SOFTWARE extern void Platform_DisplaySprite(Sprite *sprite, u8 oamPaletteNum); #endif @@ -722,7 +722,7 @@ void DisplaySprite(Sprite *sprite) oam->split.paletteNum += sprite->palId; #endif -#if !PLATFORM_GBA && !PLATFORM_SDL +#if !PLATFORM_GBA && (RENDERER != RENDERER_SOFTWARE_FAST && RENDERER != RENDERER_SOFTWARE) // TEMP // Quick hack for getting output in OpenGL test // The whole function call should be replaced by this! diff --git a/tools/scaninc/source_file.cpp b/tools/scaninc/source_file.cpp index 9d188eb73..53e258d95 100644 --- a/tools/scaninc/source_file.cpp +++ b/tools/scaninc/source_file.cpp @@ -31,7 +31,7 @@ SourceFileType GetFileType(std::string& path) std::string extension = path.substr(pos + 1); - if (extension == "c") + if (extension == "c" || extension == "cc") return SourceFileType::Cpp; else if (extension == "s") return SourceFileType::Asm;