Skip to content

Commit

Permalink
gba: cycle-based bitmap and affine backgrounds (#1714)
Browse files Browse the repository at this point in the history
The pixel accuracy setting will now emulate the sub-pixel timing
behaviours for bitmap and affine backgrounds described in [fleroviux's
PPU
docs](https://github.com/nba-emu/hw-docs/blob/main/src/ppu/background.md).

This PR also splits the PPU into two separate libco threads: one for
rendering and one for raising IRQs and starting DMAs. This allows the
CPU and PPU to be run more asynchronously, which helps mitigate the
performance cost of finer-grained PPU emulation.
  • Loading branch information
png183 authored Dec 17, 2024
1 parent a752b85 commit 1bb1669
Show file tree
Hide file tree
Showing 17 changed files with 399 additions and 190 deletions.
2 changes: 2 additions & 0 deletions ares/gba/GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ ares.objects += ares-gba-player
ares.objects += ares-gba-cpu
ares.objects += ares-gba-ppu
ares.objects += ares-gba-apu
ares.objects += ares-gba-display

$(object.path)/ares-gba-memory.o: $(ares.path)/gba/memory/memory.cpp
$(object.path)/ares-gba-system.o: $(ares.path)/gba/system/system.cpp
Expand All @@ -15,3 +16,4 @@ $(object.path)/ares-gba-player.o: $(ares.path)/gba/player/player.cpp
$(object.path)/ares-gba-cpu.o: $(ares.path)/gba/cpu/cpu.cpp
$(object.path)/ares-gba-ppu.o: $(ares.path)/gba/ppu/ppu.cpp
$(object.path)/ares-gba-apu.o: $(ares.path)/gba/apu/apu.cpp
$(object.path)/ares-gba-display.o: $(ares.path)/gba/display/display.cpp
6 changes: 3 additions & 3 deletions ares/gba/cpu/bus.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ auto CPU::set(u32 mode, n32 address, n32 word) -> void {
if(address < 0x0200'0000) bios.write(mode, address, word);
else if(address < 0x0300'0000) writeEWRAM(mode, address, word);
else if(address < 0x0400'0000) writeIWRAM(mode, address, word);
else if(address >= 0x0700'0000) ppu.writeOAM(mode, address, word);
else if(address >= 0x0600'0000) ppu.writeVRAM(mode, address, word);
else if(address >= 0x0500'0000) ppu.writePRAM(mode, address, word);
else if(address >= 0x0700'0000) { synchronize(ppu); ppu.writeOAM(mode, address, word); }
else if(address >= 0x0600'0000) { synchronize(ppu); ppu.writeVRAM(mode, address, word); }
else if(address >= 0x0500'0000) { synchronize(ppu); ppu.writePRAM(mode, address, word); }
else if((address & 0xffff'fc00) == 0x0400'0000) bus.io[address & 0x3ff]->writeIO(mode, address, word);
else if((address & 0xff00'ffff) == 0x0400'0800) ((IO*)this)->writeIO(mode, 0x0400'0800 | (address & 3), word);
}
Expand Down
6 changes: 3 additions & 3 deletions ares/gba/cpu/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,13 @@ auto CPU::step(u32 clocks) -> void {
}

Thread::step(clocks);
Thread::synchronize(ppu, player);
Thread::synchronize(display, player);

//occasionally synchronize with APU in case CPU has not recently interacted with it
//occasionally synchronize with PPU and APU in case CPU has not recently interacted with them
static u32 counter = 0;
counter += clocks;
if(counter >= 1024) {
Thread::synchronize(apu);
Thread::synchronize(ppu, apu);
counter = 0;
}
}
Expand Down
89 changes: 89 additions & 0 deletions ares/gba/display/display.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include <gba/gba.hpp>

//The only PPU state the CPU needs on every cycle is raised IRQs and DMAs,
//which occur independently of the render process.
//Display exists to put these events on a separate thread,
//so the CPU and PPU can run out-of-order.

//hdraw: 1006 cycles
//hblank: 226 cycles
//scanline: 1232 cycles

//vdraw: 160 scanlines (197120 cycles)
//vblank: 68 scanlines ( 83776 cycles)
//frame: 228 scanlines (280896 cycles)

namespace ares::GameBoyAdvance {

Display display;
#include "io.cpp"
#include "serialization.cpp"

auto Display::load(Node::Object parent) -> void {
node = parent->append<Node::Object>("Display");
}

auto Display::unload() -> void {
node.reset();
}

auto Display::step(u32 clocks) -> void {
Thread::step(clocks);
Thread::synchronize(cpu);
}

auto Display::main() -> void {
cpu.keypad.run();

io.vblank = io.vcounter >= 160 && io.vcounter <= 226;

step(1);

io.vcoincidence = io.vcounter == io.vcompare;

if(io.vcounter == 160) {
if(io.irqvblank) cpu.setInterruptFlag(CPU::Interrupt::VBlank);
}

step(1);

if(io.irqvcoincidence) {
if(io.vcoincidence) cpu.setInterruptFlag(CPU::Interrupt::VCoincidence);
}

if(io.vcounter == 160) {
cpu.dmaVblank();
}

step(3);

if(io.vcounter == 162) {
if(videoCapture) cpu.dma[3].enable = 0;
videoCapture = !videoCapture && cpu.dma[3].timingMode == 3 && cpu.dma[3].enable;
}
if(io.vcounter >= 2 && io.vcounter < 162 && videoCapture) cpu.dmaHDMA();

step(1002);

io.hblank = 1;

step(1);
if(io.irqhblank) cpu.setInterruptFlag(CPU::Interrupt::HBlank);

step(1);
if(io.vcounter < 160) cpu.dmaHblank();

step(223);
io.hblank = 0;
if(++io.vcounter == 228) io.vcounter = 0;
}

auto Display::power() -> void {
Thread::create(system.frequency(), {&Display::main, this});

for(u32 n = 0x004; n <= 0x007; n++) bus.io[n] = this;

io = {};
}

}
34 changes: 34 additions & 0 deletions ares/gba/display/display.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
struct Display : Thread, IO {
Node::Object node;

auto load(Node::Object) -> void;
auto unload() -> void;

auto step(u32 clocks) -> void;
auto main() -> void;

auto power() -> void;

//io.cpp
auto readIO(n32 address) -> n8;
auto writeIO(n32 address, n8 byte) -> void;

//serialization.cpp
auto serialize(serializer&) -> void;

struct IO {
n1 vblank;
n1 hblank;
n1 vcoincidence;
n1 irqvblank;
n1 irqhblank;
n1 irqvcoincidence;
n8 vcompare;

n16 vcounter;
} io;

n1 videoCapture = 0;
};

extern Display display;
40 changes: 40 additions & 0 deletions ares/gba/display/io.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
auto Display::readIO(n32 address) -> n8 {
switch(address) {

//DISPSTAT
case 0x0400'0004: return (
io.vblank << 0
| io.hblank << 1
| io.vcoincidence << 2
| io.irqvblank << 3
| io.irqhblank << 4
| io.irqvcoincidence << 5
);
case 0x0400'0005: return (
io.vcompare
);

//VCOUNT
case 0x0400'0006: return io.vcounter.byte(0);
case 0x0400'0007: return io.vcounter.byte(1);

}

return cpu.openBus.get(Byte, address);
}

auto Display::writeIO(n32 address, n8 data) -> void {
switch(address) {

//DISPSTAT
case 0x0400'0004:
io.irqvblank = data.bit(3);
io.irqhblank = data.bit(4);
io.irqvcoincidence = data.bit(5);
return;
case 0x0400'0005:
io.vcompare = data;
return;

}
}
12 changes: 12 additions & 0 deletions ares/gba/display/serialization.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
auto Display::serialize(serializer& s) -> void {
s(io.vblank);
s(io.hblank);
s(io.vcoincidence);
s(io.irqvblank);
s(io.irqhblank);
s(io.irqvcoincidence);
s(io.vcompare);
s(io.vcounter);

s(videoCapture);
}
1 change: 1 addition & 0 deletions ares/gba/gba.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ namespace ares::GameBoyAdvance {
#include <gba/cpu/cpu.hpp>
#include <gba/ppu/ppu.hpp>
#include <gba/apu/apu.hpp>
#include <gba/display/display.hpp>
}
92 changes: 51 additions & 41 deletions ares/gba/ppu/background.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,20 @@ auto PPU::Background::setEnable(n1 status) -> void {
}

auto PPU::Background::scanline(u32 y) -> void {
memory::move(io.enable, io.enable + 1, sizeof(io.enable) - 1);
mosaicOffset = 0;
for(auto& pixel : output) pixel = {};
}

auto PPU::Background::run(u32 x, u32 y) -> void {
output = {};
if(ppu.blank() || !io.enable[0]) {
mosaic = {};
return;
auto PPU::Background::outputPixel(u32 x, u32 y) -> void {
//horizontal mosaic
if(!io.mosaic || !mosaicOffset) {
mosaicOffset = 1 + io.mosaicWidth;
mosaic = output[x];
}
mosaicOffset--;
}

auto PPU::Background::run(u32 x, u32 y) -> void {
switch(id) {
case PPU::BG0:
if(io.mode <= 1) { linear(x, y); break; }
Expand All @@ -32,25 +35,21 @@ auto PPU::Background::run(u32 x, u32 y) -> void {

case PPU::BG2:
if(io.mode == 0) { linear(x, y); break; }
if(io.mode <= 2) { affine(x, y); break; }
if(io.mode <= 2) { affineFetchTileMap(x, y); affineFetchTileData(x, y); break; }
if(io.mode <= 5) { bitmap(x, y); break; }
break;

case PPU::BG3:
if(io.mode == 0) { linear(x, y); break; }
if(io.mode == 2) { affine(x, y); break; }
if(io.mode == 2) { affineFetchTileMap(x, y); affineFetchTileData(x, y); break; }
break;
}

//horizontal mosaic
if(!io.mosaic || !mosaicOffset) {
mosaicOffset = 1 + io.mosaicWidth;
mosaic = output;
}
mosaicOffset--;
}

auto PPU::Background::linear(u32 x, u32 y) -> void {
if(x > 239) return;
if(ppu.blank() || !io.enable[0]) return;

if(x == 0) {
if(!io.mosaic || (y % (1 + io.mosaicHeight)) == 0) {
vmosaic = y;
Expand Down Expand Up @@ -84,23 +83,26 @@ auto PPU::Background::linear(u32 x, u32 y) -> void {
if(io.colorMode == 0) {
u32 offset = (io.characterBase << 14) + (latch.character << 5) + (py << 2) + (px >> 1);
if(n4 color = ppu.readVRAM_BG(Byte, offset) >> (px & 1 ? 4 : 0)) {
output.enable = true;
output.priority = io.priority;
output.color = latch.palette << 4 | color;
output[x].enable = true;
output[x].priority = io.priority;
output[x].color = latch.palette << 4 | color;
}
} else {
u32 offset = (io.characterBase << 14) + (latch.character << 6) + (py << 3) + (px);
if(n8 color = ppu.readVRAM_BG(Byte, offset)) {
output.enable = true;
output.priority = io.priority;
output.color = color;
output[x].enable = true;
output[x].priority = io.priority;
output[x].color = color;
}
}

fx++;
}

auto PPU::Background::affine(u32 x, u32 y) -> void {
auto PPU::Background::affineFetchTileMap(u32 x, u32 y) -> void {
if(x > 239) return;
if(ppu.blank() || !io.enable[0]) return;

if(x == 0) {
if(!io.mosaic || (y % (1 + io.mosaicHeight)) == 0) {
hmosaic = io.lx;
Expand All @@ -110,24 +112,29 @@ auto PPU::Background::affine(u32 x, u32 y) -> void {
fy = vmosaic;
}

u32 screenSize = 16 << io.screenSize;
u32 screenWrap = (1 << (io.affineWrap ? 7 + io.screenSize : 20)) - 1;
affine.screenSize = 16 << io.screenSize;
affine.screenWrap = (1 << (io.affineWrap ? 7 + io.screenSize : 20)) - 1;

u32 cx = (fx >> 8) & screenWrap;
u32 cy = (fy >> 8) & screenWrap;
affine.cx = (fx >> 8) & affine.screenWrap;
affine.cy = (fy >> 8) & affine.screenWrap;

u32 tx = cx >> 3;
u32 ty = cy >> 3;
affine.tx = affine.cx >> 3;
affine.ty = affine.cy >> 3;

n3 px = cx;
n3 py = cy;
affine.character = ppu.readVRAM(Byte, (io.screenBase << 11) + affine.ty * affine.screenSize + affine.tx);
}

if(tx < screenSize && ty < screenSize) {
n8 character = ppu.readVRAM(Byte, (io.screenBase << 11) + ty * screenSize + tx);
if(n8 color = ppu.readVRAM_BG(Byte, (io.characterBase << 14) + (character << 6) + (py << 3) + px)) {
output.enable = true;
output.priority = io.priority;
output.color = color;
auto PPU::Background::affineFetchTileData(u32 x, u32 y) -> void {
if(x > 239) return;
if(ppu.blank() || !io.enable[0]) return;

if(affine.tx < affine.screenSize && affine.ty < affine.screenSize) {
n3 px = affine.cx;
n3 py = affine.cy;
if(n8 color = ppu.readVRAM_BG(Byte, (io.characterBase << 14) + (affine.character << 6) + (py << 3) + px)) {
output[x].enable = true;
output[x].priority = io.priority;
output[x].color = color;
}
}

Expand All @@ -141,6 +148,9 @@ auto PPU::Background::affine(u32 x, u32 y) -> void {
}

auto PPU::Background::bitmap(u32 x, u32 y) -> void {
if(x > 239) return;
if(ppu.blank() || !io.enable[0]) return;

if(x == 0) {
if(!io.mosaic || (y % (1 + io.mosaicHeight)) == 0) {
hmosaic = io.lx;
Expand All @@ -165,10 +175,10 @@ auto PPU::Background::bitmap(u32 x, u32 y) -> void {
n15 color = ppu.readVRAM_BG(mode, baseAddress + (offset << depth));

if(depth || color) { //8bpp color 0 is transparent; 15bpp color is always opaque
if(depth) output.directColor = true;
output.enable = true;
output.priority = io.priority;
output.color = color;
if(depth) output[x].directColor = true;
output[x].enable = true;
output[x].priority = io.priority;
output[x].color = color;
}
}

Expand All @@ -186,7 +196,7 @@ auto PPU::Background::power(u32 id) -> void {

io = {};
latch = {};
output = {};
for(auto& pixel : output) pixel = {};
mosaic = {};
mosaicOffset = 0;
hmosaic = 0;
Expand Down
Loading

0 comments on commit 1bb1669

Please sign in to comment.