From 8a2a08516c98ac206195755cf2c3bc5071e1edaf Mon Sep 17 00:00:00 2001 From: invertego Date: Sat, 14 Dec 2024 23:29:21 -0500 Subject: [PATCH] n64: step rsp dma during rsp execution Instead of advancing RSP DMA only after each CPU basic block, allow it to run between each RSP block. This prevents the RSP from racing with the DMAs it initiates in the event of a long running CPU block (which can trigger multiple consecutive blocks of RSP execution). This fixes hangs in Tarzan and possibly other games that use MusyX. Another observable side effect of this change is that when the RSP is halted, it no longer risks falling increasingly behind the CPU, because ares will now step multiple times if needed instead of just once. --- ares/n64/cpu/cpu.cpp | 1 - ares/n64/n64.hpp | 1 - ares/n64/rsp/dma.cpp | 21 +++++++++++++++++---- ares/n64/rsp/io.cpp | 4 ++-- ares/n64/rsp/rsp.cpp | 11 +++++++++-- ares/n64/rsp/rsp.hpp | 6 +++++- ares/n64/rsp/serialization.cpp | 1 + ares/n64/system/serialization.cpp | 2 +- 8 files changed, 35 insertions(+), 12 deletions(-) diff --git a/ares/n64/cpu/cpu.cpp b/ares/n64/cpu/cpu.cpp index fd5ed0b827..69dbe6587c 100644 --- a/ares/n64/cpu/cpu.cpp +++ b/ares/n64/cpu/cpu.cpp @@ -67,7 +67,6 @@ auto CPU::synchronize() -> void { queue.step(clocks, [](u32 event) { switch(event) { - case Queue::RSP_DMA: return rsp.dmaTransferStep(); case Queue::PI_DMA_Read: return pi.dmaFinished(); case Queue::PI_DMA_Write: return pi.dmaFinished(); case Queue::PI_BUS_Write: return pi.writeFinished(); diff --git a/ares/n64/n64.hpp b/ares/n64/n64.hpp index 79cdfcdb4d..0a31c8ad43 100644 --- a/ares/n64/n64.hpp +++ b/ares/n64/n64.hpp @@ -68,7 +68,6 @@ namespace ares::Nintendo64 { struct Queue : priority_queue { enum : u32 { - RSP_DMA, PI_DMA_Read, PI_DMA_Write, PI_BUS_Write, diff --git a/ares/n64/rsp/dma.cpp b/ares/n64/rsp/dma.cpp index 72b14eec4a..145c496dba 100644 --- a/ares/n64/rsp/dma.cpp +++ b/ares/n64/rsp/dma.cpp @@ -1,10 +1,23 @@ -auto RSP::dmaTransferStart(void) -> void { +auto RSP::dmaQueue(u32 clocks, Thread& thread) -> void { + dma.clock = (Thread::clock - thread.clock) - clocks; +} + +auto RSP::dmaStep(u32 clocks) -> void { + if(dma.busy.any()) { + dma.clock += clocks; + if(dma.clock >= 0) { + dmaTransferStep(); + } + } +} + +auto RSP::dmaTransferStart(Thread& thread) -> void { if(dma.busy.any()) return; if(dma.full.any()) { dma.current = dma.pending; dma.busy = dma.full; dma.full = {0,0}; - queue.insert(Queue::RSP_DMA, (dma.current.length+8) / 8 * 3); + dmaQueue((dma.current.length+8) / 8 * 3, thread); } } @@ -39,10 +52,10 @@ auto RSP::dmaTransferStep() -> void { if(dma.current.count) { dma.current.count -= 1; dma.current.dramAddress += dma.current.skip; - queue.insert(Queue::RSP_DMA, (dma.current.length+8) / 8 * 3); + dmaQueue((dma.current.length+8) / 8 * 3, *this); } else { dma.busy = {0,0}; dma.current.length = 0xFF8; - dmaTransferStart(); + dmaTransferStart(*this); } } diff --git a/ares/n64/rsp/io.cpp b/ares/n64/rsp/io.cpp index b776f9d485..db889288df 100644 --- a/ares/n64/rsp/io.cpp +++ b/ares/n64/rsp/io.cpp @@ -100,7 +100,7 @@ auto RSP::ioWrite(u32 address, u32 data_, Thread& thread) -> void { dma.full.read = 1; dma.full.write = 0; // printf("RSP DMA Read: %08x => %08x %08x\n", dma.pending.dramAddress, dma.pending.pbusAddress, dma.pending.length); - dmaTransferStart(); + dmaTransferStart(thread); } if(address == 3) { @@ -112,7 +112,7 @@ auto RSP::ioWrite(u32 address, u32 data_, Thread& thread) -> void { dma.pending.originPc = dma.pending.originCpu ? cpu.ipu.pc : (u64)rsp.ipu.r[31].u32; dma.full.write = 1; dma.full.read = 0; - dmaTransferStart(); + dmaTransferStart(thread); } if(address == 4) { diff --git a/ares/n64/rsp/rsp.cpp b/ares/n64/rsp/rsp.cpp index 6776425fb5..805b7dc3fb 100644 --- a/ares/n64/rsp/rsp.cpp +++ b/ares/n64/rsp/rsp.cpp @@ -31,8 +31,15 @@ auto RSP::unload() -> void { auto RSP::main() -> void { while(Thread::clock < 0) { - if(status.halted) return step(128); - instruction(); + auto clock = Thread::clock; + + if(status.halted) { + step(128); + } else { + instruction(); + } + + dmaStep(Thread::clock - clock); } } diff --git a/ares/n64/rsp/rsp.hpp b/ares/n64/rsp/rsp.hpp index c443316bf1..8fe66926fe 100644 --- a/ares/n64/rsp/rsp.hpp +++ b/ares/n64/rsp/rsp.hpp @@ -208,7 +208,9 @@ struct RSP : Thread, Memory::RCP { } pipeline; //dma.cpp - auto dmaTransferStart() -> void; + auto dmaQueue(u32 clocks, Thread& thread) -> void; + auto dmaStep(u32 clocks) -> void; + auto dmaTransferStart(Thread& thread) -> void; auto dmaTransferStep() -> void; //io.cpp @@ -240,6 +242,8 @@ struct RSP : Thread, Memory::RCP { auto any() -> n1 { return read | write; } } busy, full; + + s64 clock; } dma; struct Status : Memory::RCP { diff --git a/ares/n64/rsp/serialization.cpp b/ares/n64/rsp/serialization.cpp index 9f8ab6781d..87eee3900d 100644 --- a/ares/n64/rsp/serialization.cpp +++ b/ares/n64/rsp/serialization.cpp @@ -18,6 +18,7 @@ auto RSP::serialize(serializer& s) -> void { s(dma.busy.write); s(dma.full.read); s(dma.full.write); + s(dma.clock); s(status.semaphore); s(status.halted); diff --git a/ares/n64/system/serialization.cpp b/ares/n64/system/serialization.cpp index 330d7147fd..d06abdf467 100644 --- a/ares/n64/system/serialization.cpp +++ b/ares/n64/system/serialization.cpp @@ -1,4 +1,4 @@ -static const string SerializerVersion = "v135"; +static const string SerializerVersion = "v141.1"; auto System::serialize(bool synchronize) -> serializer { serializer s;