Skip to content

Commit

Permalink
Merge pull request #1 from wheremyfoodat/arm64-jot
Browse files Browse the repository at this point in the history
Arm64 jot
  • Loading branch information
GabrielBRDeveloper authored Jan 9, 2024
2 parents 0768342 + aaef3b6 commit 20ec498
Show file tree
Hide file tree
Showing 7 changed files with 1,095 additions and 5 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,6 @@
[submodule "third_party/zep"]
path = third_party/zep
url = https://github.com/Panda3DS-emu/zep
[submodule "third_party/oaknut"]
path = third_party/oaknut
url = https://github.com/merryhime/oaknut
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ endif()
# Check for arm64
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
set(HOST_ARM64 TRUE)
add_subdirectory(third_party/oaknut) # Add Oaknut submodule for arm64 JITs
include_directories(third_party/oaknut/include)
add_compile_definitions(PANDA3DS_DYNAPICA_SUPPORTED)
add_compile_definitions(PANDA3DS_ARM64_HOST)
else()
set(HOST_ARM64 FALSE)
Expand Down Expand Up @@ -176,6 +179,7 @@ set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services
set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA/shader_unit.cpp
src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp
src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp
src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp
)

set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp)
Expand Down Expand Up @@ -240,6 +244,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp
include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp
include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp
include/PICA/dynapica/shader_rec_emitter_arm64.hpp
)

cmrc_add_resource_library(
Expand Down
4 changes: 3 additions & 1 deletion include/PICA/dynapica/shader_rec.hpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
#pragma once
#include "PICA/shader.hpp"

#if defined(PANDA3DS_DYNAPICA_SUPPORTED) && defined(PANDA3DS_X64_HOST)
#if defined(PANDA3DS_DYNAPICA_SUPPORTED) && (defined(PANDA3DS_X64_HOST) || defined(PANDA3DS_ARM64_HOST))
#define PANDA3DS_SHADER_JIT_SUPPORTED
#include <memory>
#include <unordered_map>

#ifdef PANDA3DS_X64_HOST
#include "shader_rec_emitter_x64.hpp"
#elif defined(PANDA3DS_ARM64_HOST)
#include "shader_rec_emitter_arm64.hpp"
#endif
#endif

Expand Down
130 changes: 130 additions & 0 deletions include/PICA/dynapica/shader_rec_emitter_arm64.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#pragma once

// Only do anything if we're on an x64 target with JIT support enabled
#if defined(PANDA3DS_DYNAPICA_SUPPORTED) && defined(PANDA3DS_ARM64_HOST)
#include <array>
#include <oaknut/code_block.hpp>
#include <oaknut/oaknut.hpp>

#include "PICA/shader.hpp"
#include "helpers.hpp"
#include "logger.hpp"

class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
static constexpr size_t executableMemorySize = PICAShader::maxInstructionCount * 96; // How much executable memory to alloc for each shader
// Allocate some extra space as padding for security purposes in the extremely unlikely occasion we manage to overflow the above size
static constexpr size_t allocSize = executableMemorySize + 0x1000;

// If the swizzle field is this value then the swizzle pattern is .xyzw so we don't need a shuffle
static constexpr uint noSwizzle = 0x1B;

using f24 = Floats::f24;
using vec4f = std::array<f24, 4>;

// An array of labels (incl pointers) to each compiled (to x64) PICA instruction
std::array<oaknut::Label, PICAShader::maxInstructionCount> instructionLabels;
// A vector of PCs that can potentially return based on the state of the PICA callstack.
// Filled before compiling a shader by scanning the code for call instructions
std::vector<u32> returnPCs;

// An array of 128-bit masks for blending registers together to perform masked writes.
// Eg for writing only the x and y components, the mask is 0x00000000'00000000'FFFFFFFF'FFFF
oaknut::Label blendMasks;

u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop)

// Shows whether the loaded shader has any log2 and exp2 instructions
bool codeHasLog2 = false;
bool codeHasExp2 = false;

oaknut::Label log2Func, exp2Func;
oaknut::Label emitLog2Func();
oaknut::Label emitExp2Func();

// Compile all instructions from [current recompiler PC, end)
void compileUntil(const PICAShader& shaderUnit, u32 endPC);
// Compile instruction "instr"
void compileInstruction(const PICAShader& shaderUnit);

bool isCall(u32 instruction) {
const u32 opcode = instruction >> 26;
return (opcode == ShaderOpcodes::CALL) || (opcode == ShaderOpcodes::CALLC) || (opcode == ShaderOpcodes::CALLU);
}

// Scan the shader code for call instructions to fill up the returnPCs vector before starting compilation
// We also scan for log2/exp2 instructions to see whether to emit the relevant functions
void scanCode(const PICAShader& shaderUnit);

// Load register with number "srcReg" indexed by index "idx" into the arm64 register "reg"
template <int sourceIndex>
void loadRegister(oaknut::QReg dest, const PICAShader& shader, u32 src, u32 idx, u32 operandDescriptor);
void storeRegister(oaknut::QReg source, const PICAShader& shader, u32 dest, u32 operandDescriptor);

const vec4f& getSourceRef(const PICAShader& shader, u32 src);
const vec4f& getDestRef(const PICAShader& shader, u32 dest);

// Check the value of the cmp register for instructions like ifc and callc
// Result is returned in the zero flag. If the comparison is true then zero == 1, else zero == 0
void checkCmpRegister(const PICAShader& shader, u32 instruction);

// Check the value of the bool uniform for instructions like ifu and callu
// Result is returned in the zero flag. If the comparison is true then zero == 0, else zero == 1 (Opposite of checkCmpRegister)
void checkBoolUniform(const PICAShader& shader, u32 instruction);

// Instruction recompilation functions
void recADD(const PICAShader& shader, u32 instruction);
void recCALL(const PICAShader& shader, u32 instruction);
void recCALLC(const PICAShader& shader, u32 instruction);
void recCALLU(const PICAShader& shader, u32 instruction);
void recCMP(const PICAShader& shader, u32 instruction);
void recDP3(const PICAShader& shader, u32 instruction);
void recDP4(const PICAShader& shader, u32 instruction);
void recDPH(const PICAShader& shader, u32 instruction);
void recEMIT(const PICAShader& shader, u32 instruction);
void recEND(const PICAShader& shader, u32 instruction);
void recEX2(const PICAShader& shader, u32 instruction);
void recFLR(const PICAShader& shader, u32 instruction);
void recIFC(const PICAShader& shader, u32 instruction);
void recIFU(const PICAShader& shader, u32 instruction);
void recJMPC(const PICAShader& shader, u32 instruction);
void recJMPU(const PICAShader& shader, u32 instruction);
void recLG2(const PICAShader& shader, u32 instruction);
void recLOOP(const PICAShader& shader, u32 instruction);
void recMAD(const PICAShader& shader, u32 instruction);
void recMAX(const PICAShader& shader, u32 instruction);
void recMIN(const PICAShader& shader, u32 instruction);
void recMOVA(const PICAShader& shader, u32 instruction);
void recMOV(const PICAShader& shader, u32 instruction);
void recMUL(const PICAShader& shader, u32 instruction);
void recRCP(const PICAShader& shader, u32 instruction);
void recRSQ(const PICAShader& shader, u32 instruction);
void recSETEMIT(const PICAShader& shader, u32 instruction);
void recSGE(const PICAShader& shader, u32 instruction);
void recSLT(const PICAShader& shader, u32 instruction);

MAKE_LOG_FUNCTION(log, shaderJITLogger)

public:
// Callback type used for instructions
using InstructionCallback = const void (*)(PICAShader& shaderUnit);
// Callback type used for the JIT prologue. This is what the caller will call
using PrologueCallback = const void (*)(PICAShader& shaderUnit, InstructionCallback cb);

PrologueCallback prologueCb = nullptr;

// Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer
ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {}

// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
InstructionCallback getInstructionCallback(u32 pc) {
// Cast away the constness because casting to a function pointer is hard otherwise. Legal as long as we don't write to *ptr
uint8_t* ptr = instructionLabels.at(pc).ptr<u8*>();
return reinterpret_cast<InstructionCallback>(ptr);
}

PrologueCallback getPrologueCallback() { return prologueCb; }
void compile(const PICAShader& shaderUnit);
};

#endif // arm64 recompiler check
Loading

0 comments on commit 20ec498

Please sign in to comment.