From 57b8be463a937e5acee025f4cccdfd4ac6b73ec9 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Mon, 17 Jun 2024 09:09:34 -0500 Subject: [PATCH] Revert [mlir][Target] Improve ROCDL gpu serialization API (#95790) Reverts llvm/llvm-project#95456 --- mlir/include/mlir/Target/LLVM/ROCDL/Utils.h | 41 +-- mlir/lib/Dialect/GPU/CMakeLists.txt | 2 +- mlir/lib/Target/LLVM/CMakeLists.txt | 7 +- mlir/lib/Target/LLVM/ROCDL/Target.cpp | 279 +++++++++----------- 4 files changed, 136 insertions(+), 193 deletions(-) diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h index 44c9ded317fa58..374fa65bd02e3b 100644 --- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h @@ -27,19 +27,6 @@ namespace ROCDL { /// 5. Returns an empty string. StringRef getROCMPath(); -/// Helper enum for specifying the AMD GCN device libraries required for -/// compilation. -enum class AMDGCNLibraries : uint32_t { - None = 0, - Ockl = 1, - Ocml = 2, - OpenCL = 4, - Hip = 8, - LastLib = Hip, - LLVM_MARK_AS_BITMASK_ENUM(LastLib), - All = (LastLib << 1) - 1 -}; - /// Base class for all ROCDL serializations from GPU modules into binary /// strings. By default this class serializes into LLVM bitcode. class SerializeGPUModuleBase : public LLVM::ModuleToObject { @@ -62,8 +49,8 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// Returns the bitcode files to be loaded. ArrayRef getFileList() const; - /// Appends standard ROCm device libraries to `fileList`. - LogicalResult appendStandardLibs(AMDGCNLibraries libs); + /// Appends standard ROCm device libraries like `ocml.bc`, `ockl.bc`, etc. + LogicalResult appendStandardLibs(); /// Loads the bitcode files in `fileList`. virtual std::optional>> @@ -76,20 +63,15 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { LogicalResult handleBitcodeFile(llvm::Module &module) override; protected: - /// Adds `oclc` control variables to the LLVM module. - void addControlVariables(llvm::Module &module, AMDGCNLibraries libs, - bool wave64, bool daz, bool finiteOnly, - bool unsafeMath, bool fastMath, bool correctSqrt, - StringRef abiVer); + /// Appends the paths of common ROCm device libraries to `libs`. + LogicalResult getCommonBitcodeLibs(llvm::SmallVector &libs, + SmallVector &libPath, + StringRef isaVersion); - /// Compiles assembly to a binary. - virtual std::optional> - compileToBinary(const std::string &serializedISA); - - /// Default implementation of `ModuleToObject::moduleToObject`. - std::optional> - moduleToObjectImpl(const gpu::TargetOptions &targetOptions, - llvm::Module &llvmModule); + /// Adds `oclc` control variables to the LLVM module. + void addControlVariables(llvm::Module &module, bool wave64, bool daz, + bool finiteOnly, bool unsafeMath, bool fastMath, + bool correctSqrt, StringRef abiVer); /// Returns the assembled ISA. std::optional> assembleIsa(StringRef isa); @@ -102,9 +84,6 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// List of LLVM bitcode files to link to. SmallVector fileList; - - /// AMD GCN libraries to use when linking, the default is using none. - AMDGCNLibraries deviceLibs = AMDGCNLibraries::None; }; } // namespace ROCDL } // namespace mlir diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 08c8aea36fac9e..61ab298ebfb986 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -106,7 +106,7 @@ if(MLIR_ENABLE_ROCM_CONVERSIONS) "Building mlir with ROCm support requires the AMDGPU backend") endif() - set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs") + set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") target_compile_definitions(obj.MLIRGPUTransforms PRIVATE __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}" diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index 4393ff1775ef9a..5a3fa160850b43 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -123,12 +123,17 @@ add_mlir_dialect_library(MLIRROCDLTarget ) if(MLIR_ENABLE_ROCM_CONVERSIONS) + if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) + message(SEND_ERROR + "Building mlir with ROCm support requires the AMDGPU backend") + endif() + if (DEFINED ROCM_PATH) set(DEFAULT_ROCM_PATH "${ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") elseif(DEFINED ENV{ROCM_PATH}) set(DEFAULT_ROCM_PATH "$ENV{ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") else() - set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs") + set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") endif() message(VERBOSE "MLIR Default ROCM toolkit path: ${DEFAULT_ROCM_PATH}") diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index fdf1c93b372105..cc13e5b7436ea7 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -17,6 +17,9 @@ #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Target/LLVM/ROCDL/Utils.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/IR/Constants.h" @@ -109,9 +112,8 @@ SerializeGPUModuleBase::SerializeGPUModuleBase( if (auto file = dyn_cast(attr)) fileList.push_back(file.str()); - // By default add all libraries if the toolkit path is not empty. - if (!getToolkitPath().empty()) - deviceLibs = AMDGCNLibraries::All; + // Append standard ROCm device bitcode libraries to the files to be loaded. + (void)appendStandardLibs(); } void SerializeGPUModuleBase::init() { @@ -136,67 +138,29 @@ ArrayRef SerializeGPUModuleBase::getFileList() const { return fileList; } -LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) { - if (libs == AMDGCNLibraries::None) - return success(); +LogicalResult SerializeGPUModuleBase::appendStandardLibs() { StringRef pathRef = getToolkitPath(); - // Fail if the toolkit is empty. - if (pathRef.empty()) - return failure(); - - // Get the path for the device libraries - SmallString<256> path; - path.insert(path.begin(), pathRef.begin(), pathRef.end()); - llvm::sys::path::append(path, "amdgcn", "bitcode"); - pathRef = StringRef(path.data(), path.size()); - - // Fail if the path is invalid. - if (!llvm::sys::fs::is_directory(pathRef)) { - getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef - << " does not exist or is not a directory."; - return failure(); - } - - // Get the ISA version. - StringRef isaVersion = - llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); - isaVersion.consume_front("gfx"); - - // Helper function for adding a library. - auto addLib = [&](const Twine &lib) -> bool { - auto baseSize = path.size(); - llvm::sys::path::append(path, lib); - StringRef pathRef(path.data(), path.size()); - if (!llvm::sys::fs::is_regular_file(pathRef)) { - getOperation().emitRemark() << "Bitcode library path: " << pathRef - << " does not exist or is not a file.\n"; - return true; + if (!pathRef.empty()) { + SmallVector path; + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + llvm::sys::path::append(path, "amdgcn", "bitcode"); + pathRef = StringRef(path.data(), path.size()); + if (!llvm::sys::fs::is_directory(pathRef)) { + getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef + << " does not exist or is not a directory."; + return failure(); } - fileList.push_back(pathRef.str()); - path.truncate(baseSize); - return false; - }; - - // Add ROCm device libraries. Fail if any of the libraries is not found, ie. - // if any of the `addLib` failed. - if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) || - (any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) || - (any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) || - (any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc")) || - (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl)) && - addLib("oclc_isa_version_" + isaVersion + ".bc"))) - return failure(); + StringRef isaVersion = + llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); + isaVersion.consume_front("gfx"); + return getCommonBitcodeLibs(fileList, path, isaVersion); + } return success(); } std::optional>> SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { SmallVector> bcFiles; - // Return if there are no libs to load. - if (deviceLibs == AMDGCNLibraries::None && fileList.empty()) - return bcFiles; - if (failed(appendStandardLibs(deviceLibs))) - return std::nullopt; if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles, true))) return std::nullopt; @@ -210,79 +174,80 @@ LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { // Stop spamming us with clang version numbers if (auto *ident = module.getNamedMetadata("llvm.ident")) module.eraseNamedMetadata(ident); - // Override the libModules datalayout and target triple with the compiler's - // data layout should there be a discrepency. - setDataLayoutAndTriple(module); return success(); } void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { - std::optional targetMachine = + [[maybe_unused]] std::optional targetMachine = getOrCreateTargetMachine(); assert(targetMachine && "expect a TargetMachine"); - // If all libraries are not set, traverse the module to determine which - // libraries are required. - if (deviceLibs != AMDGCNLibraries::All) { - for (llvm::Function &f : module.functions()) { - if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { - StringRef funcName = f.getName(); - if ("printf" == funcName) - deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl | - AMDGCNLibraries::Ocml; - if (funcName.starts_with("__ockl_")) - deviceLibs |= AMDGCNLibraries::Ockl; - if (funcName.starts_with("__ocml_")) - deviceLibs |= AMDGCNLibraries::Ocml; - } - } - } - addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(), + addControlVariables(module, target.hasWave64(), target.hasDaz(), target.hasFiniteOnly(), target.hasUnsafeMath(), target.hasFastMath(), target.hasCorrectSqrt(), target.getAbi()); } -void SerializeGPUModuleBase::addControlVariables( - llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, - bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, - StringRef abiVer) { - // Return if no device libraries are required. - if (libs == AMDGCNLibraries::None) - return; - // Helper function for adding control variables. - auto addControlVariable = [&module](StringRef name, uint32_t value, - uint32_t bitwidth) { - if (module.getNamedGlobal(name)) { - return; +// Get the paths of ROCm device libraries. +LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs( + llvm::SmallVector &libs, SmallVector &libPath, + StringRef isaVersion) { + auto addLib = [&](StringRef path) -> bool { + if (!llvm::sys::fs::is_regular_file(path)) { + getOperation().emitRemark() << "Bitcode library path: " << path + << " does not exist or is not a file.\n"; + return true; } - llvm::IntegerType *type = - llvm::IntegerType::getIntNTy(module.getContext(), bitwidth); + libs.push_back(path.str()); + return false; + }; + auto getLibPath = [&libPath](Twine lib) { + auto baseSize = libPath.size(); + llvm::sys::path::append(libPath, lib + ".bc"); + std::string path(StringRef(libPath.data(), libPath.size()).str()); + libPath.truncate(baseSize); + return path; + }; + + // Add ROCm device libraries. Fail if any of the libraries is not found. + if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) || + addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) || + addLib(getLibPath("oclc_isa_version_" + isaVersion))) + return failure(); + return success(); +} + +void SerializeGPUModuleBase::addControlVariables( + llvm::Module &module, bool wave64, bool daz, bool finiteOnly, + bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) { + llvm::Type *i8Ty = llvm::Type::getInt8Ty(module.getContext()); + auto addControlVariable = [i8Ty, &module](StringRef name, bool enable) { llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable( - module, /*isConstant=*/type, true, - llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, - llvm::ConstantInt::get(type, value), name, /*before=*/nullptr, - /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, - /*addressSpace=*/4); + module, i8Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, + llvm::ConstantInt::get(i8Ty, enable), name, nullptr, + llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); controlVariable->setVisibility( llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); - controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8)); + controlVariable->setAlignment(llvm::MaybeAlign(1)); controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); }; - // Add ocml related control variables. - if (any(libs & AMDGCNLibraries::Ocml)) { - addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8); - addControlVariable("__oclc_daz_opt", daz || fastMath, 8); - addControlVariable("__oclc_correctly_rounded_sqrt32", - correctSqrt && !fastMath, 8); - addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8); - } - // Add ocml or ockl related control variables. - if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) { - addControlVariable("__oclc_wavefrontsize64", wave64, 8); - int abi = 500; - abiVer.getAsInteger(0, abi); - addControlVariable("__oclc_ABI_version", abi, 32); - } + addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath); + addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath); + addControlVariable("__oclc_daz_opt", daz || fastMath); + addControlVariable("__oclc_correctly_rounded_sqrt32", + correctSqrt && !fastMath); + addControlVariable("__oclc_wavefrontsize64", wave64); + + llvm::Type *i32Ty = llvm::Type::getInt32Ty(module.getContext()); + int abi = 500; + abiVer.getAsInteger(0, abi); + llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable( + module, i32Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, + llvm::ConstantInt::get(i32Ty, abi), "__oclc_ABI_version", nullptr, + llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); + abiVersion->setVisibility( + llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); + abiVersion->setAlignment(llvm::MaybeAlign(4)); + abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); } std::optional> @@ -347,11 +312,43 @@ SerializeGPUModuleBase::assembleIsa(StringRef isa) { parser->setTargetParser(*tap); parser->Run(false); + return result; } +#if MLIR_ENABLE_ROCM_CONVERSIONS +namespace { +class AMDGPUSerializer : public SerializeGPUModuleBase { +public: + AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions); + + gpu::GPUModuleOp getOperation(); + + // Compile to HSA. + std::optional> + compileToBinary(const std::string &serializedISA); + + std::optional> + moduleToObject(llvm::Module &llvmModule) override; + +private: + // Target options. + gpu::TargetOptions targetOptions; +}; +} // namespace + +AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions) + : SerializeGPUModuleBase(module, target, targetOptions), + targetOptions(targetOptions) {} + +gpu::GPUModuleOp AMDGPUSerializer::getOperation() { + return dyn_cast(&SerializeGPUModuleBase::getOperation()); +} + std::optional> -SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { +AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { // Assemble the ISA. std::optional> isaBinary = assembleIsa(serializedISA); @@ -410,13 +407,13 @@ SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { return SmallVector(buffer.begin(), buffer.end()); } -std::optional> SerializeGPUModuleBase::moduleToObjectImpl( - const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) { +std::optional> +AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { // Return LLVM IR if the compilation target is offload. #define DEBUG_TYPE "serialize-to-llvm" LLVM_DEBUG({ - llvm::dbgs() << "LLVM IR for module: " - << cast(getOperation()).getNameAttr() << "\n" + llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr() + << "\n" << llvmModule << "\n"; }); #undef DEBUG_TYPE @@ -440,8 +437,7 @@ std::optional> SerializeGPUModuleBase::moduleToObjectImpl( } #define DEBUG_TYPE "serialize-to-isa" LLVM_DEBUG({ - llvm::dbgs() << "ISA for module: " - << cast(getOperation()).getNameAttr() << "\n" + llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n" << *serializedISA << "\n"; }); #undef DEBUG_TYPE @@ -452,38 +448,6 @@ std::optional> SerializeGPUModuleBase::moduleToObjectImpl( // Compile to binary. return compileToBinary(*serializedISA); } - -#if MLIR_ENABLE_ROCM_CONVERSIONS -namespace { -class AMDGPUSerializer : public SerializeGPUModuleBase { -public: - AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, - const gpu::TargetOptions &targetOptions); - - gpu::GPUModuleOp getOperation(); - - std::optional> - moduleToObject(llvm::Module &llvmModule) override; - -private: - // Target options. - gpu::TargetOptions targetOptions; -}; -} // namespace - -AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, - const gpu::TargetOptions &targetOptions) - : SerializeGPUModuleBase(module, target, targetOptions), - targetOptions(targetOptions) {} - -gpu::GPUModuleOp AMDGPUSerializer::getOperation() { - return dyn_cast(&SerializeGPUModuleBase::getOperation()); -} - -std::optional> -AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { - return moduleToObjectImpl(targetOptions, llvmModule); -} #endif // MLIR_ENABLE_ROCM_CONVERSIONS std::optional> ROCDLTargetAttrImpl::serializeToObject( @@ -513,15 +477,10 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, const SmallVector &object, const gpu::TargetOptions &options) const { gpu::CompilationTarget format = options.getCompilationTarget(); - // If format is `fatbin` transform it to binary as `fatbin` is not yet - // supported. - if (format > gpu::CompilationTarget::Binary) - format = gpu::CompilationTarget::Binary; - - DictionaryAttr properties{}; Builder builder(attribute.getContext()); return builder.getAttr( - attribute, format, - builder.getStringAttr(StringRef(object.data(), object.size())), - properties); + attribute, + format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary + : format, + builder.getStringAttr(StringRef(object.data(), object.size())), nullptr); }