From fea5914c926e2f013a8b5e27eaa74c7047fb2c71 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 26 Jul 2024 16:38:56 -0500 Subject: [PATCH] Revert "[OpenMP][libc] Remove special handling for OpenMP printf (#98940)" This reverts commit 069e8bcd82c4420239f95c7e6a09e1f756317cfc. Summary: Some tests failing, revert this for now. --- clang/lib/CodeGen/CGBuiltin.cpp | 2 + clang/lib/CodeGen/CGGPUBuiltin.cpp | 29 +++++++ clang/lib/CodeGen/CodeGenFunction.h | 1 + libc/config/gpu/entrypoints.txt | 1 + libc/spec/gpu_ext.td | 8 ++ libc/src/gpu/CMakeLists.txt | 12 +++ libc/src/gpu/rpc_fprintf.cpp | 75 +++++++++++++++++++ libc/src/gpu/rpc_fprintf.h | 23 ++++++ .../AMDGPU/AMDGPUPrintfRuntimeBinding.cpp | 3 +- offload/DeviceRTL/include/LibC.h | 1 + offload/DeviceRTL/src/LibC.cpp | 44 +++++++---- 11 files changed, 183 insertions(+), 16 deletions(-) create mode 100644 libc/src/gpu/rpc_fprintf.cpp create mode 100644 libc/src/gpu/rpc_fprintf.h diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f0651c280ff954..0c4d0efb70ea51 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5986,6 +5986,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, getTarget().getTriple().isAMDGCN() || (getTarget().getTriple().isSPIRV() && getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) { + if (getLangOpts().OpenMPIsTargetDevice) + return EmitOpenMPDevicePrintfCallExpr(E); if (getTarget().getTriple().isNVPTX()) return EmitNVPTXDevicePrintfCallExpr(E); if ((getTarget().getTriple().isAMDGCN() || diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp index 84adf29e8db87e..b2340732afeb5a 100644 --- a/clang/lib/CodeGen/CGGPUBuiltin.cpp +++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -42,6 +42,28 @@ llvm::Function *GetVprintfDeclaration(llvm::Module &M) { VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M); } +llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) { + const char *Name = "__llvm_omp_vprintf"; + llvm::Module &M = CGM.getModule(); + llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()), + llvm::PointerType::getUnqual(M.getContext()), + llvm::Type::getInt32Ty(M.getContext())}; + llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( + llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); + + if (auto *F = M.getFunction(Name)) { + if (F->getFunctionType() != VprintfFuncType) { + CGM.Error(SourceLocation(), + "Invalid type declaration for __llvm_omp_vprintf"); + return nullptr; + } + return F; + } + + return llvm::Function::Create( + VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M); +} + // Transforms a call to printf into a call to the NVPTX vprintf syscall (which // isn't particularly special; it's invoked just like a regular function). // vprintf takes two args: A format string, and a pointer to a buffer containing @@ -191,3 +213,10 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) { Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint()); return RValue::get(Printf); } + +RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) { + assert(getTarget().getTriple().isNVPTX() || + getTarget().getTriple().isAMDGCN()); + return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM), + true); +} diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 89cc819c43bb56..bd62c65d8cce64 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4536,6 +4536,7 @@ class CodeGenFunction : public CodeGenTypeCache { RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E); RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E); + RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E); RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index 157f6f8af00a97..df7aa9e3196242 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -226,6 +226,7 @@ set(TARGET_LIBC_ENTRYPOINTS # gpu/rpc.h entrypoints libc.src.gpu.rpc_host_call + libc.src.gpu.rpc_fprintf ) set(TARGET_LIBM_ENTRYPOINTS diff --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td index dce81ff7786203..5400e0afa7564a 100644 --- a/libc/spec/gpu_ext.td +++ b/libc/spec/gpu_ext.td @@ -10,6 +10,14 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> { RetValSpec, [ArgSpec, ArgSpec, ArgSpec] >, + FunctionSpec< + "rpc_fprintf", + RetValSpec, + [ArgSpec, + ArgSpec, + ArgSpec, + ArgSpec] + >, ] >; let Headers = [ diff --git a/libc/src/gpu/CMakeLists.txt b/libc/src/gpu/CMakeLists.txt index e20228516b5112..4508abea7a8886 100644 --- a/libc/src/gpu/CMakeLists.txt +++ b/libc/src/gpu/CMakeLists.txt @@ -8,3 +8,15 @@ add_entrypoint_object( libc.src.__support.RPC.rpc_client libc.src.__support.GPU.utils ) + +add_entrypoint_object( + rpc_fprintf + SRCS + rpc_fprintf.cpp + HDRS + rpc_fprintf.h + DEPENDS + libc.src.stdio.gpu.gpu_file + libc.src.__support.RPC.rpc_client + libc.src.__support.GPU.utils +) diff --git a/libc/src/gpu/rpc_fprintf.cpp b/libc/src/gpu/rpc_fprintf.cpp new file mode 100644 index 00000000000000..70056daa25e2ec --- /dev/null +++ b/libc/src/gpu/rpc_fprintf.cpp @@ -0,0 +1,75 @@ +//===-- GPU implementation of fprintf -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "rpc_fprintf.h" + +#include "src/__support/CPP/string_view.h" +#include "src/__support/GPU/utils.h" +#include "src/__support/RPC/rpc_client.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/stdio/gpu/file.h" + +namespace LIBC_NAMESPACE_DECL { + +template +int fprintf_impl(::FILE *__restrict file, const char *__restrict format, + size_t format_size, void *args, size_t args_size) { + uint64_t mask = gpu::get_lane_mask(); + rpc::Client::Port port = rpc::client.open(); + + if constexpr (opcode == RPC_PRINTF_TO_STREAM) { + port.send([&](rpc::Buffer *buffer) { + buffer->data[0] = reinterpret_cast(file); + }); + } + + port.send_n(format, format_size); + port.recv([&](rpc::Buffer *buffer) { + args_size = static_cast(buffer->data[0]); + }); + port.send_n(args, args_size); + + uint32_t ret = 0; + for (;;) { + const char *str = nullptr; + port.recv([&](rpc::Buffer *buffer) { + ret = static_cast(buffer->data[0]); + str = reinterpret_cast(buffer->data[1]); + }); + // If any lanes have a string argument it needs to be copied back. + if (!gpu::ballot(mask, str)) + break; + + uint64_t size = str ? internal::string_length(str) + 1 : 0; + port.send_n(str, size); + } + + port.close(); + return ret; +} + +// TODO: Delete this and port OpenMP to use `printf`. +// place of varargs. Once varargs support is added we will use that to +// implement the real version. +LLVM_LIBC_FUNCTION(int, rpc_fprintf, + (::FILE *__restrict stream, const char *__restrict format, + void *args, size_t size)) { + cpp::string_view str(format); + if (stream == stdout) + return fprintf_impl(stream, format, str.size() + 1, + args, size); + else if (stream == stderr) + return fprintf_impl(stream, format, str.size() + 1, + args, size); + else + return fprintf_impl(stream, format, str.size() + 1, + args, size); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/gpu/rpc_fprintf.h b/libc/src/gpu/rpc_fprintf.h new file mode 100644 index 00000000000000..7658b214c07c29 --- /dev/null +++ b/libc/src/gpu/rpc_fprintf.h @@ -0,0 +1,23 @@ +//===-- Implementation header for RPC functions -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H +#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H + +#include "hdr/types/FILE.h" +#include "src/__support/macros/config.h" +#include + +namespace LIBC_NAMESPACE_DECL { + +int rpc_fprintf(::FILE *__restrict stream, const char *__restrict format, + void *argc, size_t size); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp index 02b0d436451a3f..42a6bac4fa6f24 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -437,8 +437,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) { return false; auto PrintfFunction = M.getFunction("printf"); - if (!PrintfFunction || !PrintfFunction->isDeclaration() || - M.getModuleFlag("openmp")) + if (!PrintfFunction || !PrintfFunction->isDeclaration()) return false; for (auto &U : PrintfFunction->uses()) { diff --git a/offload/DeviceRTL/include/LibC.h b/offload/DeviceRTL/include/LibC.h index 59a795cc62e0e6..dde86af783af95 100644 --- a/offload/DeviceRTL/include/LibC.h +++ b/offload/DeviceRTL/include/LibC.h @@ -18,6 +18,7 @@ extern "C" { int memcmp(const void *lhs, const void *rhs, size_t count); void memset(void *dst, int C, size_t count); + int printf(const char *format, ...); } diff --git a/offload/DeviceRTL/src/LibC.cpp b/offload/DeviceRTL/src/LibC.cpp index 291ceb023a69c5..4bca5d29643fe9 100644 --- a/offload/DeviceRTL/src/LibC.cpp +++ b/offload/DeviceRTL/src/LibC.cpp @@ -11,33 +11,44 @@ #pragma omp begin declare target device_type(nohost) namespace impl { -int32_t omp_vprintf(const char *Format, __builtin_va_list vlist); +int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t); } -#ifndef OMPTARGET_HAS_LIBC -namespace impl { #pragma omp begin declare variant match( \ device = {arch(nvptx, nvptx64)}, \ implementation = {extension(match_any)}) -extern "C" int vprintf(const char *format, ...); -int omp_vprintf(const char *Format, __builtin_va_list vlist) { - return vprintf(Format, vlist); +extern "C" int32_t vprintf(const char *, void *); +namespace impl { +int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { + return vprintf(Format, Arguments); } +} // namespace impl #pragma omp end declare variant #pragma omp begin declare variant match(device = {arch(amdgcn)}) -int omp_vprintf(const char *Format, __builtin_va_list) { return -1; } -#pragma omp end declare variant -} // namespace impl -extern "C" int printf(const char *Format, ...) { - __builtin_va_list vlist; - __builtin_va_start(vlist, Format); - return impl::omp_vprintf(Format, vlist); +#ifdef OMPTARGET_HAS_LIBC +// TODO: Remove this handling once we have varargs support. +extern "C" struct FILE *stdout; +extern "C" int32_t rpc_fprintf(FILE *, const char *, void *, uint64_t); + +namespace impl { +int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t Size) { + return rpc_fprintf(stdout, Format, Arguments, Size); } -#endif // OMPTARGET_HAS_LIBC +} // namespace impl +#else +// We do not have a vprintf implementation for AMD GPU so we use a stub. +namespace impl { +int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { + return -1; +} +} // namespace impl +#endif +#pragma omp end declare variant extern "C" { + [[gnu::weak]] int memcmp(const void *lhs, const void *rhs, size_t count) { auto *L = reinterpret_cast(lhs); auto *R = reinterpret_cast(rhs); @@ -54,6 +65,11 @@ extern "C" { for (size_t I = 0; I < count; ++I) dstc[I] = C; } + +/// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf +int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) { + return impl::omp_vprintf(Format, Arguments, Size); +} } #pragma omp end declare target