From 9016641211513ffc9ef5b90eb78f2be064e6d3b5 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Wed, 10 Apr 2024 12:32:52 -0400 Subject: [PATCH] [AMDGPU] add macro `__AMDGCN_CDNA_VERSION__` If a processor belongs to CDNA generation, pre-define macro `__AMDGCN_CDNA_VERSION__` as an integer. Fixes: https://github.com/ROCm/llvm-project/issues/59 --- clang/docs/AMDGPUSupport.rst | 2 ++ clang/lib/Basic/Targets/AMDGPU.cpp | 18 ++++++++++++++++++ clang/lib/Basic/Targets/AMDGPU.h | 2 ++ clang/test/Driver/amdgpu-macros.cl | 11 ++++++----- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/clang/docs/AMDGPUSupport.rst b/clang/docs/AMDGPUSupport.rst index e63c0e1ba7d67b..551a16961ea924 100644 --- a/clang/docs/AMDGPUSupport.rst +++ b/clang/docs/AMDGPUSupport.rst @@ -45,6 +45,8 @@ Predefined Macros - Defined with the target ID as a string. * - ``__amdgcn_feature___`` - Defined for each supported target feature. The value is 1 if the feature is enabled and 0 if it is disabled. Allowed feature names are sramecc and xnack. + * - ``__AMDGCN_CDNA_VERSION__`` + - Defined with the CDNA version as an integer if the processor belongs to the CDNA generation. * - ``__AMDGCN_CUMODE__`` - Defined as 1 if the CU mode is enabled and 0 if the WGP mode is enabled. * - ``__AMDGCN_UNSAFE_FP_ATOMICS__`` diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 5742885df0461b..bc392531c62aa6 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -249,6 +249,22 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, for (auto F : {"image-insts", "gws"}) ReadOnlyFeatures.insert(F); HalfArgsAndReturns = true; + + switch (GPUKind) { + case llvm::AMDGPU::GK_GFX908: + CDNAVersion = 1; + break; + case llvm::AMDGPU::GK_GFX90A: + CDNAVersion = 2; + break; + case llvm::AMDGPU::GK_GFX940: + case llvm::AMDGPU::GK_GFX941: + case llvm::AMDGPU::GK_GFX942: + CDNAVersion = 3; + break; + default: + CDNAVersion = 0; + } } void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { @@ -299,6 +315,8 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind); Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) + Twine("__")); + if (CDNAVersion) + Builder.defineMacro("__AMDGCN_CDNA_VERSION__", Twine(CDNAVersion)); Builder.defineMacro("__amdgcn_processor__", Twine("\"") + Twine(CanonName) + Twine("\"")); Builder.defineMacro("__amdgcn_target_id__", diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 94d9ba93ed226f..b0f601fa27bbb7 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -51,6 +51,8 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { llvm::StringMap OffloadArchFeatures; std::string TargetID; + unsigned CDNAVersion = 0; + bool hasFP64() const { return getTriple().getArch() == llvm::Triple::amdgcn || !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64); diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index 004619321b271f..08c5aa8a8a4d5c 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -103,14 +103,14 @@ // RUN: %clang -E -dM -target amdgcn -mcpu=gfx902 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx902 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx904 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx904 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx906 -DFAMILY=GFX9 -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx908 -DFAMILY=GFX9 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,CDNA,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx908 -DFAMILY=GFX9 -DCDNA=1 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx908 -munsafe-fp-atomics %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,UNSAFEFPATOMIC %s -DWAVEFRONT_SIZE=64 -DCPU=gfx908 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx909 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx909 -DFAMILY=GFX9 -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90a -DFAMILY=GFX9 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,CDNA,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90a -DFAMILY=GFX9 -DCDNA=2 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90c -DFAMILY=GFX9 -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9 -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9 -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,CDNA,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9 -DCDNA=3 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,CDNA,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9 -DCDNA=3 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,CDNA,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9 -DCDNA=3 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10 @@ -150,6 +150,7 @@ // ARCH-GCN-DAG: #define __[[CPU]]__ 1 // ARCH-GCN-DAG: #define __[[FAMILY]]__ 1 // ARCH-GCN-DAG: #define __amdgcn_processor__ "[[CPU]]" +// CDNA-DAG: #define __AMDGCN_CDNA_VERSION__ [[CDNA]] // ARCH-GCN-DAG: #define __AMDGCN_WAVEFRONT_SIZE [[WAVEFRONT_SIZE]] // UNSAFEFPATOMIC-DAG: #define __AMDGCN_UNSAFE_FP_ATOMICS__ 1