From f7572c468ead8a281d855e2527e9f374af48c2c0 Mon Sep 17 00:00:00 2001
From: Dmitry Sidorov <dmitry.sidorov@intel.com>
Date: Mon, 21 Oct 2024 16:05:49 +0200
Subject: [PATCH 1/4] [Backport to 14] Add fast math flag translation for
 OpenCL std lib

Such possibility was added in SPIR-V 1.6.
Note, this backport doesn't add handling of nofpclass attribute, as it
doesn't present in LLVM yet.

Signed-off-by: Sidorov, Dmitry <dmitry.sidorov@intel.com>
---
 lib/SPIRV/SPIRVReader.cpp                     |  7 +++-
 lib/SPIRV/SPIRVUtil.cpp                       |  3 ++
 lib/SPIRV/SPIRVWriter.cpp                     |  9 ++++-
 test/transcoding/fast-math-opencl-builtins.ll | 40 +++++++++++++++++++
 4 files changed, 55 insertions(+), 4 deletions(-)
 create mode 100644 test/transcoding/fast-math-opencl-builtins.ll
diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp
index f536216203..114f865660 100644
--- a/lib/SPIRV/SPIRVReader.cpp
+++ b/lib/SPIRV/SPIRVReader.cpp
@@ -2460,8 +2460,11 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F,
   case OpExtInst: {
     auto *ExtInst = static_cast<SPIRVExtInst *>(BV);
     switch (ExtInst->getExtSetKind()) {
-    case SPIRVEIS_OpenCL:
-      return mapValue(BV, transOCLBuiltinFromExtInst(ExtInst, BB));
+    case SPIRVEIS_OpenCL: {
+      auto *V = mapValue(BV, transOCLBuiltinFromExtInst(ExtInst, BB));
+      applyFPFastMathModeDecorations(BV, static_cast<Instruction *>(V));
+      return V;
+    }
     case SPIRVEIS_Debug:
     case SPIRVEIS_OpenCL_DebugInfo_100:
     case SPIRVEIS_NonSemantic_Shader_DebugInfo_100:
diff --git a/lib/SPIRV/SPIRVUtil.cpp b/lib/SPIRV/SPIRVUtil.cpp
index fa62763802..1b90197f39 100644
--- a/lib/SPIRV/SPIRVUtil.cpp
+++ b/lib/SPIRV/SPIRVUtil.cpp
@@ -787,6 +787,9 @@ CallInst *mutateCallInst(
   NewCI->copyMetadata(*CI);
   NewCI->setAttributes(CI->getAttributes());
   NewCI->setTailCall(CI->isTailCall());
+  if (isa<FPMathOperator>(CI))
+    NewCI->setFastMathFlags(CI->getFastMathFlags());
+
   if (CI->hasFnAttr("fpbuiltin-max-error")) {
     auto Attr = CI->getFnAttr("fpbuiltin-max-error");
     NewCI->addFnAttr(Attr);
diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp
index 0a98114872..2bf1a98e9e 100644
--- a/lib/SPIRV/SPIRVWriter.cpp
+++ b/lib/SPIRV/SPIRVWriter.cpp
@@ -2715,7 +2715,8 @@ bool LLVMToSPIRVBase::transDecoration(Value *V, SPIRVValue *BV) {
     if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
         Opcode == Instruction::FMul || Opcode == Instruction::FDiv ||
         Opcode == Instruction::FRem ||
-        ((Opcode == Instruction::FNeg || Opcode == Instruction::FCmp) &&
+        ((Opcode == Instruction::FNeg || Opcode == Instruction::FCmp ||
+          BV->isExtInst()) &&
          BM->isAllowedToUseVersion(VersionNumber::SPIRV_1_6))) {
       FastMathFlags FMF = BVF->getFastMathFlags();
       SPIRVWord M{0};
@@ -2742,8 +2743,12 @@ bool LLVMToSPIRVBase::transDecoration(Value *V, SPIRVValue *BV) {
           }
         }
       }
-      if (M != 0)
+      if (M != 0) {
         BV->setFPFastMathMode(M);
+        if (Opcode == Instruction::FNeg || Opcode == Instruction::FCmp ||
+            BV->isExtInst())
+          BM->setMinSPIRVVersion(VersionNumber::SPIRV_1_6);
+      }
     }
   }
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
diff --git a/test/transcoding/fast-math-opencl-builtins.ll b/test/transcoding/fast-math-opencl-builtins.ll
new file mode 100644
index 0000000000..7cec9434d6
--- /dev/null
+++ b/test/transcoding/fast-math-opencl-builtins.ll
@@ -0,0 +1,40 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv -spirv-text %t.bc -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: llvm-spirv %t.bc -o %t.spv
+; RUN: spirv-val %t.spv
+; RUN: llvm-spirv -r %t.spv -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-LLVM-OCL
+; RUN: llvm-spirv -r --spirv-target-env=SPV-IR %t.spv -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-LLVM-SPV
+
+; RUN: llvm-spirv -spirv-text --spirv-max-version=1.5 %t.bc -o - | FileCheck %s --check-prefix=CHECK-SPIRV-NEG
+
+; CHECK-SPIRV: Decorate [[#FPDec4:]] FPFastMathMode 16
+; CHECK-SPIRV: ExtInst [[#]] [[#FPDec4]] [[#]] fmax [[#]] [[#]]
+
+; CHECK-SPIRV-NEG-NOT: Decorate [[#]] FPFastMathMode [[#]]
+
+; CHECK-LLVM-OCL: call fast spir_func float @_Z4fmaxff(float %[[#]], float %[[#]])
+
+; CHECK-LLVM-SPV: call fast spir_func float @_Z16__spirv_ocl_fmaxff(float %[[#]], float %[[#]])
+
+; ModuleID = 'test.bc'
+source_filename = "test.cpp"
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+
+declare dso_local spir_func noundef float @_Z16__spirv_ocl_fmaxff(float noundef, float noundef) local_unnamed_addr
+
+define weak_odr dso_local spir_kernel void @nofpclass_fast(ptr addrspace(1) noundef align 4 %_arg_data, ptr addrspace(1) noundef align 4 %_arg_dat1, ptr addrspace(1) noundef align 4 %_arg_dat2) local_unnamed_addr  {
+entry:
+  %0 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32
+  %arrayidx.i = getelementptr inbounds float, ptr addrspace(1) %_arg_data, i64 %0
+  %arrayidx3.i = getelementptr inbounds float, ptr addrspace(1) %_arg_dat1, i64 %0
+  %cmp.i = icmp ult i64 %0, 2147483648
+  %arrayidx5.i = getelementptr inbounds float, ptr addrspace(1) %_arg_dat2, i64 %0
+  %1 = load float, ptr addrspace(1) %arrayidx3.i, align 4
+  %2 = load float, ptr addrspace(1) %arrayidx5.i, align 4
+  %call.i.i = tail call fast spir_func noundef float @_Z16__spirv_ocl_fmaxff(float noundef %1, float noundef %2)
+  store float %call.i.i, ptr addrspace(1) %arrayidx.i, align 4
+  ret void
+}

From 7e5902eb9e1c73bdbe69e1de13eab365e6dcb43c Mon Sep 17 00:00:00 2001
From: "Sidorov, Dmitry" <dmitry.sidorov@intel.com>
Date: Thu, 7 Nov 2024 09:00:28 -0800
Subject: [PATCH 2/4] Fix

Signed-off-by: Sidorov, Dmitry <dmitry.sidorov@intel.com>
---
 lib/SPIRV/SPIRVWriter.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp
index 2bf1a98e9e..bb83c0c470 100644
--- a/lib/SPIRV/SPIRVWriter.cpp
+++ b/lib/SPIRV/SPIRVWriter.cpp
@@ -2747,7 +2747,8 @@ bool LLVMToSPIRVBase::transDecoration(Value *V, SPIRVValue *BV) {
         BV->setFPFastMathMode(M);
         if (Opcode == Instruction::FNeg || Opcode == Instruction::FCmp ||
             BV->isExtInst())
-          BM->setMinSPIRVVersion(VersionNumber::SPIRV_1_6);
+          BM->setMinSPIRVVersion(
+              static_cast<SPIRVWord>(VersionNumber::SPIRV_1_6));
       }
     }
   }

From ad36505a0ac1b23eafa3eb8d0a438e1fa3c75dfa Mon Sep 17 00:00:00 2001
From: "Sidorov, Dmitry" <dmitry.sidorov@intel.com>
Date: Thu, 7 Nov 2024 09:44:25 -0800
Subject: [PATCH 3/4] test

Signed-off-by: Sidorov, Dmitry <dmitry.sidorov@intel.com>
---
 test/transcoding/fast-math-opencl-builtins.ll | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/transcoding/fast-math-opencl-builtins.ll b/test/transcoding/fast-math-opencl-builtins.ll
index 7cec9434d6..3eee2f329a 100644
--- a/test/transcoding/fast-math-opencl-builtins.ll
+++ b/test/transcoding/fast-math-opencl-builtins.ll
@@ -25,16 +25,16 @@ target triple = "spir64-unknown-unknown"
 
 declare dso_local spir_func noundef float @_Z16__spirv_ocl_fmaxff(float noundef, float noundef) local_unnamed_addr
 
-define weak_odr dso_local spir_kernel void @nofpclass_fast(ptr addrspace(1) noundef align 4 %_arg_data, ptr addrspace(1) noundef align 4 %_arg_dat1, ptr addrspace(1) noundef align 4 %_arg_dat2) local_unnamed_addr  {
+define weak_odr dso_local spir_kernel void @nofpclass_fast(i32 addrspace(1)* noundef align 4 %_arg_data, i32 addrspace(1)* noundef align 4 %_arg_dat1, i32 addrspace(1)* noundef align 4 %_arg_dat2) local_unnamed_addr  {
 entry:
-  %0 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32
-  %arrayidx.i = getelementptr inbounds float, ptr addrspace(1) %_arg_data, i64 %0
-  %arrayidx3.i = getelementptr inbounds float, ptr addrspace(1) %_arg_dat1, i64 %0
+  %0 = load i64, i64 addrspace(1)* @__spirv_BuiltInGlobalInvocationId, align 32
+  %arrayidx.i = getelementptr inbounds float, i32 addrspace(1)* %_arg_data, i64 %0
+  %arrayidx3.i = getelementptr inbounds float, i32 addrspace(1)* %_arg_dat1, i64 %0
   %cmp.i = icmp ult i64 %0, 2147483648
-  %arrayidx5.i = getelementptr inbounds float, ptr addrspace(1) %_arg_dat2, i64 %0
-  %1 = load float, ptr addrspace(1) %arrayidx3.i, align 4
-  %2 = load float, ptr addrspace(1) %arrayidx5.i, align 4
+  %arrayidx5.i = getelementptr inbounds float, i32 addrspace(1)* %_arg_dat2, i64 %0
+  %1 = load float, float addrspace(1)* %arrayidx3.i, align 4
+  %2 = load float, float addrspace(1)* %arrayidx5.i, align 4
   %call.i.i = tail call fast spir_func noundef float @_Z16__spirv_ocl_fmaxff(float noundef %1, float noundef %2)
-  store float %call.i.i, ptr addrspace(1) %arrayidx.i, align 4
+  store float %call.i.i, float addrspace(1)* %arrayidx.i, align 4
   ret void
 }

From 683dd3cbdf0a63a864a462d901f792261da1bba6 Mon Sep 17 00:00:00 2001
From: "Sidorov, Dmitry" <dmitry.sidorov@intel.com>
Date: Thu, 7 Nov 2024 10:32:19 -0800
Subject: [PATCH 4/4] wip

Signed-off-by: Sidorov, Dmitry <dmitry.sidorov@intel.com>
---
 test/transcoding/fast-math-opencl-builtins.ll | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/test/transcoding/fast-math-opencl-builtins.ll b/test/transcoding/fast-math-opencl-builtins.ll
index 3eee2f329a..8bed5f640f 100644
--- a/test/transcoding/fast-math-opencl-builtins.ll
+++ b/test/transcoding/fast-math-opencl-builtins.ll
@@ -25,13 +25,14 @@ target triple = "spir64-unknown-unknown"
 
 declare dso_local spir_func noundef float @_Z16__spirv_ocl_fmaxff(float noundef, float noundef) local_unnamed_addr
 
-define weak_odr dso_local spir_kernel void @nofpclass_fast(i32 addrspace(1)* noundef align 4 %_arg_data, i32 addrspace(1)* noundef align 4 %_arg_dat1, i32 addrspace(1)* noundef align 4 %_arg_dat2) local_unnamed_addr  {
+define weak_odr dso_local spir_kernel void @nofpclass_fast(float addrspace(1)* noundef align 4 %_arg_data, float addrspace(1)* noundef align 4 %_arg_dat1, float addrspace(1)* noundef align 4 %_arg_dat2) local_unnamed_addr  {
 entry:
-  %0 = load i64, i64 addrspace(1)* @__spirv_BuiltInGlobalInvocationId, align 32
-  %arrayidx.i = getelementptr inbounds float, i32 addrspace(1)* %_arg_data, i64 %0
-  %arrayidx3.i = getelementptr inbounds float, i32 addrspace(1)* %_arg_dat1, i64 %0
-  %cmp.i = icmp ult i64 %0, 2147483648
-  %arrayidx5.i = getelementptr inbounds float, i32 addrspace(1)* %_arg_dat2, i64 %0
+  %0 = load <3 x i64>, <3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId, align 32
+  %elem = extractelement <3 x i64> %0, i32 2
+  %arrayidx.i = getelementptr inbounds float, float addrspace(1)* %_arg_data, i64 %elem
+  %arrayidx3.i = getelementptr inbounds float, float addrspace(1)* %_arg_dat1, i64 %elem
+  %cmp.i = icmp ult i64 %elem, 2147483648
+  %arrayidx5.i = getelementptr inbounds float, float addrspace(1)* %_arg_dat2, i64 %elem
   %1 = load float, float addrspace(1)* %arrayidx3.i, align 4
   %2 = load float, float addrspace(1)* %arrayidx5.i, align 4
   %call.i.i = tail call fast spir_func noundef float @_Z16__spirv_ocl_fmaxff(float noundef %1, float noundef %2)