Skip to content

Commit

Permalink
Enable denormal support for systolic operations in VC
Browse files Browse the repository at this point in the history
When the target device supports denormals for DPAS instructions, the
compiler should retain them. This change enables denormal support for
the following data types:
* bf16 - for Xe2
* tf32 - for Xe2
  • Loading branch information
vmustya authored and igcbot committed Aug 27, 2024
1 parent ed9da95 commit a8f65b1
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 23 deletions.
56 changes: 33 additions & 23 deletions IGC/VectorCompiler/lib/GenXCodeGen/GenXCisaBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,10 +374,11 @@ class GenXKernelBuilder final {
// The default float control from kernel attribute. Each subroutine may
// overrride this control mask, but it should revert back to the default float
// control mask before exiting from the subroutine.
uint32_t DefaultFloatControl = 0;
uint32_t FloatControlKernel = 0;
uint32_t FloatControlMask = 0;


uint32_t CRMask = 0;
// The hardware-initialization value for the float control register.
static constexpr uint32_t FloatControlDefault = 0x0;

// normally false, set to true if there is any SIMD CF in the func or this is
// (indirectly) called inside any SIMD CF.
Expand Down Expand Up @@ -1096,11 +1097,15 @@ bool GenXKernelBuilder::run() {
StackSurf = Subtarget->stackSurface();

using namespace visa;
CRMask = CRBits::RoundingBitMask | CRBits::DoublePrecisionDenorm |
CRBits::SinglePrecisionDenorm | CRBits::HalfPrecisionDenorm;
FloatControlMask = CRBits::DoublePrecisionDenorm |
CRBits::SinglePrecisionDenorm |
CRBits::HalfPrecisionDenorm | CRBits::RoundingBitMask;
FloatControlKernel = CRBits::RTNE;

// If the subtarget supports systolic denorm control, retain denormals for the
// systolic.
if (Subtarget->hasSystolicDenormControl())
CRMask |= CRBits::SystolicDenorm;
FloatControlKernel |= CRBits::SystolicDenorm;

StackCallExecSize =
getExecSizeFromValue(BackendConfig->getInteropSubgroupSize());
Expand Down Expand Up @@ -1294,27 +1299,32 @@ void GenXKernelBuilder::buildInstructions() {
beginFunctionLight(Func);

// If a float control is specified, emit code to make that happen.
// Float control contains rounding mode, denorm behaviour and single
// precision float mode (ALT or IEEE) Relevant bits are already set as
// defined for VISA control reg in header definition on enums
// Float control contains rounding mode and denorm behaviour. Relevant bits
// are already set as defined for VISA control reg in header definition on
// enums.
uint32_t FloatControl = FloatControlKernel;

if (Func->hasFnAttribute(genx::FunctionMD::CMFloatControl)) {
uint32_t FloatControl = 0;
Func->getFnAttribute(genx::FunctionMD::CMFloatControl)
.getValueAsString()
.getAsInteger(0, FloatControl);

// Clear current float control bits to known zero state
buildControlRegUpdate(CRMask, true);

// Set rounding mode to required state if that isn't zero
FloatControl &= CRMask;
if (FloatControl) {
if (FG->getHead() == Func)
DefaultFloatControl = FloatControl;
buildControlRegUpdate(FloatControl, false);
FloatControl &= FloatControlMask;
FloatControl |= FloatControlKernel & ~FloatControlMask;
if (FloatControl != (FloatControlKernel & FloatControlMask) &&
vc::isKernel(Func)) {
FloatControlKernel &= ~FloatControlMask;
FloatControlKernel |= FloatControl;
}
}

if ((vc::isKernel(Func) && FloatControlKernel != 0) ||
FloatControl != (FloatControlKernel & FloatControlMask)) {
buildControlRegUpdate(FloatControlMask, true);
buildControlRegUpdate(FloatControl, false);
}

// Only output a label for the initial basic block if it is used from
// somewhere else.
bool NeedsLabel = !Func->front().use_empty();
Expand Down Expand Up @@ -5475,11 +5485,11 @@ void GenXKernelBuilder::buildRet(ReturnInst *RI) {
F->getFnAttribute(genx::FunctionMD::CMFloatControl)
.getValueAsString()
.getAsInteger(0, FloatControl);
FloatControl &= CRMask;
if (FloatControl != DefaultFloatControl) {
buildControlRegUpdate(CRMask, true);
if (DefaultFloatControl)
buildControlRegUpdate(DefaultFloatControl, false);
FloatControl &= FloatControlMask;
if (FloatControl != (FloatControlKernel & FloatControlMask)) {
buildControlRegUpdate(FloatControlMask, true);
if (FloatControlKernel & FloatControlMask)
buildControlRegUpdate(FloatControlKernel, false);
}
if (vc::requiresStackCall(Func)) {
appendVISACFFunctionRetInst(nullptr, vISA_EMASK_M1, StackCallExecSize);
Expand Down
48 changes: 48 additions & 0 deletions IGC/VectorCompiler/test/CisaBuilder/retain-denorm-systolic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================

target triple = "genx64-unknown-unknown"

; RUN: %opt %use_old_pass_manager% -GenXModule -GenXCategoryWrapper -GenXCisaBuilderPass -GenXFinalizer \
; RUN: -march=genx64 -mtriple=spir64-unknown-unknown -finalizer-opts="-dumpcommonisa -isaasmToConsole" \
; RUN: -mcpu=XeHPC < %s | FileCheck %s --check-prefix=FLUSH

; RUN: %opt %use_old_pass_manager% -GenXModule -GenXCategoryWrapper -GenXCisaBuilderPass -GenXFinalizer \
; RUN: -march=genx64 -mtriple=spir64-unknown-unknown -finalizer-opts="-dumpcommonisa -isaasmToConsole" \
; RUN: -mcpu=Xe2 < %s | FileCheck %s --check-prefix=RETAIN

; FLUSH-NOT: and (M1, 1) %cr0(0,0)<1> %cr0(0,0)<0;1,0>
; FLUSH-NOT: or (M1, 1) %cr0(0,0)<1> %cr0(0,0)<0;1,0>

; RETAIN: and (M1, 1) %cr0(0,0)<1> %cr0(0,0)<0;1,0> 0xfffffb0f:ud
; RETAIN: or (M1, 1) %cr0(0,0)<1> %cr0(0,0)<0;1,0> 0x40000000:ud

define dllexport spir_kernel void @the_test(i32 %0, i32 %1) #0 {
ret void
}

attributes #0 = { "CMGenxMain" "CMFloatControl"="0" }

!spirv.Source = !{!0}
!opencl.spir.version = !{!1}
!opencl.ocl.version = !{!0}
!opencl.used.extensions = !{!2}
!opencl.used.optional.core.features = !{!2}
!spirv.Generator = !{!3}
!genx.kernels = !{!4}
!genx.kernel.internal = !{!8}

!0 = !{i32 0, i32 0}
!1 = !{i32 1, i32 2}
!2 = !{}
!3 = !{i16 6, i16 14}
!4 = !{void (i32, i32)* @the_test, !"the_test", !5, i32 0, !6, !0, !7, i32 0}
!5 = !{i32 2, i32 2}
!6 = !{i32 64, i32 68}
!7 = !{!"buffer_t", !"buffer_t"}
!8 = !{void (i32, i32)* @the_test, null, null, null, null}

0 comments on commit a8f65b1

Please sign in to comment.