Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Add force range rounding option and introduce new compiler flag #12715

Merged
merged 8 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,9 @@ LANGOPT(
"SYCL compiler assumes value fits within MAX_INT for member function of "
"get/operator[], get_id/operator[] and get_global_id/get_global_linear_id "
"in SYCL class id, iterm and nd_iterm")
LANGOPT(SYCLDisableRangeRounding, 1, 0, "Disable parallel for range rounding")
ldrumm marked this conversation as resolved.
Show resolved Hide resolved
ENUM_LANGOPT(SYCLRangeRounding, SYCLRangeRoundingPreference, 2,
SYCLRangeRoundingPreference::On,
"Preference for SYCL parallel_for range rounding")
LANGOPT(SYCLEnableIntHeaderDiags, 1, 0, "Enable diagnostics that require the "
"SYCL integration header")
LANGOPT(SYCLAllowVirtualFunctions, 1, 0,
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/LangOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,12 @@ class LangOptionsBase {
undefined
};

enum class SYCLRangeRoundingPreference {
On,
Disable,
Force,
};

enum HLSLLangStd {
HLSL_Unset = 0,
HLSL_2015 = 2015,
Expand Down
18 changes: 15 additions & 3 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -3997,6 +3997,21 @@ def fsycl_host_compiler_options_EQ : Joined<["-"], "fsycl-host-compiler-options=
Visibility<[ClangOption, CLOption, DXCOption]>, HelpText<"When performing the host compilation with "
"-fsycl-host-compiler specified, use the given options during that compile. "
"Options are expected to be a quoted list of space separated options.">;
def fsycl_range_rounding_EQ : Joined<["-"], "fsycl-range-rounding=">,
Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>,
Values<"on,disable,force">,
NormalizedValuesScope<"LangOptions::SYCLRangeRoundingPreference">,
NormalizedValues<["On", "Disable", "Force"]>,
MarshallingInfoEnum<LangOpts<"SYCLRangeRounding">, "On">,
HelpText<"Options for range rounding of SYCL range kernels: "
"disable (do not generate range rounded kernels) "
"force (only generate range rounded kernels) "
"on (generate range rounded kernels as well as unrounded kernels). Default is 'on'">;
def fsycl_disable_range_rounding : Flag<["-"], "fsycl-disable-range-rounding">,
Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>,
Alias<fsycl_range_rounding_EQ>, AliasArgs<["disable"]>,
HelpText<"Deprecated: please use -fsycl-range-rounding=disable instead.">,
Flags<[Deprecated]>;
def fno_sycl_use_footer : Flag<["-"], "fno-sycl-use-footer">, Visibility<[ClangOption, CLOption, DXCOption]>,
HelpText<"Disable usage of the integration footer during SYCL enabled "
"compilations.">;
Expand Down Expand Up @@ -8256,9 +8271,6 @@ defm sycl_allow_func_ptr: BoolFOption<"sycl-allow-func-ptr",
def fenable_sycl_dae : Flag<["-"], "fenable-sycl-dae">,
HelpText<"Enable Dead Argument Elimination in SPIR kernels">,
MarshallingInfoFlag<LangOpts<"EnableDAEInSpirKernels">>;
def fsycl_disable_range_rounding : Flag<["-"], "fsycl-disable-range-rounding">,
HelpText<"Disable parallel for range rounding.">,
MarshallingInfoFlag<LangOpts<"SYCLDisableRangeRounding">>;
def fsycl_enable_int_header_diags: Flag<["-"], "fsycl-enable-int-header-diags">,
HelpText<"Enable diagnostics that require the SYCL integration header.">,
MarshallingInfoFlag<LangOpts<"SYCLEnableIntHeaderDiags">>;
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,10 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
checkSingleArgValidity(DeviceCodeSplit,
{"per_kernel", "per_source", "auto", "off"});

Arg *RangeRoundingPreference =
C.getInputArgs().getLastArg(options::OPT_fsycl_range_rounding_EQ);
checkSingleArgValidity(RangeRoundingPreference, {"disable", "force", "on"});

Arg *SYCLForceTarget =
getArgRequiringSYCLRuntime(options::OPT_fsycl_force_target_EQ);
if (SYCLForceTarget) {
Expand Down
10 changes: 8 additions & 2 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5427,6 +5427,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_sycl_esimd_force_stateless_mem, true))
CmdArgs.push_back("-fno-sycl-esimd-force-stateless-mem");

if (Arg *A = Args.getLastArg(options::OPT_fsycl_range_rounding_EQ))
A->render(Args, CmdArgs);

// Add the Unique ID prefix
StringRef UniqueID = D.getSYCLUniqueID(Input.getBaseInput());
if (!UniqueID.empty())
Expand All @@ -5451,10 +5454,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
bool DisableRangeRounding = false;
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
if (A->getOption().matches(options::OPT_O0))
DisableRangeRounding = true;
// If the user has set some range rounding preference then let that
// override not range rounding at -O0
if (!Args.getLastArg(options::OPT_fsycl_range_rounding_EQ))
DisableRangeRounding = true;
}
if (DisableRangeRounding || HasFPGA)
CmdArgs.push_back("-fsycl-disable-range-rounding");
CmdArgs.push_back("-fsycl-range-rounding=disable");
hdelan marked this conversation as resolved.
Show resolved Hide resolved
hdelan marked this conversation as resolved.
Show resolved Hide resolved

if (HasFPGA) {
// Pass -fintelfpga to both the host and device SYCL compilations if set.
Expand Down
10 changes: 9 additions & 1 deletion clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -579,8 +579,16 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
// Set __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ macro for
// both host and device compilations if -fsycl-disable-range-rounding
// flag is used.
if (LangOpts.SYCLDisableRangeRounding)
switch (LangOpts.getSYCLRangeRounding()) {
case LangOptions::SYCLRangeRoundingPreference::Disable:
Builder.defineMacro("__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__");
break;
case LangOptions::SYCLRangeRoundingPreference::Force:
Builder.defineMacro("__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__");
hdelan marked this conversation as resolved.
Show resolved Hide resolved
break;
default:
break;
}
}

if (LangOpts.DeclareSPIRVBuiltins) {
Expand Down
11 changes: 10 additions & 1 deletion clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5172,10 +5172,19 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
O << "#endif //" << Macro.first << "\n\n";
}

if (S.getLangOpts().SYCLDisableRangeRounding) {
hdelan marked this conversation as resolved.
Show resolved Hide resolved
switch (S.getLangOpts().getSYCLRangeRounding()) {
case LangOptions::SYCLRangeRoundingPreference::Disable:
O << "#ifndef __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ \n";
O << "#define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1\n";
O << "#endif //__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__\n\n";
break;
case LangOptions::SYCLRangeRoundingPreference::Force:
O << "#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ \n";
O << "#define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1\n";
O << "#endif //__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__\n\n";
break;
default:
break;
}

hdelan marked this conversation as resolved.
Show resolved Hide resolved
if (SpecConsts.size() > 0) {
Expand Down
10 changes: 9 additions & 1 deletion clang/test/CodeGenSYCL/integration_header_ppmacros.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-SYCL2020
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -sycl-std=2017 -fsycl-int-header=%t.h %s
// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-SYCL2017
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-disable-range-rounding -fsycl-int-header=%t.h %s
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-range-rounding=disable -fsycl-int-header=%t.h %s
// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-RANGE
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-range-rounding=force -fsycl-int-header=%t.h %s
// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-FORCE-RANGE
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-int-header=%t.h %s
// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-NO-RANGE

Expand Down Expand Up @@ -33,4 +35,10 @@ int main() {
// CHECK-RANGE: #ifndef __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__
// CHECK-RANGE-NEXT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1
// CHECK-RANGE-NEXT: #endif //__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__

// CHECK-FORCE-RANGE: #ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
// CHECK-FORCE-RANGE-NEXT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1
// CHECK-FORCE-RANGE-NEXT: #endif //__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__

// CHECK-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1
// CHECK-NO-RANGE-NOT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1
14 changes: 7 additions & 7 deletions clang/test/Driver/sycl-offload-intelfpga.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@
// CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fsycl-is-device"{{.*}} "-fintelfpga"
// CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fintelfpga"{{.*}} "-fsycl-is-host"

/// FPGA target implies -fsycl-disable-range-rounding
/// FPGA target implies -fsycl-range-rounding=disable
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fintelfpga %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding"
// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-disable-range-rounding"{{.*}} "-fsycl-is-host"
// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable"
// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-range-rounding=disable"{{.*}} "-fsycl-is-host"

/// FPGA target implies -emit-only-kernels-as-entry-points in sycl-post-link
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fintelfpga %s 2>&1 \
Expand All @@ -41,12 +41,12 @@
// RUN: | FileCheck -check-prefix=CHK-NON-KERNEL-ENTRY-POINTS %s
// CHK-NON-KERNEL-ENTRY-POINTS: sycl-post-link{{.*}} "-emit-only-kernels-as-entry-points"

/// -fsycl-disable-range-rounding is applied to all compilations if fpga is used
/// -fsycl-range-rounding=disable is applied to all compilations if fpga is used
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown,spir64_gen-unknown-unknown %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING-MULTI %s
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding"
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-fsycl-disable-range-rounding"{{.*}} "-fsycl-is-host"
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_fpga-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding"
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable"
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-fsycl-range-rounding=disable"{{.*}} "-fsycl-is-host"
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_fpga-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable"

/// -fintelfpga with -reuse-exe=
// RUN: touch %t.cpp
Expand Down
26 changes: 24 additions & 2 deletions clang/test/Driver/sycl-offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -508,13 +508,33 @@
// RUN: | FileCheck -check-prefix=CHK-TOOLS-OPTS2 %s
// CHK-TOOLS-OPTS2: clang-offload-wrapper{{.*}} "-link-opts=-DFOO1 -DFOO2"

/// -fsycl-disable-range-rounding settings
/// -fsycl-range-rounding settings
///
/// // Check that driver flag is passed to cc1
// RUN: %clang -### -fsycl -fsycl-range-rounding=disable %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-DISABLE %s
// RUN: %clang -### -fsycl -fsycl-range-rounding=force %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-FORCE %s
// RUN: %clang -### -fsycl -fsycl-range-rounding=on %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-ON %s
// CHK-DRIVER-RANGE-ROUNDING-DISABLE: "-cc1{{.*}}-fsycl-range-rounding=disable"
// CHK-DRIVER-RANGE-ROUNDING-FORCE: "-cc1{{.*}}-fsycl-range-rounding=force"
// CHK-DRIVER-RANGE-ROUNDING-ON: "-cc1{{.*}}-fsycl-range-rounding=on"
///
///
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \
// RUN: -fsycl-targets=spir64 -O0 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-DISABLE-RANGE-ROUNDING %s
// RUN: %clang_cl -### -fsycl -fsycl-targets=spir64 -Od %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-DISABLE-RANGE-ROUNDING %s
// CHK-DISABLE-RANGE-ROUNDING: "-fsycl-disable-range-rounding"
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \
// RUN: -O0 -fsycl-range-rounding=force %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-OVERRIDE-RANGE-ROUNDING %s
// RUN: %clang_cl -### -fsycl -Od %s 2>&1 -fsycl-range-rounding=force %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-OVERRIDE-RANGE-ROUNDING %s
// CHK-DISABLE-RANGE-ROUNDING: "-fsycl-range-rounding=disable"
hdelan marked this conversation as resolved.
Show resolved Hide resolved
// CHK-OVERRIDE-RANGE-ROUNDING: "-fsycl-range-rounding=force"
// CHK-OVERRIDE-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=disable"

// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \
// RUN: -fsycl-targets=spir64 -O2 %s 2>&1 \
Expand All @@ -527,6 +547,8 @@
// RUN: %clang_cl -### -fsycl -fsycl-targets=spir64 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
// CHK-RANGE-ROUNDING-NOT: "-fsycl-disable-range-rounding"
// CHK-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=disable"
// CHK-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=force"

/// ###########################################################################

Expand Down
54 changes: 41 additions & 13 deletions clang/test/Preprocessor/predefined-macros.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,32 +284,60 @@
// CHECK-RDC: #define __CLANG_RDC__ 1

// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \
// RUN: -triple spir64-unknown-unknown -fsycl-disable-range-rounding -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE
// RUN: -triple spir64-unknown-unknown -fsycl-range-rounding=disable -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE

// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \
// RUN: -triple spir64_fpga-unknown-unknown -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE

// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-disable-range-rounding \
// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-range-rounding=disable \
// RUN: -triple spir64_fpga-unknown-unknown -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE

// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE

// RUN: %clang_cc1 %s -E -dM -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE

// RUN: %clang_cc1 %s -E -dM -fsycl-is-host \
// RUN: -triple x86_64-unknown-linux-gnu -fsycl-disable-range-rounding -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE
// RUN: -triple x86_64-unknown-linux-gnu -fsycl-range-rounding=disable -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE

// RUN: %clang_cc1 %s -E -dM -fsycl-is-host -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE

// CHECK-RANGE: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1
// CHECK-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1
// CHECK-DISABLE-RANGE: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1
// CHECK-DISABLE-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1

// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \
// RUN: -triple spir64-unknown-unknown -fsycl-range-rounding=force -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE

// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \
// RUN: -triple spir64_fpga-unknown-unknown -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE

// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-range-rounding=force \
// RUN: -triple spir64_fpga-unknown-unknown -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE

// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE

// RUN: %clang_cc1 %s -E -dM -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE

// RUN: %clang_cc1 %s -E -dM -fsycl-is-host \
// RUN: -triple x86_64-unknown-linux-gnu -fsycl-range-rounding=force -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE

// RUN: %clang_cc1 %s -E -dM -fsycl-is-host -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE

// CHECK-FORCE-RANGE: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1
// CHECK-FORCE-NO-RANGE-NOT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1

// RUN: %clang_cc1 %s -E -dM -o - -x hip -triple x86_64-unknown-linux-gnu \
// RUN: -fgpu-default-stream=per-thread \
Expand All @@ -334,4 +362,4 @@
// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device | FileCheck -match-full-lines \
// RUN: %s --check-prefix=CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG: #define __HIPSTDPAR__ 1
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1
7 changes: 5 additions & 2 deletions sycl/doc/design/ParallelForRangeRounding.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,8 @@ rounding will only be used if the SYCL runtime X dimension exceeds some minimum
value, which can be configured using the
`SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS` environment variable.

Generation of range rounded kernels can be disabled by using the compiler flag
`-fsycl-disable-range-rounding`.
In order to reduce binary size, the user can tell the compiler only to generate
the range rounded kernel, `-fsycl-range-rounding=force`. The user can also tell
the SYCL implementation to only produce the unrounded kernel using the flag
`-fsycl-range-rounding=disable`. By default both kernels will be generated,
which is equivalent to `-fsycl-range-rounding=on`.
hdelan marked this conversation as resolved.
Show resolved Hide resolved
12 changes: 11 additions & 1 deletion sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1181,7 +1181,6 @@ class __SYCL_EXPORT handler {
// non-32-bit global range, we wrap the old kernel in a new kernel
// that has each work item peform multiple invocations the old
// kernel in a 32-bit global range.
auto Dev = detail::getSyclObjImpl(detail::getDeviceFromHandler(*this));
id<Dims> MaxNWGs = [&] {
auto [MaxWGs, HasMaxWGs] = getMaxWorkGroups_v2();
if (!HasMaxWGs) {
Expand Down Expand Up @@ -1224,6 +1223,11 @@ class __SYCL_EXPORT handler {
// will yield a rounded-up value for the total range.
Adjust(0, ((RoundedRange[0] + GoodFactor - 1) / GoodFactor) * GoodFactor);
}
#ifdef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
// If we are forcing range rounding kernels to be used, we always want the
// rounded range kernel to be generated, even if rounding isn't needed
DidAdjust = true;
#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__

for (int i = 0; i < Dims; ++i)
if (RoundedRange[i] > MaxRange[i])
Expand Down Expand Up @@ -1330,6 +1334,9 @@ class __SYCL_EXPORT handler {
{
(void)UserRange;
(void)Props;
#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
// If parallel_for range rounding is forced then only range rounded
// kernel is generated
kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
PropertiesT>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
Expand All @@ -1340,6 +1347,9 @@ class __SYCL_EXPORT handler {
std::move(KernelFunc));
setType(detail::CG::Kernel);
#endif
#else
(void)KernelFunc;
#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
}
}

Expand Down
Loading
Loading