Skip to content

Commit

Permalink
[SYCL] Add support for multiple filtered outputs in sycl-post-link (#…
Browse files Browse the repository at this point in the history
…12727)

This PR adds the required changes to `sycl-post-link` support optional
kernel features in AOT mode as described by the design doc in
#12252. Additionally, it also updates
the driver to invoke `sycl-post-link` with a device architecture when
Intel GPU targets are passed in `-fsycl-targets`.
  • Loading branch information
jzc committed Apr 8, 2024
1 parent 1bd076b commit c821dc9
Show file tree
Hide file tree
Showing 13 changed files with 554 additions and 200 deletions.
7 changes: 6 additions & 1 deletion clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10611,7 +10611,12 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,

// Add output file table file option
assert(Output.isFilename() && "output must be a filename");
addArgs(CmdArgs, TCArgs, {"-o", Output.getFilename()});
StringRef Device = JA.getOffloadingArch();
std::string OutputArg = Output.getFilename();
if (T.getSubArch() == llvm::Triple::SPIRSubArch_gen && Device.data())
OutputArg = ("intel_gpu_" + Device + "," + OutputArg).str();

addArgs(CmdArgs, TCArgs, {"-o", OutputArg});

const toolchains::SYCLToolChain &TC =
static_cast<const toolchains::SYCLToolChain &>(getToolChain());
Expand Down
10 changes: 8 additions & 2 deletions clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
/// -fsycl-targets=spir64_x86_64 should set a specific macro
// RUN: %clangxx -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=MACRO_X86_64
// RUN: %clang_cl -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \
// RUN: %clang_cl -c -fsycl -fsycl-targets=spir64_x86_64 -### -- %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=MACRO_X86_64
// MACRO_X86_64: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown"
// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__"
Expand All @@ -111,7 +111,7 @@
/// test for invalid intel arch
// RUN: not %clangxx -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=BAD_INPUT
// RUN: not %clang_cl -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \
// RUN: not %clang_cl -c -fsycl -fsycl-targets=intel_gpu_bad -### -- %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=BAD_INPUT
// BAD_INPUT: error: SYCL target is invalid: 'intel_gpu_bad'

Expand Down Expand Up @@ -233,3 +233,9 @@
// CHECK_TOOLS_BEOPTS_MIX: opencl-aot{{.*}} "-DCPU"
// CHECK_TOOLS_BEOPTS_MIX-NOT: "-DDG1"
// CHECK_TOOLS_BEOPTS_MIX: ocloc{{.*}} "-device" "skl"{{.*}}"-DSKL2"

/// Check that target is passed to sycl-post-link for filtering
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,intel_gpu_dg1 \
// RUN: -### %s 2>&1 | FileCheck %s --check-prefix=CHECK_TOOLS_FILTER
// CHECK_TOOLS_FILTER: sycl-post-link{{.*}} "-o" "intel_gpu_pvc,{{.*}}"
// CHECK_TOOLS_FILTER: sycl-post-link{{.*}} "-o" "intel_gpu_dg1,{{.*}}"
6 changes: 6 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,9 @@ def : TargetInfo<"x86_64", [], [], "", "", 1>;
//defvar AspectList = [AspectCpu] # AllUSMAspects;
//def : TargetInfo<"Test", AspectList, []>;
//def : TargetInfo<"Test2", [AspectCpu] # AllUSMAspects, []>;

// TODO: The aspects listed for the intel_gpu targets right now are incomplete;
// only the fp16/fp64/atomic64 aspects are listed.
def : TargetInfo<"intel_gpu_cfl", [AspectFp16, AspectFp64, AspectAtomic64], [8, 16, 32]>;
def : TargetInfo<"intel_gpu_tgllp", [AspectFp16, AspectAtomic64], [8, 16, 32]>;
def : TargetInfo<"intel_gpu_pvc", [AspectFp16, AspectFp64, AspectAtomic64], [16, 32]>;
9 changes: 9 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#ifndef LLVM_SYCLLOWERIR_MODULE_SPLITTER_H
#define LLVM_SYCLLOWERIR_MODULE_SPLITTER_H

#include "SYCLDeviceRequirements.h"

#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Function.h"
Expand Down Expand Up @@ -108,6 +110,7 @@ class ModuleDesc {
std::unique_ptr<Module> M;
EntryPointGroup EntryPoints;
bool IsTopLevel = false;
mutable std::optional<SYCLDeviceRequirements> Reqs;

public:
struct Properties {
Expand Down Expand Up @@ -193,6 +196,12 @@ class ModuleDesc {

ModuleDesc clone() const;

const SYCLDeviceRequirements &getOrComputeDeviceRequirements() const {
if (!Reqs.has_value())
Reqs = computeDeviceRequirements(*this);
return *Reqs;
}

#ifndef NDEBUG
void verifyESIMDProperty() const;
void dump() const;
Expand Down
46 changes: 46 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//===----- SYCLDeviceRequirements.h - collect data for used aspects ------=-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"

#include <cstdint>
#include <map>
#include <optional>
#include <set>
#include <vector>

namespace llvm {

class StringRef;

namespace module_split {
class ModuleDesc;
}
namespace util {
class PropertyValue;
}

struct SYCLDeviceRequirements {
std::set<uint32_t> Aspects;
std::set<uint32_t> FixedTarget;
std::optional<llvm::SmallVector<uint64_t, 3>> ReqdWorkGroupSize;
std::optional<llvm::SmallString<256>> JointMatrix;
std::optional<llvm::SmallString<256>> JointMatrixMad;
std::optional<uint32_t> SubGroupSize;

std::map<StringRef, util::PropertyValue> asMap() const;
};

SYCLDeviceRequirements
computeDeviceRequirements(const module_split::ModuleDesc &M);

} // namespace llvm
1 change: 1 addition & 0 deletions llvm/lib/SYCLLowerIR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ add_llvm_component_library(LLVMSYCLLowerIR
ModuleSplitter.cpp
MutatePrintfAddrspace.cpp
SYCLAddOptLevelAttribute.cpp
SYCLDeviceRequirements.cpp
SYCLPropagateAspectsUsage.cpp
SYCLPropagateJointMatrixUsage.cpp
SYCLUtils.cpp
Expand Down
137 changes: 137 additions & 0 deletions llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
//===----- SYCLDeviceRequirements.cpp - collect data for used aspects ----=-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/SYCLLowerIR/SYCLDeviceRequirements.h"

#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Module.h"
#include "llvm/SYCLLowerIR/ModuleSplitter.h"
#include "llvm/Support/PropertySetIO.h"

#include <set>
#include <vector>

using namespace llvm;

static int64_t ExtractSignedIntegerFromMDNodeOperand(const MDNode *N,
unsigned OpNo) {
Constant *C = cast<ConstantAsMetadata>(N->getOperand(OpNo).get())->getValue();
return C->getUniqueInteger().getSExtValue();
}
static uint64_t ExtractUnsignedIntegerFromMDNodeOperand(const MDNode *N,
unsigned OpNo) {
Constant *C = cast<ConstantAsMetadata>(N->getOperand(OpNo).get())->getValue();
return C->getUniqueInteger().getZExtValue();
}
static llvm::StringRef ExtractStringFromMDNodeOperand(const MDNode *N,
unsigned OpNo) {
MDString *S = cast<llvm::MDString>(N->getOperand(OpNo).get());
return S->getString();
}

SYCLDeviceRequirements
llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) {
SYCLDeviceRequirements Reqs;
// Process all functions in the module
for (const Function &F : MD.getModule()) {
if (auto *MDN = F.getMetadata("sycl_used_aspects")) {
for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) {
auto Val = ExtractSignedIntegerFromMDNodeOperand(MDN, I);
// Don't put internal aspects (with negative integer value) into the
// requirements, they are used only for device image splitting.
if (Val >= 0)
Reqs.Aspects.insert(Val);
}
}

if (auto *MDN = F.getMetadata("sycl_fixed_targets")) {
for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) {
auto Val = ExtractUnsignedIntegerFromMDNodeOperand(MDN, I);
Reqs.FixedTarget.insert(Val);
}
}

if (auto *MDN = F.getMetadata("reqd_work_group_size")) {
llvm::SmallVector<uint64_t, 3> NewReqdWorkGroupSize;
for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I)
NewReqdWorkGroupSize.push_back(
ExtractUnsignedIntegerFromMDNodeOperand(MDN, I));
if (!Reqs.ReqdWorkGroupSize.has_value())
Reqs.ReqdWorkGroupSize = NewReqdWorkGroupSize;
}

if (auto *MDN = F.getMetadata("sycl_joint_matrix")) {
auto Val = ExtractStringFromMDNodeOperand(MDN, 0);
if (!Val.empty())
Reqs.JointMatrix = Val;
}

if (auto *MDN = F.getMetadata("sycl_joint_matrix_mad")) {
auto Val = ExtractStringFromMDNodeOperand(MDN, 0);
if (!Val.empty())
Reqs.JointMatrixMad = Val;
}
}

// Process just the entry points in the module
for (const Function *F : MD.entries()) {
if (auto *MDN = F->getMetadata("intel_reqd_sub_group_size")) {
// There should only be at most one function with
// intel_reqd_sub_group_size metadata when considering the entry
// points of a module, but not necessarily when considering all the
// functions of a module: an entry point with a
// intel_reqd_sub_group_size can call an ESIMD function through
// invoke_esimd, and that function has intel_reqd_sub_group_size=1,
// which is valid.
assert(
MDN->getNumOperands() == 1 &&
"intel_reqd_sub_group_size metadata expects exactly one argument!");
auto MDValue = ExtractUnsignedIntegerFromMDNodeOperand(MDN, 0);
if (!Reqs.SubGroupSize)
Reqs.SubGroupSize = MDValue;
else
assert(*Reqs.SubGroupSize == static_cast<uint32_t>(MDValue));
}
}
return Reqs;
}

std::map<StringRef, util::PropertyValue> SYCLDeviceRequirements::asMap() const {
std::map<StringRef, util::PropertyValue> Requirements;

// For all properties except for "aspects", we'll only add the
// value to the map if the corresponding value from
// SYCLDeviceRequirements has a value/is non-empty.
Requirements["aspects"] =
std::vector<uint32_t>(Aspects.begin(), Aspects.end());

if (!FixedTarget.empty())
Requirements["fixed_target"] =
std::vector<uint32_t>(FixedTarget.begin(), FixedTarget.end());

// TODO: Before intel/llvm#10620, the reqd_work_group_size attribute
// stores its values as uint32_t, but this needed to be expanded to
// uint64_t. However, this change did not happen in ABI-breaking
// window, so we attach the required work-group size as the
// reqd_work_group_size_uint64_t attribute. At the next ABI-breaking
// window, this can be changed back to reqd_work_group_size.
if (ReqdWorkGroupSize.has_value())
Requirements["reqd_work_group_size_uint64_t"] = *ReqdWorkGroupSize;

if (JointMatrix.has_value())
Requirements["joint_matrix"] = *JointMatrix;

if (JointMatrixMad.has_value())
Requirements["joint_matrix_mad"] = *JointMatrixMad;

if (SubGroupSize.has_value())
Requirements["reqd_sub_group_size"] = *SubGroupSize;

return Requirements;
}
2 changes: 1 addition & 1 deletion llvm/test/tools/sycl-post-link/help.test
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ CHECK: sycl-post-link options:
CHECK: --device-globals - Lower and generate information about device global variables
CHECK: -f - Enable binary output on terminals
CHECK: --ir-output-only - Output single IR file
CHECK: -o <filename> - Output filename
CHECK: -o <target filename pair> - Specifies an output file. Multiple output files can be specified. Additionally, a target may be specified alongside an output file, which has the effect that when module splitting is performed, the modules that are in that output table are filtered so those modules are compatible with the target.
CHECK: --out-dir=<dirname> - Directory where files listed in the result file table will be output
CHECK: --spec-const=<value> - lower and generate specialization constants information
CHECK: =native - lower spec constants to native spirv instructions so that these values could be set at runtime
Expand Down
Loading

0 comments on commit c821dc9

Please sign in to comment.