Skip to content

Commit

Permalink
Merge remote-tracking branch 'intel_llvm/sycl' into esimd_L1_L2_rewor…
Browse files Browse the repository at this point in the history
…k_L3_assert
  • Loading branch information
v-klochkov committed Mar 5, 2024
2 parents 5fc647c + f8abcb7 commit f44dd13
Show file tree
Hide file tree
Showing 11 changed files with 36 additions and 27 deletions.
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,8 @@ static llvm::SmallVector<StringRef, 16> SYCLDeviceLibList {
"bfloat16", "crt", "cmath", "cmath-fp64", "complex", "complex-fp64",
#if defined(_WIN32)
"msvc-math",
#else
"sanitizer",
#endif
"imf", "imf-fp64", "itt-compiler-wrappers", "itt-stubs",
"itt-user-wrappers", "fallback-cassert", "fallback-cstring",
Expand Down
2 changes: 2 additions & 0 deletions clang/test/Driver/sycl-device-lib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle"
// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle"
// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-bf16.o" "-output={{.*}}libsycl-fallback-imf-bf16-{{.*}}.o" "-unbundle"

/// ###########################################################################
/// test sycl fallback device libraries are not linked by default
// RUN: %clangxx -fsycl -fsycl-device-lib-jit-link %s --sysroot=%S/Inputs/SYCL -### 2>&1 \
Expand Down Expand Up @@ -251,3 +252,4 @@
// SYCL_DEVICE_ASAN_MACRO-SAME: "USE_SYCL_DEVICE_ASAN"
// SYCL_DEVICE_ASAN_MACRO-NEXT: llvm-link{{.*}}
// SYCL_DEVICE_ASAN_MACRO: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-sanitizer.o" "-output={{.*}}libsycl-sanitizer-{{.*}}.o" "-unbundle"
// SYCL_DEVICE_ASAN_MACRO: llvm-link{{.*}} "-only-needed" "{{.*}}" "-o" "{{.*}}.bc" "--suppress-warnings"
9 changes: 0 additions & 9 deletions libdevice/include/spir_global_var.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,6 @@
// Treat this header as system one to workaround frontend's restriction
#pragma clang system_header

#ifndef SPIR_GLOBAL_VAR
#ifdef __SYCL_DEVICE_ONLY__
#define SPIR_GLOBAL_VAR __attribute__((sycl_global_var))
#else
#warning "SPIR_GLOBAL_VAR not defined in host mode. Defining as empty macro."
#define SPIR_GLOBAL_VAR
#endif
#endif

#define __SYCL_GLOBAL__ __attribute__((opencl_global))
#define __SYCL_LOCAL__ __attribute__((opencl_local))
#define __SYCL_PRIVATE__ __attribute__((opencl_private))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ inline constexpr grf_size_key::value_t<Size> grf_size;
inline constexpr grf_size_automatic_key::value_t grf_size_automatic;

} // namespace ext::intel::experimental
namespace ext::oneapi::experimental {
namespace ext::oneapi::experimental::detail {
template <unsigned int Size>
struct PropertyMetaInfo<
sycl::ext::intel::experimental::grf_size_key::value_t<Size>> {
Expand Down Expand Up @@ -79,7 +79,6 @@ struct ConflictingProperties<sycl::detail::register_alloc_mode_key, Properties>
sycl::ext::intel::experimental::grf_size_automatic_key,
Properties>::value> {};

} // namespace detail
} // namespace ext::oneapi::experimental
} // namespace ext::oneapi::experimental::detail
} // namespace _V1
} // namespace sycl
4 changes: 4 additions & 0 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,10 @@ class event_impl {
return MEventFromSubmittedExecCommandBuffer;
}

const std::vector<EventImplPtr> &getPostCompleteEvents() const {
return MPostCompleteEvents;
}

protected:
// When instrumentation is enabled emits trace event for event wait begin and
// returns the telemetry event generated for the wait
Expand Down
16 changes: 12 additions & 4 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,9 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
sycl::detail::CG::StorageInitHelper CGData) {
WriteLock Lock(MMutex);

std::vector<sycl::detail::EventImplPtr> PartitionEvents;
// Map of the partitions to their execution events
std::unordered_map<std::shared_ptr<partition>, sycl::detail::EventImplPtr>
PartitionsExecutionEvents;

auto CreateNewEvent([&]() {
auto NewEvent = std::make_shared<sycl::detail::event_impl>(Queue);
Expand All @@ -787,7 +789,7 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
}

for (auto const &DepPartition : CurrentPartition->MPredecessors) {
CGData.MEvents.push_back(MPartitionsExecutionEvents[DepPartition]);
CGData.MEvents.push_back(PartitionsExecutionEvents[DepPartition]);
}

auto CommandBuffer =
Expand Down Expand Up @@ -819,7 +821,13 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
sycl::backend::ext_oneapi_level_zero) {
Event->wait(Event);
} else {
auto &AttachedEventsList = Event->getPostCompleteEvents();
CGData.MEvents.reserve(AttachedEventsList.size() + 1);
CGData.MEvents.push_back(Event);
// Add events of the previous execution of all graph partitions.
for (auto &AttachedEvent : AttachedEventsList) {
CGData.MEvents.push_back(AttachedEvent);
}
}
++It;
} else {
Expand Down Expand Up @@ -929,15 +937,15 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
NewEvent->setStateIncomplete();
NewEvent->getPreparedDepsEvents() = ScheduledEvents;
}
MPartitionsExecutionEvents[CurrentPartition] = NewEvent;
PartitionsExecutionEvents[CurrentPartition] = NewEvent;
}

// Keep track of this execution event so we can make sure it's completed in
// the destructor.
MExecutionEvents.push_back(NewEvent);
// Attach events of previous partitions to ensure that when the returned event
// is complete all execution associated with the graph have been completed.
for (auto const &Elem : MPartitionsExecutionEvents) {
for (auto const &Elem : PartitionsExecutionEvents) {
if (Elem.second != NewEvent) {
NewEvent->attachEventToComplete(Elem.second);
}
Expand Down
3 changes: 0 additions & 3 deletions sycl/source/detail/graph_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1190,9 +1190,6 @@ class exec_graph_impl {
std::vector<sycl::detail::EventImplPtr> MExecutionEvents;
/// List of the partitions that compose the exec graph.
std::vector<std::shared_ptr<partition>> MPartitions;
/// Map of the partitions to their execution events
std::unordered_map<std::shared_ptr<partition>, sycl::detail::EventImplPtr>
MPartitionsExecutionEvents;
/// Storage for copies of nodes from the original modifiable graph.
std::vector<std::shared_ptr<node_impl>> MNodeStorage;
};
Expand Down
7 changes: 5 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
// REQUIRES: matrix
// REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943

// RUN: %{build} -o %t.out -ffp-model=precise
// RUN: %{run} %t.out
// RUN: %{build} -o %t_gpu.out -ffp-model=precise
// RUN: %if gpu %{ %{run} %t_gpu.out %}

// RUN: %{build} -ffp-model=precise -o %t_cpu.out -DtM=16 -DtK=32 -DNCACHE1=32 -DKCACHE1=32
// RUN: %if cpu %{ %{run} %t_cpu.out %}

// -ffp-model=precise is added to not depend on compiler defaults.

Expand Down
7 changes: 5 additions & 2 deletions sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
//===----------------------------------------------------------------------===//
// REQUIRES: matrix

// RUN: %{build} -o %t.out -ffp-model=precise
// RUN: %{run} %t.out
// RUN: %{build} -o %t_gpu.out -ffp-model=precise
// RUN: %if gpu %{ %{run} %t_gpu.out %}

// RUN: %{build} -ffp-model=precise -o %t_cpu.out -DtM=16 -DtK=32 -DNCACHE1=32 -DKCACHE1=32
// RUN: %if cpu %{ %{run} %t_cpu.out %}

// -ffp-model=precise is added to not depend on compiler defaults.

Expand Down
7 changes: 4 additions & 3 deletions sycl/test-e2e/Matrix/joint_matrix_prefetch_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,16 @@ int main() {
.get_info<sycl::ext::oneapi::experimental::info::device::
matrix_combinations>();
bool support_p = false;
for (int i = 0; i < combinations.size(); i++) {
if (combinations[i].atype == matrix_type::tf32) {
// joint_matrix_prefetch is not supported on DG2
for (unsigned int i = 0; i < combinations.size(); i++) {
if (combinations[i].nsize == 0 || combinations[i].nsize == 16) {
support_p = true;
break;
}
}
if (!support_p) {
std::cout << "Prefetch not supported on this device" << std::endl;
// Once the test is not marke as XFAIL, this should change to return 0;
// Once the test is not marked as XFAIL, this should change to return 0;
return 1;
}
static constexpr size_t M = TM * 2;
Expand Down
1 change: 0 additions & 1 deletion sycl/test-e2e/Plugin/level_zero_usm_residency.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ int main(int argc, char *argv[]) {
// DEVICE: ---> piextUSMDeviceAlloc
// DEVICE: ZE ---> zeMemAllocDevice
// DEVICE: ZE ---> zeContextMakeMemoryResident
// DEVICE-NOT: ZE ---> zeContextMakeMemoryResident

auto ptr2 = malloc_shared<int>(1, Q);
// SHARED: ---> piextUSMSharedAlloc
Expand Down

0 comments on commit f44dd13

Please sign in to comment.