Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sycl-rel_5_2_0: [HIP][UR] Fix memory type detection in allocation info queries and USM copy2D (#13059) #13472

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
22e9785
[UR][L0] Support for urUsmP2PPeerAccessGetInfoExp to query p2p access…
nrspruit Mar 18, 2024
fa53fea
[CUDA][LIBCLC] Implement RC11 seq_cst for PTX6.0 (#12516)
JackAKirk Mar 18, 2024
0326cdc
[UR] Add urProgramGetGlobalVariablePointer entrypoint (#12496)
fabiomestre Mar 19, 2024
a486c12
[SYCL][Graph][UR] Update UR to support updating kernel commands in co…
againull Mar 19, 2024
0838aba
[UR] CI for UR PR refactor-guess-local-worksize (#12663)
hdelan Mar 20, 2024
257ac92
[SYCL][Graph][HIP] Set minimum ROCm version for graphs (#13035)
EwanC Mar 21, 2024
42919a9
[UR][L0] Fix Native Host memory usage on device with copy back sync (…
nrspruit Mar 21, 2024
1ba64e4
[UR][L0] Enable default support for L0 in-order lists (#13033)
raiyanla Mar 22, 2024
455c764
[SYCL][Graph][L0] Test Coverity fix (#13075)
EwanC Mar 25, 2024
1de8dbe
[UR][L0] fix a deadlock on a recursive event rwlock (#13112)
pbalcer Mar 27, 2024
4c54bfe
[UR] Refactor Device Initialisation (#12762)
hdelan Mar 28, 2024
6fd40bb
[UR] Pull in UR changes to add exec error status to events. (#13127)
aarongreig Apr 1, 2024
30c1495
[UR] Remove unused function prototypes (#13072)
hdelan Apr 1, 2024
a669736
[UR] Add DEVICE_NOT_AVAILABLE UR error code and PI translation for sa…
aarongreig Apr 2, 2024
4627abf
[UR][CL] Atomic order memory capability for Intel FPGA driver (#13041)
kbenzie Apr 5, 2024
40c2781
[UR][L0] Fix DeviceInfo global mem free to report unsupported given M…
nrspruit Apr 8, 2024
1985ba5
[SYCL][PI] Add PI_ERROR_UNSUPPORTED_FEATURE error code (#13036)
steffenlarsen Mar 20, 2024
e9ec31d
[Bindless][Exp][NFC] Remove Unnecessary 3D Array Image Helpers (#13022)
isaacault Mar 18, 2024
92e0423
[SYCL][Bindless][Doc] Add support for cubemaps (#12996)
Seanst98 Apr 9, 2024
b125ddf
[SYCL][Bindless][E2E] fix unsampled images test failure (#13007)
cppchedy Mar 29, 2024
74d213d
[SYCL][Bindless][E2E] fix missing scope when using equal_vec function…
cppchedy Apr 2, 2024
7ce8483
[HIP][UR] Fix memory type detection in allocation info queries and US…
GeorgeWeb Apr 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_add.cl
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ Memory order is stored in the lowest 5 bits */
ADDR_SPACE, ADDR_SPACE_NV) \
} \
break; \
case SequentiallyConsistent: \
if (__clc_nvvm_reflect_arch() >= 700) { \
__CLC_NVVM_FENCE_SC_SM70() \
__CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \
ADDR_SPACE_NV, _acq_rel) \
break; \
} \
} \
__builtin_trap(); \
__builtin_unreachable(); \
Expand Down
8 changes: 8 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include <atomic_helpers.h>
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

Expand Down Expand Up @@ -120,6 +121,13 @@ Memory order is stored in the lowest 5 bits */ \
TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \
} \
break; \
case SequentiallyConsistent: \
if (__clc_nvvm_reflect_arch() >= 700) { \
__CLC_NVVM_FENCE_SC_SM70() \
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \
break; \
} \
} \
__builtin_trap(); \
__builtin_unreachable(); \
Expand Down
16 changes: 16 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
} \
}

#define __CLC_NVVM_FENCE_SC_SM70() \
if (scope == CrossDevice) { \
__asm__ __volatile__("fence.sc.sys;"); \
} else if (scope == Device) { \
__asm__ __volatile__("fence.sc.gpu;"); \
} else { \
__asm__ __volatile__("fence.sc.cta;"); \
}

#define __CLC_NVVM_ATOMIC_IMPL( \
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, NAME_MANGLED, \
ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV, SUBSTITUTION) \
Expand Down Expand Up @@ -117,6 +126,13 @@ Memory order is stored in the lowest 5 bits */ \
OP, ADDR_SPACE, ADDR_SPACE_NV) \
} \
break; \
case SequentiallyConsistent: \
if (__clc_nvvm_reflect_arch() >= 700) { \
__CLC_NVVM_FENCE_SC_SM70() \
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \
break; \
} \
} \
__builtin_trap(); \
__builtin_unreachable(); \
Expand Down
7 changes: 7 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include <atomic_helpers.h>
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

Expand Down Expand Up @@ -53,6 +54,12 @@ Memory order is stored in the lowest 5 bits */ \
case Acquire: \
__CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
ADDR_SPACE, ADDR_SPACE_NV, _acquire) \
break; \
case SequentiallyConsistent: \
__CLC_NVVM_FENCE_SC_SM70() \
__CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
ADDR_SPACE, ADDR_SPACE_NV, _acquire) \
break; \
} \
} else { \
TYPE_NV res = __nvvm_volatile_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \
Expand Down
8 changes: 8 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include <atomic_helpers.h>
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

Expand Down Expand Up @@ -54,6 +55,13 @@ Memory order is stored in the lowest 5 bits */ \
__CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
ADDR_SPACE, ADDR_SPACE_NV, \
_release) \
break; \
case SequentiallyConsistent: \
__CLC_NVVM_FENCE_SC_SM70() \
__CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
ADDR_SPACE, ADDR_SPACE_NV, \
_release) \
break; \
} \
} else { \
switch (order) { \
Expand Down
127 changes: 127 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/images/image.cl
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,11 @@ pixelf32 as_pixelf32(int4 v) { return as_float4(v); }
return (to_t##2)((to_t)from.x, (to_t)from.y); \
}

#define _DEFINE_VEC4_TO_SINGLE_CAST(from_t, to_t) \
inline to_t cast_##from_t##4_to_##to_t(from_t##4 from) { \
return (to_t)from[0]; \
}

#define _DEFINE_CAST(from_t, to_t) \
inline to_t cast_##from_t##_to_##to_t(from_t from) { return (to_t)from; }

Expand Down Expand Up @@ -278,6 +283,17 @@ _DEFINE_VEC4_TO_VEC2_CAST(float, half)
_DEFINE_VEC4_TO_VEC2_CAST(int, uint)
_DEFINE_VEC4_TO_VEC2_CAST(short, ushort)

_DEFINE_VEC4_TO_SINGLE_CAST(int, int)
_DEFINE_VEC4_TO_SINGLE_CAST(uint, uint)
_DEFINE_VEC4_TO_SINGLE_CAST(float, float)
_DEFINE_VEC4_TO_SINGLE_CAST(short, short)
_DEFINE_VEC4_TO_SINGLE_CAST(short, char)
_DEFINE_VEC4_TO_SINGLE_CAST(int, short)
_DEFINE_VEC4_TO_SINGLE_CAST(int, char)
_DEFINE_VEC4_TO_SINGLE_CAST(uint, ushort)
_DEFINE_VEC4_TO_SINGLE_CAST(uint, uchar)
_DEFINE_VEC4_TO_SINGLE_CAST(float, half)

_DEFINE_VEC2_CAST(int, float)
_DEFINE_VEC2_CAST(short, char)
_DEFINE_VEC2_CAST(short, uchar)
Expand Down Expand Up @@ -332,6 +348,8 @@ _DEFINE_READ_3D_PIXELF(16, clamp)
#undef _DEFINE_VEC4_CAST
#undef _DEFINE_VEC2_CAST
#undef _DEFINE_CAST
#undef _DEFINE_VEC4_TO_VEC2_CAST
#undef _DEFINE_VEC4_TO_SINGLE_CAST
#undef _DEFINE_READ_1D_PIXELF
#undef _DEFINE_READ_2D_PIXELF
#undef _DEFINE_READ_3D_PIXELF
Expand Down Expand Up @@ -3645,3 +3663,112 @@ _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(half, DF16_, f, 16)
#undef _NVVM_FUNC
#undef NVVM_FUNC
#undef MANGLE_FUNC_IMG_HANDLE_HELPER


// <--- CUBEMAP --->
// Cubemap surfaces are handled through the layered images implementation

// Define functions to call intrinsic
float4
__nvvm_tex_cube_v4f32_f32(unsigned long, float, float,
float) __asm("__clc_llvm_nvvm_tex_cube_v4f32_f32");
int4 __nvvm_tex_cube_v4i32_f32(unsigned long, float, float, float) __asm(
"__clc_llvm_nvvm_tex_cube_v4i32_f32");
uint4 __nvvm_tex_cube_v4j32_f32(unsigned long, float, float, float) __asm(
"__clc_llvm_nvvm_tex_cube_v4j32_f32");

#define COORD_INPUT float x, float y, float z
#define COORD_THUNK_PARAMS x, y, z
#define COORD_PARAMS coord.x, coord.y, coord.z

// Macro to generate cubemap fetches to call intrinsics
// float4, int4, uint4 already defined above
#define _CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN( \
elem_t, fetch_elem_t, vec_size, fetch_vec_size, coord_input, coord_params) \
elem_t __nvvm_tex_cube_##vec_size##_f32(unsigned long imageHandle, \
coord_input) { \
fetch_elem_t a = \
__nvvm_tex_cube_##fetch_vec_size##_f32(imageHandle, coord_params); \
return cast_##fetch_elem_t##_to_##elem_t(a); \
}

// Float
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(float, float4, f32, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(float2, float4, v2f32, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
// Int
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(int, int4, i32, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(int2, int4, v2i32, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
// Uint
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uint, uint4, j32, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uint2, uint4, v2j32, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
// Short
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short, int4, i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short2, int4, v2i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short4, int4, v4i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
// UShort
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort, uint4, t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort2, uint4, v2t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort4, uint4, v4t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
// Char
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char, int4, i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char2, int4, v2i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char4, int4, v4i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
// UChar
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar, uint4, h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar2, uint4, v2h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar4, uint4, v4h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
// Half
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half, float4, f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half2, float4, v2f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half4, float4, v4f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)

// Macro to generate the mangled names for cubemap fetches
#define _CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(elem_t, elem_t_mangled, \
vec_size, coord_mangled, \
coord_input, coord_params) \
_CLC_DEF elem_t MANGLE_FUNC_IMG_HANDLE( \
26, __spirv_ImageSampleCubemap, I, \
elem_t_mangled##coord_mangled##ET0_T_T1_)(ulong imageHandle, \
coord_input) { \
return __nvvm_tex_cube_##vec_size##_f32(imageHandle, coord_params); \
}

// Float
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float, f, f32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float2, Dv2_f, v2f32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float4, Dv4_f, v4f32, Dv3_f, float3 coord, COORD_PARAMS)
// Int
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int, i, i32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int2, Dv2_i, v2i32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int4, Dv4_i, v4i32, Dv3_f, float3 coord, COORD_PARAMS)
// Uint
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint, j, j32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint2, Dv2_j, v2j32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint4, Dv4_j, v4j32, Dv3_f, float3 coord, COORD_PARAMS)
// Short
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short, s, i16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short2, Dv2_s, v2i16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short4, Dv4_s, v4i16, Dv3_f, float3 coord, COORD_PARAMS)
// UShort
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort, t, t16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort2, Dv2_t, v2t16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort4, Dv4_t, v4t16, Dv3_f, float3 coord, COORD_PARAMS)
// Char
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char, a, i8, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char2, Dv2_a, v2i8, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char4, Dv4_a, v4i8, Dv3_f, float3 coord, COORD_PARAMS)
// UChar
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar, h, h8, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar2, Dv2_h, v2h8, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar4, Dv4_h, v4h8, Dv3_f, float3 coord, COORD_PARAMS)
// Half
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half, DF16_, f16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half2, Dv2_DF16_, v2f16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half4, Dv4_DF16_, v4f16, Dv3_f, float3 coord, COORD_PARAMS)


#undef _CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN
#undef COORD_INPUT
#undef COORD_THUNK_PARAMS
#undef COORD_PARAMS
#undef _CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN
Loading