Skip to content

Commit

Permalink
Merge branch 'sycl' into przemek/sampled-image-fetch
Browse files Browse the repository at this point in the history
  • Loading branch information
przemektmalon committed Feb 23, 2024
2 parents e7cdd43 + 5a92a19 commit c3ec14e
Show file tree
Hide file tree
Showing 29 changed files with 1,858 additions and 168 deletions.
28 changes: 22 additions & 6 deletions .github/workflows/sycl-linux-precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,21 +109,37 @@ jobs:
fail-fast: false
matrix:
include:
- name: Perf tests on Intel GEN12 Graphics system
- name: Intel GEN12 Graphics system
runner: '["Linux", "gen12"]'
- name: Perf tests on Intel Arc A-Series Graphics system
image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest
image_extra_opts: --device=/dev/dri
reset_gpu: true
- name: Intel Arc A-Series Graphics system
runner: '["Linux", "arc"]'
image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest
image_extra_opts: --device=/dev/dri
reset_gpu: true
- name: AMD system
runner: '["Linux", "amdgpu"]'
image: ghcr.io/intel/llvm/ubuntu2204_build:latest
image_extra_opts: --device=/dev/dri --device=/dev/kfd
extra_cmake_args: -DHIP_PLATFORM="AMD" -DAMD_ARCH="gfx1031"
- name: CUDA system
runner: '["Linux", "cuda"]'
image: ghcr.io/intel/llvm/ubuntu2204_build:latest
image_extra_opts: --gpus all
uses: ./.github/workflows/sycl-linux-run-tests.yml
with:
name: ${{ matrix.name }}
name: Perf tests on ${{ matrix.name }}
runner: ${{ matrix. runner }}
image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest
image_options: -u 1001 --device=/dev/dri --privileged --cap-add SYS_ADMIN
image: ${{ matrix.image }}
image_options: -u 1001 --privileged --cap-add SYS_ADMIN ${{ matrix.image_extra_opts }}
target_devices: all
reset_gpu: true
reset_gpu: ${{ matrix.reset_gpu }}

env: '{"LIT_FILTER":"PerformanceTests/"}'
extra_lit_opts: -a -j 1 --param enable-perf-tests=True
extra_cmake_args: ${{ matrix.extra_cmake_args }}

ref: ${{ github.sha }}
merge_ref: ''
Expand Down
13 changes: 5 additions & 8 deletions clang/lib/CodeGen/CGVTT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT,
llvm::GlobalVariable::LinkageTypes Linkage,
const CXXRecordDecl *RD) {
VTTBuilder Builder(CGM.getContext(), RD, /*GenerateDefinition=*/true);
llvm::ArrayType *ArrayType =
llvm::ArrayType::get(CGM.DefaultInt8PtrTy, Builder.getVTTComponents().size());
llvm::ArrayType *ArrayType = llvm::ArrayType::get(
CGM.GlobalsInt8PtrTy, Builder.getVTTComponents().size());

SmallVector<llvm::GlobalVariable *, 8> VTables;
SmallVector<VTableAddressPointsMapTy, 8> VTableAddressPoints;
Expand Down Expand Up @@ -81,9 +81,6 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT,
VTable->getValueType(), VTable, Idxs, /*InBounds=*/true,
/*InRangeIndex=*/1);

Init = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
Init, CGM.Int8PtrTy);

VTTComponents.push_back(Init);
}

Expand Down Expand Up @@ -117,9 +114,9 @@ llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) {

VTTBuilder Builder(CGM.getContext(), RD, /*GenerateDefinition=*/false);

llvm::ArrayType *ArrayType =
llvm::ArrayType::get(CGM.Int8PtrTy, Builder.getVTTComponents().size());
llvm::Align Align = CGM.getDataLayout().getABITypeAlign(CGM.Int8PtrTy);
llvm::ArrayType *ArrayType = llvm::ArrayType::get(
CGM.GlobalsInt8PtrTy, Builder.getVTTComponents().size());
llvm::Align Align = CGM.getDataLayout().getABITypeAlign(CGM.GlobalsInt8PtrTy);

llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(
Name, ArrayType, llvm::GlobalValue::ExternalLinkage, Align);
Expand Down
32 changes: 22 additions & 10 deletions clang/lib/CodeGen/CGVTables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ bool CodeGenVTables::useRelativeLayout() const {
llvm::Type *CodeGenModule::getVTableComponentType() const {
if (UseRelativeLayout(*this))
return Int32Ty;
return Int8PtrTy;
return GlobalsInt8PtrTy;
}

llvm::Type *CodeGenVTables::getVTableComponentType() const {
Expand All @@ -704,7 +704,7 @@ static void AddPointerLayoutOffset(const CodeGenModule &CGM,
CharUnits offset) {
builder.add(llvm::ConstantExpr::getIntToPtr(
llvm::ConstantInt::get(CGM.PtrDiffTy, offset.getQuantity()),
CGM.Int8PtrTy));
CGM.GlobalsInt8PtrTy));
}

static void AddRelativeLayoutOffset(const CodeGenModule &CGM,
Expand Down Expand Up @@ -741,7 +741,7 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder,
vtableHasLocalLinkage,
/*isCompleteDtor=*/false);
else
return builder.add(llvm::ConstantExpr::getBitCast(rtti, CGM.Int8PtrTy));
return builder.add(rtti);

case VTableComponent::CK_FunctionPointer:
case VTableComponent::CK_CompleteDtorPointer:
Expand All @@ -760,7 +760,8 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder,
? MD->hasAttr<CUDADeviceAttr>()
: (MD->hasAttr<CUDAHostAttr>() || !MD->hasAttr<CUDADeviceAttr>());
if (!CanEmitMethod)
return builder.add(llvm::ConstantExpr::getNullValue(CGM.Int8PtrTy));
return builder.add(
llvm::ConstantExpr::getNullValue(CGM.GlobalsInt8PtrTy));
// Method is acceptable, continue processing as usual.
}

Expand All @@ -773,20 +774,20 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder,
// with the local symbol. As a temporary solution, fill these components
// with zero. We shouldn't be calling these in the first place anyway.
if (useRelativeLayout())
return llvm::ConstantPointerNull::get(CGM.Int8PtrTy);
return llvm::ConstantPointerNull::get(CGM.GlobalsInt8PtrTy);

// For NVPTX devices in OpenMP emit special functon as null pointers,
// otherwise linking ends up with unresolved references.
if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPIsTargetDevice &&
CGM.getTriple().isNVPTX())
return llvm::ConstantPointerNull::get(CGM.Int8PtrTy);
return llvm::ConstantPointerNull::get(CGM.GlobalsInt8PtrTy);
llvm::FunctionType *fnTy =
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
llvm::Constant *fn = cast<llvm::Constant>(
CGM.CreateRuntimeFunction(fnTy, name).getCallee());
if (auto f = dyn_cast<llvm::Function>(fn))
f->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
return llvm::ConstantExpr::getBitCast(fn, CGM.Int8PtrTy);
return fn;
};

llvm::Constant *fnPtr;
Expand Down Expand Up @@ -824,15 +825,26 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder,
return addRelativeComponent(
builder, fnPtr, vtableAddressPoint, vtableHasLocalLinkage,
component.getKind() == VTableComponent::CK_CompleteDtorPointer);
} else
return builder.add(llvm::ConstantExpr::getBitCast(fnPtr, CGM.Int8PtrTy));
} else {
// TODO: this icky and only exists due to functions being in the generic
// address space, rather than the global one, even though they are
// globals; fixing said issue might be intrusive, and will be done
// later.
unsigned FnAS = fnPtr->getType()->getPointerAddressSpace();
unsigned GVAS = CGM.GlobalsInt8PtrTy->getPointerAddressSpace();

if (FnAS != GVAS)
fnPtr =
llvm::ConstantExpr::getAddrSpaceCast(fnPtr, CGM.GlobalsInt8PtrTy);
return builder.add(fnPtr);
}
}

case VTableComponent::CK_UnusedFunctionPointer:
if (useRelativeLayout())
return builder.add(llvm::ConstantExpr::getNullValue(CGM.Int32Ty));
else
return builder.addNullPointer(CGM.Int8PtrTy);
return builder.addNullPointer(CGM.GlobalsInt8PtrTy);
}

llvm_unreachable("Unexpected vtable component kind");
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7982,7 +7982,7 @@ llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty,
// FIXME: should we even be calling this method if RTTI is disabled
// and it's not for EH?
if (!shouldEmitRTTI(ForEH))
return llvm::Constant::getNullValue(Int8PtrTy);
return llvm::Constant::getNullValue(GlobalsInt8PtrTy);

if (ForEH && Ty->isObjCObjectPointerType() &&
LangOpts.ObjCRuntime.isGNUFamily())
Expand Down
34 changes: 17 additions & 17 deletions clang/lib/CodeGen/ItaniumCXXABI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
CGF.EmitBlock(FnVirtual);

// Cast the adjusted this to a pointer to vtable pointer and load.
llvm::Type *VTableTy = Builder.getInt8PtrTy();
llvm::Type *VTableTy = CGF.CGM.GlobalsInt8PtrTy;
CharUnits VTablePtrAlign =
CGF.CGM.getDynamicOffsetAlignment(ThisAddr.getAlignment(), RD,
CGF.getPointerAlign());
Expand Down Expand Up @@ -1942,11 +1942,11 @@ llvm::Value *ItaniumCXXABI::getVTableAddressPointInStructorWithVTT(
/// Load the VTT.
llvm::Value *VTT = CGF.LoadCXXVTT();
if (VirtualPointerIndex)
VTT = CGF.Builder.CreateConstInBoundsGEP1_64(
CGF.VoidPtrTy, VTT, VirtualPointerIndex);
VTT = CGF.Builder.CreateConstInBoundsGEP1_64(CGF.GlobalsVoidPtrTy, VTT,
VirtualPointerIndex);

// And load the address point from the VTT.
return CGF.Builder.CreateAlignedLoad(CGF.VoidPtrTy, VTT,
return CGF.Builder.CreateAlignedLoad(CGF.GlobalsVoidPtrTy, VTT,
CGF.getPointerAlign());
}

Expand Down Expand Up @@ -1974,12 +1974,13 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
CGM.getItaniumVTableContext().getVTableLayout(RD);
llvm::Type *VTableType = CGM.getVTables().getVTableType(VTLayout);

// Use pointer alignment for the vtable. Otherwise we would align them based
// on the size of the initializer which doesn't make sense as only single
// values are read.
// Use pointer to global alignment for the vtable. Otherwise we would align
// them based on the size of the initializer which doesn't make sense as only
// single values are read.
LangAS AS = CGM.GetGlobalVarAddressSpace(nullptr);
unsigned PAlign = CGM.getItaniumVTableContext().isRelativeLayout()
? 32
: CGM.getTarget().getPointerAlign(LangAS::Default);
: CGM.getTarget().getPointerAlign(AS);

VTable = CGM.CreateOrReplaceCXXRuntimeVariable(
Name, VTableType, llvm::GlobalValue::ExternalLinkage,
Expand Down Expand Up @@ -3281,10 +3282,9 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
// Note for the future: If we would ever like to do deferred emission of
// RTTI, check if emitting vtables opportunistically need any adjustment.

GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
/*isConstant=*/true,
llvm::GlobalValue::ExternalLinkage, nullptr,
Name);
GV = new llvm::GlobalVariable(
CGM.getModule(), CGM.GlobalsInt8PtrTy,
/*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, Name);
const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
CGM.setGVProperties(GV, RD);
// Import the typeinfo symbol when all non-inline virtual methods are
Expand Down Expand Up @@ -3680,8 +3680,8 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
if (CGM.getItaniumVTableContext().isRelativeLayout())
VTable = CGM.getModule().getNamedAlias(VTableName);
if (!VTable) {
llvm::Type *Ty = llvm::ArrayType::get(CGM.DefaultInt8PtrTy, 0);
VTable = CGM.CreateRuntimeVariable(Ty, VTableName);
llvm::Type *Ty = llvm::ArrayType::get(CGM.GlobalsInt8PtrTy, 0);
VTable = CGM.getModule().getOrInsertGlobal(VTableName, Ty);
}

CGM.setDSOLocal(cast<llvm::GlobalValue>(VTable->stripPointerCasts()));
Expand All @@ -3698,7 +3698,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8Ty, VTable, Eight);
} else {
llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2);
VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.DefaultInt8PtrTy,
VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.GlobalsInt8PtrTy,
VTable, Two);
}

Expand Down Expand Up @@ -3835,7 +3835,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
llvm::ConstantInt::get(CGM.Int64Ty, ((uint64_t)1) << 63);
TypeNameField = llvm::ConstantExpr::getAdd(TypeNameField, flag);
TypeNameField =
llvm::ConstantExpr::getIntToPtr(TypeNameField, CGM.Int8PtrTy);
llvm::ConstantExpr::getIntToPtr(TypeNameField, CGM.GlobalsInt8PtrTy);
} else {
TypeNameField = TypeName;
}
Expand Down Expand Up @@ -3965,7 +3965,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
GV->setComdat(M.getOrInsertComdat(GV->getName()));

CharUnits Align = CGM.getContext().toCharUnitsFromBits(
CGM.getTarget().getPointerAlign(LangAS::Default));
CGM.getTarget().getPointerAlign(CGM.GetGlobalVarAddressSpace(nullptr)));
GV->setAlignment(Align.getAsAlign());

// The Itanium ABI specifies that type_info objects must be globally
Expand Down
2 changes: 0 additions & 2 deletions clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// XFAIL: *
//
// RUN: %clang_cc1 -I%S %s -triple amdgcn-amd-amdhsa -emit-llvm -fcxx-exceptions -fexceptions -o - | FileCheck %s
struct A { virtual void f(); };
struct B : A { };
Expand Down
13 changes: 13 additions & 0 deletions clang/test/CodeGenCXX/vtable-align-address-space.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -std=c++11 -emit-llvm -o - | FileCheck %s

struct A {
virtual void f();
virtual void g();
virtual void h();
};

void A::f() {}

// CHECK: @_ZTV1A ={{.*}} unnamed_addr addrspace(1) constant { [5 x ptr addrspace(1)] } { [5 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1A, ptr addrspace(1) addrspacecast (ptr @_ZN1A1fEv to ptr addrspace(1)), ptr addrspace(1) addrspacecast (ptr @_ZN1A1gEv to ptr addrspace(1)), ptr addrspace(1) addrspacecast (ptr @_ZN1A1hEv to ptr addrspace(1))]
// CHECK: @_ZTS1A ={{.*}} constant [3 x i8] c"1A\00", align 1
// CHECK: @_ZTI1A ={{.*}} addrspace(1) constant { ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1A }, align 8
Loading

0 comments on commit c3ec14e

Please sign in to comment.