Skip to content

Commit

Permalink
[InstCombine] Remove some of the complexity-based canonicalization (l…
Browse files Browse the repository at this point in the history
…lvm#91185)

The idea behind this canonicalization is that it allows us to handle less
patterns, because we know that some will be canonicalized away. This is
indeed very useful to e.g. know that constants are always on the right.

However, this is only useful if the canonicalization is actually
reliable. This is the case for constants, but not for arguments: Moving
these to the right makes it look like the "more complex" expression is
guaranteed to be on the left, but this is not actually the case in
practice. It fails as soon as you replace the argument with another
instruction.

The end result is that it looks like things correctly work in tests,
while they actually don't. We use the "thwart complexity-based
canonicalization" trick to handle this in tests, but it's often a
challenge for new contributors to get this right, and based on the
regressions this PR originally exposed, we clearly don't get this right
in many cases.

For this reason, I think that it's better to remove this complexity
canonicalization. It will make it much easier to write tests for
commuted cases and make sure that they are handled.
  • Loading branch information
nikic authored Aug 21, 2024
1 parent 297bb46 commit a105877
Show file tree
Hide file tree
Showing 289 changed files with 2,680 additions and 2,681 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP4]]
Expand All @@ -54,7 +54,7 @@ svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP4]]
Expand Down
44 changes: 22 additions & 22 deletions clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c

Large diffs are not rendered by default.

26 changes: 13 additions & 13 deletions clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ svuint64_t test_svld1sb_u64(svbool_t pg, const int8_t *base) MODE_ATTR
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
Expand All @@ -152,7 +152,7 @@ svuint64_t test_svld1sb_u64(svbool_t pg, const int8_t *base) MODE_ATTR
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
Expand All @@ -168,7 +168,7 @@ svint16_t test_svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum) M
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -179,7 +179,7 @@ svint16_t test_svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum) M
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -195,7 +195,7 @@ svint32_t test_svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum) M
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -206,7 +206,7 @@ svint32_t test_svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum) M
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -222,7 +222,7 @@ svint64_t test_svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum) M
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
Expand All @@ -233,7 +233,7 @@ svint64_t test_svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum) M
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
Expand All @@ -249,7 +249,7 @@ svuint16_t test_svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -260,7 +260,7 @@ svuint16_t test_svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -276,7 +276,7 @@ svuint32_t test_svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -287,7 +287,7 @@ svuint32_t test_svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -298,7 +298,7 @@ svuint64_t test_svld1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum)
return svld1sb_vnum_u64(pg, base, vnum);
}

#ifndef __ARM_FEATURE_SME
#ifndef __ARM_FEATURE_SME

// CHECK-LABEL: @test_svld1sb_gather_u32base_s32(
// CHECK-NEXT: entry:
Expand Down
18 changes: 9 additions & 9 deletions clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base) MODE_ATTR
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -114,7 +114,7 @@ svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base) MODE_ATTR
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -130,7 +130,7 @@ svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -141,7 +141,7 @@ svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -157,7 +157,7 @@ svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -168,7 +168,7 @@ svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -184,7 +184,7 @@ svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -195,7 +195,7 @@ svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -206,7 +206,7 @@ svuint64_t test_svld1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
return svld1sh_vnum_u64(pg, base, vnum);
}

#ifndef __ARM_FEATURE_SME
#ifndef __ARM_FEATURE_SME

// CHECK-LABEL: @test_svld1sh_gather_u32base_s32(
// CHECK-NEXT: entry:
Expand Down
Loading

0 comments on commit a105877

Please sign in to comment.