[RISCV] Add combines to form binop from tail insert idioms (llvm#72675)

This patch contains two related combines: 1) If we have an scalar vector insert into the result of a concat_vector, sink the insert into the operand of the concat. 2) If we have a insert of a scalar binop into a vector binop of the same opcode and the RHS of both are constant, perform the insert and then the binop. The common theme to both is pushing inserts closer to the sources of the computation graph. The goal is to enable forming vector bin ops from inserts of scalar binops at the end of another vector. For RISCV specifically, the concat_vector transform will push inserts to smaller vectors. This will have the effect of reducing lmul for the vslides, and usually doesn't require an additional vsetvli since the source vectors are already working in the narrower VL. I tried that one as a target independent combine first, and it doesn't appear profitable on all targets. This is only one approach to the problem. Another idea would be to aggressively form build_vectors and subvector inserts from the individual scalar inserts, and then have a transform which sunk a subvector_insert down through the concat. The advantage of the alternate approach is that we expose parallelism in the insert sequence, even if the source vector isn't a concat_vector. If reviewers are okay with it, I'd like to start with this approach, and then explore that direction in a follow up patch.
pratlucas · Nov 30, 2023 · ff5e536 · ff5e536
1 parent 0ef013c
commit ff5e536
Show file tree

Hide file tree

Showing 3 changed files with 130 additions and 75 deletions.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1393,7 +1393,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                          ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
                          ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
                          ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
-                         ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL});
+                         ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
+                         ISD::INSERT_VECTOR_ELT});
   if (Subtarget.hasVendorXTHeadMemPair())
     setTargetDAGCombine({ISD::LOAD, ISD::STORE});
   if (Subtarget.useRVVForFixedLengthVectors())
@@ -14342,6 +14343,75 @@ static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
                      DAG.getBuildVector(VT, DL, RHSOps));
 }
 
+static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
+                                               const RISCVSubtarget &Subtarget,
+                                               const RISCVTargetLowering &TLI) {
+  SDValue InVec = N->getOperand(0);
+  SDValue InVal = N->getOperand(1);
+  SDValue EltNo = N->getOperand(2);
+  SDLoc DL(N);
+
+  EVT VT = InVec.getValueType();
+  if (VT.isScalableVector())
+    return SDValue();
+
+  if (!InVec.hasOneUse())
+    return SDValue();
+
+  // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
+  // move the insert_vector_elts into the arms of the binop.  Note that
+  // the new RHS must be a constant.
+  const unsigned InVecOpcode = InVec->getOpcode();
+  if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
+      InVal.hasOneUse()) {
+    SDValue InVecLHS = InVec->getOperand(0);
+    SDValue InVecRHS = InVec->getOperand(1);
+    SDValue InValLHS = InVal->getOperand(0);
+    SDValue InValRHS = InVal->getOperand(1);
+
+    if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode()))
+      return SDValue();
+    if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
+      return SDValue();
+    // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
+    // have different LHS and RHS types.
+    if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
+      return SDValue();
+    SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+                              InVecLHS, InValLHS, EltNo);
+    SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+                              InVecRHS, InValRHS, EltNo);
+    return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
+  }
+
+  // Given insert_vector_elt (concat_vectors ...), InVal, Elt
+  // move the insert_vector_elt to the source operand of the concat_vector.
+  if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
+    return SDValue();
+
+  auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
+  if (!IndexC)
+    return SDValue();
+  unsigned Elt = IndexC->getZExtValue();
+
+  EVT ConcatVT = InVec.getOperand(0).getValueType();
+  if (ConcatVT.getVectorElementType() != InVal.getValueType())
+    return SDValue();
+  unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
+  SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
+                                   EltNo.getValueType());
+
+  unsigned ConcatOpIdx = Elt / ConcatNumElts;
+  SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
+  ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
+                         ConcatOp, InVal, NewIdx);
+
+  SmallVector<SDValue> ConcatOps;
+  ConcatOps.append(InVec->op_begin(), InVec->op_end());
+  ConcatOps[ConcatOpIdx] = ConcatOp;
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+}
+
 // If we're concatenating a series of vector loads like
 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
 // Then we can turn this into a strided load by widening the vector elements
@@ -15407,6 +15477,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
       return V;
     break;
+  case ISD::INSERT_VECTOR_ELT:
+    if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
+      return V;
+    break;
   case RISCVISD::VFMV_V_F_VL: {
     const MVT VT = N->getSimpleValueType(0);
     SDValue Passthru = N->getOperand(0);

diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll
@@ -10,10 +10,10 @@ define void @v4xi8_concat_vector_insert_idx0(ptr %a, ptr %b, i8 %x) {
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a2
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslideup.vi v8, v9, 2
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, tu, ma
-; CHECK-NEXT:    vmv.s.x v8, a2
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %v1 = load <2 x i8>, ptr %a
@@ -30,12 +30,10 @@ define void @v4xi8_concat_vector_insert_idx1(ptr %a, ptr %b, i8 %x) {
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vslideup.vi v8, v10, 1
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslideup.vi v8, v9, 2
-; CHECK-NEXT:    vmv.s.x v9, a2
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v9, 1
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %v1 = load <2 x i8>, ptr %a
@@ -50,15 +48,13 @@ define void @v4xi8_concat_vector_insert_idx2(ptr %a, ptr %b, i8 %x) {
 ; CHECK-LABEL: v4xi8_concat_vector_insert_idx2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vle8.v v9, (a1)
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vslideup.vi v8, v9, 2
-; CHECK-NEXT:    vmv.s.x v9, a2
-; CHECK-NEXT:    vsetivli zero, 3, e8, mf4, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vle8.v v8, (a1)
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a2
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    vslideup.vi v9, v8, 2
+; CHECK-NEXT:    vse8.v v9, (a0)
 ; CHECK-NEXT:    ret
   %v1 = load <2 x i8>, ptr %a
   %v2 = load <2 x i8>, ptr %b
@@ -72,13 +68,13 @@ define void @v4xi8_concat_vector_insert_idx3(ptr %a, ptr %b, i8 %x) {
 ; CHECK-LABEL: v4xi8_concat_vector_insert_idx3:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vle8.v v8, (a1)
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vslideup.vi v8, v10, 1
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vslideup.vi v8, v9, 2
-; CHECK-NEXT:    vmv.s.x v9, a2
-; CHECK-NEXT:    vslideup.vi v8, v9, 3
-; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    vslideup.vi v9, v8, 2
+; CHECK-NEXT:    vse8.v v9, (a0)
 ; CHECK-NEXT:    ret
   %v1 = load <2 x i8>, ptr %a
   %v2 = load <2 x i8>, ptr %b
@@ -94,12 +90,11 @@ define void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) {
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
 ; RV32-NEXT:    vle64.v v10, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 2
 ; RV32-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; RV32-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32-NEXT:    vslide1down.vx v8, v8, a3
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
 ; RV32-NEXT:    vse64.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
@@ -108,10 +103,10 @@ define void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) {
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v8, a2
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vslideup.vi v8, v10, 2
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
-; RV64-NEXT:    vmv.s.x v8, a2
 ; RV64-NEXT:    vse64.v v8, (a0)
 ; RV64-NEXT:    ret
   %v1 = load <2 x i64>, ptr %a
@@ -128,14 +123,13 @@ define void @v4xi64_concat_vector_insert_idx1(ptr %a, ptr %b, i64 %x) {
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
 ; RV32-NEXT:    vle64.v v10, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 2
 ; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; RV32-NEXT:    vslide1down.vx v10, v8, a2
-; RV32-NEXT:    vslide1down.vx v10, v10, a3
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 1
+; RV32-NEXT:    vslide1down.vx v9, v8, a2
+; RV32-NEXT:    vslide1down.vx v9, v9, a3
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v9, 1
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
 ; RV32-NEXT:    vse64.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
@@ -144,12 +138,10 @@ define void @v4xi64_concat_vector_insert_idx1(ptr %a, ptr %b, i64 %x) {
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vmv.s.x v9, a2
+; RV64-NEXT:    vslideup.vi v8, v9, 1
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vslideup.vi v8, v10, 2
-; RV64-NEXT:    vmv.s.x v10, a2
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 1
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vse64.v v8, (a0)
 ; RV64-NEXT:    ret
   %v1 = load <2 x i64>, ptr %a
@@ -164,31 +156,26 @@ define void @v4xi64_concat_vector_insert_idx2(ptr %a, ptr %b, i64 %x) {
 ; RV32-LABEL: v4xi64_concat_vector_insert_idx2:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    vle64.v v10, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 2
-; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT:    vslide1down.vx v10, v8, a2
-; RV32-NEXT:    vslide1down.vx v10, v10, a3
-; RV32-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    vle64.v v8, (a1)
+; RV32-NEXT:    vle64.v v10, (a0)
+; RV32-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    vslide1down.vx v8, v8, a3
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    vslideup.vi v10, v8, 2
+; RV32-NEXT:    vse64.v v10, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: v4xi64_concat_vector_insert_idx2:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    vle64.v v10, (a1)
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 2
-; RV64-NEXT:    vmv.s.x v10, a2
-; RV64-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vle64.v v8, (a1)
+; RV64-NEXT:    vle64.v v10, (a0)
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v8, a2
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    vslideup.vi v10, v8, 2
+; RV64-NEXT:    vse64.v v10, (a0)
 ; RV64-NEXT:    ret
   %v1 = load <2 x i64>, ptr %a
   %v2 = load <2 x i64>, ptr %b
@@ -204,26 +191,26 @@ define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) {
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
 ; RV32-NEXT:    vle64.v v10, (a1)
+; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
+; RV32-NEXT:    vslide1down.vx v9, v8, a2
+; RV32-NEXT:    vslide1down.vx v9, v9, a3
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vslideup.vi v10, v9, 1
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vslideup.vi v8, v10, 2
-; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT:    vslide1down.vx v10, v8, a2
-; RV32-NEXT:    vslide1down.vx v10, v10, a3
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 3
 ; RV32-NEXT:    vse64.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: v4xi64_concat_vector_insert_idx3:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vle64.v v8, (a1)
+; RV64-NEXT:    vle64.v v10, (a0)
+; RV64-NEXT:    vmv.s.x v9, a2
+; RV64-NEXT:    vslideup.vi v8, v9, 1
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 2
-; RV64-NEXT:    vmv.s.x v10, a2
-; RV64-NEXT:    vslideup.vi v8, v10, 3
-; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    vslideup.vi v10, v8, 2
+; RV64-NEXT:    vse64.v v10, (a0)
 ; RV64-NEXT:    ret
   %v1 = load <2 x i64>, ptr %a
   %v2 = load <2 x i64>, ptr %b

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -560,18 +560,8 @@ define <8 x i32> @add_constant_rhs_8xi32_vector_in3(<8 x i32> %vin, i32 %a, i32
 define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: add_constant_rhs_8xi32_partial:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a4, %hi(.LCPI19_0)
-; CHECK-NEXT:    addi a4, a4, %lo(.LCPI19_0)
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a4)
-; CHECK-NEXT:    vadd.vv v8, v8, v10
-; CHECK-NEXT:    addi a0, a0, 23
-; CHECK-NEXT:    addi a1, a1, 25
-; CHECK-NEXT:    addi a2, a2, 1
-; CHECK-NEXT:    addi a3, a3, 2047
-; CHECK-NEXT:    addi a3, a3, 308
-; CHECK-NEXT:    vmv.s.x v10, a0
 ; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-NEXT:    vmv.s.x v10, a0
 ; CHECK-NEXT:    vslideup.vi v8, v10, 4
 ; CHECK-NEXT:    vmv.s.x v10, a1
 ; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
@@ -581,7 +571,11 @@ define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b,
 ; CHECK-NEXT:    vslideup.vi v8, v10, 6
 ; CHECK-NEXT:    vmv.s.x v10, a3
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    lui a0, %hi(.LCPI19_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI19_0)
+; CHECK-NEXT:    vle32.v v12, (a0)
 ; CHECK-NEXT:    vslideup.vi v8, v10, 7
+; CHECK-NEXT:    vadd.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vadd = add <8 x i32> %vin, <i32 1, i32 2, i32 3, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
   %e0 = add i32 %a, 23