[C++ bridge] Switch from using arith ops to cc.cast (#1978)

* [C++ bridge] Switch from using arith ops to cc.cast The arith dialect provides a big menu of casting/conversion operations that we can use but which have their own rules and semantics. For example, the generation of a quantum circuit can involve aggressive constant propagation. By using an operation in a dialect we control (CC), we can implement folding and combing these operations in ways that are suitable to our domain and do not rely on decisions in the MLIR code that may be too conservative or too aggressive for our needs. [core] Add constant folding of cc.cast operations. These constant folding operations will eliminate cc.cast operations when they are converting the type of a constant input op. Update the tests. Add pattern to eliminate integer casts into an integer comparison. Add constant folding of complex.create. Apparently MLIR doesn't do this. Add cast complex to complex and various constant foldings. Add a new test. Add codegen expansion pattern. * Convert test to work with ARM.
NVIDIA · Jul 20, 2024 · 20e78b8 · 20e78b8
1 parent 2bf1cb6
commit 20e78b8
Show file tree

Hide file tree

Showing 29 changed files with 587 additions and 265 deletions.
diff --git a/include/cudaq/Optimizer/Builder/Factory.h b/include/cudaq/Optimizer/Builder/Factory.h
@@ -252,6 +252,7 @@ bool isAArch64(mlir::ModuleOp);
 bool structUsesTwoArguments(mlir::Type ty);
 
 std::optional<std::int64_t> getIntIfConstant(mlir::Value value);
+std::optional<llvm::APFloat> getDoubleIfConstant(mlir::Value value);
 
 /// Create a `cc.cast` operation, if it is needed.
 mlir::Value createCast(mlir::OpBuilder &builder, mlir::Location loc,

diff --git a/lib/Frontend/nvqpp/ConvertDecl.cpp b/lib/Frontend/nvqpp/ConvertDecl.cpp
@@ -749,14 +749,16 @@ bool QuakeBridgeVisitor::VisitVarDecl(clang::VarDecl *x) {
  if (initValue.getType().getIntOrFloatBitWidth() <
  type.getIntOrFloatBitWidth()) {
  // FIXME: Use zero-extend if this is unsigned!
- initValue = builder.create<arith::ExtSIOp>(loc, type, initValue);
+ initValue = builder.create<cudaq::cc::CastOp>(
+ loc, type, initValue, cudaq::cc::CastOpMode::Signed);
  } else if (initValue.getType().getIntOrFloatBitWidth() >
  type.getIntOrFloatBitWidth()) {
- initValue = builder.create<arith::TruncIOp>(loc, type, initValue);
+ initValue = builder.create<cudaq::cc::CastOp>(loc, type, initValue);
  }
  } else if (isa<IntegerType>(initValue.getType()) && isa<FloatType>(type)) {
  // FIXME: Use UIToFP if this is unsigned!
- initValue = builder.create<arith::SIToFPOp>(loc, type, initValue);
+ initValue = builder.create<cudaq::cc::CastOp>(
+ loc, type, initValue, cudaq::cc::CastOpMode::Signed);
  }
 
  if (auto initObject = initValue.getDefiningOp<cc::AllocaOp>()) {

diff --git a/lib/Frontend/nvqpp/ConvertExpr.cpp b/lib/Frontend/nvqpp/ConvertExpr.cpp
@@ -295,8 +295,9 @@ static Value toIntegerImpl(OpBuilder &builder, Location loc, Value bitVec) {
  Value bitElement = builder.create<cudaq::cc::LoadOp>(loc, eleAddr);
 
  // -bits[k]
- bitElement = builder.create<arith::ExtUIOp>(loc, builder.getI32Type(),
- bitElement);
+ bitElement = builder.create<cudaq::cc::CastOp>(
+ loc, builder.getI32Type(), bitElement,
+ cudaq::cc::CastOpMode::Unsigned);
  bitElement = builder.create<arith::MulIOp>(loc, negOne, bitElement);
 
  // -bits[k] ^ i
@@ -326,38 +327,38 @@ static void castToSameType(OpBuilder builder, Location loc,
  auto rhsTy = rhs.getType();
  if (lhsTy.isa<IntegerType>() && rhsTy.isa<IntegerType>()) {
  if (lhsTy.getIntOrFloatBitWidth() < rhsTy.getIntOrFloatBitWidth()) {
- if (lhsType && lhsType->isUnsignedIntegerOrEnumerationType())
- lhs = builder.create<arith::ExtUIOp>(loc, rhs.getType(), lhs);
- else
-  lhs = builder.create<arith::ExtSIOp>(loc, rhs.getType(), lhs);
+ auto mode = (lhsType && lhsType->isUnsignedIntegerOrEnumerationType())
+  ? cudaq::cc::CastOpMode::Unsigned
+  : cudaq::cc::CastOpMode::Signed;
+ lhs = builder.create<cudaq::cc::CastOp>(loc, rhs.getType(), lhs, mode);
  return;
  }
- if (rhsType && rhsType->isUnsignedIntegerOrEnumerationType())
- rhs = builder.create<arith::ExtUIOp>(loc, lhs.getType(), rhs);
- else
-  rhs = builder.create<arith::ExtSIOp>(loc, lhs.getType(), rhs);
+ auto mode = (rhsType && rhsType->isUnsignedIntegerOrEnumerationType())
+  ? cudaq::cc::CastOpMode::Unsigned
+  : cudaq::cc::CastOpMode::Signed;
+ rhs = builder.create<cudaq::cc::CastOp>(loc, lhs.getType(), rhs, mode);
  return;
  }
  if (lhsTy.isa<FloatType>() && rhsTy.isa<FloatType>()) {
  if (lhsTy.getIntOrFloatBitWidth() < rhsTy.getIntOrFloatBitWidth()) {
- lhs = builder.create<arith::ExtFOp>(loc, rhs.getType(), lhs);
+ lhs = builder.create<cudaq::cc::CastOp>(loc, rhs.getType(), lhs);
  return;
  }
- rhs = builder.create<arith::ExtFOp>(loc, lhs.getType(), rhs);
+ rhs = builder.create<cudaq::cc::CastOp>(loc, lhs.getType(), rhs);
  return;
  }
  if (lhsTy.isa<FloatType>() && rhsTy.isa<IntegerType>()) {
- if (rhsType && rhsType->isUnsignedIntegerOrEnumerationType())
- rhs = builder.create<arith::UIToFPOp>(loc, lhs.getType(), rhs);
- else
-  rhs = builder.create<arith::SIToFPOp>(loc, lhs.getType(), rhs);
+ auto mode = (rhsType && rhsType->isUnsignedIntegerOrEnumerationType())
+  ? cudaq::cc::CastOpMode::Unsigned
+  : cudaq::cc::CastOpMode::Signed;
+ rhs = builder.create<cudaq::cc::CastOp>(loc, lhs.getType(), rhs, mode);
  return;
  }
  if (lhsTy.isa<IntegerType>() && rhsTy.isa<FloatType>()) {
- if (lhsType && lhsType->isUnsignedIntegerOrEnumerationType())
- lhs = builder.create<arith::UIToFPOp>(loc, rhs.getType(), lhs);
- else
-  lhs = builder.create<arith::SIToFPOp>(loc, rhs.getType(), lhs);
+ auto mode = (lhsType && lhsType->isUnsignedIntegerOrEnumerationType())
+  ? cudaq::cc::CastOpMode::Unsigned
+  : cudaq::cc::CastOpMode::Signed;
+ lhs = builder.create<cudaq::cc::CastOp>(loc, rhs.getType(), lhs, mode);
  return;
  }
  TODO_loc(loc, "conversion of operands in binary expression");
@@ -579,14 +580,8 @@ Value QuakeBridgeVisitor::floatingPointCoercion(Location loc, Type toType,
  auto fromType = value.getType();
  if (toType == fromType)
  return value;
- if (fromType.isa<IntegerType>() && toType.isa<IntegerType>()) {
- if (fromType.getIntOrFloatBitWidth() < toType.getIntOrFloatBitWidth())
- return builder.create<arith::ExtFOp>(loc, toType, value);
- if (fromType.getIntOrFloatBitWidth() > toType.getIntOrFloatBitWidth())
- return builder.create<arith::TruncFOp>(loc, toType, value);
- TODO_loc(loc, "floating point types are distinct and same size");
- }
- TODO_loc(loc, "Float conversion but not floating point types");
+ assert(fromType.isa<FloatType>() && toType.isa<FloatType>());
+ return builder.create<cudaq::cc::CastOp>(loc, toType, value);
 }
 
 Value QuakeBridgeVisitor::integerCoercion(Location loc,
@@ -596,17 +591,15 @@ Value QuakeBridgeVisitor::integerCoercion(Location loc,
  if (dstTy == fromTy)
  return srcVal;
 
- if (fromTy.isa<IntegerType>() && dstTy.isa<IntegerType>()) {
- if (fromTy.getIntOrFloatBitWidth() < dstTy.getIntOrFloatBitWidth()) {
- if (clangTy->isUnsignedIntegerOrEnumerationType())
- return builder.create<arith::ExtUIOp>(loc, dstTy, srcVal);
- return builder.create<arith::ExtSIOp>(loc, dstTy, srcVal);
- }
- if (fromTy.getIntOrFloatBitWidth() > dstTy.getIntOrFloatBitWidth())
- return builder.create<arith::TruncIOp>(loc, dstTy, srcVal);
- TODO_loc(loc, "Types are not the same but have the same length");
+ assert(fromTy.isa<IntegerType>() && dstTy.isa<IntegerType>());
+ if (fromTy.getIntOrFloatBitWidth() < dstTy.getIntOrFloatBitWidth()) {
+ auto mode = (clangTy->isUnsignedIntegerOrEnumerationType())
+ ? cudaq::cc::CastOpMode::Unsigned
+ : cudaq::cc::CastOpMode::Signed;
+ return builder.create<cudaq::cc::CastOp>(loc, dstTy, srcVal, mode);
  }
- TODO_loc(loc, "Integer conversion but not integer types");
+ assert(fromTy.getIntOrFloatBitWidth() > dstTy.getIntOrFloatBitWidth());
+ return builder.create<cudaq::cc::CastOp>(loc, dstTy, srcVal);
 }
 
 bool QuakeBridgeVisitor::TraverseCastExpr(clang::CastExpr *x,
@@ -654,9 +647,7 @@ bool QuakeBridgeVisitor::VisitCastExpr(clang::CastExpr *x) {
  assert(toType && fromType);
  if (toType == fromType)
  return pushValue(value);
- if (fromType.getIntOrFloatBitWidth() < toType.getIntOrFloatBitWidth())
- return pushValue(builder.create<arith::ExtFOp>(loc, toType, value));
- return pushValue(builder.create<arith::TruncFOp>(loc, toType, value));
+ return pushValue(builder.create<cudaq::cc::CastOp>(loc, toType, value));
  }
  case clang::CastKind::CK_IntegralCast: {
  auto locSub = toLocation(x->getSubExpr());
@@ -672,18 +663,19 @@ bool QuakeBridgeVisitor::VisitCastExpr(clang::CastExpr *x) {
  return true;
  case clang::CastKind::CK_FloatingToIntegral: {
  auto qualTy = x->getType();
- if (qualTy->isUnsignedIntegerOrEnumerationType())
- return pushValue(
- builder.create<arith::FPToUIOp>(loc, castToTy, popValue()));
+ auto mode = qualTy->isUnsignedIntegerOrEnumerationType()
+  ? cudaq::cc::CastOpMode::Unsigned
+  : cudaq::cc::CastOpMode::Signed;
  return pushValue(
- builder.create<arith::FPToSIOp>(loc, castToTy, popValue()));
+ builder.create<cudaq::cc::CastOp>(loc, castToTy, popValue(), mode));
  }
  case clang::CastKind::CK_IntegralToFloating: {
- if (x->getSubExpr()->getType()->isUnsignedIntegerOrEnumerationType())
- return pushValue(
- builder.create<arith::UIToFPOp>(loc, castToTy, popValue()));
+ auto mode =
+ (x->getSubExpr()->getType()->isUnsignedIntegerOrEnumerationType())
+ ? cudaq::cc::CastOpMode::Unsigned
+ : cudaq::cc::CastOpMode::Signed;
  return pushValue(
- builder.create<arith::SIToFPOp>(loc, castToTy, popValue()));
+ builder.create<cudaq::cc::CastOp>(loc, castToTy, popValue(), mode));
  }
  case clang::CastKind::CK_IntegralToBoolean: {
  auto last = popValue();
@@ -1153,7 +1145,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) {
 
  // Get the values involved
  auto peelIntToFloat = [&](Value v) {
- if (auto op = v.getDefiningOp<arith::SIToFPOp>())
+ if (auto op = v.getDefiningOp<cudaq::cc::CastOp>())
  return op.getOperand();
  return v;
  };
@@ -1171,7 +1163,8 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) {
  base, x->getArg(1)->getType().getTypePtrOrNull(), power);
  auto ipow = builder.create<math::IPowIOp>(loc, base, power);
  if (isa<FloatType>(resTy))
- return pushValue(builder.create<arith::SIToFPOp>(loc, resTy, ipow));
+ return pushValue(builder.create<cudaq::cc::CastOp>(
+ loc, resTy, ipow, cudaq::cc::CastOpMode::Signed));
  assert(resTy == ipow.getType());
  return pushValue(ipow);
  }
@@ -2228,7 +2221,7 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr(
  auto eleAddr = builder.create<cc::ComputePtrOp>(loc, elePtrTy, vecPtr,
  ValueRange{indexVar});
  auto i1PtrTy = cc::PointerType::get(builder.getI1Type());
- auto i1Cast = builder.create<arith::TruncIOp>(loc, i1PtrTy, eleAddr);
+ auto i1Cast = builder.create<cudaq::cc::CastOp>(loc, i1PtrTy, eleAddr);
  return replaceTOSValue(i1Cast);
  }
  TODO_loc(loc, "unhandled operator call for quake conversion");

diff --git a/lib/Optimizer/CodeGen/QuakeToCodegen.cpp b/lib/Optimizer/CodeGen/QuakeToCodegen.cpp
@@ -12,6 +12,7 @@
 #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
+#include "mlir/Dialect/Complex/IR/Complex.h"
 
 using namespace mlir;
 
@@ -39,10 +40,32 @@ class CodeGenRAIIPattern : public OpRewritePattern<quake::InitializeStateOp> {
  return success();
  }
 };
+
+class ExpandComplexCast : public OpRewritePattern<cudaq::cc::CastOp> {
+public:
+ using OpRewritePattern::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(cudaq::cc::CastOp castOp,
+ PatternRewriter &rewriter) const override {
+ auto complexTy = dyn_cast<ComplexType>(castOp.getType());
+ if (!complexTy)
+ return failure();
+ auto loc = castOp.getLoc();
+ auto ty = cast<ComplexType>(castOp.getValue().getType()).getElementType();
+ Value rePart = rewriter.create<complex::ReOp>(loc, ty, castOp.getValue());
+ Value imPart = rewriter.create<complex::ImOp>(loc, ty, castOp.getValue());
+ auto eleTy = complexTy.getElementType();
+ auto reCast = rewriter.create<cudaq::cc::CastOp>(loc, eleTy, rePart);
+ auto imCast = rewriter.create<cudaq::cc::CastOp>(loc, eleTy, imPart);
+ rewriter.replaceOpWithNewOp<complex::CreateOp>(castOp, complexTy, reCast,
+ imCast);
+ return success();
+ }
+};
 } // namespace
 
 void cudaq::codegen::populateQuakeToCodegenPatterns(
  mlir::RewritePatternSet &patterns) {
  auto *ctx = patterns.getContext();
- patterns.insert<CodeGenRAIIPattern>(ctx);
+ patterns.insert<CodeGenRAIIPattern, ExpandComplexCast>(ctx);
 }