From a6d5f4e7ec30a7c6e3391c8647f41e966ef7f1ab Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Mon, 17 Jun 2024 10:08:03 -0700 Subject: [PATCH 01/50] Add a pass for state preparation from vectors --- include/cudaq/Optimizer/Transforms/Passes.h | 2 + include/cudaq/Optimizer/Transforms/Passes.td | 11 ++ lib/Optimizer/Transforms/CMakeLists.txt | 1 + .../Transforms/GenKernelExecution.cpp | 34 +++- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 34 ++++ lib/Optimizer/Transforms/StatePreparation.cpp | 119 +++++++++++++ program.py | 35 ++++ .../cudaq/platform/py_alt_launch_kernel.cpp | 1 + runtime/common/BaseRemoteRESTQPU.h | 1 + runtime/common/BaseRestRemoteClient.h | 1 + targettests/execution/from_state.cpp | 30 ++++ targettests/execution/from_state_complex.cpp | 27 +++ targettests/execution/program.cpp | 167 ++++++++++++++++++ 13 files changed, 457 insertions(+), 6 deletions(-) create mode 100644 lib/Optimizer/Transforms/StatePreparation.cpp create mode 100644 program.py create mode 100644 targettests/execution/from_state.cpp create mode 100644 targettests/execution/from_state_complex.cpp create mode 100644 targettests/execution/program.cpp diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 996b6e56a7..422032326c 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -40,6 +40,8 @@ std::unique_ptr createLowerToCFGPass(); std::unique_ptr createObserveAnsatzPass(std::vector &); std::unique_ptr createQuakeAddMetadata(); std::unique_ptr createQuakeAddDeallocs(); +std::unique_ptr createStatePreparation(); +std::unique_ptr createStatePreparation(std::string_view, void *); std::unique_ptr createQuakeSynthesizer(); std::unique_ptr createQuakeSynthesizer(std::string_view, void *); std::unique_ptr createRaiseToAffinePass(); diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 8d2f0c1821..e5e15a8776 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -512,6 +512,17 @@ def PruneCtrlRelations : Pass<"pruned-ctrl-form", "mlir::func::FuncOp"> { }]; } +def PrepareState : Pass<"state-prep", "mlir::ModuleOp"> { + let summary = + "Convert state vector data into gates"; + let description = [{ + Convert quake representation that includes qubit initialization + from data into qubit initialization using gates. + }]; + + let constructor = "cudaq::opt::createStatePreparation()"; +} + def QuakeSynthesize : Pass<"quake-synth", "mlir::ModuleOp"> { let summary = "Synthesize concrete quantum program from Quake code plus runtime values."; diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index 7600efe276..6a51057bd3 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -39,6 +39,7 @@ add_cudaq_library(OptTransforms ObserveAnsatz.cpp PruneCtrlRelations.cpp QuakeAddMetadata.cpp + StatePreparation.cpp QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp diff --git a/lib/Optimizer/Transforms/GenKernelExecution.cpp b/lib/Optimizer/Transforms/GenKernelExecution.cpp index c16a4af7dd..68ef5b21b7 100644 --- a/lib/Optimizer/Transforms/GenKernelExecution.cpp +++ b/lib/Optimizer/Transforms/GenKernelExecution.cpp @@ -434,8 +434,18 @@ class GenerateKernelExecution hasTrailingData = true; continue; } - if (isa(currEleTy) && - !isStatePointerType(currEleTy)) { + //if (isa(currEleTy) && + // !isStatePointerType(currEleTy)) { + if (auto ptrTy = dyn_cast(currEleTy)) { + if (isa(ptrTy.getElementType())) { + // Special case: if the argument is a `cudaq::state*`, then just pass + // the pointer. We can do that in this case because the synthesis step + // (which will receive the argument data) is assumed to run in the + // same memory space. + argPtr = builder.create(loc, currEleTy, argPtr); + stVal = builder.create(loc, stVal.getType(), + stVal, argPtr, idx); + } continue; } @@ -941,8 +951,8 @@ class GenerateKernelExecution cudaq::cc::numberOfHiddenArgs(hasThisPointer, hiddenSRet); if (count > 0 && args.size() >= count && std::all_of(args.begin(), args.begin() + count, [](auto i) { - return isa(i.getType()) && - !isStatePointerType(i.getType()); + return isa(i.getType());// && + // !isStatePointerType(i.getType()); })) return args.drop_front(count); return args; @@ -1208,9 +1218,21 @@ class GenerateKernelExecution hasTrailingData = true; continue; } - if (isa(inTy) && !isStatePointerType(inTy)) + //if (isa(inTy) && !isStatePointerType(inTy)) + // continue; + if (auto ptrTy = dyn_cast(inTy)) { + if (isa(ptrTy.getElementType())) { + // Special case: if the argument is a `cudaq::state*`, then just pass + // the pointer. We can do that in this case because the synthesis step + // (which will receive the argument data) is assumed to run in the + // same memory space. + Value argPtr = builder.create(loc, inTy, arg); + stVal = builder.create(loc, stVal.getType(), + stVal, argPtr, idx); + } continue; - + } + stVal = builder.create(loc, stVal.getType(), stVal, arg, idx); } diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index f371a8b9cd..dbb2b00cc8 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -23,10 +23,19 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" +#include + #define DEBUG_TYPE "quake-synthesizer" using namespace mlir; +// cudaq::state is defined in the runtime. The compiler will never need to know +// about its implementation and there should not be a circular build/library +// dependence because of it. Simply forward declare it, as it is notional. +namespace cudaq { +class state; +} + /// Replace a BlockArgument of a specific type with a concrete instantiation of /// that type, and add the generation of that constant as an MLIR Op to the /// beginning of the function. For example @@ -366,7 +375,9 @@ class QuakeSynthesizer } void runOnOperation() override final { + std::cout << "Module before synthesis " << std::endl; auto module = getModule(); + module.dump(); if (args == nullptr || kernelName.empty()) { module.emitOpError("Synthesis requires a kernel and the values of the " "arguments passed when it is called."); @@ -472,6 +483,27 @@ class QuakeSynthesizer continue; } + if (auto ptrTy = dyn_cast(type)) { + if (isa(ptrTy.getElementType())) { + // Special case of a `cudaq::state*` which must be in the same address + // space. This references a container to a set of simulation + // amplitudes. + synthesizeRuntimeArgument( + builder, argument, args, offset, sizeof(void *), + [=](OpBuilder &builder, cudaq::state **concrete) { + Value rawPtr = builder.create( + loc, reinterpret_cast(*concrete), + sizeof(void *) * 8); + auto stateTy = cudaq::cc::StateType::get(builder.getContext()); + return builder.create( + loc, cudaq::cc::PointerType::get(stateTy), rawPtr); + }); + continue; + } + // N.B. Other pointers will not be materialized and may be in a + // different address space. + } + // If std::vector type, add it to the list of vector info. // These will be processed when we reach the buffer's appendix. if (auto vecTy = dyn_cast(type)) { @@ -601,6 +633,8 @@ class QuakeSynthesizer } } funcOp.eraseArguments(argsToErase); + std::cout << "Module after synthesis " << std::endl; + module.dump(); } }; diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp new file mode 100644 index 0000000000..d7868b46ef --- /dev/null +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -0,0 +1,119 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Runtime.h" +#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" +#include "cudaq/Optimizer/Transforms/Passes.h" +#include "llvm/Support/Debug.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Target/LLVMIR/TypeToLLVM.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/RegionUtils.h" + +#include + +#define DEBUG_TYPE "state-preparation" + +using namespace mlir; + +/// Replace a qubit initialization from vectors with quantum gates. +/// For example: +/// +/// func.func @foo(%arg0 : !cc.stdvec>) { +/// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 +/// %1 = math.cttz %0 : i64 +/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> +/// %3 = quake.alloca !quake.veq[%1 : i64] +/// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq +/// return +/// } +/// +/// on call that passes std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2} as arg0 +/// will be updated to: +/// +/// func.func @foo(%arg0 : !cc.stdvec>) { +/// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 +/// %c4_i64 = arith.constant 4 : i64 +/// %3 = math.cttz %c4_i64 : i64 +/// %5 = quake.alloca !quake.veq[%3 : i64] +/// %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref +/// quake.h %6 : (!quake.ref) -> () +/// %7 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref +/// %8 = quake.extract_ref %5[1] : (!quake.veq) -> !quake.ref +/// quake.x [%7] %8 : (!quake.ref, !quake.ref) -> () +/// } +/// +/// Note: we rely on the later synthesis and const prop stages to replace +/// the argument by a constant and propagate the values and vector size +/// through those and other instructions. + +namespace { +class StatePreparation + : public cudaq::opt::StatePreparationBase { +protected: + // The name of the kernel to be synthesized + std::string kernelName; + + // The raw pointer to the runtime arguments. + void *args; + +public: + StatePreparation() = default; + StatePreparation(std::string_view kernel, void *a) + : kernelName(kernel), args(a) {} + + mlir::ModuleOp getModule() { return getOperation(); } + + + void runOnOperation() override final { + std::cout << "Module before state prep " << std::endl; + auto module = getModule(); + module.dump(); + if (args == nullptr || kernelName.empty()) { + module.emitOpError("Synthesis requires a kernel and the values of the " + "arguments passed when it is called."); + signalPassFailure(); + return; + } + + auto kernelNameInQuake = cudaq::runtime::cudaqGenPrefixName + kernelName; + // Get the function we care about (the one with kernelName) + auto funcOp = module.lookupSymbol(kernelNameInQuake); + if (!funcOp) { + module.emitOpError("The kernel '" + kernelName + + "' was not found in the module."); + signalPassFailure(); + return; + } + + // Create the builder. + auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); + + std::cout << "Module after synthesis " << std::endl; + module.dump(); + } +}; + +} // namespace + +std::unique_ptr cudaq::opt::createStatePreparation() { + return std::make_unique(); +} + +std::unique_ptr +cudaq::opt::createStatePreparation(std::string_view kernelName, void *a) { + return std::make_unique(kernelName, a); +} diff --git a/program.py b/program.py new file mode 100644 index 0000000000..e282d8cd5d --- /dev/null +++ b/program.py @@ -0,0 +1,35 @@ +# ============================================================================ # +# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # +# All rights reserved. # +# # +# This source code and the accompanying materials are made available under # +# the terms of the Apache License 2.0 which accompanies this distribution. # +# ============================================================================ # + +import numpy as np +import cudaq + +import cudaq +import numpy as np + +cudaq.reset_target() + +cudaq.set_target('nvidia') +#cudaq.set_target('nvidia-mqpu') +# cudaq.set_target('density-matrix-cpu') + + +c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], + dtype=np.complex128) +state = cudaq.State.from_data(c) + +@cudaq.kernel(verbose=True) +def kernel(vec: cudaq.State): + q = cudaq.qvector(vec) + +print(kernel) +print(cudaq.to_qir(kernel)) + +#print(cudaq.get_target()) +#counts = cudaq.sample(kernel, state) +#print(counts) \ No newline at end of file diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 51f86ce15f..ff0c0ce477 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -474,6 +474,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, PassManager pm(context); pm.addPass(createCanonicalizerPass()); + pm.addPass(cudaq::opt::createStatePreparation(name, rawArgs)); pm.addPass(cudaq::opt::createQuakeSynthesizer(name, rawArgs)); pm.addPass(cudaq::opt::createExpandMeasurementsPass()); pm.addNestedPass(cudaq::opt::createClassicalMemToReg()); diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index aa36a0c62d..08f41e60ec 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -401,6 +401,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&context); + pm.addPass(cudaq::opt::createStatePreparation(kernelName, updatedArgs)); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); if (disableMLIRthreading || enablePrintMLIREachPass) moduleOp.getContext()->disableMultithreading(); diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index 17c235a76b..9325d0345d 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -153,6 +153,7 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { if (args) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&mlirContext); + pm.addPass(cudaq::opt::createStatePreparation(name, args)); pm.addPass(cudaq::opt::createQuakeSynthesizer(name, args)); if (failed(pm.run(moduleOp))) throw std::runtime_error("Could not successfully apply quake-synth."); diff --git a/targettests/execution/from_state.cpp b/targettests/execution/from_state.cpp new file mode 100644 index 0000000000..55438848cb --- /dev/null +++ b/targettests/execution/from_state.cpp @@ -0,0 +1,30 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s + +#include +#include "cudaq/builder/kernels.h" +#include + +__qpu__ void test(cudaq::state *inState) { + cudaq::qvector q(inState); +} + +// CHECK: size 2 + +int main() { + std::vector> vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; + auto state = cudaq::state::from_data(vec); + auto counts = cudaq::sample(test, &state); + counts.dump(); + + printf("size %zu\n", counts.size()); + return !(counts.size() == 2); +} diff --git a/targettests/execution/from_state_complex.cpp b/targettests/execution/from_state_complex.cpp new file mode 100644 index 0000000000..5ca8813393 --- /dev/null +++ b/targettests/execution/from_state_complex.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s + +#include + +__qpu__ void test(std::vector inState) { + cudaq::qvector q = inState; +} + +// CHECK: size 2 + +int main() { + std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; + auto counts = cudaq::sample(test, vec); + counts.dump(); + + printf("size %zu\n", counts.size()); + return !(counts.size() == 2); +} diff --git a/targettests/execution/program.cpp b/targettests/execution/program.cpp new file mode 100644 index 0000000000..b6a12ebb57 --- /dev/null +++ b/targettests/execution/program.cpp @@ -0,0 +1,167 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s + +#include +#include "cudaq/builder/kernels.h" +#include + +__qpu__ void test1(std::vector inState) { + cudaq::qvector q1 = inState; + h(q1[0]); + cx(q1[0], q1[1]); + +} + +// __qpu__ void test2(cudaq::state *inState) { +// cudaq::qvector q2(inState); +// cudaq::x(q2); +// } + +// __qpu__ void test3() { +// auto q3 = cudaq::qvector({M_SQRT1_2, 0., 0., M_SQRT1_2}); +// } + +// error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:1938: not yet implemented: unknown function, get_state, in cudaq namespace +// __qpu__ void test4() { +// cudaq::qvector q(cudaq::get_state(test3)); +// } + +// error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:392: not yet implemented: argument type conversion +// __qpu__ void test5(cudaq::state *inState) { +// test2(inState); +// } + + + +int main() { + std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; + + { + // Passing state data as argument (vector) + + // Before synthesis: + + // func.func @__nvqpp__mlirgen__function_test1._Z5test1St6vectorISt7complexIfESaIS1_EE(%arg0: !cc.stdvec>) attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + // %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 + // %1 = math.cttz %0 : i64 + // %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> + // %3 = quake.alloca !quake.veq[%1 : i64] + // %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq + // return + // } + + // After synthesis + + // func.func @__nvqpp__mlirgen__function_test1._Z5test1St6vectorISt7complexIfESaIS1_EE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + // %0 = cc.const_array [0.707106769 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.707106769 : f32, 0.000000e+00 : f32] : !cc.array x 4> + // %1 = cc.alloca !cc.array x 4> + // cc.store %0, %1 : !cc.ptr x 4>> + // %2 = cc.cast %1 : (!cc.ptr x 4>>) -> !cc.ptr> + // %c4_i64 = arith.constant 4 : i64 + // %3 = math.cttz %c4_i64 : i64 // (TODO: replace by a const) + // %4 = quake.alloca !quake.veq[%3 : i64] + // %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq // TODO: replace by gates + // return + // } + + // TODO: in StatePreparation pass + // input - vector, qubits + // output - MLIR replacing alloca+state_init instructions with gates on qubits + + // %3 = math.cttz %c4_i64 : i64 + // %4 = quake.alloca !quake.veq[%3 : i64] + // %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq + + // => (something like) + + // create a function that does the following and call it on qubits + // %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref + // quake.ry (%cst) %6 : (f64, !quake.ref) -> () + // ... + + // TODO: Run state preparation pass before synthesis + + std::cout << "test1(vec): " << "\n"; + auto counts = cudaq::sample(test1, vec); + counts.dump(); + } + + // { + // // Passing state ptr as argument - no support for from_data + + // // "func.func"() ({ + // // ^bb0(%arg0: !cc.ptr): + // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 + // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq + // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq + // // "func.return"() : () -> () + // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () + + // std::cout << "test2(state): " << "\n"; + // auto state = cudaq::state::from_data(vec); + + // // 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function + // //auto counts = cudaq::sample(test2, &state); + // //counts.dump(); + // } + + // { + // // Passing a state from another kernel as argument + + // // "func.func"() ({ + // // ^bb0(%arg0: !cc.ptr): + // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 + // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq + // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq + // // "func.return"() : () -> () + // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () + + // std::cout << "test2(test3): " << "\n"; + // auto state = cudaq::get_state(test3); + + // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function + // auto counts = cudaq::sample(test2, &state); + // counts.dump(); + // } + + // { + // // Passing a state to another kernel as argument + // std::cout << "test4(state): " << "\n"; + // //auto state = cudaq::state::from_data(vec); + // //auto counts = cudaq::sample(test4, &state); + // } + + // { + // // Creating a kernel from state and passing its state to another kernel + + // // "func.func"() ({ + // // ^bb0(%arg0: !cc.ptr): + // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 + // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq + // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq + // // "func.return"() : () -> () + // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () + + // std::cout << "test2(kernel): " << "\n"; + // std::vector> vec{.70710678, 0., 0., 0.70710678}; + // auto kernel = cudaq::make_kernel(); + // auto qubits = kernel.qalloc(2); + + // cudaq::from_state(kernel, qubits, vec); + + // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function + // //auto state = cudaq::get_state(kernel); + // //auto counts = cudaq::sample(test2, &state); + + // //counts.dump(); + // } + +} \ No newline at end of file From 93dd8d7f4ba31cc3869fd7fbaa399631c1cdaa97 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 25 Jun 2024 11:23:37 -0700 Subject: [PATCH 02/50] Implement state preparation --- lib/Optimizer/Transforms/CMakeLists.txt | 1 + lib/Optimizer/Transforms/StateDecomposer.cpp | 128 ++++++++++++++ lib/Optimizer/Transforms/StateDecomposer.h | 163 ++++++++++++++++++ lib/Optimizer/Transforms/StatePreparation.cpp | 151 ++++++++++------ targettests/execution/program.cpp | 118 +++---------- 5 files changed, 415 insertions(+), 146 deletions(-) create mode 100644 lib/Optimizer/Transforms/StateDecomposer.cpp create mode 100644 lib/Optimizer/Transforms/StateDecomposer.h diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index 6a51057bd3..b0a13571ec 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -39,6 +39,7 @@ add_cudaq_library(OptTransforms ObserveAnsatz.cpp PruneCtrlRelations.cpp QuakeAddMetadata.cpp + StateDecomposer.cpp StatePreparation.cpp QuakeSynthesizer.cpp RefToVeqAlloc.cpp diff --git a/lib/Optimizer/Transforms/StateDecomposer.cpp b/lib/Optimizer/Transforms/StateDecomposer.cpp new file mode 100644 index 0000000000..3105fad707 --- /dev/null +++ b/lib/Optimizer/Transforms/StateDecomposer.cpp @@ -0,0 +1,128 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "StateDecomposer.h" + +namespace cudaq::details { + +std::vector grayCode(std::size_t numBits) { + std::vector result(1ULL << numBits); + for (std::size_t i = 0; i < (1ULL << numBits); ++i) + result[i] = ((i >> 1) ^ i); + return result; +} + +std::vector getControlIndices(std::size_t numBits) { + auto code = grayCode(numBits); + std::vector indices; + for (auto i = 0u; i < code.size(); ++i) { + // The position of the control in the lth CNOT gate is set to match + // the position where the lth and (l + 1)th bit strings g[l] and g[l+1] of + // the binary reflected Gray code differ. + auto position = std::log2(code[i] ^ code[(i + 1) % code.size()]); + // N.B: In CUDA Quantum we write the least significant bit (LSb) on the left + // + // lsb -v + // 001 + // ^- msb + // + // Meaning that the bitstring 001 represents the number four instead of one. + // The above position calculation uses the 'normal' convention of writing + // numbers with the LSb on the left. + // + // Now, what we need to find out is the position of the 1 in the bitstring. + // If we take LSb as being position 0, then for the normal convention its + // position will be 0. Using CUDA Quantum convention it will be 2. Hence, + // we need to convert the position we find using: + // + // numBits - position - 1 + // + // The extra -1 is to account for indices starting at 0. Using the above + // examples: + // + // bitstring: 001 + // numBits: 3 + // position: 0 + // + // We have the converted position: 2, which is what we need. + indices.emplace_back(numBits - position - 1); + } + return indices; +} + +std::vector convertAngles(const std::span alphas) { + // Implements Eq. (3) from https://arxiv.org/pdf/quant-ph/0407010.pdf + // + // N.B: The paper does fails to explicitly define what is the dot operator in + // the exponent of -1. Ref. 3 solves the mystery: its the bitwise inner + // product. + auto bitwiseInnerProduct = [](std::size_t a, std::size_t b) { + auto product = a & b; + auto sumOfProducts = 0; + while (product) { + sumOfProducts += product & 0b1 ? 1 : 0; + product = product >> 1; + } + return sumOfProducts; + }; + std::vector thetas(alphas.size(), 0); + for (std::size_t i = 0u; i < alphas.size(); ++i) { + for (std::size_t j = 0u; j < alphas.size(); ++j) + thetas[i] += + bitwiseInnerProduct(j, ((i >> 1) ^ i)) & 0b1 ? -alphas[j] : alphas[j]; + thetas[i] /= alphas.size(); + } + return thetas; +} + +std::vector getAlphaZ(const std::span data, + std::size_t numQubits, std::size_t k) { + // Implements Eq. (5) from https://arxiv.org/pdf/quant-ph/0407010.pdf + std::vector angles; + double divisor = static_cast(1ULL << (k - 1)); + for (std::size_t j = 1; j <= (1ULL << (numQubits - k)); ++j) { + double angle = 0.0; + for (std::size_t l = 1; l <= (1ULL << (k - 1)); ++l) + // N.B: There is an extra '-1' on these indices computations to account + // for the fact that our indices start at 0. + angle += data[(2 * j - 1) * (1 << (k - 1)) + l - 1] - + data[(2 * j - 2) * (1 << (k - 1)) + l - 1]; + angles.push_back(angle / divisor); + } + return angles; +} + +std::vector getAlphaY(const std::span data, + std::size_t numQubits, std::size_t k) { + // Implements Eq. (8) from https://arxiv.org/pdf/quant-ph/0407010.pdf + // N.B: There is an extra '-1' on these indices computations to account for + // the fact that our indices start at 0. + std::vector angles; + for (std::size_t j = 1; j <= (1ULL << (numQubits - k)); ++j) { + double numerator = 0; + for (std::size_t l = 1; l <= (1ULL << (k - 1)); ++l) { + numerator += + std::pow(std::abs(data[(2 * j - 1) * (1 << (k - 1)) + l - 1]), 2); + } + + double denominator = 0; + for (std::size_t l = 1; l <= (1ULL << k); ++l) { + denominator += std::pow(std::abs(data[(j - 1) * (1 << k) + l - 1]), 2); + } + + if (denominator == 0.0) { + assert(numerator == 0.0 && + "If the denominator is zero, the numerator must also be zero."); + angles.push_back(0.0); + continue; + } + angles.push_back(2.0 * std::asin(std::sqrt(numerator / denominator))); + } + return angles; +} +} // namespace cudaq::details \ No newline at end of file diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h new file mode 100644 index 0000000000..bac6909708 --- /dev/null +++ b/lib/Optimizer/Transforms/StateDecomposer.h @@ -0,0 +1,163 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Runtime.h" +#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" +#include "cudaq/Optimizer/Transforms/Passes.h" +#include "llvm/Support/Debug.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Target/LLVMIR/TypeToLLVM.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/RegionUtils.h" +#include + +#include + +namespace cudaq::details { + + /// @brief Converts angles of a uniformly controlled rotation to angles of + /// non-controlled rotations. + std::vector convertAngles(const std::span alphas); + + /// @brief Return the control indices dictated by the gray code implementation. + /// + /// Here, numBits is the number of controls. + std::vector getControlIndices(std::size_t numBits); + + /// @brief Return angles required to implement a uniformly controlled z-rotation + /// on the `kth` qubit. + std::vector getAlphaZ(const std::span data, + std::size_t numQubits, std::size_t k); + + /// @brief Return angles required to implement a uniformly controlled y-rotation + /// on the `kth` qubit. + std::vector getAlphaY(const std::span data, + std::size_t numQubits, std::size_t k); +} // namespace cudaq::details + +class StateGateBuilder { +public: + StateGateBuilder(mlir::OpBuilder& b, mlir::Location& l, mlir::Value& q): builder(b), loc(l), qubits(q) {} + + template + void applyRotationOp(double theta, std::size_t target) { + auto qubit = createQubitRef(target); + auto thetaValue = createAngleValue(theta); + builder.create(loc, thetaValue, mlir::ValueRange{}, qubit); + }; + + void applyX(std::size_t control, std::size_t target) { + auto qubitC = createQubitRef(control); + auto qubitT = createQubitRef(target); + builder.create(loc, qubitC, qubitT); + }; + +private: + mlir::Value createQubitRef(std::size_t index) { + if (qubitRefs.contains(index)) { + return qubitRefs[index]; + } + + auto indexValue = builder.create(loc, index, builder.getIntegerType(64)); + auto ref = builder.create(loc, qubits, indexValue); + qubitRefs[index] = ref; + return ref; + } + + mlir::Value createAngleValue(double angle) { + return builder.create(loc, llvm::APFloat{angle}, builder.getF64Type()); + } + + mlir::OpBuilder& builder; + mlir::Location& loc; + mlir::Value& qubits; + + std::unordered_map qubitRefs = std::unordered_map(); +}; + +class StateDecomposer { +public: + StateDecomposer(StateGateBuilder& b, std::vector>& a): builder(b), amplitudes(a), numQubits(log2(a.size())) {} + + /// @brief Decompose the input state vector data to a set of controlled + /// operations and rotations. This function takes as input a `OpBuilder` + /// and appends the operations of the decomposition to its internal + /// representation. This implementation follows the algorithm defined in + /// `https://arxiv.org/pdf/quant-ph/0407010.pdf`. + void decompose() { + + // Decompose the state into phases and magnitudes. + bool needsPhaseEqualization = false; + std::vector phases; + std::vector magnitudes; + for (const auto &a : amplitudes) { + phases.push_back(std::arg(a)); + magnitudes.push_back(std::abs(a)); + // FIXME: remove magic number. + needsPhaseEqualization |= std::abs(phases.back()) > 1e-10; + } + + // N.B: The algorithm, as described in the paper, creates a circuit that + // begins with a target state and brings it to the all zero state. Hence, this + // implementation do the two steps described in Section III in reverse order. + + // Apply uniformly controlled y-rotations, the construction in Eq. (4). + for (std::size_t j = 1; j <= numQubits; ++j) { + auto k = numQubits - j + 1; + auto numControls = j - 1; + auto target = j - 1; + auto alphaYk = cudaq::details::getAlphaY(magnitudes, numQubits, k); + applyRotation(alphaYk, numControls, target); + } + + if (!needsPhaseEqualization) + return; + + // Apply uniformly controlled z-rotations, the construction in Eq. (4). + for (std::size_t j = 1; j <= numQubits; ++j) { + auto k = numQubits - j + 1; + auto numControls = j - 1; + auto target = j - 1; + auto alphaZk = cudaq::details::getAlphaZ(phases, numQubits, k); + if (alphaZk.empty()) + continue; + applyRotation(alphaZk, numControls, target); + } + } + +private: + /// @brief Apply a uniformly controlled rotation on the target qubit. + template + void applyRotation(const std::span alphas, std::size_t numControls, std::size_t target) { + auto thetas = cudaq::details::convertAngles(alphas); + if (numControls == 0) { + builder.applyRotationOp(thetas[0], target); + return; + } + + auto controlIndices = cudaq::details::getControlIndices(numControls); + assert(thetas.size() == controlIndices.size()); + for (auto [i, c] : llvm::enumerate(controlIndices)) { + builder.applyRotationOp(thetas[i], target); + builder.applyX(c, target); + } + } + + StateGateBuilder& builder; + std::span> amplitudes; + std::size_t numQubits; +}; diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index ce46efecc0..86bb911a3a 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -18,10 +18,13 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Pass/Pass.h" #include "mlir/Target/LLVMIR/TypeToLLVM.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" +#include +#include "StateDecomposer.h" #include @@ -35,33 +38,44 @@ using namespace mlir; /// func.func @foo(%arg0 : !cc.stdvec>) { /// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 /// %1 = math.cttz %0 : i64 -/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> -/// !cc.ptr> %3 = quake.alloca !quake.veq[%1 : i64] %4 = -/// quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> -/// !quake.veq return +/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> +/// %3 = quake.alloca !quake.veq[%1 : i64] +/// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq +/// return /// } /// -/// on call that passes std::vector vec{M_SQRT1_2, 0., 0., -/// M_SQRT1_2} as arg0 will be updated to: +/// On a call that passes std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2} as arg0: /// /// func.func @foo(%arg0 : !cc.stdvec>) { -/// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 -/// %c4_i64 = arith.constant 4 : i64 -/// %3 = math.cttz %c4_i64 : i64 -/// %5 = quake.alloca !quake.veq[%3 : i64] -/// %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref -/// quake.h %6 : (!quake.ref) -> () -/// %7 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref -/// %8 = quake.extract_ref %5[1] : (!quake.veq) -> !quake.ref -/// quake.x [%7] %8 : (!quake.ref, !quake.ref) -> () +/// %0 = quake.alloca !quake.veq<2> +/// %c0_i64 = arith.constant 0 : i64 +/// %1 = quake.extract_ref %0[%c0_i64] : (!quake.veq<2>, i64) -> !quake.ref +/// %cst = arith.constant 1.5707963267948968 : f64 +/// quake.ry (%cst) %1 : (f64, !quake.ref) -> () +/// %c1_i64 = arith.constant 1 : i64 +/// %2 = quake.extract_ref %0[%c1_i64] : (!quake.veq<2>, i64) -> !quake.ref +/// %cst_0 = arith.constant 1.5707963267948966 : f64 +/// quake.ry (%cst_0) %2 : (f64, !quake.ref) -> () +/// quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () +/// %cst_1 = arith.constant -1.5707963267948966 : f64 +/// quake.ry (%cst_1) %2 : (f64, !quake.ref) -> () +/// quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () +/// return /// } /// -/// Note: we rely on the later synthesis and const prop stages to replace +/// Note: the following synthesis and const prop passes will replace /// the argument by a constant and propagate the values and vector size -/// through those and other instructions. +/// through other instructions. namespace { +template +concept IntegralType = std::is_same::value + || std::is_same::value + || std::is_same::value + || std::is_same::value + || std::is_same::value; + template concept FloatingType = std::is_same::value; @@ -69,12 +83,11 @@ template concept DoubleType = std::is_same::value; template -concept ComplexDataType = FloatingType || DoubleType; +concept ComplexDataType = FloatingType || DoubleType || IntegralType; /// Input was complex/complex but we prefer /// complex/complex. Make a copy, extending or truncating the /// values. -/// TODO: dont convert if not needed template std::vector> convertToComplex(std::complex *data, std::uint64_t size) { auto convertData = std::vector>(size); @@ -86,7 +99,7 @@ std::vector> convertToComplex(std::complex *data, std template std::vector> convertToComplex(std::complex *data, std::uint64_t size) { - return std::vector>(data, size); + return std::vector>(data, data+size); } /// Input was float/double but we prefer complex/complex. @@ -104,7 +117,7 @@ LogicalResult prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector> &vec) { - // auto *ctx = builder.getContext(); + auto *ctx = builder.getContext(); // builder.setInsertionPointToStart(argument.getOwner()); auto argLoc = argument.getLoc(); @@ -132,30 +145,67 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, /// %8 = quake.extract_ref %5[1] : (!quake.veq) -> !quake.ref /// quake.x [%7] %8 : (!quake.ref, !quake.ref) -> () + auto toErase = std::vector(); + for (auto *argUser : argument.getUsers()) { + // Handle the `StdvecSize` and `quake.alloca` use case: + // - Replace a `vec.size()` with the vector length. + // - Replace the number of qubits calculation with the vector length logarithm. + // - Replace `quake.alloca` with a constant size qvector allocation. + if (auto stdvecSizeOp = dyn_cast(argUser)) { + builder.setInsertionPointAfter(stdvecSizeOp); + Value length = builder.create( + argLoc, vec.size(), stdvecSizeOp.getType()); + + Value numQubits = builder.create( + argLoc, log2(vec.size()), stdvecSizeOp.getType()); + + for (auto *sizeUser: argUser->getUsers()) { + if (auto countZeroesOp = dyn_cast(sizeUser)) { + for (auto *numQubitsUser: sizeUser->getUsers()) { + if (auto quakeAllocaOp = dyn_cast(numQubitsUser)) { + builder.setInsertionPointAfter(quakeAllocaOp); + auto veqTy = quake::VeqType::get(ctx, log2(vec.size())); + Value newAlloc = builder.create(argLoc, veqTy); + quakeAllocaOp.replaceAllUsesWith(newAlloc); + toErase.push_back(quakeAllocaOp); + } + } + countZeroesOp.replaceAllUsesWith(numQubits); + toErase.push_back(countZeroesOp); + } + } + + stdvecSizeOp.replaceAllUsesWith(length); + toErase.push_back(stdvecSizeOp); + continue; + } + + // Handle the `StdvecDataOp` and `quake.init_state` use case: + // - Replace a `quake.init_state` with gates preparing the state. if (auto stdvecDataOp = dyn_cast(argUser)) { for (auto *dataUser : stdvecDataOp->getUsers()) { if (auto initOp = dyn_cast(dataUser)) { builder.setInsertionPointAfter(initOp); // Find the qvector alloc instruction - auto qvector = initOp.getOperand(0); - - // Replace! - auto zero = builder.create( - argLoc, 0, builder.getIntegerType(64)); - auto one = builder.create( - argLoc, 1, builder.getIntegerType(64)); - Value q0 = builder.create(argLoc, qvector, zero); - Value q1 = builder.create(argLoc, qvector, one); - /*auto hval =*/ builder.create(argLoc, q0); - /*auto xval =*/ builder.create(argLoc, q0, q1); - - initOp.replaceAllUsesWith(qvector); + auto qubits = initOp.getOperand(0); + + // Prepare state from vector data. + auto gateBuilder = StateGateBuilder(builder, argLoc, qubits); + auto decomposer = StateDecomposer(gateBuilder, vec); + decomposer.decompose(); + + initOp.replaceAllUsesWith(qubits); + toErase.push_back(initOp); } } } } + for (auto& op: toErase) { + op->erase(); + } + return success(); } @@ -294,20 +344,20 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { }; if (auto ty = dyn_cast(eleTy)) { switch (ty.getIntOrFloatBitWidth()) { - // case 1: - // doVector(false); - // break; - // case 8: - // doVector(std::int8_t{}); - // break; - // case 16: - // doVector(std::int16_t{}); - // break; - // case 32: - // doVector(std::int32_t{}); - // break; - // case 64: - // doVector(std::int64_t{}); + case 1: + doVector(false); + break; + case 8: + doVector(std::int8_t{}); + break; + case 16: + doVector(std::int16_t{}); + break; + case 32: + doVector(std::int32_t{}); + break; + case 64: + doVector(std::int64_t{}); break; default: bufferAppendix += vecLength * cudaq::opt::convertBitsToBytes( @@ -334,10 +384,9 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { doVector(std::complex{}); continue; } - - std::cout << "Module after state preparation " << std::endl; - module.dump(); } + std::cout << "Module after state preparation " << std::endl; + module.dump(); } }; diff --git a/targettests/execution/program.cpp b/targettests/execution/program.cpp index 055084455c..be4855e3de 100644 --- a/targettests/execution/program.cpp +++ b/targettests/execution/program.cpp @@ -15,9 +15,6 @@ __qpu__ void test1(std::vector inState) { cudaq::qvector q1 = inState; - // Should synthesize to - // h(q1[0]); - // cx(q1[0], q1[1]); } // __qpu__ void test2(cudaq::state *inState) { @@ -29,10 +26,6 @@ __qpu__ void test1(std::vector inState) { // auto q3 = cudaq::qvector({M_SQRT1_2, 0., 0., M_SQRT1_2}); // } -// error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:1938: not yet implemented: unknown function, get_state, in cudaq namespace -// __qpu__ void test4() { -// cudaq::qvector q(cudaq::get_state(test3)); -// } // error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:392: not yet implemented: argument type conversion // __qpu__ void test5(cudaq::state *inState) { @@ -42,53 +35,9 @@ __qpu__ void test1(std::vector inState) { int main() { - std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; - + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; { // Passing state data as argument (vector) - - // Before synthesis: - - // func.func @__nvqpp__mlirgen__function_test1._Z5test1St6vectorISt7complexIfESaIS1_EE(%arg0: !cc.stdvec>) attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - // %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 - // %1 = math.cttz %0 : i64 - // %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> - // %3 = quake.alloca !quake.veq[%1 : i64] - // %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq - // return - // } - - // After synthesis - - // func.func @__nvqpp__mlirgen__function_test1._Z5test1St6vectorISt7complexIfESaIS1_EE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - // %0 = cc.const_array [0.707106769 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.707106769 : f32, 0.000000e+00 : f32] : !cc.array x 4> - // %1 = cc.alloca !cc.array x 4> - // cc.store %0, %1 : !cc.ptr x 4>> - // %2 = cc.cast %1 : (!cc.ptr x 4>>) -> !cc.ptr> - // %c4_i64 = arith.constant 4 : i64 - // %3 = math.cttz %c4_i64 : i64 // (TODO: replace by a const) - // %4 = quake.alloca !quake.veq[%3 : i64] - // %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq // TODO: replace by gates - // return - // } - - // TODO: in StatePreparation pass - // input - vector, qubits - // output - MLIR replacing alloca+state_init instructions with gates on qubits - - // %3 = math.cttz %c4_i64 : i64 - // %4 = quake.alloca !quake.veq[%3 : i64] - // %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq - - // => (something like) - - // create a function that does the following and call it on qubits - // %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref - // quake.ry (%cst) %6 : (f64, !quake.ref) -> () - // ... - - // TODO: Run state preparation pass before synthesis - std::cout << "test1(vec): " << "\n"; auto counts = cudaq::sample(test1, vec); counts.dump(); @@ -96,37 +45,21 @@ int main() { // { // // Passing state ptr as argument - no support for from_data - - // // "func.func"() ({ - // // ^bb0(%arg0: !cc.ptr): - // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 - // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq - // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq - // // "func.return"() : () -> () - // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () - + // // std::cout << "test2(state): " << "\n"; // auto state = cudaq::state::from_data(vec); - + // // // 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // //auto counts = cudaq::sample(test2, &state); - // //counts.dump(); + // auto counts = cudaq::sample(test2, &state); + // counts.dump(); // } // { // // Passing a state from another kernel as argument - - // // "func.func"() ({ - // // ^bb0(%arg0: !cc.ptr): - // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 - // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq - // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq - // // "func.return"() : () -> () - // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () - + // // std::cout << "test2(test3): " << "\n"; // auto state = cudaq::get_state(test3); - + // // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function // auto counts = cudaq::sample(test2, &state); // counts.dump(); @@ -134,34 +67,29 @@ int main() { // { // // Passing a state to another kernel as argument + // // std::cout << "test4(state): " << "\n"; - // //auto state = cudaq::state::from_data(vec); - // //auto counts = cudaq::sample(test4, &state); + // + // auto state = cudaq::state::from_data(vec); + // auto counts = cudaq::sample(test4, &state); // } // { - // // Creating a kernel from state and passing its state to another kernel - - // // "func.func"() ({ - // // ^bb0(%arg0: !cc.ptr): - // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 - // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq - // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq - // // "func.return"() : () -> () - // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () - - // std::cout << "test2(kernel): " << "\n"; - // std::vector> vec{.70710678, 0., 0., 0.70710678}; - // auto kernel = cudaq::make_kernel(); - // auto qubits = kernel.qalloc(2); - - // cudaq::from_state(kernel, qubits, vec); - + // // Creating a kernel from state and passing its state to another kernel - is it deprecated? + // + std::cout << "test2(kernel): " << "\n"; + std::vector> vec{.70710678, 0., 0., 0.70710678}; + auto kernel = cudaq::make_kernel(); + auto qubits = kernel.qalloc(2); + + cudaq::from_state(kernel, qubits, vec); + auto counts = cudaq::sample(kernel); + // // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function // //auto state = cudaq::get_state(kernel); // //auto counts = cudaq::sample(test2, &state); - - // //counts.dump(); + // + counts.dump(); // } } \ No newline at end of file From 1cd5cbe8ee8a196aa7bc364b77b03d1060ee2b58 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 25 Jun 2024 15:29:40 -0700 Subject: [PATCH 03/50] Cleanup --- lib/Optimizer/Transforms/CMakeLists.txt | 4 +- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 5 - lib/Optimizer/Transforms/StateDecomposer.h | 69 ++++---- lib/Optimizer/Transforms/StatePreparation.cpp | 108 ++++-------- program.py | 35 ---- .../tests/kernel/test_kernel_qvector_init.py | 162 ++---------------- targettests/execution/from_state.cpp | 30 ---- targettests/execution/program.cpp | 95 ---------- .../execution/state_preparation_vector.cpp | 57 ++++++ 9 files changed, 148 insertions(+), 417 deletions(-) delete mode 100644 program.py delete mode 100644 targettests/execution/from_state.cpp delete mode 100644 targettests/execution/program.cpp create mode 100644 targettests/execution/state_preparation_vector.cpp diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index b0a13571ec..173cec4538 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -39,11 +39,11 @@ add_cudaq_library(OptTransforms ObserveAnsatz.cpp PruneCtrlRelations.cpp QuakeAddMetadata.cpp - StateDecomposer.cpp - StatePreparation.cpp QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp + StateDecomposer.cpp + StatePreparation.cpp PySynthCallableBlockArgs.cpp DEPENDS diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 0fa859f175..cc9279c79c 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -23,8 +23,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" -#include - #define DEBUG_TYPE "quake-synthesizer" using namespace mlir; @@ -419,9 +417,7 @@ class QuakeSynthesizer } void runOnOperation() override final { - std::cout << "Module before synthesis " << std::endl; auto module = getModule(); - // module.dump(); unsigned counter = 0; if (args == nullptr || kernelName.empty()) { @@ -680,7 +676,6 @@ class QuakeSynthesizer } } funcOp.eraseArguments(argsToErase); - // std::cout << "Module after synthesis " << std::endl; module.dump(); } }; diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h index bac6909708..2d17edb768 100644 --- a/lib/Optimizer/Transforms/StateDecomposer.h +++ b/lib/Optimizer/Transforms/StateDecomposer.h @@ -29,31 +29,32 @@ namespace cudaq::details { - /// @brief Converts angles of a uniformly controlled rotation to angles of - /// non-controlled rotations. - std::vector convertAngles(const std::span alphas); - - /// @brief Return the control indices dictated by the gray code implementation. - /// - /// Here, numBits is the number of controls. - std::vector getControlIndices(std::size_t numBits); - - /// @brief Return angles required to implement a uniformly controlled z-rotation - /// on the `kth` qubit. - std::vector getAlphaZ(const std::span data, - std::size_t numQubits, std::size_t k); - - /// @brief Return angles required to implement a uniformly controlled y-rotation - /// on the `kth` qubit. - std::vector getAlphaY(const std::span data, - std::size_t numQubits, std::size_t k); +/// @brief Converts angles of a uniformly controlled rotation to angles of +/// non-controlled rotations. +std::vector convertAngles(const std::span alphas); + +/// @brief Return the control indices dictated by the gray code implementation. +/// +/// Here, numBits is the number of controls. +std::vector getControlIndices(std::size_t numBits); + +/// @brief Return angles required to implement a uniformly controlled z-rotation +/// on the `kth` qubit. +std::vector getAlphaZ(const std::span data, + std::size_t numQubits, std::size_t k); + +/// @brief Return angles required to implement a uniformly controlled y-rotation +/// on the `kth` qubit. +std::vector getAlphaY(const std::span data, + std::size_t numQubits, std::size_t k); } // namespace cudaq::details class StateGateBuilder { public: - StateGateBuilder(mlir::OpBuilder& b, mlir::Location& l, mlir::Value& q): builder(b), loc(l), qubits(q) {} + StateGateBuilder(mlir::OpBuilder &b, mlir::Location &l, mlir::Value &q) + : builder(b), loc(l), qubits(q) {} - template + template void applyRotationOp(double theta, std::size_t target) { auto qubit = createQubitRef(target); auto thetaValue = createAngleValue(theta); @@ -72,26 +73,30 @@ class StateGateBuilder { return qubitRefs[index]; } - auto indexValue = builder.create(loc, index, builder.getIntegerType(64)); + auto indexValue = builder.create( + loc, index, builder.getIntegerType(64)); auto ref = builder.create(loc, qubits, indexValue); qubitRefs[index] = ref; return ref; } mlir::Value createAngleValue(double angle) { - return builder.create(loc, llvm::APFloat{angle}, builder.getF64Type()); + return builder.create( + loc, llvm::APFloat{angle}, builder.getF64Type()); } - mlir::OpBuilder& builder; - mlir::Location& loc; - mlir::Value& qubits; + mlir::OpBuilder &builder; + mlir::Location &loc; + mlir::Value &qubits; - std::unordered_map qubitRefs = std::unordered_map(); + std::unordered_map qubitRefs = + std::unordered_map(); }; class StateDecomposer { public: - StateDecomposer(StateGateBuilder& b, std::vector>& a): builder(b), amplitudes(a), numQubits(log2(a.size())) {} + StateDecomposer(StateGateBuilder &b, std::vector> &a) + : builder(b), amplitudes(a), numQubits(log2(a.size())) {} /// @brief Decompose the input state vector data to a set of controlled /// operations and rotations. This function takes as input a `OpBuilder` @@ -112,8 +117,9 @@ class StateDecomposer { } // N.B: The algorithm, as described in the paper, creates a circuit that - // begins with a target state and brings it to the all zero state. Hence, this - // implementation do the two steps described in Section III in reverse order. + // begins with a target state and brings it to the all zero state. Hence, + // this implementation do the two steps described in Section III in reverse + // order. // Apply uniformly controlled y-rotations, the construction in Eq. (4). for (std::size_t j = 1; j <= numQubits; ++j) { @@ -142,7 +148,8 @@ class StateDecomposer { private: /// @brief Apply a uniformly controlled rotation on the target qubit. template - void applyRotation(const std::span alphas, std::size_t numControls, std::size_t target) { + void applyRotation(const std::span alphas, std::size_t numControls, + std::size_t target) { auto thetas = cudaq::details::convertAngles(alphas); if (numControls == 0) { builder.applyRotationOp(thetas[0], target); @@ -157,7 +164,7 @@ class StateDecomposer { } } - StateGateBuilder& builder; + StateGateBuilder &builder; std::span> amplitudes; std::size_t numQubits; }; diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index 86bb911a3a..785e70b3f8 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ #include "PassDetails.h" +#include "StateDecomposer.h" #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" @@ -24,9 +25,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" #include -#include "StateDecomposer.h" - -#include #define DEBUG_TYPE "state-preparation" @@ -38,13 +36,14 @@ using namespace mlir; /// func.func @foo(%arg0 : !cc.stdvec>) { /// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 /// %1 = math.cttz %0 : i64 -/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> -/// %3 = quake.alloca !quake.veq[%1 : i64] -/// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq -/// return +/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> +/// !cc.ptr> %3 = quake.alloca !quake.veq[%1 : i64] %4 = +/// quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> +/// !quake.veq return /// } /// -/// On a call that passes std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2} as arg0: +/// On a call that passes std::vector vec{M_SQRT1_2, 0., 0., +/// M_SQRT1_2} as arg0: /// /// func.func @foo(%arg0 : !cc.stdvec>) { /// %0 = quake.alloca !quake.veq<2> @@ -70,11 +69,11 @@ using namespace mlir; namespace { template -concept IntegralType = std::is_same::value - || std::is_same::value - || std::is_same::value - || std::is_same::value - || std::is_same::value; +concept IntegralType = + std::is_same::value || std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; template concept FloatingType = std::is_same::value; @@ -85,31 +84,33 @@ concept DoubleType = std::is_same::value; template concept ComplexDataType = FloatingType || DoubleType || IntegralType; -/// Input was complex/complex but we prefer -/// complex/complex. Make a copy, extending or truncating the -/// values. +/// Input was complex but we prefer +/// complex. Make a copy, extending the values. template -std::vector> convertToComplex(std::complex *data, std::uint64_t size) { +std::vector> convertToComplex(std::complex *data, + std::uint64_t size) { auto convertData = std::vector>(size); for (std::size_t i = 0; i < size; ++i) convertData[i] = std::complex{static_cast(data[i].real()), - static_cast(data[i].imag())}; + static_cast(data[i].imag())}; return convertData; } template -std::vector> convertToComplex(std::complex *data, std::uint64_t size) { - return std::vector>(data, data+size); +std::vector> convertToComplex(std::complex *data, + std::uint64_t size) { + return std::vector>(data, data + size); } -/// Input was float/double but we prefer complex/complex. +/// Input was float/double but we prefer complex. /// Make a copy, extending or truncating the values. template -std::vector> convertToComplex(From *data, std::uint64_t size) { +std::vector> convertToComplex(From *data, + std::uint64_t size) { auto convertData = std::vector>(size); for (std::size_t i = 0; i < size; ++i) - convertData[i] = - std::complex{static_cast(data[i]), static_cast(0.0)}; + convertData[i] = std::complex{static_cast(data[i]), + static_cast(0.0)}; return convertData; } @@ -118,39 +119,15 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector> &vec) { auto *ctx = builder.getContext(); - // builder.setInsertionPointToStart(argument.getOwner()); auto argLoc = argument.getLoc(); - // TODO: look at quake.init_state instructions from vector data and track them - // to the argument vector, then replace the instruction by gates preparing the - // state (or a call to a kernel with gates) - - /// func.func @foo(%arg0 : !cc.stdvec>) { - /// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 - /// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> - /// !cc.ptr> - /// - /// %3 = quake.alloca !quake.veq[%1 : i64] - /// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) - /// -> !quake.veq return - /// } - - /// => - - /// ... - /// %5 = quake.alloca !quake.veq[%3 : i64] - /// %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref - /// quake.h %6 : (!quake.ref) -> () - /// %7 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref - /// %8 = quake.extract_ref %5[1] : (!quake.veq) -> !quake.ref - /// quake.x [%7] %8 : (!quake.ref, !quake.ref) -> () - - auto toErase = std::vector(); + auto toErase = std::vector(); for (auto *argUser : argument.getUsers()) { // Handle the `StdvecSize` and `quake.alloca` use case: // - Replace a `vec.size()` with the vector length. - // - Replace the number of qubits calculation with the vector length logarithm. + // - Replace the number of qubits calculation with the vector length + // logarithm. // - Replace `quake.alloca` with a constant size qvector allocation. if (auto stdvecSizeOp = dyn_cast(argUser)) { builder.setInsertionPointAfter(stdvecSizeOp); @@ -160,9 +137,10 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, Value numQubits = builder.create( argLoc, log2(vec.size()), stdvecSizeOp.getType()); - for (auto *sizeUser: argUser->getUsers()) { - if (auto countZeroesOp = dyn_cast(sizeUser)) { - for (auto *numQubitsUser: sizeUser->getUsers()) { + for (auto *sizeUser : argUser->getUsers()) { + if (auto countZeroesOp = + dyn_cast(sizeUser)) { + for (auto *numQubitsUser : sizeUser->getUsers()) { if (auto quakeAllocaOp = dyn_cast(numQubitsUser)) { builder.setInsertionPointAfter(quakeAllocaOp); auto veqTy = quake::VeqType::get(ctx, log2(vec.size())); @@ -175,7 +153,7 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, toErase.push_back(countZeroesOp); } } - + stdvecSizeOp.replaceAllUsesWith(length); toErase.push_back(stdvecSizeOp); continue; @@ -202,7 +180,7 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, } } - for (auto& op: toErase) { + for (auto &op : toErase) { op->erase(); } @@ -249,9 +227,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { } void runOnOperation() override final { - std::cout << "Module before state prep " << std::endl; auto module = getModule(); - module.dump(); unsigned counter = 0; if (args == nullptr || kernelName.empty()) { @@ -286,13 +262,12 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { // Get the argument type auto type = argument.getType(); - // auto loc = argument.getLoc(); if (auto ptrTy = dyn_cast(type)) { if (isa(ptrTy.getElementType())) { - std::cout << "State pointer found, TODO: call a kernel that created " - "the state" - << std::endl; + funcOp.emitOpError( + "State preparation from cudaq::state is not supported."); + return; } } @@ -301,9 +276,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { if (auto vecTy = dyn_cast(type)) { auto eleTy = vecTy.getElementType(); if (!isa(eleTy)) { - funcOp.emitOpError("synthesis: unsupported argument type"); - signalPassFailure(); - return; + continue; } char *ptrToSizeInBuffer = static_cast(args) + offset; auto sizeFromBuffer = @@ -328,10 +301,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { char *bufferAppendix = static_cast(args) + structSize; for (auto [idx, eleTy, vecLength] : stdVecInfo) { if (!eleTy) { - // FIXME: Skip struct values. bufferAppendix += vecLength; - funcOp.emitOpError( - "argument to kernel may be a struct and was not synthesized"); continue; } auto doVector = [&](T) { @@ -385,8 +355,6 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { continue; } } - std::cout << "Module after state preparation " << std::endl; - module.dump(); } }; diff --git a/program.py b/program.py deleted file mode 100644 index e282d8cd5d..0000000000 --- a/program.py +++ /dev/null @@ -1,35 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # - -import numpy as np -import cudaq - -import cudaq -import numpy as np - -cudaq.reset_target() - -cudaq.set_target('nvidia') -#cudaq.set_target('nvidia-mqpu') -# cudaq.set_target('density-matrix-cpu') - - -c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], - dtype=np.complex128) -state = cudaq.State.from_data(c) - -@cudaq.kernel(verbose=True) -def kernel(vec: cudaq.State): - q = cudaq.qvector(vec) - -print(kernel) -print(cudaq.to_qir(kernel)) - -#print(cudaq.get_target()) -#counts = cudaq.sample(kernel, state) -#print(counts) \ No newline at end of file diff --git a/python/tests/kernel/test_kernel_qvector_init.py b/python/tests/kernel/test_kernel_qvector_init.py index ddaeb6cc4d..f998a82dd1 100644 --- a/python/tests/kernel/test_kernel_qvector_init.py +++ b/python/tests/kernel/test_kernel_qvector_init.py @@ -20,28 +20,8 @@ # float -@skipIfNvidiaFP64NotInstalled -def test_kernel_float_params_f64(): - +def test_kernel_float_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[float]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, f) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_float_params_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] @@ -156,10 +136,8 @@ def kernel(): # complex -@skipIfNvidiaFP64NotInstalled -def test_kernel_complex_params_rotate_f64(): +def test_kernel_complex_params_rotate(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [0. + 0j, 0., 0., 1.] @@ -179,50 +157,8 @@ def kernel(vec: list[complex]): assert '10' in counts -@skipIfNvidiaNotInstalled -def test_kernel_complex_params_rotate_f32(): +def test_kernel_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia') - - c = [0. + 0j, 0., 0., 1.] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - x(q.front()) - y(q.back()) - h(q) - mz(q) - - counts = cudaq.sample(kernel, c) - print(f'rotate: {counts}') - assert '11' in counts - assert '00' in counts - assert '01' in counts - assert '10' in counts - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_complex_params_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_complex_params_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -337,10 +273,8 @@ def kernel(): # np arrays -@skipIfNvidiaFP64NotInstalled -def test_kernel_dtype_complex_params_f64(): +def test_kernel_dtype_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -354,10 +288,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_dtype_complex128_params_f64(): +def test_kernel_dtype_complex128_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -425,10 +357,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_amplitudes_complex_params_f64(): +def test_kernel_amplitudes_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = cudaq.amplitudes([1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)]) @@ -442,27 +372,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaNotInstalled -def test_kernel_amplitudes_complex_params_f32(): +def test_kernel_amplitudes_complex_from_capture(): cudaq.reset_target() - cudaq.set_target('nvidia') - - c = cudaq.amplitudes([1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)]) - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_amplitudes_complex_from_capture_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] @@ -476,23 +387,6 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaNotInstalled -def test_kernel_amplitudes_complex_from_capture_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') - - c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(cudaq.amplitudes(vec)) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - @skipIfNvidiaFP64NotInstalled def test_kernel_simulation_dtype_np_array_from_capture_f64(): cudaq.reset_target() @@ -568,40 +462,8 @@ def kernel(): # test errors -@skipIfNvidiaFP64NotInstalled -def test_kernel_error_invalid_array_size_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - @cudaq.kernel - def kernel(): - qubits = cudaq.qvector(np.array([1., 0., 0.], dtype=complex)) - - with pytest.raises(RuntimeError) as e: - counts = cudaq.sample(kernel) - assert 'Invalid input state size for qvector init (not a power of 2)' in repr( - e) - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_error_invalid_list_size_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - @cudaq.kernel - def kernel(): - qubits = cudaq.qvector([1., 0., 0.]) - - with pytest.raises(RuntimeError) as e: - counts = cudaq.sample(kernel) - assert 'Invalid input state size for qvector init (not a power of 2)' in repr( - e) - - -@skipIfNvidiaNotInstalled -def test_kernel_error_invalid_array_size_f32(): +def test_kernel_error_invalid_array_size_(): cudaq.reset_target() - cudaq.set_target('nvidia') @cudaq.kernel def kernel(): @@ -613,10 +475,8 @@ def kernel(): e) -@skipIfNvidiaNotInstalled -def test_kernel_error_invalid_list_size_f32(): +def test_kernel_error_invalid_list_size(): cudaq.reset_target() - cudaq.set_target('nvidia') @cudaq.kernel def kernel(): @@ -629,6 +489,7 @@ def kernel(): def test_kernel_qvector_init_from_param_int(): + cudaq.reset_target() @cudaq.kernel def kernel(n: int): @@ -643,6 +504,8 @@ def kernel(n: int): def test_kernel_qvector_init_from_capture_int(): + cudaq.reset_target() + n = 2 @cudaq.kernel @@ -658,6 +521,7 @@ def kernel(): def test_kernel_qvector_init_from_int(): + cudaq.reset_target() @cudaq.kernel def kernel(): diff --git a/targettests/execution/from_state.cpp b/targettests/execution/from_state.cpp deleted file mode 100644 index 55438848cb..0000000000 --- a/targettests/execution/from_state.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s - -#include -#include "cudaq/builder/kernels.h" -#include - -__qpu__ void test(cudaq::state *inState) { - cudaq::qvector q(inState); -} - -// CHECK: size 2 - -int main() { - std::vector> vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; - auto state = cudaq::state::from_data(vec); - auto counts = cudaq::sample(test, &state); - counts.dump(); - - printf("size %zu\n", counts.size()); - return !(counts.size() == 2); -} diff --git a/targettests/execution/program.cpp b/targettests/execution/program.cpp deleted file mode 100644 index be4855e3de..0000000000 --- a/targettests/execution/program.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s - -#include -#include "cudaq/builder/kernels.h" -#include - -__qpu__ void test1(std::vector inState) { - cudaq::qvector q1 = inState; -} - -// __qpu__ void test2(cudaq::state *inState) { -// cudaq::qvector q2(inState); -// cudaq::x(q2); -// } - -// __qpu__ void test3() { -// auto q3 = cudaq::qvector({M_SQRT1_2, 0., 0., M_SQRT1_2}); -// } - - -// error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:392: not yet implemented: argument type conversion -// __qpu__ void test5(cudaq::state *inState) { -// test2(inState); -// } - - - -int main() { - std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - { - // Passing state data as argument (vector) - std::cout << "test1(vec): " << "\n"; - auto counts = cudaq::sample(test1, vec); - counts.dump(); - } - - // { - // // Passing state ptr as argument - no support for from_data - // - // std::cout << "test2(state): " << "\n"; - // auto state = cudaq::state::from_data(vec); - // - // // 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // auto counts = cudaq::sample(test2, &state); - // counts.dump(); - // } - - // { - // // Passing a state from another kernel as argument - // - // std::cout << "test2(test3): " << "\n"; - // auto state = cudaq::get_state(test3); - // - // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // auto counts = cudaq::sample(test2, &state); - // counts.dump(); - // } - - // { - // // Passing a state to another kernel as argument - // - // std::cout << "test4(state): " << "\n"; - // - // auto state = cudaq::state::from_data(vec); - // auto counts = cudaq::sample(test4, &state); - // } - - // { - // // Creating a kernel from state and passing its state to another kernel - is it deprecated? - // - std::cout << "test2(kernel): " << "\n"; - std::vector> vec{.70710678, 0., 0., 0.70710678}; - auto kernel = cudaq::make_kernel(); - auto qubits = kernel.qalloc(2); - - cudaq::from_state(kernel, qubits, vec); - auto counts = cudaq::sample(kernel); - // - // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // //auto state = cudaq::get_state(kernel); - // //auto counts = cudaq::sample(test2, &state); - // - counts.dump(); - // } - -} \ No newline at end of file diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp new file mode 100644 index 0000000000..dbe9b15d86 --- /dev/null +++ b/targettests/execution/state_preparation_vector.cpp @@ -0,0 +1,57 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s + +#include +#include "cudaq/builder/kernels.h" +#include + +__qpu__ void test(std::vector inState) { + cudaq::qvector q1 = inState; +} + +__qpu__ void test2() { + cudaq::qvector q1({M_SQRT1_2, M_SQRT1_2, 0., 0.}); +} + +void printCounts(cudaq::sample_result& result) { + for (auto &&[bits, counts] : result) { + std::cout << bits << '\n'; + } +} + +int main() { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test, vec); + printCounts(counts); + } + + { + // Using state data inside kernel (kernel mode) - not implemented yet. + // auto counts = cudaq::sample(test2); + // printCounts(counts); + } + + { + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); + + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); + } +} + +// CHECK: 01 +// CHECK: 00 + +// CHECK: 01 +// CHECK: 00 \ No newline at end of file From 0a04d33ce4c7b734348784df2d14d3958827a592 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 25 Jun 2024 15:29:40 -0700 Subject: [PATCH 04/50] Cleanup --- lib/Optimizer/Transforms/CMakeLists.txt | 4 +- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 6 - lib/Optimizer/Transforms/StateDecomposer.h | 69 ++++---- lib/Optimizer/Transforms/StatePreparation.cpp | 108 ++++-------- program.py | 35 ---- .../tests/kernel/test_kernel_qvector_init.py | 162 ++---------------- targettests/execution/from_state.cpp | 30 ---- targettests/execution/program.cpp | 95 ---------- .../execution/state_preparation_vector.cpp | 57 ++++++ 9 files changed, 148 insertions(+), 418 deletions(-) delete mode 100644 program.py delete mode 100644 targettests/execution/from_state.cpp delete mode 100644 targettests/execution/program.cpp create mode 100644 targettests/execution/state_preparation_vector.cpp diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index b0a13571ec..173cec4538 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -39,11 +39,11 @@ add_cudaq_library(OptTransforms ObserveAnsatz.cpp PruneCtrlRelations.cpp QuakeAddMetadata.cpp - StateDecomposer.cpp - StatePreparation.cpp QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp + StateDecomposer.cpp + StatePreparation.cpp PySynthCallableBlockArgs.cpp DEPENDS diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 0fa859f175..7d83c152dd 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -23,8 +23,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" -#include - #define DEBUG_TYPE "quake-synthesizer" using namespace mlir; @@ -419,9 +417,7 @@ class QuakeSynthesizer } void runOnOperation() override final { - std::cout << "Module before synthesis " << std::endl; auto module = getModule(); - // module.dump(); unsigned counter = 0; if (args == nullptr || kernelName.empty()) { @@ -680,8 +676,6 @@ class QuakeSynthesizer } } funcOp.eraseArguments(argsToErase); - // std::cout << "Module after synthesis " << std::endl; - module.dump(); } }; diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h index bac6909708..2d17edb768 100644 --- a/lib/Optimizer/Transforms/StateDecomposer.h +++ b/lib/Optimizer/Transforms/StateDecomposer.h @@ -29,31 +29,32 @@ namespace cudaq::details { - /// @brief Converts angles of a uniformly controlled rotation to angles of - /// non-controlled rotations. - std::vector convertAngles(const std::span alphas); - - /// @brief Return the control indices dictated by the gray code implementation. - /// - /// Here, numBits is the number of controls. - std::vector getControlIndices(std::size_t numBits); - - /// @brief Return angles required to implement a uniformly controlled z-rotation - /// on the `kth` qubit. - std::vector getAlphaZ(const std::span data, - std::size_t numQubits, std::size_t k); - - /// @brief Return angles required to implement a uniformly controlled y-rotation - /// on the `kth` qubit. - std::vector getAlphaY(const std::span data, - std::size_t numQubits, std::size_t k); +/// @brief Converts angles of a uniformly controlled rotation to angles of +/// non-controlled rotations. +std::vector convertAngles(const std::span alphas); + +/// @brief Return the control indices dictated by the gray code implementation. +/// +/// Here, numBits is the number of controls. +std::vector getControlIndices(std::size_t numBits); + +/// @brief Return angles required to implement a uniformly controlled z-rotation +/// on the `kth` qubit. +std::vector getAlphaZ(const std::span data, + std::size_t numQubits, std::size_t k); + +/// @brief Return angles required to implement a uniformly controlled y-rotation +/// on the `kth` qubit. +std::vector getAlphaY(const std::span data, + std::size_t numQubits, std::size_t k); } // namespace cudaq::details class StateGateBuilder { public: - StateGateBuilder(mlir::OpBuilder& b, mlir::Location& l, mlir::Value& q): builder(b), loc(l), qubits(q) {} + StateGateBuilder(mlir::OpBuilder &b, mlir::Location &l, mlir::Value &q) + : builder(b), loc(l), qubits(q) {} - template + template void applyRotationOp(double theta, std::size_t target) { auto qubit = createQubitRef(target); auto thetaValue = createAngleValue(theta); @@ -72,26 +73,30 @@ class StateGateBuilder { return qubitRefs[index]; } - auto indexValue = builder.create(loc, index, builder.getIntegerType(64)); + auto indexValue = builder.create( + loc, index, builder.getIntegerType(64)); auto ref = builder.create(loc, qubits, indexValue); qubitRefs[index] = ref; return ref; } mlir::Value createAngleValue(double angle) { - return builder.create(loc, llvm::APFloat{angle}, builder.getF64Type()); + return builder.create( + loc, llvm::APFloat{angle}, builder.getF64Type()); } - mlir::OpBuilder& builder; - mlir::Location& loc; - mlir::Value& qubits; + mlir::OpBuilder &builder; + mlir::Location &loc; + mlir::Value &qubits; - std::unordered_map qubitRefs = std::unordered_map(); + std::unordered_map qubitRefs = + std::unordered_map(); }; class StateDecomposer { public: - StateDecomposer(StateGateBuilder& b, std::vector>& a): builder(b), amplitudes(a), numQubits(log2(a.size())) {} + StateDecomposer(StateGateBuilder &b, std::vector> &a) + : builder(b), amplitudes(a), numQubits(log2(a.size())) {} /// @brief Decompose the input state vector data to a set of controlled /// operations and rotations. This function takes as input a `OpBuilder` @@ -112,8 +117,9 @@ class StateDecomposer { } // N.B: The algorithm, as described in the paper, creates a circuit that - // begins with a target state and brings it to the all zero state. Hence, this - // implementation do the two steps described in Section III in reverse order. + // begins with a target state and brings it to the all zero state. Hence, + // this implementation do the two steps described in Section III in reverse + // order. // Apply uniformly controlled y-rotations, the construction in Eq. (4). for (std::size_t j = 1; j <= numQubits; ++j) { @@ -142,7 +148,8 @@ class StateDecomposer { private: /// @brief Apply a uniformly controlled rotation on the target qubit. template - void applyRotation(const std::span alphas, std::size_t numControls, std::size_t target) { + void applyRotation(const std::span alphas, std::size_t numControls, + std::size_t target) { auto thetas = cudaq::details::convertAngles(alphas); if (numControls == 0) { builder.applyRotationOp(thetas[0], target); @@ -157,7 +164,7 @@ class StateDecomposer { } } - StateGateBuilder& builder; + StateGateBuilder &builder; std::span> amplitudes; std::size_t numQubits; }; diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index 86bb911a3a..785e70b3f8 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ #include "PassDetails.h" +#include "StateDecomposer.h" #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" @@ -24,9 +25,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" #include -#include "StateDecomposer.h" - -#include #define DEBUG_TYPE "state-preparation" @@ -38,13 +36,14 @@ using namespace mlir; /// func.func @foo(%arg0 : !cc.stdvec>) { /// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 /// %1 = math.cttz %0 : i64 -/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> -/// %3 = quake.alloca !quake.veq[%1 : i64] -/// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq -/// return +/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> +/// !cc.ptr> %3 = quake.alloca !quake.veq[%1 : i64] %4 = +/// quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> +/// !quake.veq return /// } /// -/// On a call that passes std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2} as arg0: +/// On a call that passes std::vector vec{M_SQRT1_2, 0., 0., +/// M_SQRT1_2} as arg0: /// /// func.func @foo(%arg0 : !cc.stdvec>) { /// %0 = quake.alloca !quake.veq<2> @@ -70,11 +69,11 @@ using namespace mlir; namespace { template -concept IntegralType = std::is_same::value - || std::is_same::value - || std::is_same::value - || std::is_same::value - || std::is_same::value; +concept IntegralType = + std::is_same::value || std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; template concept FloatingType = std::is_same::value; @@ -85,31 +84,33 @@ concept DoubleType = std::is_same::value; template concept ComplexDataType = FloatingType || DoubleType || IntegralType; -/// Input was complex/complex but we prefer -/// complex/complex. Make a copy, extending or truncating the -/// values. +/// Input was complex but we prefer +/// complex. Make a copy, extending the values. template -std::vector> convertToComplex(std::complex *data, std::uint64_t size) { +std::vector> convertToComplex(std::complex *data, + std::uint64_t size) { auto convertData = std::vector>(size); for (std::size_t i = 0; i < size; ++i) convertData[i] = std::complex{static_cast(data[i].real()), - static_cast(data[i].imag())}; + static_cast(data[i].imag())}; return convertData; } template -std::vector> convertToComplex(std::complex *data, std::uint64_t size) { - return std::vector>(data, data+size); +std::vector> convertToComplex(std::complex *data, + std::uint64_t size) { + return std::vector>(data, data + size); } -/// Input was float/double but we prefer complex/complex. +/// Input was float/double but we prefer complex. /// Make a copy, extending or truncating the values. template -std::vector> convertToComplex(From *data, std::uint64_t size) { +std::vector> convertToComplex(From *data, + std::uint64_t size) { auto convertData = std::vector>(size); for (std::size_t i = 0; i < size; ++i) - convertData[i] = - std::complex{static_cast(data[i]), static_cast(0.0)}; + convertData[i] = std::complex{static_cast(data[i]), + static_cast(0.0)}; return convertData; } @@ -118,39 +119,15 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector> &vec) { auto *ctx = builder.getContext(); - // builder.setInsertionPointToStart(argument.getOwner()); auto argLoc = argument.getLoc(); - // TODO: look at quake.init_state instructions from vector data and track them - // to the argument vector, then replace the instruction by gates preparing the - // state (or a call to a kernel with gates) - - /// func.func @foo(%arg0 : !cc.stdvec>) { - /// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 - /// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> - /// !cc.ptr> - /// - /// %3 = quake.alloca !quake.veq[%1 : i64] - /// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) - /// -> !quake.veq return - /// } - - /// => - - /// ... - /// %5 = quake.alloca !quake.veq[%3 : i64] - /// %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref - /// quake.h %6 : (!quake.ref) -> () - /// %7 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref - /// %8 = quake.extract_ref %5[1] : (!quake.veq) -> !quake.ref - /// quake.x [%7] %8 : (!quake.ref, !quake.ref) -> () - - auto toErase = std::vector(); + auto toErase = std::vector(); for (auto *argUser : argument.getUsers()) { // Handle the `StdvecSize` and `quake.alloca` use case: // - Replace a `vec.size()` with the vector length. - // - Replace the number of qubits calculation with the vector length logarithm. + // - Replace the number of qubits calculation with the vector length + // logarithm. // - Replace `quake.alloca` with a constant size qvector allocation. if (auto stdvecSizeOp = dyn_cast(argUser)) { builder.setInsertionPointAfter(stdvecSizeOp); @@ -160,9 +137,10 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, Value numQubits = builder.create( argLoc, log2(vec.size()), stdvecSizeOp.getType()); - for (auto *sizeUser: argUser->getUsers()) { - if (auto countZeroesOp = dyn_cast(sizeUser)) { - for (auto *numQubitsUser: sizeUser->getUsers()) { + for (auto *sizeUser : argUser->getUsers()) { + if (auto countZeroesOp = + dyn_cast(sizeUser)) { + for (auto *numQubitsUser : sizeUser->getUsers()) { if (auto quakeAllocaOp = dyn_cast(numQubitsUser)) { builder.setInsertionPointAfter(quakeAllocaOp); auto veqTy = quake::VeqType::get(ctx, log2(vec.size())); @@ -175,7 +153,7 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, toErase.push_back(countZeroesOp); } } - + stdvecSizeOp.replaceAllUsesWith(length); toErase.push_back(stdvecSizeOp); continue; @@ -202,7 +180,7 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, } } - for (auto& op: toErase) { + for (auto &op : toErase) { op->erase(); } @@ -249,9 +227,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { } void runOnOperation() override final { - std::cout << "Module before state prep " << std::endl; auto module = getModule(); - module.dump(); unsigned counter = 0; if (args == nullptr || kernelName.empty()) { @@ -286,13 +262,12 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { // Get the argument type auto type = argument.getType(); - // auto loc = argument.getLoc(); if (auto ptrTy = dyn_cast(type)) { if (isa(ptrTy.getElementType())) { - std::cout << "State pointer found, TODO: call a kernel that created " - "the state" - << std::endl; + funcOp.emitOpError( + "State preparation from cudaq::state is not supported."); + return; } } @@ -301,9 +276,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { if (auto vecTy = dyn_cast(type)) { auto eleTy = vecTy.getElementType(); if (!isa(eleTy)) { - funcOp.emitOpError("synthesis: unsupported argument type"); - signalPassFailure(); - return; + continue; } char *ptrToSizeInBuffer = static_cast(args) + offset; auto sizeFromBuffer = @@ -328,10 +301,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { char *bufferAppendix = static_cast(args) + structSize; for (auto [idx, eleTy, vecLength] : stdVecInfo) { if (!eleTy) { - // FIXME: Skip struct values. bufferAppendix += vecLength; - funcOp.emitOpError( - "argument to kernel may be a struct and was not synthesized"); continue; } auto doVector = [&](T) { @@ -385,8 +355,6 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { continue; } } - std::cout << "Module after state preparation " << std::endl; - module.dump(); } }; diff --git a/program.py b/program.py deleted file mode 100644 index e282d8cd5d..0000000000 --- a/program.py +++ /dev/null @@ -1,35 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # - -import numpy as np -import cudaq - -import cudaq -import numpy as np - -cudaq.reset_target() - -cudaq.set_target('nvidia') -#cudaq.set_target('nvidia-mqpu') -# cudaq.set_target('density-matrix-cpu') - - -c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], - dtype=np.complex128) -state = cudaq.State.from_data(c) - -@cudaq.kernel(verbose=True) -def kernel(vec: cudaq.State): - q = cudaq.qvector(vec) - -print(kernel) -print(cudaq.to_qir(kernel)) - -#print(cudaq.get_target()) -#counts = cudaq.sample(kernel, state) -#print(counts) \ No newline at end of file diff --git a/python/tests/kernel/test_kernel_qvector_init.py b/python/tests/kernel/test_kernel_qvector_init.py index ddaeb6cc4d..f998a82dd1 100644 --- a/python/tests/kernel/test_kernel_qvector_init.py +++ b/python/tests/kernel/test_kernel_qvector_init.py @@ -20,28 +20,8 @@ # float -@skipIfNvidiaFP64NotInstalled -def test_kernel_float_params_f64(): - +def test_kernel_float_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[float]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, f) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_float_params_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] @@ -156,10 +136,8 @@ def kernel(): # complex -@skipIfNvidiaFP64NotInstalled -def test_kernel_complex_params_rotate_f64(): +def test_kernel_complex_params_rotate(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [0. + 0j, 0., 0., 1.] @@ -179,50 +157,8 @@ def kernel(vec: list[complex]): assert '10' in counts -@skipIfNvidiaNotInstalled -def test_kernel_complex_params_rotate_f32(): +def test_kernel_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia') - - c = [0. + 0j, 0., 0., 1.] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - x(q.front()) - y(q.back()) - h(q) - mz(q) - - counts = cudaq.sample(kernel, c) - print(f'rotate: {counts}') - assert '11' in counts - assert '00' in counts - assert '01' in counts - assert '10' in counts - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_complex_params_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_complex_params_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -337,10 +273,8 @@ def kernel(): # np arrays -@skipIfNvidiaFP64NotInstalled -def test_kernel_dtype_complex_params_f64(): +def test_kernel_dtype_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -354,10 +288,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_dtype_complex128_params_f64(): +def test_kernel_dtype_complex128_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -425,10 +357,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_amplitudes_complex_params_f64(): +def test_kernel_amplitudes_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = cudaq.amplitudes([1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)]) @@ -442,27 +372,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaNotInstalled -def test_kernel_amplitudes_complex_params_f32(): +def test_kernel_amplitudes_complex_from_capture(): cudaq.reset_target() - cudaq.set_target('nvidia') - - c = cudaq.amplitudes([1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)]) - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_amplitudes_complex_from_capture_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] @@ -476,23 +387,6 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaNotInstalled -def test_kernel_amplitudes_complex_from_capture_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') - - c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(cudaq.amplitudes(vec)) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - @skipIfNvidiaFP64NotInstalled def test_kernel_simulation_dtype_np_array_from_capture_f64(): cudaq.reset_target() @@ -568,40 +462,8 @@ def kernel(): # test errors -@skipIfNvidiaFP64NotInstalled -def test_kernel_error_invalid_array_size_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - @cudaq.kernel - def kernel(): - qubits = cudaq.qvector(np.array([1., 0., 0.], dtype=complex)) - - with pytest.raises(RuntimeError) as e: - counts = cudaq.sample(kernel) - assert 'Invalid input state size for qvector init (not a power of 2)' in repr( - e) - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_error_invalid_list_size_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - @cudaq.kernel - def kernel(): - qubits = cudaq.qvector([1., 0., 0.]) - - with pytest.raises(RuntimeError) as e: - counts = cudaq.sample(kernel) - assert 'Invalid input state size for qvector init (not a power of 2)' in repr( - e) - - -@skipIfNvidiaNotInstalled -def test_kernel_error_invalid_array_size_f32(): +def test_kernel_error_invalid_array_size_(): cudaq.reset_target() - cudaq.set_target('nvidia') @cudaq.kernel def kernel(): @@ -613,10 +475,8 @@ def kernel(): e) -@skipIfNvidiaNotInstalled -def test_kernel_error_invalid_list_size_f32(): +def test_kernel_error_invalid_list_size(): cudaq.reset_target() - cudaq.set_target('nvidia') @cudaq.kernel def kernel(): @@ -629,6 +489,7 @@ def kernel(): def test_kernel_qvector_init_from_param_int(): + cudaq.reset_target() @cudaq.kernel def kernel(n: int): @@ -643,6 +504,8 @@ def kernel(n: int): def test_kernel_qvector_init_from_capture_int(): + cudaq.reset_target() + n = 2 @cudaq.kernel @@ -658,6 +521,7 @@ def kernel(): def test_kernel_qvector_init_from_int(): + cudaq.reset_target() @cudaq.kernel def kernel(): diff --git a/targettests/execution/from_state.cpp b/targettests/execution/from_state.cpp deleted file mode 100644 index 55438848cb..0000000000 --- a/targettests/execution/from_state.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s - -#include -#include "cudaq/builder/kernels.h" -#include - -__qpu__ void test(cudaq::state *inState) { - cudaq::qvector q(inState); -} - -// CHECK: size 2 - -int main() { - std::vector> vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; - auto state = cudaq::state::from_data(vec); - auto counts = cudaq::sample(test, &state); - counts.dump(); - - printf("size %zu\n", counts.size()); - return !(counts.size() == 2); -} diff --git a/targettests/execution/program.cpp b/targettests/execution/program.cpp deleted file mode 100644 index be4855e3de..0000000000 --- a/targettests/execution/program.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s - -#include -#include "cudaq/builder/kernels.h" -#include - -__qpu__ void test1(std::vector inState) { - cudaq::qvector q1 = inState; -} - -// __qpu__ void test2(cudaq::state *inState) { -// cudaq::qvector q2(inState); -// cudaq::x(q2); -// } - -// __qpu__ void test3() { -// auto q3 = cudaq::qvector({M_SQRT1_2, 0., 0., M_SQRT1_2}); -// } - - -// error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:392: not yet implemented: argument type conversion -// __qpu__ void test5(cudaq::state *inState) { -// test2(inState); -// } - - - -int main() { - std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - { - // Passing state data as argument (vector) - std::cout << "test1(vec): " << "\n"; - auto counts = cudaq::sample(test1, vec); - counts.dump(); - } - - // { - // // Passing state ptr as argument - no support for from_data - // - // std::cout << "test2(state): " << "\n"; - // auto state = cudaq::state::from_data(vec); - // - // // 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // auto counts = cudaq::sample(test2, &state); - // counts.dump(); - // } - - // { - // // Passing a state from another kernel as argument - // - // std::cout << "test2(test3): " << "\n"; - // auto state = cudaq::get_state(test3); - // - // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // auto counts = cudaq::sample(test2, &state); - // counts.dump(); - // } - - // { - // // Passing a state to another kernel as argument - // - // std::cout << "test4(state): " << "\n"; - // - // auto state = cudaq::state::from_data(vec); - // auto counts = cudaq::sample(test4, &state); - // } - - // { - // // Creating a kernel from state and passing its state to another kernel - is it deprecated? - // - std::cout << "test2(kernel): " << "\n"; - std::vector> vec{.70710678, 0., 0., 0.70710678}; - auto kernel = cudaq::make_kernel(); - auto qubits = kernel.qalloc(2); - - cudaq::from_state(kernel, qubits, vec); - auto counts = cudaq::sample(kernel); - // - // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // //auto state = cudaq::get_state(kernel); - // //auto counts = cudaq::sample(test2, &state); - // - counts.dump(); - // } - -} \ No newline at end of file diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp new file mode 100644 index 0000000000..dbe9b15d86 --- /dev/null +++ b/targettests/execution/state_preparation_vector.cpp @@ -0,0 +1,57 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s + +#include +#include "cudaq/builder/kernels.h" +#include + +__qpu__ void test(std::vector inState) { + cudaq::qvector q1 = inState; +} + +__qpu__ void test2() { + cudaq::qvector q1({M_SQRT1_2, M_SQRT1_2, 0., 0.}); +} + +void printCounts(cudaq::sample_result& result) { + for (auto &&[bits, counts] : result) { + std::cout << bits << '\n'; + } +} + +int main() { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test, vec); + printCounts(counts); + } + + { + // Using state data inside kernel (kernel mode) - not implemented yet. + // auto counts = cudaq::sample(test2); + // printCounts(counts); + } + + { + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); + + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); + } +} + +// CHECK: 01 +// CHECK: 00 + +// CHECK: 01 +// CHECK: 00 \ No newline at end of file From 3660e278407719c7aa7ba82f93f08261dc936635 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 26 Jun 2024 09:51:42 -0700 Subject: [PATCH 05/50] Updated test --- .../execution/state_preparation_vector.cpp | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index dbe9b15d86..d415072ce7 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -16,10 +16,6 @@ __qpu__ void test(std::vector inState) { cudaq::qvector q1 = inState; } -__qpu__ void test2() { - cudaq::qvector q1({M_SQRT1_2, M_SQRT1_2, 0., 0.}); -} - void printCounts(cudaq::sample_result& result) { for (auto &&[bits, counts] : result) { std::cout << bits << '\n'; @@ -28,20 +24,18 @@ void printCounts(cudaq::sample_result& result) { int main() { std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; { // Passing state data as argument (kernel mode) auto counts = cudaq::sample(test, vec); printCounts(counts); - } - - { - // Using state data inside kernel (kernel mode) - not implemented yet. - // auto counts = cudaq::sample(test2); - // printCounts(counts); + + counts = cudaq::sample(test, vec1); + printCounts(counts); } { - // Passing state data as argument (builder mode) + // Passing state data as argument (builder mode) auto [kernel, v] = cudaq::make_kernel>(); auto qubits = kernel.qalloc(v); @@ -53,5 +47,8 @@ int main() { // CHECK: 01 // CHECK: 00 +// CHECK: 10 +// CHECK: 10 + // CHECK: 01 // CHECK: 00 \ No newline at end of file From 8cbc1f6905babbfe1e123840d9d1b6e1a00747fa Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 26 Jun 2024 12:37:52 -0700 Subject: [PATCH 06/50] Fix test failures --- .../tests/kernel/test_kernel_qvector_init.py | 21 +++++++++++++++++-- .../execution/state_preparation_vector.cpp | 16 ++++++++------ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/python/tests/kernel/test_kernel_qvector_init.py b/python/tests/kernel/test_kernel_qvector_init.py index f998a82dd1..6f2fd07152 100644 --- a/python/tests/kernel/test_kernel_qvector_init.py +++ b/python/tests/kernel/test_kernel_qvector_init.py @@ -5,11 +5,18 @@ # This source code and the accompanying materials are made available under # # the terms of the Apache License 2.0 which accompanies this distribution. # # ============================================================================ # + +import os, sys import pytest import cudaq import numpy as np +## [PYTHON_VERSION_FIX] +skipIfPythonLessThan39 = pytest.mark.skipif( + sys.version_info < (3, 9), + reason="built-in collection types such as `list` not supported") + skipIfNvidiaFP64NotInstalled = pytest.mark.skipif( not (cudaq.num_available_gpus() > 0 and cudaq.has_target('nvidia-fp64')), reason='Could not find nvidia-fp64 in installation') @@ -18,8 +25,10 @@ not (cudaq.num_available_gpus() > 0 and cudaq.has_target('nvidia')), reason='Could not find nvidia in installation') - # float + + +@skipIfPythonLessThan39 def test_kernel_float_params(): cudaq.reset_target() @@ -136,6 +145,7 @@ def kernel(): # complex +@skipIfPythonLessThan39 def test_kernel_complex_params_rotate(): cudaq.reset_target() @@ -157,6 +167,7 @@ def kernel(vec: list[complex]): assert '10' in counts +@skipIfPythonLessThan39 def test_kernel_complex_params(): cudaq.reset_target() @@ -273,6 +284,7 @@ def kernel(): # np arrays +@skipIfPythonLessThan39 def test_kernel_dtype_complex_params(): cudaq.reset_target() @@ -288,6 +300,7 @@ def kernel(vec: list[complex]): assert '00' in counts +@skipIfPythonLessThan39 def test_kernel_dtype_complex128_params(): cudaq.reset_target() @@ -357,6 +370,7 @@ def kernel(vec: list[complex]): assert '00' in counts +@skipIfPythonLessThan39 def test_kernel_amplitudes_complex_params(): cudaq.reset_target() @@ -372,6 +386,7 @@ def kernel(vec: list[complex]): assert '00' in counts +@skipIfPythonLessThan39 def test_kernel_amplitudes_complex_from_capture(): cudaq.reset_target() @@ -462,7 +477,8 @@ def kernel(): # test errors -def test_kernel_error_invalid_array_size_(): +@skipIfPythonLessThan39 +def test_kernel_error_invalid_array_size(): cudaq.reset_target() @cudaq.kernel @@ -475,6 +491,7 @@ def kernel(): e) +@skipIfPythonLessThan39 def test_kernel_error_invalid_list_size(): cudaq.reset_target() diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index d415072ce7..ef4ea69b92 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -17,7 +17,13 @@ __qpu__ void test(std::vector inState) { } void printCounts(cudaq::sample_result& result) { + std::vector values{}; for (auto &&[bits, counts] : result) { + values.push_back(bits); + } + + std::sort(values.begin(), values.end()); + for (auto &&bits : values) { std::cout << bits << '\n'; } } @@ -44,11 +50,9 @@ int main() { } } -// CHECK: 01 // CHECK: 00 - -// CHECK: 10 -// CHECK: 10 - // CHECK: 01 -// CHECK: 00 \ No newline at end of file +// CHECK: 10 +// CHECK: 11 +// CHECK: 00 +// CHECK: 01 \ No newline at end of file From 6d4433d5cf40835dfb42c67f180062b8aac7d601 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 27 Jun 2024 09:46:13 -0700 Subject: [PATCH 07/50] Revert the order of qubits in state prep --- lib/Optimizer/Transforms/StateDecomposer.h | 13 ++++++++--- .../tests/backends/test_Quantinuum_kernel.py | 15 +++++++++++++ .../tests/kernel/test_kernel_qvector_init.py | 22 +++++++++++++++++++ .../execution/state_preparation_vector.cpp | 4 ++-- 4 files changed, 49 insertions(+), 5 deletions(-) diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h index 2d17edb768..b433089258 100644 --- a/lib/Optimizer/Transforms/StateDecomposer.h +++ b/lib/Optimizer/Transforms/StateDecomposer.h @@ -150,17 +150,24 @@ class StateDecomposer { template void applyRotation(const std::span alphas, std::size_t numControls, std::size_t target) { + + // In our model the index 1 (i.e. |01>) in quantum state data + // corresponds to qubits[0]=1 and qubits[1] = 0. + // Revert the order of qubits as the state preparation algorithm + // we use assumes the opposite. + auto qubitIndex = [&](std::size_t i) { return numQubits - i - 1; }; + auto thetas = cudaq::details::convertAngles(alphas); if (numControls == 0) { - builder.applyRotationOp(thetas[0], target); + builder.applyRotationOp(thetas[0], qubitIndex(target)); return; } auto controlIndices = cudaq::details::getControlIndices(numControls); assert(thetas.size() == controlIndices.size()); for (auto [i, c] : llvm::enumerate(controlIndices)) { - builder.applyRotationOp(thetas[i], target); - builder.applyX(c, target); + builder.applyRotationOp(thetas[i], qubitIndex(target)); + builder.applyX(qubitIndex(c), qubitIndex(target)); } } diff --git a/python/tests/backends/test_Quantinuum_kernel.py b/python/tests/backends/test_Quantinuum_kernel.py index de072335bf..b0ca043060 100644 --- a/python/tests/backends/test_Quantinuum_kernel.py +++ b/python/tests/backends/test_Quantinuum_kernel.py @@ -7,6 +7,7 @@ # ============================================================================ # import cudaq, pytest, os, time +import numpy as np from cudaq import spin from multiprocessing import Process try: @@ -169,6 +170,20 @@ def kernel(): result = cudaq.sample(kernel) +def test_quantinuum_state_preparation(): + + @cudaq.kernel + def kernel(vec: list[complex]): + qubits = cudaq.qvector(vec) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '11' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/kernel/test_kernel_qvector_init.py b/python/tests/kernel/test_kernel_qvector_init.py index 6f2fd07152..28260dcb4d 100644 --- a/python/tests/kernel/test_kernel_qvector_init.py +++ b/python/tests/kernel/test_kernel_qvector_init.py @@ -25,6 +25,28 @@ not (cudaq.num_available_gpus() > 0 and cudaq.has_target('nvidia')), reason='Could not find nvidia in installation') +# state preparation and synthesis + + +@skipIfPythonLessThan39 +def test_kernel_state_preparation(): + cudaq.reset_target() + + c = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + + @cudaq.kernel + def kernel(vec: list[complex]): + q = cudaq.qvector(vec) + + synthesized = cudaq.synthesize(kernel, c) + assert 'quake.init_state' in kernel.__str__() + assert not 'quake.init_state' in synthesized.__str__() + + counts = cudaq.sample(synthesized) + assert '00' in counts + assert '10' in counts + + # float diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index ef4ea69b92..fccf6d872c 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -51,8 +51,8 @@ int main() { } // CHECK: 00 -// CHECK: 01 // CHECK: 10 +// CHECK: 01 // CHECK: 11 // CHECK: 00 -// CHECK: 01 \ No newline at end of file +// CHECK: 10 From 46f247728cf2ca22cda3bbf417007c63db1a1bed Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 27 Jun 2024 10:20:09 -0700 Subject: [PATCH 08/50] Fixed failing tests --- python/tests/backends/test_Quantinuum_kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/backends/test_Quantinuum_kernel.py b/python/tests/backends/test_Quantinuum_kernel.py index b0ca043060..fc11224f5e 100644 --- a/python/tests/backends/test_Quantinuum_kernel.py +++ b/python/tests/backends/test_Quantinuum_kernel.py @@ -178,7 +178,7 @@ def kernel(vec: list[complex]): state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] counts = cudaq.sample(kernel, state) - assert '11' in counts + assert '00' in counts assert '10' in counts assert not '01' in counts assert not '11' in counts From fb0994f8cb2c8459d715a27c208cef19c58542cb Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 27 Jun 2024 11:27:27 -0700 Subject: [PATCH 09/50] Fix test faiure --- lib/Optimizer/Transforms/StateDecomposer.h | 2 +- targettests/execution/state_preparation_vector.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h index b433089258..a698ac83c2 100644 --- a/lib/Optimizer/Transforms/StateDecomposer.h +++ b/lib/Optimizer/Transforms/StateDecomposer.h @@ -95,7 +95,7 @@ class StateGateBuilder { class StateDecomposer { public: - StateDecomposer(StateGateBuilder &b, std::vector> &a) + StateDecomposer(StateGateBuilder &b, std::span> a) : builder(b), amplitudes(a), numQubits(log2(a.size())) {} /// @brief Decompose the input state vector data to a set of controlled diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index fccf6d872c..35a628c06a 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -9,7 +9,6 @@ // RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s #include -#include "cudaq/builder/kernels.h" #include __qpu__ void test(std::vector inState) { From 0abf40aa2ebc13a314500ac5ca7955cbe4510181 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 27 Jun 2024 13:22:55 -0700 Subject: [PATCH 10/50] Cleanup --- lib/Optimizer/Transforms/StateDecomposer.cpp | 2 +- lib/Optimizer/Transforms/StateDecomposer.h | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/Optimizer/Transforms/StateDecomposer.cpp b/lib/Optimizer/Transforms/StateDecomposer.cpp index 3105fad707..62ca8a9d73 100644 --- a/lib/Optimizer/Transforms/StateDecomposer.cpp +++ b/lib/Optimizer/Transforms/StateDecomposer.cpp @@ -125,4 +125,4 @@ std::vector getAlphaY(const std::span data, } return angles; } -} // namespace cudaq::details \ No newline at end of file +} // namespace cudaq::details diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h index a698ac83c2..a09b8a64e9 100644 --- a/lib/Optimizer/Transforms/StateDecomposer.h +++ b/lib/Optimizer/Transforms/StateDecomposer.h @@ -25,8 +25,6 @@ #include "mlir/Transforms/RegionUtils.h" #include -#include - namespace cudaq::details { /// @brief Converts angles of a uniformly controlled rotation to angles of @@ -152,7 +150,7 @@ class StateDecomposer { std::size_t target) { // In our model the index 1 (i.e. |01>) in quantum state data - // corresponds to qubits[0]=1 and qubits[1] = 0. + // corresponds to qubits[0] = 1 and qubits[1] = 0. // Revert the order of qubits as the state preparation algorithm // we use assumes the opposite. auto qubitIndex = [&](std::size_t i) { return numQubits - i - 1; }; From b62bb526f1bcfa24f66867ca6fcf7001ea39e790 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Mon, 1 Jul 2024 14:08:00 -0700 Subject: [PATCH 11/50] Move state prep to after synthesis --- include/cudaq/Optimizer/Transforms/Passes.h | 2 + include/cudaq/Optimizer/Transforms/Passes.td | 11 + lib/Frontend/nvqpp/ConvertExpr.cpp | 20 +- lib/Optimizer/CodeGen/ConvertToQIR.cpp | 12 + .../Transforms/ApplyControlNegations.cpp | 4 + lib/Optimizer/Transforms/BasisConversion.cpp | 14 +- lib/Optimizer/Transforms/CMakeLists.txt | 1 + .../Transforms/GenDeviceCodeLoader.cpp | 7 + lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 2 +- .../Transforms/StatePreparation2.cpp | 304 ++++++++++++++++++ program.py | 23 ++ runtime/common/BaseRemoteRESTQPU.h | 53 ++- runtime/common/RuntimeMLIRCommonImpl.h | 10 + .../platform/default/rest/RemoteRESTQPU.cpp | 2 + .../execution/state_preparation_vector.cpp | 47 +-- 15 files changed, 488 insertions(+), 24 deletions(-) create mode 100644 lib/Optimizer/Transforms/StatePreparation2.cpp create mode 100644 program.py diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 422032326c..d0759cac85 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -42,6 +42,8 @@ std::unique_ptr createQuakeAddMetadata(); std::unique_ptr createQuakeAddDeallocs(); std::unique_ptr createStatePreparation(); std::unique_ptr createStatePreparation(std::string_view, void *); +std::unique_ptr createStatePreparation2(); +std::unique_ptr createStatePreparation2(std::string_view, void *); std::unique_ptr createQuakeSynthesizer(); std::unique_ptr createQuakeSynthesizer(std::string_view, void *); std::unique_ptr createRaiseToAffinePass(); diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index e5e15a8776..2a342e63c3 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -523,6 +523,17 @@ def PrepareState : Pass<"state-prep", "mlir::ModuleOp"> { let constructor = "cudaq::opt::createStatePreparation()"; } +def PrepareState2 : Pass<"state-prep2", "mlir::ModuleOp"> { + let summary = + "Convert state vector data into gates"; + let description = [{ + Convert quake representation that includes qubit initialization + from data into qubit initialization using gates. + }]; + + let constructor = "cudaq::opt::createStatePreparation2()"; +} + def QuakeSynthesize : Pass<"quake-synth", "mlir::ModuleOp"> { let summary = "Synthesize concrete quantum program from Quake code plus runtime values."; diff --git a/lib/Frontend/nvqpp/ConvertExpr.cpp b/lib/Frontend/nvqpp/ConvertExpr.cpp index e5deb2e023..7c73faa2d5 100644 --- a/lib/Frontend/nvqpp/ConvertExpr.cpp +++ b/lib/Frontend/nvqpp/ConvertExpr.cpp @@ -15,6 +15,8 @@ #include "llvm/Support/Debug.h" #include "mlir/Dialect/SCF/IR/SCF.h" +#include + #define DEBUG_TYPE "lower-ast-expr" using namespace mlir; @@ -2569,12 +2571,28 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { } } return false; - }(); + }(); if (isVectorOfQubitRefs) return true; if (ctorName == "complex") { Value imag = popValue(); Value real = popValue(); + + std::cout << "Real and Imag values" << std::endl; + real.dump(); + imag.dump(); + if (auto realOp = real.getDefiningOp()) { + if (auto imagOp = imag.getDefiningOp()) { + std::cout << "Creating const complex" << std::endl; + auto realConst = realOp.value().convertToDouble(); + auto imagConst = imagOp.value().convertToDouble(); + auto attr = (real.getType() == builder.getF64Type())? + builder.getF64ArrayAttr({realConst, imagConst}): + builder.getF32ArrayAttr({static_cast(realConst), static_cast(imagConst)}); + return pushValue(builder.create(loc, ComplexType::get(real.getType()), attr)); + } + } + std::cout << "Creating non-const complex" << std::endl; return pushValue(builder.create( loc, ComplexType::get(real.getType()), real, imag)); } diff --git a/lib/Optimizer/CodeGen/ConvertToQIR.cpp b/lib/Optimizer/CodeGen/ConvertToQIR.cpp index 245a887a2e..731bc0e5bc 100644 --- a/lib/Optimizer/CodeGen/ConvertToQIR.cpp +++ b/lib/Optimizer/CodeGen/ConvertToQIR.cpp @@ -36,6 +36,8 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include + #define DEBUG_TYPE "convert-to-qir" namespace cudaq::opt { @@ -95,6 +97,7 @@ class ConvertToQIR : public cudaq::opt::impl::ConvertToQIRBase { // buffer of constants. LogicalResult eraseConstantArrayOps() { bool ok = true; + SmallVector cleanUps; getOperation().walk([&](cudaq::cc::ConstantArrayOp carr) { // If there is a constant array, then we expect that it is involved in @@ -169,6 +172,9 @@ class ConvertToQIR : public cudaq::opt::impl::ConvertToQIRBase { /// ops. This step makes converting a DAG of nodes in the conversion step /// simpler. void runOnOperation() override final { + std::cout << "Before ConvertToQIR" << std::endl; + getOperation().dump(); + auto *context = &getContext(); if (failed(fuseSubgraphPatterns(context, getOperation()))) { signalPassFailure(); @@ -204,11 +210,17 @@ class ConvertToQIR : public cudaq::opt::impl::ConvertToQIRBase { target.addLegalDialect(); target.addLegalOp(); + if (failed( applyFullConversion(getOperation(), target, std::move(patterns)))) { LLVM_DEBUG(getOperation().dump()); + std::cout << "Filed ConvertToQIR" << std::endl; + getOperation().dump(); signalPassFailure(); } + + std::cout << "Succeded ConvertToQIR" << std::endl; + getOperation().dump(); } }; diff --git a/lib/Optimizer/Transforms/ApplyControlNegations.cpp b/lib/Optimizer/Transforms/ApplyControlNegations.cpp index c88f80e6a1..e10df9bd7c 100644 --- a/lib/Optimizer/Transforms/ApplyControlNegations.cpp +++ b/lib/Optimizer/Transforms/ApplyControlNegations.cpp @@ -16,6 +16,7 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" +#include namespace cudaq::opt { #define GEN_PASS_DEF_APPLYCONTROLNEGATIONS #include "cudaq/Optimizer/Transforms/Passes.h.inc" @@ -67,6 +68,9 @@ struct ApplyControlNegationsPass void runOnOperation() override { auto funcOp = getOperation(); + std::cout << " >>>> ApplyControlNegations *** " << std::endl; + funcOp.dump(); + std::cout << " <<< ApplyControlNegations *** " << std::endl; auto *ctx = &getContext(); RewritePatternSet patterns(ctx); patterns.insert< diff --git a/lib/Optimizer/Transforms/BasisConversion.cpp b/lib/Optimizer/Transforms/BasisConversion.cpp index 326feb87f2..816e7c354d 100644 --- a/lib/Optimizer/Transforms/BasisConversion.cpp +++ b/lib/Optimizer/Transforms/BasisConversion.cpp @@ -18,6 +18,8 @@ #include "mlir/Rewrite/FrozenRewritePatternSet.h" #include "mlir/Transforms/DialectConversion.h" +#include + using namespace mlir; //===----------------------------------------------------------------------===// @@ -103,6 +105,10 @@ struct BasisConversion void runOnOperation() override { auto module = getOperation(); + + std::cout << "Before BasisConversion" << std::endl; + getOperation().dump(); + if (basis.empty()) { module.emitError("Basis conversion requires a target basis"); signalPassFailure(); @@ -161,8 +167,14 @@ struct BasisConversion return applyFullConversion(op, target, patterns); }); - if (failed(rewriteResult)) + if (failed(rewriteResult)) { signalPassFailure(); + std::cout << "Failed BasisConversion" << std::endl; + getOperation().dump(); + } else { + std::cout << "Succeeded BasisConversion" << std::endl; + getOperation().dump(); + } } }; diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index 173cec4538..fc547f41bd 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -44,6 +44,7 @@ add_cudaq_library(OptTransforms RegToMem.cpp StateDecomposer.cpp StatePreparation.cpp + StatePreparation2.cpp PySynthCallableBlockArgs.cpp DEPENDS diff --git a/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp b/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp index 96e3dcce70..c9dd468376 100644 --- a/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp +++ b/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp @@ -9,6 +9,7 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Factory.h" #include "cudaq/Optimizer/Builder/Runtime.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Support/Debug.h" @@ -93,6 +94,7 @@ class GenerateDeviceCodeLoader // declarations are just thrown away when the code is JIT compiled. SmallVector declarations; for (auto &op : *module.getBody()) { + llvm::errs() << "**ADDING OP ***: " << op; if (auto funcOp = dyn_cast(op)) { if (funcOp.empty()) { LLVM_DEBUG(llvm::dbgs() << "adding declaration: " << op); @@ -103,6 +105,11 @@ class GenerateDeviceCodeLoader LLVM_DEBUG(llvm::dbgs() << "adding declaration: " << op); declarations.push_back(&op); } + } + // cc.global constant @__nvqpp__rodata_init_0 (dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<4xf64>) : !cc.array + else if (auto globalOp = dyn_cast(op)) { + LLVM_DEBUG(llvm::dbgs() << "adding global: " << op); + declarations.push_back(&op); } } diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 7d83c152dd..1b1dddc028 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -142,7 +142,7 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, // Stick global at end of Module. builder.setInsertionPointToEnd(module.getBody()); std::string symbol = - "__nvqpp_rodata_init_state." + std::to_string(counter++); + "__nvqpp_rodata_init_state_qs." + std::to_string(counter++); builder.create(argLoc, arrTy, symbol, arrayAttr, /*isConstant=*/true, /*isExternal=*/false); diff --git a/lib/Optimizer/Transforms/StatePreparation2.cpp b/lib/Optimizer/Transforms/StatePreparation2.cpp new file mode 100644 index 0000000000..a8047821a0 --- /dev/null +++ b/lib/Optimizer/Transforms/StatePreparation2.cpp @@ -0,0 +1,304 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "StateDecomposer.h" +#include "cudaq/Optimizer/Builder/Runtime.h" +#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" +#include "cudaq/Optimizer/Transforms/Passes.h" +#include "llvm/Support/Debug.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Target/LLVMIR/TypeToLLVM.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/RegionUtils.h" +#include + +#include + +#define DEBUG_TYPE "state-preparation2" + +using namespace mlir; + +/// Replace a qubit initialization from vectors with quantum gates. +/// For example: +/// +/// func.func @__nvqpp__mlirgen__function_test._Z4testSt6vectorISt7complexIfESaIS1_EE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +/// %0 = cc.address_of @__nvqpp_rodata_init_state.0 : !cc.ptr x 4>> +/// %1 = cc.cast %0 : (!cc.ptr x 4>>) -> !cc.ptr> +/// %2 = quake.alloca !quake.veq<2> +/// %3 = quake.init_state %2, %1 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> +/// return +/// } +/// +/// is converted to: +/// +/// func.func @foo(%arg0 : !cc.stdvec>) { +/// %0 = quake.alloca !quake.veq<2> +/// %c0_i64 = arith.constant 0 : i64 +/// %1 = quake.extract_ref %0[%c0_i64] : (!quake.veq<2>, i64) -> !quake.ref +/// %cst = arith.constant 1.5707963267948968 : f64 +/// quake.ry (%cst) %1 : (f64, !quake.ref) -> () +/// %c1_i64 = arith.constant 1 : i64 +/// %2 = quake.extract_ref %0[%c1_i64] : (!quake.veq<2>, i64) -> !quake.ref +/// %cst_0 = arith.constant 1.5707963267948966 : f64 +/// quake.ry (%cst_0) %2 : (f64, !quake.ref) -> () +/// quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () +/// %cst_1 = arith.constant -1.5707963267948966 : f64 +/// quake.ry (%cst_1) %2 : (f64, !quake.ref) -> () +/// quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () +/// return +/// } +/// +/// Note: the following synthesis and const prop passes will replace +/// the argument by a constant and propagate the values and vector size +/// through other instructions. + +namespace { + +template +concept IntegralType = + std::is_same::value || std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; + +template +concept FloatingType = std::is_same::value; + +template +concept DoubleType = std::is_same::value; + +template +concept ComplexDataType = FloatingType || DoubleType || IntegralType; + +/// Input was complex but we prefer +/// complex. Make a copy, extending the values. +template +std::vector> convertToComplex(std::complex *data, + std::uint64_t size) { + auto convertData = std::vector>(size); + for (std::size_t i = 0; i < size; ++i) + convertData[i] = std::complex{static_cast(data[i].real()), + static_cast(data[i].imag())}; + return convertData; +} + +template +std::vector> convertToComplex(std::complex *data, + std::uint64_t size) { + return std::vector>(data, data + size); +} + +/// Input was float/double but we prefer complex. +/// Make a copy, extending or truncating the values. +template +std::vector> convertToComplex(From *data, + std::uint64_t size) { + auto convertData = std::vector>(size); + for (std::size_t i = 0; i < size; ++i) + convertData[i] = std::complex{static_cast(data[i]), + static_cast(0.0)}; + return convertData; +} + +std::vector> readConstantArray(mlir::OpBuilder &builder, cudaq::cc::GlobalOp &global) { + std::vector> result{}; + + auto attr = global.getValue(); + auto type = global.getType().getElementType(); + + if (auto arrayTy = dyn_cast(type)) { + auto eleTy = arrayTy.getElementType(); + std::cout << "Attribute element type:" << std::endl; + eleTy.dump(); + + if (attr.has_value()) { + // auto tensorTy = RankedTensorType::get(size, eleTy); + // auto f64Attr = DenseElementsAttr::get(tensorTy, values); + if (auto elementsAttr = dyn_cast(attr.value())) { + auto values = elementsAttr.getValues(); + for (auto it = values.begin(); it != values.end(); ++it) { + result.push_back({*it, 0.0}); + } + } + + else if (auto values = dyn_cast(attr.value())) { + for (auto it = values.begin(); it != values.end(); ++it) { + auto real = *it; + // for (std::size_t idx = 0; idx < numConstants; idx += isComplex ? 2 : 1) { + auto v = [&]() -> std::complex { + //auto val = constantValues[idx]; + + if (isa(eleTy)) + return { + cast(real).getValue().convertToDouble(), + static_cast(0.0) + }; + if (isa(eleTy)) + return { + static_cast(cast(real).getInt()), + static_cast(0.0) + }; + assert(isa(eleTy)); + it++; + auto imag = *it; + return { + cast(real).getValue().convertToDouble(), + cast(imag).getValue().convertToDouble() + }; + }(); + + result.push_back(v); + } + } + } + } + + std::cout << "Results (" << result.size() << "):" << std::endl; + for (auto &r: result) { + std::cout << r << ", " << std::endl; + } + return result; +} + +LogicalResult +transform(OpBuilder &builder, ModuleOp module) { + //auto *ctx = builder.getContext(); + + auto toErase = std::vector(); + +// Module after everything +// module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.triple = "x86_64-unknown-linux-gnu", quake.mangled_name_map = {__nvqpp__mlirgen__function_test._Z4testSt6vectorISt7complexIfESaIS1_EE = "_Z4testSt6vectorISt7complexIfESaIS1_EE"}} { +// func.func @__nvqpp__mlirgen__function_test._Z4testSt6vectorISt7complexIfESaIS1_EE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// %0 = cc.address_of @__nvqpp_rodata_init_state.0 : !cc.ptr x 4>> +// %1 = cc.cast %0 : (!cc.ptr x 4>>) -> !cc.ptr> +// %2 = quake.alloca !quake.veq<2> +// %3 = quake.init_state %2, %1 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> +// return +// } +// cc.global constant @__nvqpp_rodata_init_state.0 ([0.707106769 : f32, 0.000000e+00 : f32, 0.707106769 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32]) : !cc.array x 4> +// } + +// func.func @__nvqpp__mlirgen__function_f._Z1fv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// %0 = cc.address_of @__nvqpp__rodata_init_0 : !cc.ptr> +// %1 = quake.alloca !quake.veq<2> +// %2 = quake.init_state %1, %0 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> +// quake.dealloc %2 : !quake.veq<2> +// return +// } + + + module->walk([&](Operation *op) { + if (auto initOp = dyn_cast(op)) { + toErase.push_back(initOp); + auto loc = op->getLoc(); + builder.setInsertionPointAfter(initOp); + // Find the qvector alloc. + auto qubits = initOp.getOperand(0); + if (auto alloc = dyn_cast(qubits.getDefiningOp())) { + + // Find vector data. + auto data = initOp.getOperand(1); + if (auto cast = dyn_cast(data.getDefiningOp())) { + data = cast.getOperand(); + toErase.push_back(cast); + } + if (auto addr = dyn_cast(data.getDefiningOp())) { + + auto globalName = addr.getGlobalName(); + auto symbol = module.lookupSymbol(globalName); + if (auto global = dyn_cast(symbol)) { + // Read state initialization data from the global array. + auto vec = readConstantArray(builder, global); + + // Prepare state from vector data. + auto gateBuilder = StateGateBuilder(builder, loc, qubits); + auto decomposer = StateDecomposer(gateBuilder, vec); + decomposer.decompose(); + + initOp.replaceAllUsesWith(qubits); + toErase.push_back(addr); + toErase.push_back(global); + } + } + } + } + }); + + for (auto &op : toErase) { + op->erase(); + } + + return success(); +} + +class StatePreparation2 : public cudaq::opt::PrepareState2Base { +protected: + // The name of the kernel to be synthesized + std::string kernelName; + + // The raw pointer to the runtime arguments. + void *args; + +public: + StatePreparation2() = default; + StatePreparation2(std::string_view kernel, void *a) + : kernelName(kernel), args(a) {} + + mlir::ModuleOp getModule() { return getOperation(); } + + + void runOnOperation() override final { + auto module = getModule(); + + std::cout << "Module before state prep2" << std::endl; + module.dump(); + + auto kernelNameInQuake = cudaq::runtime::cudaqGenPrefixName + kernelName; + // Get the function we care about (the one with kernelName) + auto funcOp = module.lookupSymbol(kernelNameInQuake); + if (!funcOp) { + module.emitOpError("The kernel '" + kernelName + + "' was not found in the module."); + signalPassFailure(); + return; + } + + // Create the builder. + auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); + + auto result = transform(builder, module); + if (result.failed()) { + module.emitOpError("Failed to prepare state for '" + kernelName); + signalPassFailure(); + return; + } + + std::cout << "Module after state prep2" << std::endl; + module.dump(); + } +}; + +} // namespace + +std::unique_ptr cudaq::opt::createStatePreparation2() { + return std::make_unique(); +} + +std::unique_ptr +cudaq::opt::createStatePreparation2(std::string_view kernelName, void *a) { + return std::make_unique(kernelName, a); +} diff --git a/program.py b/program.py new file mode 100644 index 0000000000..92321a755a --- /dev/null +++ b/program.py @@ -0,0 +1,23 @@ +# ============================================================================ # +# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # +# All rights reserved. # +# # +# This source code and the accompanying materials are made available under # +# the terms of the Apache License 2.0 which accompanies this distribution. # +# ============================================================================ # + +import cudaq +import numpy as np + +c = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + +@cudaq.kernel +def kernel(vec: list[complex]): + q = cudaq.qvector(vec) + +synthesized = cudaq.synthesize(kernel, c) +print(synthesized) + +counts = cudaq.sample(synthesized) +assert '00' in counts +assert '10' in counts \ No newline at end of file diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 08f41e60ec..c469d543ee 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -18,6 +18,7 @@ #include "cudaq/Optimizer/CodeGen/OpenQASMEmitter.h" #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Support/Plugin.h" @@ -378,6 +379,18 @@ class BaseRemoteRESTQPU : public cudaq::QPU { auto moduleOp = builder.create(); moduleOp.push_back(func.clone()); moduleOp->setAttrs(m_module->getAttrDictionary()); + for (auto &op: m_module.getOps()) { + // Add globals referenced in the func. + if (auto globalOp = dyn_cast(op)) { + //for (auto *use: globalOp->getUsers()) { + // auto parent = use->getParentOfType(); + // std::cout << "Global " << globalOp.getName().str() << " is used in " << parent.getName().str() <disableMultithreading(); @@ -411,9 +427,42 @@ class BaseRemoteRESTQPU : public cudaq::QPU { throw std::runtime_error("Could not successfully apply quake-synth."); } + std::cout << "Module after synthesis" << std::endl; + moduleOp.dump(); + // runPassPipeline("canonicalize,cse", moduleOp); + // std::cout << "Module after synthesis and cse" << std::endl; + // moduleOp.dump(); + + // Run the config-specified pass pipeline + //runPassPipeline(passPipelineConfig, moduleOp); + //runPassPipeline("cc-loop-unroll{allow-early-exit=1},canonicalize,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),quantinuum-gate-set-mapping", moduleOp); + //if (updatedArgs) { + cudaq::info("Run State Prep.\n"); + mlir::PassManager pm(&context); + pm.addPass(cudaq::opt::createStatePreparation2(kernelName, updatedArgs)); + if (disableMLIRthreading || enablePrintMLIREachPass) + moduleOp.getContext()->disableMultithreading(); + if (enablePrintMLIREachPass) + pm.enableIRPrinting(); + if (failed(pm.run(moduleOp))) + throw std::runtime_error("Could not successfully apply state prep."); + //} + + std::cout << "Module after state prep" << std::endl; + moduleOp.dump(); + + runPassPipeline("canonicalize,cse", moduleOp); + std::cout << "Module after state prep and cse" << std::endl; + moduleOp.dump(); + // Run the config-specified pass pipeline + //runPassPipeline("cc-loop-unroll{allow-early-exit=1},canonicalize,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition)", moduleOp); + // runPassPipeline("cc-loop-unroll{allow-early-exit=1},canonicalize,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),quantinuum-gate-set-mapping", moduleOp); runPassPipeline(passPipelineConfig, moduleOp); + std::cout << "Module after state prep and pipeline" << std::endl; + moduleOp.dump(); + auto entryPointFunc = moduleOp.lookupSymbol( std::string("__nvqpp__mlirgen__") + kernelName); std::vector mapping_reorder_idx; @@ -479,6 +528,8 @@ class BaseRemoteRESTQPU : public cudaq::QPU { // and use that for execution for (auto &[name, module] : modules) { auto clonedModule = module.clone(); + std::cout << "Module after everything" << std::endl; + clonedModule.dump(); jitEngines.emplace_back( cudaq::createQIRJITEngine(clonedModule, codegenTranslation)); } diff --git a/runtime/common/RuntimeMLIRCommonImpl.h b/runtime/common/RuntimeMLIRCommonImpl.h index df06f750a0..91722e1751 100644 --- a/runtime/common/RuntimeMLIRCommonImpl.h +++ b/runtime/common/RuntimeMLIRCommonImpl.h @@ -40,6 +40,8 @@ #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Tools/ParseUtilities.h" +#include + namespace cudaq { bool setupTargetTriple(llvm::Module *llvmModule) { @@ -369,6 +371,11 @@ qirProfileTranslationFunction(const char *qirProfile, mlir::Operation *op, mlir::PassManager pm(context); if (printIntermediateMLIR) pm.enableIRPrinting(); + + std::cout << "qirProfileTranslationFunction" << std::endl; + pm.enableIRPrinting(); + context->disableMultithreading(); + std::string errMsg; llvm::raw_string_ostream errOs(errMsg); cudaq::opt::addPipelineConvertToQIR(pm, qirProfile); @@ -575,6 +582,9 @@ mlir::ExecutionEngine *createQIRJITEngine(mlir::ModuleOp &moduleOp, tm.setEnabled(cudaq::isTimingTagEnabled(cudaq::TIMING_JIT_PASSES)); auto timingScope = tm.getRootScope(); // starts the timer pm.enableTiming(timingScope); // do this right before pm.run + std::cout << "Common IR" << std::endl; + context->disableMultithreading(); + pm.enableIRPrinting(); if (failed(pm.run(module))) throw std::runtime_error( "[createQIRJITEngine] Lowering to QIR for remote emulation failed."); diff --git a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp index f8318e1dec..39602a6fba 100644 --- a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp +++ b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp @@ -36,6 +36,8 @@ class RemoteRESTQPU : public cudaq::BaseRemoteRESTQPU { // Get the quake representation of the kernel auto quakeCode = cudaq::get_quake_by_name(kernelName); + std::cout << "extractQuakeCodeAndContext" << quakeCode << std::endl; + auto m_module = parseSourceString(quakeCode, &context); if (!m_module) throw std::runtime_error("module cannot be parsed"); diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index 35a628c06a..ddc8e6e265 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -11,10 +11,14 @@ #include #include -__qpu__ void test(std::vector inState) { - cudaq::qvector q1 = inState; +__qpu__ void f() { + cudaq::qvector v = {1.0, 2.0, 3.0, 4.0}; } +// __qpu__ void test(std::vector inState) { +// cudaq::qvector q1 = inState; +// } + void printCounts(cudaq::sample_result& result) { std::vector values{}; for (auto &&[bits, counts] : result) { @@ -28,25 +32,28 @@ void printCounts(cudaq::sample_result& result) { } int main() { - std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; - { - // Passing state data as argument (kernel mode) - auto counts = cudaq::sample(test, vec); - printCounts(counts); - - counts = cudaq::sample(test, vec1); - printCounts(counts); - } - - { - // Passing state data as argument (builder mode) - auto [kernel, v] = cudaq::make_kernel>(); - auto qubits = kernel.qalloc(v); + auto counts = cudaq::sample(f); + printCounts(counts); + + // std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + // std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + // { + // // Passing state data as argument (kernel mode) + // auto counts = cudaq::sample(test, vec); + // printCounts(counts); + + // counts = cudaq::sample(test, vec1); + // printCounts(counts); + // } + + // { + // // Passing state data as argument (builder mode) + // auto [kernel, v] = cudaq::make_kernel>(); + // auto qubits = kernel.qalloc(v); - auto counts = cudaq::sample(kernel, vec); - printCounts(counts); - } + // auto counts = cudaq::sample(kernel, vec); + // printCounts(counts); + // } } // CHECK: 00 From 53d2644b745a4e1e8b69912c55bd4b17ac53ebaa Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 3 Jul 2024 09:11:09 -0700 Subject: [PATCH 12/50] Read complex numbers --- lib/Frontend/nvqpp/ConvertExpr.cpp | 14 ++++++++++++++ targettests/execution/state_preparation_vector.cpp | 9 +++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/lib/Frontend/nvqpp/ConvertExpr.cpp b/lib/Frontend/nvqpp/ConvertExpr.cpp index 7c73faa2d5..8ec06e58af 100644 --- a/lib/Frontend/nvqpp/ConvertExpr.cpp +++ b/lib/Frontend/nvqpp/ConvertExpr.cpp @@ -2417,6 +2417,18 @@ static Type getEleTyFromVectorCtor(Type ctorTy) { return ctorTy; } +mlir::Operation* constProp(OpBuilder &builder, Location &loc, Operation* op) { + if (auto &constOp = dyn_cast(op)) { + return op; + } + if (auto &truncOp = dyn_cast(op)) { + auto truncated = truncOp->getOperand(0); + auto fTy = op->getType(); + builder.create(loc, cast(val).getValue(), fTy); + } + +} + bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { auto loc = toLocation(x); auto *ctor = x->getConstructor(); @@ -2579,6 +2591,8 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { Value real = popValue(); std::cout << "Real and Imag values" << std::endl; + real = constProp(builder, loc, real); + imag = constProp(builder, loc, imag); real.dump(); imag.dump(); if (auto realOp = real.getDefiningOp()) { diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index ddc8e6e265..1a96b3e881 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -12,12 +12,13 @@ #include __qpu__ void f() { - cudaq::qvector v = {1.0, 2.0, 3.0, 4.0}; + cudaq::qvector v = { static_cast(1.0), static_cast(2.0), static_cast(3.0), static_cast(4.0)}; + // cudaq::qvector v = { 1.0, 2.0, 3.0, 4.0}; } -// __qpu__ void test(std::vector inState) { -// cudaq::qvector q1 = inState; -// } +__qpu__ void test(std::vector inState) { + cudaq::qvector q1 = inState; +} void printCounts(cudaq::sample_result& result) { std::vector values{}; From 7e4523f7c7500498eff31a45b7550f6b8ab81018 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 3 Jul 2024 09:32:06 -0700 Subject: [PATCH 13/50] Merge with main --- lib/Frontend/nvqpp/ConvertExpr.cpp | 103 ++++++++++++++--------------- 1 file changed, 49 insertions(+), 54 deletions(-) diff --git a/lib/Frontend/nvqpp/ConvertExpr.cpp b/lib/Frontend/nvqpp/ConvertExpr.cpp index 8ec06e58af..6051d0794a 100644 --- a/lib/Frontend/nvqpp/ConvertExpr.cpp +++ b/lib/Frontend/nvqpp/ConvertExpr.cpp @@ -15,8 +15,6 @@ #include "llvm/Support/Debug.h" #include "mlir/Dialect/SCF/IR/SCF.h" -#include - #define DEBUG_TYPE "lower-ast-expr" using namespace mlir; @@ -280,8 +278,10 @@ static Value toIntegerImpl(OpBuilder &builder, Location loc, Value bitVec) { auto eleTy = cast(bitVec.getType()).getElementType(); auto elePtrTy = cudaq::cc::PointerType::get(eleTy); + auto eleArrTy = + cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(eleTy)); auto vecPtr = - builder.create(loc, elePtrTy, bitVec); + builder.create(loc, eleArrTy, bitVec); auto eleAddr = builder.create( loc, elePtrTy, vecPtr, ValueRange{kIter}); Value bitElement = builder.create(loc, eleAddr); @@ -700,7 +700,7 @@ bool QuakeBridgeVisitor::VisitCastExpr(clang::CastExpr *x) { assert(result && "integer conversion failed"); return result; } - TODO_loc(loc, "unhandled user defined implicit conversion"); + TODO_loc(loc, "unhandled user-defined implicit conversion"); } case clang::CastKind::CK_ConstructorConversion: { // Enable implicit conversion of surface types, which both map to VeqType. @@ -1109,11 +1109,16 @@ bool QuakeBridgeVisitor::VisitMemberExpr(clang::MemberExpr *x) { if (auto *field = dyn_cast(x->getMemberDecl())) { auto loc = toLocation(x->getSourceRange()); auto object = popValue(); // DeclRefExpr + auto eleTy = cast(object.getType()).getElementType(); + SmallVector offsets; + if (auto arrTy = dyn_cast(eleTy)) + if (arrTy.isUnknownSize()) + offsets.push_back(0); std::int32_t offset = field->getFieldIndex(); + offsets.push_back(offset); auto ty = popType(); return pushValue(builder.create( - loc, cc::PointerType::get(ty), object, - SmallVector{0, offset})); + loc, cc::PointerType::get(ty), object, offsets)); } return true; } @@ -1214,10 +1219,11 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { assert(isa(calleeTy)); auto negativeOneIndex = getConstantInt(builder, loc, -1, 64); auto eleTy = cast(svec.getType()).getElementType(); + auto eleArrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); auto elePtrTy = cc::PointerType::get(eleTy); auto *ctx = eleTy.getContext(); auto i64Ty = mlir::IntegerType::get(ctx, 64); - auto vecPtr = builder.create(loc, elePtrTy, svec); + auto vecPtr = builder.create(loc, eleArrTy, svec); auto vecLen = builder.create(loc, i64Ty, svec); Value vecLenMinusOne = builder.create(loc, vecLen, negativeOneIndex); @@ -1231,9 +1237,10 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { assert(isa(calleeTy)); auto eleTy = cast(svec.getType()).getElementType(); auto elePtrTy = cc::PointerType::get(eleTy); + auto eleArrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); auto *ctx = eleTy.getContext(); auto i64Ty = mlir::IntegerType::get(ctx, 64); - auto vecPtr = builder.create(loc, elePtrTy, svec); + auto vecPtr = builder.create(loc, eleArrTy, svec); Value vecLen = builder.create(loc, i64Ty, svec); return pushValue(builder.create( loc, elePtrTy, vecPtr, ValueRange{vecLen})); @@ -1247,7 +1254,8 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { builder.create(loc, -1, 64); auto eleTy = cast(svec.getType()).getElementType(); auto elePtrTy = cc::PointerType::get(eleTy); - auto vecPtr = builder.create(loc, elePtrTy, svec); + auto eleArrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); + auto vecPtr = builder.create(loc, eleArrTy, svec); return pushValue(builder.create( loc, elePtrTy, vecPtr, ValueRange{negativeOneIndex})); } @@ -1890,7 +1898,8 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { offset = builder.create(loc, scale, args[1]); } else { ptrTy = cc::PointerType::get(eleTy); - vecPtr = builder.create(loc, ptrTy, args[0]); + auto arrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); + vecPtr = builder.create(loc, arrTy, args[0]); } auto ptr = builder.create(loc, ptrTy, vecPtr, ArrayRef{offset}); @@ -1950,7 +1959,13 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto loInt = builder.create(loc, i64Ty, args[0]); auto ptrTy = cast(args[0].getType()); auto eleTy = ptrTy.getElementType(); - auto arrTy = cc::ArrayType::get(eleTy); + auto arrTy = dyn_cast(eleTy); + if (arrTy) { + eleTy = arrTy.getElementType(); + ptrTy = cc::PointerType::get(eleTy); + } else { + arrTy = cc::ArrayType::get(eleTy); + } auto eleSize = eleTy.getIntOrFloatBitWidth(); auto adjust = getConstantInt(builder, loc, eleSize / 4, i64Ty); auto dist = builder.create(loc, hiInt, loInt); @@ -1988,9 +2003,8 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { if (specArgs[0].getKind() == clang::TemplateArgument::ArgKind::Integral) { auto ptr = builder.create( loc, resultTy, args[0], - ArrayRef{ - 0, static_cast( - specArgs[0].getAsIntegral().getExtValue())}); + ArrayRef{static_cast( + specArgs[0].getAsIntegral().getExtValue())}); return pushValue(builder.create(loc, ptr)); } auto *selectTy = specArgs[0].getAsType().getTypePtr(); @@ -1999,7 +2013,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { for (auto &templateArg : specArgs[1].pack_elements()) { if (templateArg.getAsType().getTypePtr() == selectTy) { auto ptr = builder.create( - loc, resultTy, args[0], ArrayRef{0, i}); + loc, resultTy, args[0], ArrayRef{i}); return pushValue(builder.create(loc, ptr)); } ++i; @@ -2112,7 +2126,8 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( } auto eleTy = cast(svec.getType()).getElementType(); auto elePtrTy = cc::PointerType::get(eleTy); - auto vecPtr = builder.create(loc, elePtrTy, svec); + auto eleArrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); + auto vecPtr = builder.create(loc, eleArrTy, svec); auto eleAddr = builder.create(loc, elePtrTy, vecPtr, ValueRange{indexVar}); return replaceTOSValue(eleAddr); @@ -2124,8 +2139,10 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( auto indexVar = popValue(); auto svec = popValue(); assert(svec.getType().isa()); - auto elePtrTy = cc::PointerType::get(builder.getI8Type()); - auto vecPtr = builder.create(loc, elePtrTy, svec); + auto i8Ty = builder.getI8Type(); + auto elePtrTy = cc::PointerType::get(i8Ty); + auto eleArrTy = cc::PointerType::get(cc::ArrayType::get(i8Ty)); + auto vecPtr = builder.create(loc, eleArrTy, svec); auto eleAddr = builder.create(loc, elePtrTy, vecPtr, ValueRange{indexVar}); auto i1PtrTy = cc::PointerType::get(builder.getI1Type()); @@ -2353,13 +2370,21 @@ bool QuakeBridgeVisitor::VisitInitListExpr(clang::InitListExpr *x) { ArrayRef{i / structMems, i % structMems}); } else { auto ptrTy = cc::PointerType::get(structTy.getMembers()[i]); - ptr = builder.create( - loc, ptrTy, alloca, ArrayRef{0, i}); + ptr = builder.create(loc, ptrTy, alloca, + ArrayRef{i}); } } else { - auto ptrTy = cc::PointerType::get(eleTy); - ptr = builder.create(loc, ptrTy, alloca, - ArrayRef{i}); + if (numEles > 1) { + auto ptrTy = cc::PointerType::get(eleTy); + ptr = builder.create(loc, ptrTy, alloca, + ArrayRef{i}); + } else { + auto arrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); + auto cast = builder.create(loc, arrTy, alloca); + auto ptrTy = cc::PointerType::get(eleTy); + ptr = builder.create(loc, ptrTy, cast, + ArrayRef{i}); + } } assert(ptr && (v.getType() == @@ -2417,18 +2442,6 @@ static Type getEleTyFromVectorCtor(Type ctorTy) { return ctorTy; } -mlir::Operation* constProp(OpBuilder &builder, Location &loc, Operation* op) { - if (auto &constOp = dyn_cast(op)) { - return op; - } - if (auto &truncOp = dyn_cast(op)) { - auto truncated = truncOp->getOperand(0); - auto fTy = op->getType(); - builder.create(loc, cast(val).getValue(), fTy); - } - -} - bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { auto loc = toLocation(x); auto *ctor = x->getConstructor(); @@ -2583,30 +2596,12 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { } } return false; - }(); + }(); if (isVectorOfQubitRefs) return true; if (ctorName == "complex") { Value imag = popValue(); Value real = popValue(); - - std::cout << "Real and Imag values" << std::endl; - real = constProp(builder, loc, real); - imag = constProp(builder, loc, imag); - real.dump(); - imag.dump(); - if (auto realOp = real.getDefiningOp()) { - if (auto imagOp = imag.getDefiningOp()) { - std::cout << "Creating const complex" << std::endl; - auto realConst = realOp.value().convertToDouble(); - auto imagConst = imagOp.value().convertToDouble(); - auto attr = (real.getType() == builder.getF64Type())? - builder.getF64ArrayAttr({realConst, imagConst}): - builder.getF32ArrayAttr({static_cast(realConst), static_cast(imagConst)}); - return pushValue(builder.create(loc, ComplexType::get(real.getType()), attr)); - } - } - std::cout << "Creating non-const complex" << std::endl; return pushValue(builder.create( loc, ComplexType::get(real.getType()), real, imag)); } From 3b315937a7259856e2f9b84e96cdddc4e39fd5cb Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 5 Jul 2024 09:49:26 -0700 Subject: [PATCH 14/50] Cleanup --- include/cudaq/Optimizer/Transforms/Passes.h | 5 +- include/cudaq/Optimizer/Transforms/Passes.td | 11 - lib/Optimizer/CodeGen/ConvertToQIR.cpp | 8 - .../Transforms/ApplyControlNegations.cpp | 4 - lib/Optimizer/Transforms/BasisConversion.cpp | 14 +- lib/Optimizer/Transforms/CMakeLists.txt | 1 - .../Transforms/GenDeviceCodeLoader.cpp | 5 +- .../Transforms/GenKernelExecution.cpp | 1 + lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 133 ++++++- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 2 +- lib/Optimizer/Transforms/StatePreparation.cpp | 375 ++++++------------ .../Transforms/StatePreparation2.cpp | 304 -------------- program.py | 23 -- python/cudaq/kernel/ast_bridge.py | 10 +- .../cudaq/platform/py_alt_launch_kernel.cpp | 5 +- .../tests/kernel/test_kernel_qvector_init.py | 152 +------ runtime/common/BaseRemoteRESTQPU.h | 50 +-- runtime/common/BaseRestRemoteClient.h | 13 +- runtime/common/RuntimeMLIRCommonImpl.h | 9 - .../platform/default/rest/RemoteRESTQPU.cpp | 2 - .../execution/state_preparation_vector.cpp | 136 +++++-- 21 files changed, 394 insertions(+), 869 deletions(-) delete mode 100644 lib/Optimizer/Transforms/StatePreparation2.cpp delete mode 100644 program.py diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index d0759cac85..9431b3da0d 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -36,14 +36,13 @@ createApplyOpSpecializationPass(bool computeActionOpt); std::unique_ptr createDelayMeasurementsPass(); std::unique_ptr createExpandMeasurementsPass(); std::unique_ptr createLambdaLiftingPass(); +std::unique_ptr createLiftArrayAllocPass(); std::unique_ptr createLowerToCFGPass(); std::unique_ptr createObserveAnsatzPass(std::vector &); std::unique_ptr createQuakeAddMetadata(); std::unique_ptr createQuakeAddDeallocs(); std::unique_ptr createStatePreparation(); -std::unique_ptr createStatePreparation(std::string_view, void *); -std::unique_ptr createStatePreparation2(); -std::unique_ptr createStatePreparation2(std::string_view, void *); +std::unique_ptr createStatePreparation(std::string_view); std::unique_ptr createQuakeSynthesizer(); std::unique_ptr createQuakeSynthesizer(std::string_view, void *); std::unique_ptr createRaiseToAffinePass(); diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 7b0be15dc2..b00bf1f1e6 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -565,17 +565,6 @@ def PrepareState : Pass<"state-prep", "mlir::ModuleOp"> { let constructor = "cudaq::opt::createStatePreparation()"; } -def PrepareState2 : Pass<"state-prep2", "mlir::ModuleOp"> { - let summary = - "Convert state vector data into gates"; - let description = [{ - Convert quake representation that includes qubit initialization - from data into qubit initialization using gates. - }]; - - let constructor = "cudaq::opt::createStatePreparation2()"; -} - def QuakeSynthesize : Pass<"quake-synth", "mlir::ModuleOp"> { let summary = "Synthesize concrete quantum program from Quake code plus runtime values."; diff --git a/lib/Optimizer/CodeGen/ConvertToQIR.cpp b/lib/Optimizer/CodeGen/ConvertToQIR.cpp index 6c34b568ac..e4f370876e 100644 --- a/lib/Optimizer/CodeGen/ConvertToQIR.cpp +++ b/lib/Optimizer/CodeGen/ConvertToQIR.cpp @@ -37,8 +37,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include - #define DEBUG_TYPE "convert-to-qir" namespace cudaq::opt { @@ -74,7 +72,6 @@ class ConvertToQIR : public cudaq::opt::impl::ConvertToQIRBase { // buffer of constants. LogicalResult eraseConstantArrayOps() { bool ok = true; - SmallVector cleanUps; getOperation().walk([&](cudaq::cc::ConstantArrayOp carr) { // If there is a constant array, then we expect that it is involved in @@ -149,9 +146,6 @@ class ConvertToQIR : public cudaq::opt::impl::ConvertToQIRBase { /// ops. This step makes converting a DAG of nodes in the conversion step /// simpler. void runOnOperation() override final { - std::cout << "Before ConvertToQIR" << std::endl; - getOperation().dump(); - auto *context = &getContext(); if (failed(fuseSubgraphPatterns(context, getOperation()))) { signalPassFailure(); @@ -191,8 +185,6 @@ class ConvertToQIR : public cudaq::opt::impl::ConvertToQIRBase { LLVM_DEBUG(llvm::dbgs() << "Before conversion to QIR:\n"; op.dump()); if (failed(applyFullConversion(op, target, std::move(patterns)))) { LLVM_DEBUG(getOperation().dump()); - std::cout << "Filed ConvertToQIR" << std::endl; - getOperation().dump(); signalPassFailure(); } LLVM_DEBUG(llvm::dbgs() << "After conversion to QIR:\n"; op.dump()); diff --git a/lib/Optimizer/Transforms/ApplyControlNegations.cpp b/lib/Optimizer/Transforms/ApplyControlNegations.cpp index e10df9bd7c..c88f80e6a1 100644 --- a/lib/Optimizer/Transforms/ApplyControlNegations.cpp +++ b/lib/Optimizer/Transforms/ApplyControlNegations.cpp @@ -16,7 +16,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" -#include namespace cudaq::opt { #define GEN_PASS_DEF_APPLYCONTROLNEGATIONS #include "cudaq/Optimizer/Transforms/Passes.h.inc" @@ -68,9 +67,6 @@ struct ApplyControlNegationsPass void runOnOperation() override { auto funcOp = getOperation(); - std::cout << " >>>> ApplyControlNegations *** " << std::endl; - funcOp.dump(); - std::cout << " <<< ApplyControlNegations *** " << std::endl; auto *ctx = &getContext(); RewritePatternSet patterns(ctx); patterns.insert< diff --git a/lib/Optimizer/Transforms/BasisConversion.cpp b/lib/Optimizer/Transforms/BasisConversion.cpp index 816e7c354d..326feb87f2 100644 --- a/lib/Optimizer/Transforms/BasisConversion.cpp +++ b/lib/Optimizer/Transforms/BasisConversion.cpp @@ -18,8 +18,6 @@ #include "mlir/Rewrite/FrozenRewritePatternSet.h" #include "mlir/Transforms/DialectConversion.h" -#include - using namespace mlir; //===----------------------------------------------------------------------===// @@ -105,10 +103,6 @@ struct BasisConversion void runOnOperation() override { auto module = getOperation(); - - std::cout << "Before BasisConversion" << std::endl; - getOperation().dump(); - if (basis.empty()) { module.emitError("Basis conversion requires a target basis"); signalPassFailure(); @@ -167,14 +161,8 @@ struct BasisConversion return applyFullConversion(op, target, patterns); }); - if (failed(rewriteResult)) { + if (failed(rewriteResult)) signalPassFailure(); - std::cout << "Failed BasisConversion" << std::endl; - getOperation().dump(); - } else { - std::cout << "Succeeded BasisConversion" << std::endl; - getOperation().dump(); - } } }; diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index d3e15d1382..717e379ef4 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -45,7 +45,6 @@ add_cudaq_library(OptTransforms RegToMem.cpp StateDecomposer.cpp StatePreparation.cpp - StatePreparation2.cpp PySynthCallableBlockArgs.cpp DEPENDS diff --git a/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp b/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp index c9dd468376..74f4306654 100644 --- a/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp +++ b/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp @@ -94,7 +94,6 @@ class GenerateDeviceCodeLoader // declarations are just thrown away when the code is JIT compiled. SmallVector declarations; for (auto &op : *module.getBody()) { - llvm::errs() << "**ADDING OP ***: " << op; if (auto funcOp = dyn_cast(op)) { if (funcOp.empty()) { LLVM_DEBUG(llvm::dbgs() << "adding declaration: " << op); @@ -105,9 +104,7 @@ class GenerateDeviceCodeLoader LLVM_DEBUG(llvm::dbgs() << "adding declaration: " << op); declarations.push_back(&op); } - } - // cc.global constant @__nvqpp__rodata_init_0 (dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<4xf64>) : !cc.array - else if (auto globalOp = dyn_cast(op)) { + } else if (auto globalOp = dyn_cast(op)) { LLVM_DEBUG(llvm::dbgs() << "adding global: " << op); declarations.push_back(&op); } diff --git a/lib/Optimizer/Transforms/GenKernelExecution.cpp b/lib/Optimizer/Transforms/GenKernelExecution.cpp index e0877d4c3d..3c76d9e197 100644 --- a/lib/Optimizer/Transforms/GenKernelExecution.cpp +++ b/lib/Optimizer/Transforms/GenKernelExecution.cpp @@ -1243,6 +1243,7 @@ class GenerateKernelExecution } continue; } + stVal = builder.create(loc, stVal.getType(), stVal, arg, idx); } diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index 8093a477e6..d4c0c335ed 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -38,11 +38,12 @@ class AllocaPattern : public OpRewritePattern { PatternRewriter &rewriter) const override { SmallVector stores; bool toGlobal = false; - if (!isGoodCandidate(alloc, stores, dom, toGlobal)) + if (!isGoodCandidate(alloc, stores, dom, toGlobal)) { return failure(); + } LLVM_DEBUG(llvm::dbgs() << "Candidate was found\n"); - auto arrTy = cast(alloc.getElementType()); + auto arrTy = cast(alloc.getType().getElementType()); SmallVector values; // Every element of `stores` must be a cc::StoreOp with a ConstantOp as the @@ -65,6 +66,7 @@ class AllocaPattern : public OpRewritePattern { Value conArr; Value conGlobal; if (toGlobal) { + auto ip = rewriter.saveInsertionPoint(); static unsigned counter = 0; auto ptrTy = cudaq::cc::PointerType::get(arrTy); // Build a new name based on the kernel name. @@ -110,6 +112,7 @@ class AllocaPattern : public OpRewritePattern { /*isExternal=*/false); } } + rewriter.restoreInsertionPoint(ip); conGlobal = rewriter.create(loc, ptrTy, name); conArr = rewriter.create(loc, arrTy, conGlobal); } else { @@ -117,6 +120,8 @@ class AllocaPattern : public OpRewritePattern { rewriter.create(loc, arrTy, valuesAttr); } + std::vector toErase; + // Rewalk all the uses of alloc, u, which must be cc.cast or cc.compute_ptr. // For each,u, remove a store and replace a load with a cc.extract_value. for (auto &use : alloc->getUses()) { @@ -128,6 +133,7 @@ class AllocaPattern : public OpRewritePattern { for (auto &useuse : user->getUses()) { auto *useuser = useuse.getOwner(); if (auto ist = dyn_cast(useuser)) { + rewriter.setInsertionPointAfter(useuser); LLVM_DEBUG(llvm::dbgs() << "replaced init_state\n"); assert(conGlobal && "global must be defined"); rewriter.replaceOpWithNewOp( @@ -135,23 +141,31 @@ class AllocaPattern : public OpRewritePattern { continue; } if (auto load = dyn_cast(useuser)) { + rewriter.setInsertionPointAfter(useuser); LLVM_DEBUG(llvm::dbgs() << "replaced load\n"); rewriter.replaceOpWithNewOp( load, eleTy, conArr, ArrayRef{offset}); continue; } - if (isa(useuser)) - rewriter.eraseOp(useuser); + if (isa(useuser)) { + toErase.push_back(useuser); + continue; + } isLive = true; } if (!isLive) - rewriter.eraseOp(user); + toErase.push_back(user); } if (toGlobal) { + rewriter.setInsertionPointAfter(alloc); rewriter.replaceOp(alloc, conGlobal); } else { - rewriter.eraseOp(alloc); + toErase.push_back(alloc); + } + + for (auto *op : toErase) { + rewriter.eraseOp(op); } return success(); } @@ -182,8 +196,8 @@ class AllocaPattern : public OpRewritePattern { if (std::distance(alloc->getUses().begin(), alloc->getUses().end()) < size) return false; - // Keep a scoreboard for every element in the array. Every element *must* be - // stored to with a constant exactly one time. + // Keep a scoreboard for every element in the array. Every element *must* + // be stored to with a constant exactly one time. scoreboard.resize(size); for (int i = 0; i < size; i++) scoreboard[i] = nullptr; @@ -249,11 +263,18 @@ class AllocaPattern : public OpRewritePattern { scoreboard[0] = w; continue; } - return false; + // can be a cast only used for a quake.init_state) + continue; + } else { + if (getWriteOp(cast, 0)) { + LLVM_DEBUG(llvm::dbgs() + << "another cast used in store: " << *op << '\n'); + return false; + } + // can be a cast only used for a quake.init_state) + continue; } LLVM_DEBUG(llvm::dbgs() << "unexpected cast: " << *op << '\n'); - toGlobalUses.push_back(op); - toGlobal = true; continue; } LLVM_DEBUG(llvm::dbgs() << "unexpected use: " << *op << '\n'); @@ -321,6 +342,88 @@ class ComplexCreatePattern : public OpRewritePattern { } }; +// Fold arith.trunc ops if the argument is constant. +class FloatTruncatePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(arith::TruncFOp truncate, + PatternRewriter &rewriter) const override { + auto val = truncate.getOperand(); + auto valCon = val.getDefiningOp(); + if (valCon) { + auto v = valCon.value().convertToDouble(); + auto fTy = dyn_cast(truncate.getType()); + rewriter.replaceOpWithNewOp( + truncate, APFloat{static_cast(v)}, fTy); + return success(); + } + return failure(); + } +}; + +// Fold arith.ext ops if the argument is constant. +class FloatExtendPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(arith::ExtFOp extend, + PatternRewriter &rewriter) const override { + auto val = extend.getOperand(); + auto valCon = val.getDefiningOp(); + if (valCon) { + auto v = valCon.value().convertToFloat(); + auto fTy = dyn_cast(extend.getType()); + rewriter.replaceOpWithNewOp( + extend, APFloat{static_cast(v)}, fTy); + return success(); + } + return failure(); + } +}; + +// Fold complex.re ops if the argument is constant. +class ComplexRePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(complex::ReOp re, + PatternRewriter &rewriter) const override { + auto val = re.getOperand(); + auto valCon = val.getDefiningOp(); + if (valCon) { + auto attr = valCon.getValue(); + auto real = cast(attr[0]).getValue(); + auto fTy = dyn_cast(re.getType()); + auto v = fTy.isF64() ? real.convertToDouble() : real.convertToFloat(); + rewriter.replaceOpWithNewOp(re, APFloat{v}, fTy); + return success(); + } + return failure(); + } +}; + +// Fold complex.im ops if the argument is constant. +class ComplexImPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(complex::ImOp im, + PatternRewriter &rewriter) const override { + auto val = im.getOperand(); + auto valCon = val.getDefiningOp(); + if (valCon) { + auto attr = valCon.getValue(); + auto real = cast(attr[0]).getValue(); + auto fTy = dyn_cast(im.getType()); + auto v = fTy.isF64() ? real.convertToDouble() : real.convertToFloat(); + rewriter.replaceOpWithNewOp(im, APFloat{v}, fTy); + return success(); + } + return failure(); + } +}; + class LiftArrayAllocPass : public cudaq::opt::impl::LiftArrayAllocBase { public: @@ -338,6 +441,10 @@ class LiftArrayAllocPass RewritePatternSet patterns(ctx); patterns.insert(ctx, domInfo, funcName, module); patterns.insert(ctx); + patterns.insert(ctx); + patterns.insert(ctx); + patterns.insert(ctx); + patterns.insert(ctx); LLVM_DEBUG(llvm::dbgs() << "Before lifting constant array: " << func << '\n'); @@ -352,3 +459,7 @@ class LiftArrayAllocPass } }; } // namespace + +std::unique_ptr cudaq::opt::createLiftArrayAllocPass() { + return std::make_unique(); +} diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 0545a4e296..d81fdd04c8 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -142,7 +142,7 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, // Stick global at end of Module. builder.setInsertionPointToEnd(module.getBody()); std::string symbol = - "__nvqpp_rodata_init_state_qs." + std::to_string(counter++); + "__nvqpp_rodata_init_state." + std::to_string(counter++); builder.create(argLoc, arrTy, symbol, arrayAttr, /*isConstant=*/true, /*isExternal=*/false); diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index 785e70b3f8..564a121f83 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -33,17 +33,17 @@ using namespace mlir; /// Replace a qubit initialization from vectors with quantum gates. /// For example: /// -/// func.func @foo(%arg0 : !cc.stdvec>) { -/// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 -/// %1 = math.cttz %0 : i64 -/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> -/// !cc.ptr> %3 = quake.alloca !quake.veq[%1 : i64] %4 = -/// quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> -/// !quake.veq return -/// } +/// func.func +/// @__nvqpp__mlirgen__function_test._Z4testSt6vectorISt7complexIfESaIS1_EE() +/// attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +/// %0 = cc.address_of @__nvqpp_rodata_init_state.0 : +/// !cc.ptr x 4>> %1 = cc.cast %0 : +/// (!cc.ptr x 4>>) -> !cc.ptr> %2 = +/// quake.alloca !quake.veq<2> %3 = quake.init_state %2, %1 : (!quake.veq<2>, +/// !cc.ptr>) -> !quake.veq<2> return +/// } /// -/// On a call that passes std::vector vec{M_SQRT1_2, 0., 0., -/// M_SQRT1_2} as arg0: +/// is converted to: /// /// func.func @foo(%arg0 : !cc.stdvec>) { /// %0 = quake.alloca !quake.veq<2> @@ -61,127 +61,114 @@ using namespace mlir; /// quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () /// return /// } -/// -/// Note: the following synthesis and const prop passes will replace -/// the argument by a constant and propagate the values and vector size -/// through other instructions. namespace { -template -concept IntegralType = - std::is_same::value || std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value; - -template -concept FloatingType = std::is_same::value; - -template -concept DoubleType = std::is_same::value; - -template -concept ComplexDataType = FloatingType || DoubleType || IntegralType; - -/// Input was complex but we prefer -/// complex. Make a copy, extending the values. -template -std::vector> convertToComplex(std::complex *data, - std::uint64_t size) { - auto convertData = std::vector>(size); - for (std::size_t i = 0; i < size; ++i) - convertData[i] = std::complex{static_cast(data[i].real()), - static_cast(data[i].imag())}; - return convertData; -} - -template -std::vector> convertToComplex(std::complex *data, - std::uint64_t size) { - return std::vector>(data, data + size); -} - -/// Input was float/double but we prefer complex. -/// Make a copy, extending or truncating the values. -template -std::vector> convertToComplex(From *data, - std::uint64_t size) { - auto convertData = std::vector>(size); - for (std::size_t i = 0; i < size; ++i) - convertData[i] = std::complex{static_cast(data[i]), - static_cast(0.0)}; - return convertData; -} - -LogicalResult -prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, - unsigned &counter, BlockArgument argument, - std::vector> &vec) { - auto *ctx = builder.getContext(); - auto argLoc = argument.getLoc(); - - auto toErase = std::vector(); - - for (auto *argUser : argument.getUsers()) { - // Handle the `StdvecSize` and `quake.alloca` use case: - // - Replace a `vec.size()` with the vector length. - // - Replace the number of qubits calculation with the vector length - // logarithm. - // - Replace `quake.alloca` with a constant size qvector allocation. - if (auto stdvecSizeOp = dyn_cast(argUser)) { - builder.setInsertionPointAfter(stdvecSizeOp); - Value length = builder.create( - argLoc, vec.size(), stdvecSizeOp.getType()); - - Value numQubits = builder.create( - argLoc, log2(vec.size()), stdvecSizeOp.getType()); - - for (auto *sizeUser : argUser->getUsers()) { - if (auto countZeroesOp = - dyn_cast(sizeUser)) { - for (auto *numQubitsUser : sizeUser->getUsers()) { - if (auto quakeAllocaOp = dyn_cast(numQubitsUser)) { - builder.setInsertionPointAfter(quakeAllocaOp); - auto veqTy = quake::VeqType::get(ctx, log2(vec.size())); - Value newAlloc = builder.create(argLoc, veqTy); - quakeAllocaOp.replaceAllUsesWith(newAlloc); - toErase.push_back(quakeAllocaOp); - } +std::vector> +readConstantArray(mlir::OpBuilder &builder, cudaq::cc::GlobalOp &global) { + std::vector> result{}; + + auto attr = global.getValue(); + auto type = global.getType().getElementType(); + + if (auto arrayTy = dyn_cast(type)) { + auto eleTy = arrayTy.getElementType(); + + if (attr.has_value()) { + if (auto elementsAttr = dyn_cast(attr.value())) { + auto eleTy = elementsAttr.getElementType(); + if (isa(eleTy)) { + auto values = elementsAttr.getValues(); + for (auto it = values.begin(); it != values.end(); ++it) { + auto valueAttr = *it; + auto real = + cast(valueAttr[0]).getValue().convertToDouble(); + auto imag = + cast(valueAttr[1]).getValue().convertToDouble(); + result.push_back({real, imag}); } - countZeroesOp.replaceAllUsesWith(numQubits); - toErase.push_back(countZeroesOp); + } else { + auto values = elementsAttr.getValues(); + for (auto it = values.begin(); it != values.end(); ++it) { + result.push_back({*it, 0.0}); + } + } + } else if (auto values = dyn_cast(attr.value())) { + for (auto it = values.begin(); it != values.end(); ++it) { + auto real = *it; + // for (std::size_t idx = 0; idx < numConstants; idx += isComplex ? 2 + // : 1) { + auto v = [&]() -> std::complex { + if (isa(eleTy)) + return {cast(real).getValue().convertToDouble(), + static_cast(0.0)}; + if (isa(eleTy)) + return {static_cast(cast(real).getInt()), + static_cast(0.0)}; + assert(isa(eleTy)); + it++; + auto imag = *it; + return {cast(real).getValue().convertToDouble(), + cast(imag).getValue().convertToDouble()}; + }(); + + result.push_back(v); } } - - stdvecSizeOp.replaceAllUsesWith(length); - toErase.push_back(stdvecSizeOp); - continue; } + } - // Handle the `StdvecDataOp` and `quake.init_state` use case: - // - Replace a `quake.init_state` with gates preparing the state. - if (auto stdvecDataOp = dyn_cast(argUser)) { - for (auto *dataUser : stdvecDataOp->getUsers()) { - if (auto initOp = dyn_cast(dataUser)) { - builder.setInsertionPointAfter(initOp); - // Find the qvector alloc instruction - auto qubits = initOp.getOperand(0); - - // Prepare state from vector data. - auto gateBuilder = StateGateBuilder(builder, argLoc, qubits); - auto decomposer = StateDecomposer(gateBuilder, vec); - decomposer.decompose(); + return result; +} - initOp.replaceAllUsesWith(qubits); - toErase.push_back(initOp); +LogicalResult transform(OpBuilder &builder, ModuleOp module) { + auto toErase = std::vector(); + module->walk([&](Operation *op) { + if (auto initOp = dyn_cast(op)) { + toErase.push_back(initOp); + auto loc = op->getLoc(); + builder.setInsertionPointAfter(initOp); + // Find the qvector alloc. + auto qubits = initOp.getOperand(0); + if (auto alloc = dyn_cast(qubits.getDefiningOp())) { + + // Find vector data. + auto data = initOp.getOperand(1); + if (auto cast = dyn_cast(data.getDefiningOp())) { + data = cast.getOperand(); + toErase.push_back(cast); + } + if (auto addr = + dyn_cast(data.getDefiningOp())) { + + auto globalName = addr.getGlobalName(); + auto symbol = module.lookupSymbol(globalName); + if (auto global = dyn_cast(symbol)) { + // Read state initialization data from the global array. + auto vec = readConstantArray(builder, global); + + // Prepare state from vector data. + auto gateBuilder = StateGateBuilder(builder, loc, qubits); + auto decomposer = StateDecomposer(gateBuilder, vec); + decomposer.decompose(); + + initOp.replaceAllUsesWith(qubits); + toErase.push_back(addr); + toErase.push_back(global); + } } } } - } + }); for (auto &op : toErase) { - op->erase(); + if (op->getUses().empty()) { + op->erase(); + } else { + module.emitOpError("StatePreparation failed to remove quake.init_state " + "or its dependencies."); + return failure(); + } } return success(); @@ -192,52 +179,14 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { // The name of the kernel to be synthesized std::string kernelName; - // The raw pointer to the runtime arguments. - void *args; - public: StatePreparation() = default; - StatePreparation(std::string_view kernel, void *a) - : kernelName(kernel), args(a) {} + StatePreparation(std::string_view kernel) : kernelName(kernel) {} mlir::ModuleOp getModule() { return getOperation(); } - std::pair> - getTargetLayout(FunctionType funcTy) { - auto bufferTy = cudaq::opt::factory::buildInvokeStructType(funcTy); - StringRef dataLayoutSpec = ""; - if (auto attr = - getModule()->getAttr(cudaq::opt::factory::targetDataLayoutAttrName)) - dataLayoutSpec = cast(attr); - auto dataLayout = llvm::DataLayout(dataLayoutSpec); - // Convert bufferTy to llvm. - llvm::LLVMContext context; - LLVMTypeConverter converter(funcTy.getContext()); - cudaq::opt::initializeTypeConversions(converter); - auto llvmDialectTy = converter.convertType(bufferTy); - LLVM::TypeToLLVMIRTranslator translator(context); - auto *llvmStructTy = - cast(translator.translateType(llvmDialectTy)); - auto *layout = dataLayout.getStructLayout(llvmStructTy); - auto strSize = layout->getSizeInBytes(); - std::vector fieldOffsets; - for (std::size_t i = 0, I = bufferTy.getMembers().size(); i != I; ++i) - fieldOffsets.emplace_back(layout->getElementOffset(i)); - return {strSize, fieldOffsets}; - } - void runOnOperation() override final { auto module = getModule(); - unsigned counter = 0; - - if (args == nullptr || kernelName.empty()) { - module.emitOpError( - "State preparation requires a kernel and the values of the " - "arguments passed when it is called."); - signalPassFailure(); - return; - } - auto kernelNameInQuake = cudaq::runtime::cudaqGenPrefixName + kernelName; // Get the function we care about (the one with kernelName) auto funcOp = module.lookupSymbol(kernelNameInQuake); @@ -248,112 +197,12 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { return; } - // Create the builder. auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); - auto arguments = funcOp.getArguments(); - auto structLayout = getTargetLayout(funcOp.getFunctionType()); - // Keep track of the stdVec sizes. - std::vector> stdVecInfo; - - for (auto iter : llvm::enumerate(arguments)) { - auto argNum = iter.index(); - auto argument = iter.value(); - std::size_t offset = structLayout.second[argNum]; - - // Get the argument type - auto type = argument.getType(); - - if (auto ptrTy = dyn_cast(type)) { - if (isa(ptrTy.getElementType())) { - funcOp.emitOpError( - "State preparation from cudaq::state is not supported."); - return; - } - } - - // If std::vector type, add it to the list of vector info. - // These will be processed when we reach the buffer's appendix. - if (auto vecTy = dyn_cast(type)) { - auto eleTy = vecTy.getElementType(); - if (!isa(eleTy)) { - continue; - } - char *ptrToSizeInBuffer = static_cast(args) + offset; - auto sizeFromBuffer = - *reinterpret_cast(ptrToSizeInBuffer); - unsigned bytesInType = [&eleTy]() { - if (auto complexTy = dyn_cast(eleTy)) - return 2 * cudaq::opt::convertBitsToBytes( - complexTy.getElementType().getIntOrFloatBitWidth()); - return cudaq::opt::convertBitsToBytes(eleTy.getIntOrFloatBitWidth()); - }(); - assert(bytesInType > 0 && "element must have a size"); - auto vectorSize = sizeFromBuffer / bytesInType; - stdVecInfo.emplace_back(argNum, eleTy, vectorSize); - continue; - } - } - - // For any `std::vector` arguments, we now know the sizes so let's replace - // the block arg with the actual vector element data. First get the pointer - // to the start of the buffer's appendix. - auto structSize = structLayout.first; - char *bufferAppendix = static_cast(args) + structSize; - for (auto [idx, eleTy, vecLength] : stdVecInfo) { - if (!eleTy) { - bufferAppendix += vecLength; - continue; - } - auto doVector = [&](T) { - auto *ptr = reinterpret_cast(bufferAppendix); - auto v = convertToComplex(ptr, vecLength); - if (failed(prepareStateFromVectorArgument(builder, module, counter, - arguments[idx], v))) - funcOp.emitOpError("state preparation failed for vector"); - bufferAppendix += vecLength * sizeof(T); - }; - if (auto ty = dyn_cast(eleTy)) { - switch (ty.getIntOrFloatBitWidth()) { - case 1: - doVector(false); - break; - case 8: - doVector(std::int8_t{}); - break; - case 16: - doVector(std::int16_t{}); - break; - case 32: - doVector(std::int32_t{}); - break; - case 64: - doVector(std::int64_t{}); - break; - default: - bufferAppendix += vecLength * cudaq::opt::convertBitsToBytes( - ty.getIntOrFloatBitWidth()); - funcOp.emitOpError( - "state preparation failed for vector."); - break; - } - continue; - } - if (eleTy == builder.getF32Type()) { - doVector(float{}); - continue; - } - if (eleTy == builder.getF64Type()) { - doVector(double{}); - continue; - } - if (eleTy == ComplexType::get(builder.getF32Type())) { - doVector(std::complex{}); - continue; - } - if (eleTy == ComplexType::get(builder.getF64Type())) { - doVector(std::complex{}); - continue; - } + auto result = transform(builder, module); + if (result.failed()) { + module.emitOpError("Failed to prepare state for '" + kernelName); + signalPassFailure(); + return; } } }; @@ -365,6 +214,6 @@ std::unique_ptr cudaq::opt::createStatePreparation() { } std::unique_ptr -cudaq::opt::createStatePreparation(std::string_view kernelName, void *a) { - return std::make_unique(kernelName, a); +cudaq::opt::createStatePreparation(std::string_view kernelName) { + return std::make_unique(kernelName); } diff --git a/lib/Optimizer/Transforms/StatePreparation2.cpp b/lib/Optimizer/Transforms/StatePreparation2.cpp deleted file mode 100644 index a8047821a0..0000000000 --- a/lib/Optimizer/Transforms/StatePreparation2.cpp +++ /dev/null @@ -1,304 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -#include "PassDetails.h" -#include "StateDecomposer.h" -#include "cudaq/Optimizer/Builder/Runtime.h" -#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" -#include "cudaq/Optimizer/Transforms/Passes.h" -#include "llvm/Support/Debug.h" -#include "mlir/Conversion/LLVMCommon/TypeConverter.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Complex/IR/Complex.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/Math/IR/Math.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Target/LLVMIR/TypeToLLVM.h" -#include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/RegionUtils.h" -#include - -#include - -#define DEBUG_TYPE "state-preparation2" - -using namespace mlir; - -/// Replace a qubit initialization from vectors with quantum gates. -/// For example: -/// -/// func.func @__nvqpp__mlirgen__function_test._Z4testSt6vectorISt7complexIfESaIS1_EE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { -/// %0 = cc.address_of @__nvqpp_rodata_init_state.0 : !cc.ptr x 4>> -/// %1 = cc.cast %0 : (!cc.ptr x 4>>) -> !cc.ptr> -/// %2 = quake.alloca !quake.veq<2> -/// %3 = quake.init_state %2, %1 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> -/// return -/// } -/// -/// is converted to: -/// -/// func.func @foo(%arg0 : !cc.stdvec>) { -/// %0 = quake.alloca !quake.veq<2> -/// %c0_i64 = arith.constant 0 : i64 -/// %1 = quake.extract_ref %0[%c0_i64] : (!quake.veq<2>, i64) -> !quake.ref -/// %cst = arith.constant 1.5707963267948968 : f64 -/// quake.ry (%cst) %1 : (f64, !quake.ref) -> () -/// %c1_i64 = arith.constant 1 : i64 -/// %2 = quake.extract_ref %0[%c1_i64] : (!quake.veq<2>, i64) -> !quake.ref -/// %cst_0 = arith.constant 1.5707963267948966 : f64 -/// quake.ry (%cst_0) %2 : (f64, !quake.ref) -> () -/// quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () -/// %cst_1 = arith.constant -1.5707963267948966 : f64 -/// quake.ry (%cst_1) %2 : (f64, !quake.ref) -> () -/// quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () -/// return -/// } -/// -/// Note: the following synthesis and const prop passes will replace -/// the argument by a constant and propagate the values and vector size -/// through other instructions. - -namespace { - -template -concept IntegralType = - std::is_same::value || std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value; - -template -concept FloatingType = std::is_same::value; - -template -concept DoubleType = std::is_same::value; - -template -concept ComplexDataType = FloatingType || DoubleType || IntegralType; - -/// Input was complex but we prefer -/// complex. Make a copy, extending the values. -template -std::vector> convertToComplex(std::complex *data, - std::uint64_t size) { - auto convertData = std::vector>(size); - for (std::size_t i = 0; i < size; ++i) - convertData[i] = std::complex{static_cast(data[i].real()), - static_cast(data[i].imag())}; - return convertData; -} - -template -std::vector> convertToComplex(std::complex *data, - std::uint64_t size) { - return std::vector>(data, data + size); -} - -/// Input was float/double but we prefer complex. -/// Make a copy, extending or truncating the values. -template -std::vector> convertToComplex(From *data, - std::uint64_t size) { - auto convertData = std::vector>(size); - for (std::size_t i = 0; i < size; ++i) - convertData[i] = std::complex{static_cast(data[i]), - static_cast(0.0)}; - return convertData; -} - -std::vector> readConstantArray(mlir::OpBuilder &builder, cudaq::cc::GlobalOp &global) { - std::vector> result{}; - - auto attr = global.getValue(); - auto type = global.getType().getElementType(); - - if (auto arrayTy = dyn_cast(type)) { - auto eleTy = arrayTy.getElementType(); - std::cout << "Attribute element type:" << std::endl; - eleTy.dump(); - - if (attr.has_value()) { - // auto tensorTy = RankedTensorType::get(size, eleTy); - // auto f64Attr = DenseElementsAttr::get(tensorTy, values); - if (auto elementsAttr = dyn_cast(attr.value())) { - auto values = elementsAttr.getValues(); - for (auto it = values.begin(); it != values.end(); ++it) { - result.push_back({*it, 0.0}); - } - } - - else if (auto values = dyn_cast(attr.value())) { - for (auto it = values.begin(); it != values.end(); ++it) { - auto real = *it; - // for (std::size_t idx = 0; idx < numConstants; idx += isComplex ? 2 : 1) { - auto v = [&]() -> std::complex { - //auto val = constantValues[idx]; - - if (isa(eleTy)) - return { - cast(real).getValue().convertToDouble(), - static_cast(0.0) - }; - if (isa(eleTy)) - return { - static_cast(cast(real).getInt()), - static_cast(0.0) - }; - assert(isa(eleTy)); - it++; - auto imag = *it; - return { - cast(real).getValue().convertToDouble(), - cast(imag).getValue().convertToDouble() - }; - }(); - - result.push_back(v); - } - } - } - } - - std::cout << "Results (" << result.size() << "):" << std::endl; - for (auto &r: result) { - std::cout << r << ", " << std::endl; - } - return result; -} - -LogicalResult -transform(OpBuilder &builder, ModuleOp module) { - //auto *ctx = builder.getContext(); - - auto toErase = std::vector(); - -// Module after everything -// module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.triple = "x86_64-unknown-linux-gnu", quake.mangled_name_map = {__nvqpp__mlirgen__function_test._Z4testSt6vectorISt7complexIfESaIS1_EE = "_Z4testSt6vectorISt7complexIfESaIS1_EE"}} { -// func.func @__nvqpp__mlirgen__function_test._Z4testSt6vectorISt7complexIfESaIS1_EE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { -// %0 = cc.address_of @__nvqpp_rodata_init_state.0 : !cc.ptr x 4>> -// %1 = cc.cast %0 : (!cc.ptr x 4>>) -> !cc.ptr> -// %2 = quake.alloca !quake.veq<2> -// %3 = quake.init_state %2, %1 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> -// return -// } -// cc.global constant @__nvqpp_rodata_init_state.0 ([0.707106769 : f32, 0.000000e+00 : f32, 0.707106769 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32]) : !cc.array x 4> -// } - -// func.func @__nvqpp__mlirgen__function_f._Z1fv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { -// %0 = cc.address_of @__nvqpp__rodata_init_0 : !cc.ptr> -// %1 = quake.alloca !quake.veq<2> -// %2 = quake.init_state %1, %0 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> -// quake.dealloc %2 : !quake.veq<2> -// return -// } - - - module->walk([&](Operation *op) { - if (auto initOp = dyn_cast(op)) { - toErase.push_back(initOp); - auto loc = op->getLoc(); - builder.setInsertionPointAfter(initOp); - // Find the qvector alloc. - auto qubits = initOp.getOperand(0); - if (auto alloc = dyn_cast(qubits.getDefiningOp())) { - - // Find vector data. - auto data = initOp.getOperand(1); - if (auto cast = dyn_cast(data.getDefiningOp())) { - data = cast.getOperand(); - toErase.push_back(cast); - } - if (auto addr = dyn_cast(data.getDefiningOp())) { - - auto globalName = addr.getGlobalName(); - auto symbol = module.lookupSymbol(globalName); - if (auto global = dyn_cast(symbol)) { - // Read state initialization data from the global array. - auto vec = readConstantArray(builder, global); - - // Prepare state from vector data. - auto gateBuilder = StateGateBuilder(builder, loc, qubits); - auto decomposer = StateDecomposer(gateBuilder, vec); - decomposer.decompose(); - - initOp.replaceAllUsesWith(qubits); - toErase.push_back(addr); - toErase.push_back(global); - } - } - } - } - }); - - for (auto &op : toErase) { - op->erase(); - } - - return success(); -} - -class StatePreparation2 : public cudaq::opt::PrepareState2Base { -protected: - // The name of the kernel to be synthesized - std::string kernelName; - - // The raw pointer to the runtime arguments. - void *args; - -public: - StatePreparation2() = default; - StatePreparation2(std::string_view kernel, void *a) - : kernelName(kernel), args(a) {} - - mlir::ModuleOp getModule() { return getOperation(); } - - - void runOnOperation() override final { - auto module = getModule(); - - std::cout << "Module before state prep2" << std::endl; - module.dump(); - - auto kernelNameInQuake = cudaq::runtime::cudaqGenPrefixName + kernelName; - // Get the function we care about (the one with kernelName) - auto funcOp = module.lookupSymbol(kernelNameInQuake); - if (!funcOp) { - module.emitOpError("The kernel '" + kernelName + - "' was not found in the module."); - signalPassFailure(); - return; - } - - // Create the builder. - auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); - - auto result = transform(builder, module); - if (result.failed()) { - module.emitOpError("Failed to prepare state for '" + kernelName); - signalPassFailure(); - return; - } - - std::cout << "Module after state prep2" << std::endl; - module.dump(); - } -}; - -} // namespace - -std::unique_ptr cudaq::opt::createStatePreparation2() { - return std::make_unique(); -} - -std::unique_ptr -cudaq::opt::createStatePreparation2(std::string_view kernelName, void *a) { - return std::make_unique(kernelName, a); -} diff --git a/program.py b/program.py deleted file mode 100644 index 92321a755a..0000000000 --- a/program.py +++ /dev/null @@ -1,23 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # - -import cudaq -import numpy as np - -c = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] - -@cudaq.kernel -def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - -synthesized = cudaq.synthesize(kernel, c) -print(synthesized) - -counts = cudaq.sample(synthesized) -assert '00' in counts -assert '10' in counts \ No newline at end of file diff --git a/python/cudaq/kernel/ast_bridge.py b/python/cudaq/kernel/ast_bridge.py index 1b1fed2a83..3b35b4dd0d 100644 --- a/python/cudaq/kernel/ast_bridge.py +++ b/python/cudaq/kernel/ast_bridge.py @@ -539,18 +539,18 @@ def __copyVectorAndCastElements(self, source, targetEleType): if (sourceEleType == targetEleType): return sourcePtr - sourceArrEleTy = cc.ArrayType.get(self.ctx, sourceEleType) + sourceArrTy = cc.ArrayType.get(self.ctx, sourceEleType) sourceElePtrTy = cc.PointerType.get(self.ctx, sourceEleType) - sourceArrElePtrTy = cc.PointerType.get(self.ctx, sourceArrType) + sourceArrPtrTy = cc.PointerType.get(self.ctx, sourceArrTy) sourceValue = self.ifPointerThenLoad(sourcePtr) - sourceDataPtr = cc.StdvecDataOp(sourceArrElePtrTy, sourceValue).result + sourceDataPtr = cc.StdvecDataOp(sourceArrPtrTy, sourceValue).result sourceSize = cc.StdvecSizeOp(self.getIntegerType(), sourceValue).result targetElePtrType = cc.PointerType.get(self.ctx, targetEleType) targetTy = cc.ArrayType.get(self.ctx, targetEleType) - targetArrElePtrTy = cc.PointerType.get(self.ctx, targetTy) + targetArrPtrTy = cc.PointerType.get(self.ctx, targetTy) targetVecTy = cc.StdvecType.get(self.ctx, targetEleType) - targetPtr = cc.AllocaOp(targetArrElePtr, + targetPtr = cc.AllocaOp(targetArrPtrTy, TypeAttr.get(targetEleType), seqSize=sourceSize).result diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 93bfd8a295..a937b4acc8 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -474,12 +474,11 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, registerLLVMDialectTranslation(*context); PassManager pm(context); - //pm.addPass(createCanonicalizerPass()); - //pm.addPass(cudaq::opt::createStatePreparation(name, rawArgs)); pm.addPass(cudaq::opt::createQuakeSynthesizer(name, rawArgs)); pm.addPass(createCanonicalizerPass()); pm.addPass(createCSEPass()); - pm.addPass(cudaq::opt::createStatePreparation2(name, rawArgs)); + pm.addPass(cudaq::opt::createLiftArrayAllocPass()); + pm.addPass(cudaq::opt::createStatePreparation(name)); pm.addPass(createCanonicalizerPass()); pm.addPass(cudaq::opt::createExpandMeasurementsPass()); pm.addNestedPass(cudaq::opt::createClassicalMemToReg()); diff --git a/python/tests/kernel/test_kernel_qvector_init.py b/python/tests/kernel/test_kernel_qvector_init.py index 28260dcb4d..0323d13f99 100644 --- a/python/tests/kernel/test_kernel_qvector_init.py +++ b/python/tests/kernel/test_kernel_qvector_init.py @@ -66,27 +66,9 @@ def kernel(vec: list[float]): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_float_capture_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(): - q = cudaq.qvector(f) - - counts = cudaq.sample(kernel) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_float_capture_f32(): +@skipIfPythonLessThan39 +def test_kernel_float_capture(): cudaq.reset_target() - cudaq.set_target('nvidia') f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] @@ -100,27 +82,9 @@ def kernel(): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_float_np_array_from_capture_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(): - q = cudaq.qvector(np.array(f)) - - counts = cudaq.sample(kernel) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_float_np_array_from_capture_f32(): +@skipIfPythonLessThan39 +def test_kernel_float_np_array_from_capture(): cudaq.reset_target() - cudaq.set_target('nvidia') f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] @@ -134,25 +98,9 @@ def kernel(): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_float_definition_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - @cudaq.kernel - def kernel(): - q = cudaq.qvector([1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)]) - - counts = cudaq.sample(kernel) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_float_definition_f32(): +@skipIfPythonLessThan39 +def test_kernel_float_definition(): cudaq.reset_target() - cudaq.set_target('nvidia') @cudaq.kernel def kernel(): @@ -205,27 +153,9 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_complex_capture_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(): - q = cudaq.qvector(c) - - counts = cudaq.sample(kernel) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_complex_capture_f32(): +@skipIfPythonLessThan39 +def test_kernel_complex_capture(): cudaq.reset_target() - cudaq.set_target('nvidia') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -239,27 +169,10 @@ def kernel(): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_complex_np_array_from_capture_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(): - q = cudaq.qvector(np.array(c)) - counts = cudaq.sample(kernel) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_complex_np_array_from_capture_f32(): +@skipIfPythonLessThan39 +def test_kernel_complex_np_array_from_capture(): cudaq.reset_target() - cudaq.set_target('nvidia') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -273,25 +186,10 @@ def kernel(): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_complex_definition_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - @cudaq.kernel - def kernel(): - q = cudaq.qvector([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)]) - counts = cudaq.sample(kernel) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_complex_definition_f32(): +@skipIfPythonLessThan39 +def test_kernel_complex_definition(): cudaq.reset_target() - cudaq.set_target('nvidia') @cudaq.kernel def kernel(): @@ -362,7 +260,7 @@ def kernel(vec: list[complex]): def test_kernel_simulation_dtype_complex_params_f64(): cudaq.reset_target() cudaq.set_target('nvidia-fp64') - + c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @cudaq.kernel @@ -428,7 +326,7 @@ def kernel(vec: list[complex]): def test_kernel_simulation_dtype_np_array_from_capture_f64(): cudaq.reset_target() cudaq.set_target('nvidia-fp64') - + c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @cudaq.kernel @@ -458,29 +356,9 @@ def kernel(): assert '00' in counts -@skipIfNvidiaFP64NotInstalled +@skipIfPythonLessThan39 def test_kernel_simulation_dtype_np_array_capture_f64(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] - - state = np.array(c, dtype=cudaq.complex()) - - @cudaq.kernel - def kernel(): - q = cudaq.qvector(state) - - counts = cudaq.sample(kernel) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_simulation_dtype_np_array_capture_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index e05a32bf13..b598da28ed 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -381,16 +381,10 @@ class BaseRemoteRESTQPU : public cudaq::QPU { auto moduleOp = builder.create(); moduleOp.push_back(func.clone()); moduleOp->setAttrs(m_module->getAttrDictionary()); - for (auto &op: m_module.getOps()) { - // Add globals referenced in the func. + + for (auto &op : m_module.getOps()) { if (auto globalOp = dyn_cast(op)) { - //for (auto *use: globalOp->getUsers()) { - // auto parent = use->getParentOfType(); - // std::cout << "Global " << globalOp.getName().str() << " is used in " << parent.getName().str() <disableMultithreading(); if (enablePrintMLIREachPass) pm.enableIRPrinting(); if (failed(pm.run(moduleOp))) throw std::runtime_error("Could not successfully apply state prep."); - //} - - std::cout << "Module after state prep" << std::endl; - moduleOp.dump(); - - runPassPipeline("canonicalize,cse", moduleOp); - std::cout << "Module after state prep and cse" << std::endl; - moduleOp.dump(); + } - // Run the config-specified pass pipeline - //runPassPipeline("cc-loop-unroll{allow-early-exit=1},canonicalize,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition)", moduleOp); - // runPassPipeline("cc-loop-unroll{allow-early-exit=1},canonicalize,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),quantinuum-gate-set-mapping", moduleOp); runPassPipeline(passPipelineConfig, moduleOp); - std::cout << "Module after state prep and pipeline" << std::endl; - moduleOp.dump(); - auto entryPointFunc = moduleOp.lookupSymbol( std::string("__nvqpp__mlirgen__") + kernelName); std::vector mapping_reorder_idx; @@ -531,8 +503,6 @@ class BaseRemoteRESTQPU : public cudaq::QPU { // and use that for execution for (auto &[name, module] : modules) { auto clonedModule = module.clone(); - std::cout << "Module after everything" << std::endl; - clonedModule.dump(); jitEngines.emplace_back( cudaq::createQIRJITEngine(clonedModule, codegenTranslation)); } diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index 1d6f40e8a7..db1288caca 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -153,13 +153,24 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { if (args) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&mlirContext); - pm.addPass(cudaq::opt::createStatePreparation(name, args)); pm.addPass(cudaq::opt::createQuakeSynthesizer(name, args)); pm.addPass(mlir::createCanonicalizerPass()); if (failed(pm.run(moduleOp))) throw std::runtime_error("Could not successfully apply quake-synth."); } + { + cudaq::info("Run State Prep.\n"); + mlir::PassManager pm(&mlirContext); + pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(mlir::createCSEPass()); + pm.addPass(cudaq::opt::createLiftArrayAllocPass()); + pm.addPass(cudaq::opt::createStatePreparation(name)); + pm.addPass(mlir::createCanonicalizerPass()); + if (failed(pm.run(moduleOp))) + throw std::runtime_error("Could not successfully apply state-prep."); + } + // Run client-side passes. `clientPasses` is empty right now, but the code // below accommodates putting passes into it. mlir::PassManager pm(&mlirContext); diff --git a/runtime/common/RuntimeMLIRCommonImpl.h b/runtime/common/RuntimeMLIRCommonImpl.h index 91722e1751..e3661744f3 100644 --- a/runtime/common/RuntimeMLIRCommonImpl.h +++ b/runtime/common/RuntimeMLIRCommonImpl.h @@ -40,8 +40,6 @@ #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Tools/ParseUtilities.h" -#include - namespace cudaq { bool setupTargetTriple(llvm::Module *llvmModule) { @@ -372,10 +370,6 @@ qirProfileTranslationFunction(const char *qirProfile, mlir::Operation *op, if (printIntermediateMLIR) pm.enableIRPrinting(); - std::cout << "qirProfileTranslationFunction" << std::endl; - pm.enableIRPrinting(); - context->disableMultithreading(); - std::string errMsg; llvm::raw_string_ostream errOs(errMsg); cudaq::opt::addPipelineConvertToQIR(pm, qirProfile); @@ -582,9 +576,6 @@ mlir::ExecutionEngine *createQIRJITEngine(mlir::ModuleOp &moduleOp, tm.setEnabled(cudaq::isTimingTagEnabled(cudaq::TIMING_JIT_PASSES)); auto timingScope = tm.getRootScope(); // starts the timer pm.enableTiming(timingScope); // do this right before pm.run - std::cout << "Common IR" << std::endl; - context->disableMultithreading(); - pm.enableIRPrinting(); if (failed(pm.run(module))) throw std::runtime_error( "[createQIRJITEngine] Lowering to QIR for remote emulation failed."); diff --git a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp index 39602a6fba..f8318e1dec 100644 --- a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp +++ b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp @@ -36,8 +36,6 @@ class RemoteRESTQPU : public cudaq::BaseRemoteRESTQPU { // Get the quake representation of the kernel auto quakeCode = cudaq::get_quake_by_name(kernelName); - std::cout << "extractQuakeCodeAndContext" << quakeCode << std::endl; - auto m_module = parseSourceString(quakeCode, &context); if (!m_module) throw std::runtime_error("module cannot be parsed"); diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index 1a96b3e881..886c3d92b8 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -11,12 +11,28 @@ #include #include -__qpu__ void f() { - cudaq::qvector v = { static_cast(1.0), static_cast(2.0), static_cast(3.0), static_cast(4.0)}; - // cudaq::qvector v = { 1.0, 2.0, 3.0, 4.0}; +__qpu__ void test_complex_constant_array() { + cudaq::qvector v(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); } -__qpu__ void test(std::vector inState) { +__qpu__ void test_complex_constant_array2() { + cudaq::qvector v({ + cudaq::complex(M_SQRT1_2), + cudaq::complex(M_SQRT1_2), + cudaq::complex(0.0), + cudaq::complex(0.0) + }); +} + +__qpu__ void test_real_constant_array() { + cudaq::qvector v({ M_SQRT1_2, M_SQRT1_2, 0., 0.}); +} + +__qpu__ void test_complex_array_param(std::vector inState) { + cudaq::qvector q1 = inState; +} + +__qpu__ void test_real_array_param(std::vector inState) { cudaq::qvector q1 = inState; } @@ -33,33 +49,101 @@ void printCounts(cudaq::sample_result& result) { } int main() { - auto counts = cudaq::sample(f); - printCounts(counts); - - // std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - // std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; - // { - // // Passing state data as argument (kernel mode) - // auto counts = cudaq::sample(test, vec); - // printCounts(counts); - - // counts = cudaq::sample(test, vec1); - // printCounts(counts); - // } - - // { - // // Passing state data as argument (builder mode) - // auto [kernel, v] = cudaq::make_kernel>(); - // auto qubits = kernel.qalloc(v); - - // auto counts = cudaq::sample(kernel, vec); - // printCounts(counts); - // } + { + auto counts = cudaq::sample(test_complex_constant_array); + printCounts(counts); + } + + { + auto counts = cudaq::sample(test_complex_constant_array2); + printCounts(counts); + } + + { + auto counts = cudaq::sample(test_real_constant_array); + printCounts(counts); + } + + { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_complex_array_param, vec); + printCounts(counts); + + counts = cudaq::sample(test_complex_array_param, vec1); + printCounts(counts); + } + + { + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); + + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); + + counts = cudaq::sample(kernel, vec1); + printCounts(counts); + } + } + + { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_real_array_param, vec); + printCounts(counts); + + counts = cudaq::sample(test_real_array_param, vec1); + printCounts(counts); + } + + { + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); + + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); + + counts = cudaq::sample(kernel, vec1); + printCounts(counts); + } + } } +// CHECK: 00 +// CHECK: 10 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 00 +// CHECK: 10 + + +// CHECK: 00 +// CHECK: 10 +// CHECK: 01 +// CHECK: 11 + // CHECK: 00 // CHECK: 10 // CHECK: 01 // CHECK: 11 + // CHECK: 00 // CHECK: 10 +// CHECK: 01 +// CHECK: 11 + +// CHECK: 00 +// CHECK: 10 +// CHECK: 01 +// CHECK: 11 From c2431d5a17ffd216162ee9f0eb3ee1a3eae4be85 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 5 Jul 2024 09:51:22 -0700 Subject: [PATCH 15/50] Format --- python/tests/kernel/test_kernel_qvector_init.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/tests/kernel/test_kernel_qvector_init.py b/python/tests/kernel/test_kernel_qvector_init.py index 0323d13f99..3edb5ca951 100644 --- a/python/tests/kernel/test_kernel_qvector_init.py +++ b/python/tests/kernel/test_kernel_qvector_init.py @@ -169,7 +169,6 @@ def kernel(): assert '00' in counts - @skipIfPythonLessThan39 def test_kernel_complex_np_array_from_capture(): cudaq.reset_target() @@ -186,7 +185,6 @@ def kernel(): assert '00' in counts - @skipIfPythonLessThan39 def test_kernel_complex_definition(): cudaq.reset_target() @@ -260,7 +258,7 @@ def kernel(vec: list[complex]): def test_kernel_simulation_dtype_complex_params_f64(): cudaq.reset_target() cudaq.set_target('nvidia-fp64') - + c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @cudaq.kernel @@ -326,7 +324,7 @@ def kernel(vec: list[complex]): def test_kernel_simulation_dtype_np_array_from_capture_f64(): cudaq.reset_target() cudaq.set_target('nvidia-fp64') - + c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @cudaq.kernel From 8b18c672ed9f110b2bd057d25c1bb620e3c18b7f Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 10 Jul 2024 16:37:06 -0700 Subject: [PATCH 16/50] Unified common code and added tests --- include/cudaq/Optimizer/Builder/Intrinsics.h | 39 ++++- include/cudaq/Optimizer/Transforms/Passes.h | 1 - include/cudaq/Optimizer/Transforms/Passes.td | 57 +++++-- lib/Optimizer/Builder/Intrinsics.cpp | 99 +++++++++-- lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 118 +++++++------ lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 125 ++++++-------- lib/Optimizer/Transforms/StatePreparation.cpp | 157 ++++++++---------- .../cudaq/platform/py_alt_launch_kernel.cpp | 34 +++- python/tests/backends/test_IQM.py | 27 +++ python/tests/backends/test_IonQ.py | 27 +++ python/tests/backends/test_OQC.py | 27 +++ .../test_Quantinuum_LocalEmulation_builder.py | 13 ++ .../test_Quantinuum_LocalEmulation_kernel.py | 15 ++ .../tests/backends/test_Quantinuum_builder.py | 13 ++ .../tests/kernel/test_kernel_qvector_init.py | 19 ++- python/tests/remote/test_remote_code_exec.py | 26 +++ runtime/common/BaseRemoteRESTQPU.h | 16 -- runtime/common/BaseRestRemoteClient.h | 13 +- .../default/rest/helpers/ionq/ionq.config | 2 +- .../default/rest/helpers/iqm/iqm.config | 2 +- .../default/rest/helpers/oqc/oqc.config | 2 +- .../rest/helpers/quantinuum/quantinuum.config | 2 +- .../execution/state_preparation_vector.cpp | 35 ++-- test/Quake/lift_array.qke | 35 +++- test/Quake/lift_array_temp.qke | 50 ++++++ test/Quake/state_prep.qke | 114 +++++++++++++ 26 files changed, 764 insertions(+), 304 deletions(-) create mode 100644 test/Quake/lift_array_temp.qke create mode 100644 test/Quake/state_prep.qke diff --git a/include/cudaq/Optimizer/Builder/Intrinsics.h b/include/cudaq/Optimizer/Builder/Intrinsics.h index 4d9130504c..2413c935c7 100644 --- a/include/cudaq/Optimizer/Builder/Intrinsics.h +++ b/include/cudaq/Optimizer/Builder/Intrinsics.h @@ -86,13 +86,40 @@ class IRBuilder : public mlir::OpBuilder { } cc::GlobalOp - genVectorOfComplexConstant(mlir::Location loc, mlir::ModuleOp module, - mlir::StringRef name, - const std::vector> &values); + genVectorOfConstants(mlir::Location loc, mlir::ModuleOp module, + mlir::StringRef name, + const std::vector> &values); cc::GlobalOp - genVectorOfComplexConstant(mlir::Location loc, mlir::ModuleOp module, - mlir::StringRef name, - const std::vector> &values); + genVectorOfConstants(mlir::Location loc, mlir::ModuleOp module, + mlir::StringRef name, + const std::vector> &values); + + cc::GlobalOp genVectorOfConstants(mlir::Location loc, mlir::ModuleOp module, + mlir::StringRef name, + const std::vector &values); + cc::GlobalOp genVectorOfConstants(mlir::Location loc, mlir::ModuleOp module, + mlir::StringRef name, + const std::vector &values); + + cc::GlobalOp genVectorOfConstants(mlir::Location loc, mlir::ModuleOp module, + mlir::StringRef name, + const std::vector &values); + + cc::GlobalOp genVectorOfConstants(mlir::Location loc, mlir::ModuleOp module, + mlir::StringRef name, + const std::vector &values); + + cc::GlobalOp genVectorOfConstants(mlir::Location loc, mlir::ModuleOp module, + mlir::StringRef name, + const std::vector &values); + + cc::GlobalOp genVectorOfConstants(mlir::Location loc, mlir::ModuleOp module, + mlir::StringRef name, + const std::vector &values); + + cc::GlobalOp genVectorOfConstants(mlir::Location loc, mlir::ModuleOp module, + mlir::StringRef name, + const std::vector &values); /// Load an intrinsic into \p module. The intrinsic to load has name \p name. /// This will automatically load any intrinsics that \p name depends upon. diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 9431b3da0d..57b79cdec2 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -42,7 +42,6 @@ std::unique_ptr createObserveAnsatzPass(std::vector &); std::unique_ptr createQuakeAddMetadata(); std::unique_ptr createQuakeAddDeallocs(); std::unique_ptr createStatePreparation(); -std::unique_ptr createStatePreparation(std::string_view); std::unique_ptr createQuakeSynthesizer(); std::unique_ptr createQuakeSynthesizer(std::string_view, void *); std::unique_ptr createRaiseToAffinePass(); diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 0b20662af6..1a2675d482 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -532,6 +532,52 @@ def ObserveAnsatz : Pass<"observe-ansatz", "mlir::func::FuncOp"> { ]; } +def PrepareState : Pass<"state-prep", "mlir::ModuleOp"> { + let summary = + "Convert state vector data into gates"; + let description = [{ + Convert quake representation that includes qubit initialization + from data into qubit initialization using gates. + + For example: + + ```mlir + module { + func.func @foo() attributes { + %0 = cc.address_of @foo.rodata_0 : !cc.ptr x 4>> + %1 = quake.alloca !quake.veq<2> + %2 = quake.init_state %1, %0 : (!quake.veq<2>, !cc.ptr x 4>>) -> !quake.veq<2> + return + } + cc.global constant @foo.rodata_0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<4xcomplex>) : !cc.array x 4> + } + ``` + Will be rewritten to: + ```mlir + module { + func.func @foo() attributes { + %0 = quake.alloca !quake.veq<2> + %c1_i64 = arith.constant 1 : i64 + %1 = quake.extract_ref %0[%c1_i64] : (!quake.veq<2>, i64) -> !quake.ref + %cst = arith.constant 0.000000e+00 : f64 + quake.ry (%cst) %1 : (f64, !quake.ref) -> () + %c0_i64 = arith.constant 0 : i64 + %2 = quake.extract_ref %0[%c0_i64] : (!quake.veq<2>, i64) -> !quake.ref + %cst_0 = arith.constant 0.78539816339744839 : f64 + quake.ry (%cst_0) %2 : (f64, !quake.ref) -> () + quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () + %cst_1 = arith.constant 0.78539816339744839 : f64 + quake.ry (%cst_1) %2 : (f64, !quake.ref) -> () + quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () + return + } + } + ``` + }]; + + let constructor = "cudaq::opt::createStatePreparation()"; +} + def PromoteRefToVeqAlloc : Pass<"promote-qubit-allocation"> { let summary = "Promote single qubit allocations."; let description = [{ @@ -572,17 +618,6 @@ def PruneCtrlRelations : Pass<"pruned-ctrl-form", "mlir::func::FuncOp"> { }]; } -def PrepareState : Pass<"state-prep", "mlir::ModuleOp"> { - let summary = - "Convert state vector data into gates"; - let description = [{ - Convert quake representation that includes qubit initialization - from data into qubit initialization using gates. - }]; - - let constructor = "cudaq::opt::createStatePreparation()"; -} - def QuakeSynthesize : Pass<"quake-synth", "mlir::ModuleOp"> { let summary = "Synthesize concrete quantum program from Quake code plus runtime values."; diff --git a/lib/Optimizer/Builder/Intrinsics.cpp b/lib/Optimizer/Builder/Intrinsics.cpp index e801a123f9..2b8be438fd 100644 --- a/lib/Optimizer/Builder/Intrinsics.cpp +++ b/lib/Optimizer/Builder/Intrinsics.cpp @@ -388,39 +388,102 @@ LogicalResult IRBuilder::loadIntrinsic(ModuleOp module, StringRef intrinName) { } template -cc::GlobalOp -buildVectorOfComplexConstant(Location loc, ModuleOp module, StringRef name, - const std::vector> &values, - IRBuilder &builder, Type ty) { +static std::vector asI32(const std::vector &v) { + std::vector result(v.size()); + for (auto iter : llvm::enumerate(v)) + result[iter.index()] = static_cast(iter.value()); + return result; +} + +template +DenseElementsAttr createArrayAttr(const std::vector &values, Type eleTy) { + auto newValues = ArrayRef(values.data(), values.size()); + auto tensorTy = RankedTensorType::get(values.size(), eleTy); + return DenseElementsAttr::get(tensorTy, newValues); +} + +template +cc::GlobalOp buildVectorOfConstantElements(Location loc, ModuleOp module, + StringRef name, + const std::vector &values, + IRBuilder &builder, Type eleTy) { if (auto glob = module.lookupSymbol(name)) return glob; auto *ctx = builder.getContext(); OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToEnd(module.getBody()); - auto complexTy = ComplexType::get(ty); - auto globalTy = cc::ArrayType::get(ctx, complexTy, values.size()); - SmallVector> newValues; - for (auto c : values) - newValues.emplace_back(APFloat{c.real()}, APFloat{c.imag()}); - auto tensorTy = RankedTensorType::get(values.size(), complexTy); - auto denseEleAttr = DenseElementsAttr::get(tensorTy, newValues); - return builder.create(loc, globalTy, name, denseEleAttr, + auto globalTy = cc::ArrayType::get(ctx, eleTy, values.size()); + + auto arrayAttr = createArrayAttr(values, eleTy); + return builder.create(loc, globalTy, name, arrayAttr, /*constant=*/true, /*external=*/false); } -cc::GlobalOp IRBuilder::genVectorOfComplexConstant( +cc::GlobalOp IRBuilder::genVectorOfConstants( Location loc, ModuleOp module, StringRef name, const std::vector> &values) { - return buildVectorOfComplexConstant(loc, module, name, values, *this, - getF64Type()); + return buildVectorOfConstantElements(loc, module, name, values, *this, + ComplexType::get(getF64Type())); } -cc::GlobalOp IRBuilder::genVectorOfComplexConstant( +cc::GlobalOp IRBuilder::genVectorOfConstants( Location loc, ModuleOp module, StringRef name, const std::vector> &values) { - return buildVectorOfComplexConstant(loc, module, name, values, *this, - getF32Type()); + return buildVectorOfConstantElements(loc, module, name, values, *this, + ComplexType::get(getF32Type())); +} + +cc::GlobalOp +IRBuilder::genVectorOfConstants(Location loc, ModuleOp module, StringRef name, + const std::vector &values) { + return buildVectorOfConstantElements(loc, module, name, values, *this, + getF64Type()); +} + +cc::GlobalOp IRBuilder::genVectorOfConstants(Location loc, ModuleOp module, + StringRef name, + const std::vector &values) { + return buildVectorOfConstantElements(loc, module, name, values, *this, + getF32Type()); +} + +cc::GlobalOp +IRBuilder::genVectorOfConstants(Location loc, ModuleOp module, StringRef name, + const std::vector &values) { + return buildVectorOfConstantElements(loc, module, name, values, *this, + getI64Type()); +} + +cc::GlobalOp +IRBuilder::genVectorOfConstants(Location loc, ModuleOp module, StringRef name, + const std::vector &values) { + return buildVectorOfConstantElements(loc, module, name, values, *this, + getI32Type()); +} + +cc::GlobalOp +IRBuilder::genVectorOfConstants(Location loc, ModuleOp module, StringRef name, + const std::vector &values) { + auto converted = asI32(values); + return buildVectorOfConstantElements(loc, module, name, values, *this, + getI32Type()); +} + +cc::GlobalOp +IRBuilder::genVectorOfConstants(Location loc, ModuleOp module, StringRef name, + const std::vector &values) { + auto converted = asI32(values); + return buildVectorOfConstantElements(loc, module, name, values, *this, + getI32Type()); +} + +cc::GlobalOp IRBuilder::genVectorOfConstants(Location loc, ModuleOp module, + StringRef name, + const std::vector &values) { + auto converted = asI32(values); + return buildVectorOfConstantElements(loc, module, name, converted, *this, + getI32Type()); } Value IRBuilder::getByteSizeOfType(Location loc, Type ty) { diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index d4c0c335ed..d541edcacb 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -27,6 +27,64 @@ namespace cudaq::opt { using namespace mlir; +namespace { +template +std::vector readConstantValues(SmallVectorImpl &vec, Type eleTy) { + std::vector result; + for (auto a : vec) { + if constexpr (std::is_same_v>) { + auto v = cast(a); + result.emplace_back(cast(v[0]).getValue().convertToDouble(), + cast(v[1]).getValue().convertToDouble()); + } else if constexpr (std::is_same_v>) { + auto v = cast(a); + result.emplace_back(cast(v[0]).getValue().convertToFloat(), + cast(v[1]).getValue().convertToFloat()); + } else if constexpr (std::is_same_v) { + auto v = cast(a); + result.emplace_back(v.getValue().convertToDouble()); + } else if constexpr (std::is_same_v) { + auto v = cast(a); + result.emplace_back(v.getValue().convertToFloat()); + } else { + assert(false && "unexpected type in constant array"); + } + } + return result; +} + +void genVectorOfConstantsFromAttributes(cudaq::IRBuilder irBuilder, + Location loc, ModuleOp module, + StringRef name, + SmallVector &values, + Type eleTy) { + + if (auto cTy = dyn_cast(eleTy)) { + auto floatTy = cTy.getElementType(); + if (floatTy == irBuilder.getF64Type()) { + auto vals = readConstantValues>(values, cTy); + irBuilder.genVectorOfConstants(loc, module, name, vals); + return; + } else if (floatTy == irBuilder.getF32Type()) { + auto vals = readConstantValues>(values, cTy); + irBuilder.genVectorOfConstants(loc, module, name, vals); + return; + } + } else if (auto floatTy = dyn_cast(eleTy)) { + if (floatTy == irBuilder.getF64Type()) { + auto vals = readConstantValues(values, floatTy); + irBuilder.genVectorOfConstants(loc, module, name, vals); + return; + } else if (floatTy == irBuilder.getF32Type()) { + auto vals = readConstantValues(values, floatTy); + irBuilder.genVectorOfConstants(loc, module, name, vals); + return; + } + } + assert(false && "unexpected element type in constant array"); +} +} // namespace + namespace { class AllocaPattern : public OpRewritePattern { public: @@ -66,53 +124,13 @@ class AllocaPattern : public OpRewritePattern { Value conArr; Value conGlobal; if (toGlobal) { - auto ip = rewriter.saveInsertionPoint(); static unsigned counter = 0; auto ptrTy = cudaq::cc::PointerType::get(arrTy); // Build a new name based on the kernel name. std::string name = funcName + ".rodata_" + std::to_string(counter++); - { - OpBuilder::InsertionGuard guard(rewriter); - if (auto complexTy = dyn_cast(eleTy)) { - // Transforming complex vectors is a bit more labor intensive. Use the - // IRBuilder to create the object since we have to thread the needle - // for the LLVM-IR to be lowered to LLVM correctly. - auto transform = [&](SmallVectorImpl &vec) - -> std::vector> { - std::vector> result; - for (auto a : vec) { - auto v = cast(a); - if constexpr (std::is_same_v) { - result.emplace_back( - cast(v[0]).getValue().convertToDouble(), - cast(v[1]).getValue().convertToDouble()); - } else { - result.emplace_back( - cast(v[0]).getValue().convertToFloat(), - cast(v[1]).getValue().convertToFloat()); - } - } - return result; - }; - cudaq::IRBuilder irBuilder(rewriter.getContext()); - if (complexTy.getElementType() == rewriter.getF64Type()) { - std::vector> vals = - transform.template operator()(values); - irBuilder.genVectorOfComplexConstant(loc, module, name, vals); - } else { - std::vector> vals = - transform.template operator()(values); - irBuilder.genVectorOfComplexConstant(loc, module, name, vals); - } - } else { - OpBuilder::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToEnd(module.getBody()); - rewriter.create(loc, arrTy, name, valuesAttr, - /*isConstant=*/true, - /*isExternal=*/false); - } - } - rewriter.restoreInsertionPoint(ip); + cudaq::IRBuilder irBuilder(rewriter.getContext()); + genVectorOfConstantsFromAttributes(irBuilder, loc, module, name, values, + eleTy); conGlobal = rewriter.create(loc, ptrTy, name); conArr = rewriter.create(loc, arrTy, conGlobal); } else { @@ -150,7 +168,6 @@ class AllocaPattern : public OpRewritePattern { } if (isa(useuser)) { toErase.push_back(useuser); - continue; } isLive = true; } @@ -165,7 +182,13 @@ class AllocaPattern : public OpRewritePattern { } for (auto *op : toErase) { - rewriter.eraseOp(op); + if (op->getUses().empty()) { + rewriter.eraseOp(op); + } else { + module.emitOpError("LiftArrayAlloc failed to remove quake.init_state " + "or its dependencies."); + return failure(); + } } return success(); } @@ -263,7 +286,7 @@ class AllocaPattern : public OpRewritePattern { scoreboard[0] = w; continue; } - // can be a cast only used for a quake.init_state) + // can be a cast only used for a quake.init_state or vector init continue; } else { if (getWriteOp(cast, 0)) { @@ -271,7 +294,8 @@ class AllocaPattern : public OpRewritePattern { << "another cast used in store: " << *op << '\n'); return false; } - // can be a cast only used for a quake.init_state) + // can be a cast only used for a quake.init_state or vector init + toGlobal = true; continue; } LLVM_DEBUG(llvm::dbgs() << "unexpected cast: " << *op << '\n'); diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index d81fdd04c8..58a5f4a3f9 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ #include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" @@ -102,22 +103,31 @@ Value makeComplexElement(OpBuilder &builder, Location argLoc, return builder.create(argLoc, eleTy, complexVal); } -/// returns true if and only if \p argument is used by a `quake.init_state` -/// operation. -static bool hasInitStateUse(BlockArgument argument) { - for (auto *argUser : argument.getUsers()) - if (auto stdvecDataOp = dyn_cast(argUser)) - for (auto *dataUser : stdvecDataOp->getUsers()) - if (isa(dataUser)) - return true; - return false; +template +std::tuple +createArrayInMemory(OpBuilder &builder, ModuleOp module, unsigned &counter, + BlockArgument argument, std::vector &vec, + cudaq::cc::ArrayType arrTy) { + auto argLoc = argument.getLoc(); + + // Stick global at end of Module. + std::string symbol = "__nvqpp_rodata_init_state." + std::to_string(counter++); + + cudaq::IRBuilder irBuilder(builder); + irBuilder.genVectorOfConstants(argLoc, module, symbol, vec); + + builder.setInsertionPointToStart(argument.getOwner()); + auto buffer = builder.create( + argLoc, cudaq::cc::PointerType::get(arrTy), symbol); + auto data = builder.create(argLoc, arrTy, buffer); + return {buffer, data}; } -template +template LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector &vec, - ATTR arrayAttr, MAKER makeElementValue) { + MAKER makeElementValue) { auto *ctx = builder.getContext(); auto argTy = argument.getType(); assert(isa(argTy)); @@ -125,40 +135,26 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, auto eleTy = cast(strTy.getElementType()); builder.setInsertionPointToStart(argument.getOwner()); auto argLoc = argument.getLoc(); - auto conArray = builder.create( - argLoc, cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()), arrayAttr); + auto arrTy = cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()); std::optional arrayInMemory; + std::optional conArray; auto ptrEleTy = cudaq::cc::PointerType::get(eleTy); bool generateNewValue = false; // Helper function that materializes the array in memory. - auto getArrayInMemory = [&]() -> Value { + auto getArrayInMemory = [&]() -> std::tuple { if (arrayInMemory) - return *arrayInMemory; + return {*arrayInMemory, *conArray}; OpBuilder::InsertionGuard guard(builder); - Value buffer; - if (hasInitStateUse(argument)) { - // Stick global at end of Module. - builder.setInsertionPointToEnd(module.getBody()); - std::string symbol = - "__nvqpp_rodata_init_state." + std::to_string(counter++); - builder.create(argLoc, arrTy, symbol, arrayAttr, - /*isConstant=*/true, - /*isExternal=*/false); - builder.setInsertionPointAfter(conArray); - buffer = builder.create( - argLoc, cudaq::cc::PointerType::get(arrTy), symbol); - } else { - builder.setInsertionPointAfter(conArray); - buffer = builder.create(argLoc, arrTy); - builder.create(argLoc, conArray, buffer); - } + auto [buffer, data] = + createArrayInMemory(builder, module, counter, argument, vec, arrTy); auto ptrArrEleTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(eleTy)); Value res = builder.create(argLoc, ptrArrEleTy, buffer); arrayInMemory = res; - return res; + conArray = data; + return {res, data}; }; auto replaceLoads = [&](cudaq::cc::ComputePtrOp elePtrOp, @@ -211,11 +207,11 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, if (index == cudaq::cc::ComputePtrOp::kDynamicIndex) { OpBuilder::InsertionGuard guard(builder); builder.setInsertionPoint(elePtrOp); + auto [memArr, conArray] = getArrayInMemory(); Value getEle = builder.create( elePtrOp.getLoc(), eleTy, conArray, elePtrOp.getDynamicIndices()[0]); if (failed(replaceLoads(elePtrOp, getEle))) { - Value memArr = getArrayInMemory(); builder.setInsertionPoint(elePtrOp); Value newComputedPtr = builder.create( argLoc, ptrEleTy, memArr, @@ -228,7 +224,7 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, Value runtimeParam = makeElementValue(builder, argLoc, vec[index], eleTy); if (failed(replaceLoads(elePtrOp, runtimeParam))) { - Value memArr = getArrayInMemory(); + auto [memArr, _] = getArrayInMemory(); OpBuilder::InsertionGuard guard(builder); builder.setInsertionPoint(elePtrOp); Value newComputedPtr = builder.create( @@ -243,7 +239,7 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, // Check if there were other uses of `vec.data()` and simply forward the // constant array as materialized in memory. if (replaceOtherUses) { - Value memArr = getArrayInMemory(); + auto [memArr, _] = getArrayInMemory(); stdvecDataOp.replaceAllUsesWith(memArr); } continue; @@ -255,9 +251,9 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, generateNewValue = true; } if (generateNewValue) { - auto memArr = getArrayInMemory(); + auto [memArr, data] = getArrayInMemory(); OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointAfter(memArr.getDefiningOp()); + builder.setInsertionPointAfter(data.getDefiningOp()); Value size = builder.create(argLoc, vec.size(), 64); Value newVec = builder.create(argLoc, strTy, memArr, size); @@ -274,15 +270,11 @@ std::vector asI32(const std::vector &v) { return result; } -// TODO: consider using DenseArrayAttr here instead. NB: such a change may alter -// the output of the constant array op. static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector &vec) { - auto arrayAttr = builder.getI32ArrayAttr(asI32(vec)); - return synthesizeVectorArgument(builder, module, counter, - argument, vec, arrayAttr, - makeIntegerElement); + return synthesizeVectorArgument( + builder, module, counter, argument, vec, makeIntegerElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, @@ -290,10 +282,8 @@ static LogicalResult synthesizeVectorArgument(OpBuilder &builder, unsigned &counter, BlockArgument argument, std::vector &vec) { - auto arrayAttr = builder.getI32ArrayAttr(asI32(vec)); - return synthesizeVectorArgument(builder, module, counter, - argument, vec, arrayAttr, - makeIntegerElement); + return synthesizeVectorArgument( + builder, module, counter, argument, vec, makeIntegerElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, @@ -301,9 +291,8 @@ static LogicalResult synthesizeVectorArgument(OpBuilder &builder, unsigned &counter, BlockArgument argument, std::vector &vec) { - auto arrayAttr = builder.getI32ArrayAttr(asI32(vec)); return synthesizeVectorArgument( - builder, module, counter, argument, vec, arrayAttr, + builder, module, counter, argument, vec, makeIntegerElement); } @@ -312,9 +301,8 @@ static LogicalResult synthesizeVectorArgument(OpBuilder &builder, unsigned &counter, BlockArgument argument, std::vector &vec) { - auto arrayAttr = builder.getI32ArrayAttr(vec); return synthesizeVectorArgument( - builder, module, counter, argument, vec, arrayAttr, + builder, module, counter, argument, vec, makeIntegerElement); } @@ -323,58 +311,39 @@ static LogicalResult synthesizeVectorArgument(OpBuilder &builder, unsigned &counter, BlockArgument argument, std::vector &vec) { - auto arrayAttr = builder.getI64ArrayAttr(vec); return synthesizeVectorArgument( - builder, module, counter, argument, vec, arrayAttr, + builder, module, counter, argument, vec, makeIntegerElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector &vec) { - auto arrayAttr = builder.getF32ArrayAttr(vec); return synthesizeVectorArgument(builder, module, counter, argument, - vec, arrayAttr, - makeFloatElement); + vec, makeFloatElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector &vec) { - auto arrayAttr = builder.getF64ArrayAttr(vec); return synthesizeVectorArgument(builder, module, counter, argument, - vec, arrayAttr, - makeFloatElement); + vec, makeFloatElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector> &vec) { - std::vector vec2; - for (auto c : vec) { - vec2.push_back(c.real()); - vec2.push_back(c.imag()); - } - auto arrayAttr = builder.getF32ArrayAttr(vec2); - return synthesizeVectorArgument(builder, module, counter, - argument, vec, arrayAttr, - makeComplexElement); + return synthesizeVectorArgument( + builder, module, counter, argument, vec, makeComplexElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector> &vec) { - std::vector vec2; - for (auto c : vec) { - vec2.push_back(c.real()); - vec2.push_back(c.imag()); - } - auto arrayAttr = builder.getF64ArrayAttr(vec2); - return synthesizeVectorArgument(builder, module, counter, - argument, vec, arrayAttr, - makeComplexElement); + return synthesizeVectorArgument( + builder, module, counter, argument, vec, makeComplexElement); } namespace { diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index 564a121f83..8f84623a29 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -33,34 +33,42 @@ using namespace mlir; /// Replace a qubit initialization from vectors with quantum gates. /// For example: /// -/// func.func -/// @__nvqpp__mlirgen__function_test._Z4testSt6vectorISt7complexIfESaIS1_EE() -/// attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { -/// %0 = cc.address_of @__nvqpp_rodata_init_state.0 : -/// !cc.ptr x 4>> %1 = cc.cast %0 : -/// (!cc.ptr x 4>>) -> !cc.ptr> %2 = -/// quake.alloca !quake.veq<2> %3 = quake.init_state %2, %1 : (!quake.veq<2>, -/// !cc.ptr>) -> !quake.veq<2> return +/// +/// Before PrepareState (state-prep): +/// +/// module { +/// func.func @foo() attributes { +/// %0 = cc.address_of @foo.rodata_0 : !cc.ptr x 4>> +/// %1 = quake.alloca !quake.veq<2> +/// %2 = quake.init_state %1, %0 : (!quake.veq<2>, +/// !cc.ptr x 4>>) -> !quake.veq<2> return +/// } +/// cc.global constant @foo.rodata_0 (dense<[(0.707106769,0.000000e+00), +/// (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), +/// (0.000000e+00,0.000000e+00)]> : tensor<4xcomplex>) : +/// !cc.array x 4> /// } /// -/// is converted to: +/// After PrepareState (state-prep): /// -/// func.func @foo(%arg0 : !cc.stdvec>) { +/// module { +/// func.func @foo() attributes { /// %0 = quake.alloca !quake.veq<2> -/// %c0_i64 = arith.constant 0 : i64 -/// %1 = quake.extract_ref %0[%c0_i64] : (!quake.veq<2>, i64) -> !quake.ref -/// %cst = arith.constant 1.5707963267948968 : f64 -/// quake.ry (%cst) %1 : (f64, !quake.ref) -> () /// %c1_i64 = arith.constant 1 : i64 -/// %2 = quake.extract_ref %0[%c1_i64] : (!quake.veq<2>, i64) -> !quake.ref -/// %cst_0 = arith.constant 1.5707963267948966 : f64 +/// %1 = quake.extract_ref %0[%c1_i64] : (!quake.veq<2>, i64) -> !quake.ref +/// %cst = arith.constant 0.000000e+00 : f64 +/// quake.ry (%cst) %1 : (f64, !quake.ref) -> () +/// %c0_i64 = arith.constant 0 : i64 +/// %2 = quake.extract_ref %0[%c0_i64] : (!quake.veq<2>, i64) -> !quake.ref +/// %cst_0 = arith.constant 0.78539816339744839 : f64 /// quake.ry (%cst_0) %2 : (f64, !quake.ref) -> () /// quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () -/// %cst_1 = arith.constant -1.5707963267948966 : f64 +/// %cst_1 = arith.constant 0.78539816339744839 : f64 /// quake.ry (%cst_1) %2 : (f64, !quake.ref) -> () /// quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () /// return /// } +/// } namespace { @@ -71,59 +79,41 @@ readConstantArray(mlir::OpBuilder &builder, cudaq::cc::GlobalOp &global) { auto attr = global.getValue(); auto type = global.getType().getElementType(); - if (auto arrayTy = dyn_cast(type)) { - auto eleTy = arrayTy.getElementType(); - - if (attr.has_value()) { - if (auto elementsAttr = dyn_cast(attr.value())) { - auto eleTy = elementsAttr.getElementType(); - if (isa(eleTy)) { - auto values = elementsAttr.getValues(); - for (auto it = values.begin(); it != values.end(); ++it) { - auto valueAttr = *it; - auto real = - cast(valueAttr[0]).getValue().convertToDouble(); - auto imag = - cast(valueAttr[1]).getValue().convertToDouble(); - result.push_back({real, imag}); - } - } else { - auto values = elementsAttr.getValues(); - for (auto it = values.begin(); it != values.end(); ++it) { - result.push_back({*it, 0.0}); - } - } - } else if (auto values = dyn_cast(attr.value())) { - for (auto it = values.begin(); it != values.end(); ++it) { - auto real = *it; - // for (std::size_t idx = 0; idx < numConstants; idx += isComplex ? 2 - // : 1) { - auto v = [&]() -> std::complex { - if (isa(eleTy)) - return {cast(real).getValue().convertToDouble(), - static_cast(0.0)}; - if (isa(eleTy)) - return {static_cast(cast(real).getInt()), - static_cast(0.0)}; - assert(isa(eleTy)); - it++; - auto imag = *it; - return {cast(real).getValue().convertToDouble(), - cast(imag).getValue().convertToDouble()}; - }(); - - result.push_back(v); - } - } - } + auto arrayTy = dyn_cast(type); + assert(arrayTy); + assert(attr.has_value()); + + auto elementsAttr = dyn_cast(attr.value()); + assert(elementsAttr); + auto eleTy = elementsAttr.getElementType(); + auto values = elementsAttr.getValues(); + + for (auto it = values.begin(); it != values.end(); ++it) { + auto valAttr = *it; + + auto v = [&]() -> std::complex { + if (isa(eleTy)) + return {cast(valAttr).getValue().convertToDouble(), + static_cast(0.0)}; + if (isa(eleTy)) + return {static_cast(cast(valAttr).getInt()), + static_cast(0.0)}; + assert(isa(eleTy)); + auto arrayAttr = cast(valAttr); + auto real = cast(arrayAttr[0]).getValue().convertToDouble(); + auto imag = cast(arrayAttr[1]).getValue().convertToDouble(); + return {real, imag}; + }(); + + result.push_back(v); } - return result; } -LogicalResult transform(OpBuilder &builder, ModuleOp module) { +LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { + auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); auto toErase = std::vector(); - module->walk([&](Operation *op) { + funcOp->walk([&](Operation *op) { if (auto initOp = dyn_cast(op)) { toErase.push_back(initOp); auto loc = op->getLoc(); @@ -176,33 +166,25 @@ LogicalResult transform(OpBuilder &builder, ModuleOp module) { class StatePreparation : public cudaq::opt::PrepareStateBase { protected: - // The name of the kernel to be synthesized - std::string kernelName; - public: StatePreparation() = default; - StatePreparation(std::string_view kernel) : kernelName(kernel) {} mlir::ModuleOp getModule() { return getOperation(); } void runOnOperation() override final { auto module = getModule(); - auto kernelNameInQuake = cudaq::runtime::cudaqGenPrefixName + kernelName; - // Get the function we care about (the one with kernelName) - auto funcOp = module.lookupSymbol(kernelNameInQuake); - if (!funcOp) { - module.emitOpError("The kernel '" + kernelName + - "' was not found in the module."); - signalPassFailure(); - return; - } - - auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); - auto result = transform(builder, module); - if (result.failed()) { - module.emitOpError("Failed to prepare state for '" + kernelName); - signalPassFailure(); - return; + for (Operation &op : *module.getBody()) { + auto funcOp = dyn_cast(op); + if (!funcOp) + continue; + std::string kernelName = funcOp.getName().str(); + + auto result = transform(module, funcOp); + if (result.failed()) { + funcOp.emitOpError("Failed to prepare state for '" + kernelName); + signalPassFailure(); + return; + } } } }; @@ -212,8 +194,3 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { std::unique_ptr cudaq::opt::createStatePreparation() { return std::make_unique(); } - -std::unique_ptr -cudaq::opt::createStatePreparation(std::string_view kernelName) { - return std::make_unique(kernelName); -} diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index a937b4acc8..6d238509ec 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -462,6 +462,18 @@ py::object pyAltLaunchKernelR(const std::string &name, MlirModule module, return returnValue; } +/// @brief Helper function to get boolean environment variable +bool getEnvBool(const char *envName, bool defaultVal = false) { + if (auto envVal = std::getenv(envName)) { + std::string tmp(envVal); + std::transform(tmp.begin(), tmp.end(), tmp.begin(), + [](unsigned char c) { return std::tolower(c); }); + if (tmp == "1" || tmp == "on" || tmp == "true" || tmp == "yes") + return true; + } + return defaultVal; +} + MlirModule synthesizeKernel(const std::string &name, MlirModule module, cudaq::OpaqueArguments &runtimeArgs) { ScopedTraceWithContext(cudaq::TIMING_JIT, "synthesizeKernel", name); @@ -473,12 +485,24 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, auto context = cloned.getContext(); registerLLVMDialectTranslation(*context); + // Get additional debug values + auto disableMLIRthreading = getEnvBool("CUDAQ_MLIR_DISABLE_THREADING", false); + auto enablePrintMLIREachPass = + getEnvBool("CUDAQ_MLIR_PRINT_EACH_PASS", false); + PassManager pm(context); pm.addPass(cudaq::opt::createQuakeSynthesizer(name, rawArgs)); pm.addPass(createCanonicalizerPass()); - pm.addPass(createCSEPass()); - pm.addPass(cudaq::opt::createLiftArrayAllocPass()); - pm.addPass(cudaq::opt::createStatePreparation(name)); + + // Run state preparation for quantum devices only. + // Simulators have direct implementation of state initialization + // in their runtime. + auto &platform = cudaq::get_platform(); + if (!platform.is_simulator() || platform.is_emulated()) { + pm.addPass(createCSEPass()); + pm.addPass(cudaq::opt::createLiftArrayAllocPass()); + pm.addPass(cudaq::opt::createStatePreparation()); + } pm.addPass(createCanonicalizerPass()); pm.addPass(cudaq::opt::createExpandMeasurementsPass()); pm.addNestedPass(cudaq::opt::createClassicalMemToReg()); @@ -490,6 +514,10 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, tm.setEnabled(cudaq::isTimingTagEnabled(cudaq::TIMING_JIT_PASSES)); auto timingScope = tm.getRootScope(); // starts the timer pm.enableTiming(timingScope); // do this right before pm.run + if (disableMLIRthreading || enablePrintMLIREachPass) + context->disableMultithreading(); + if (enablePrintMLIREachPass) + pm.enableIRPrinting(); if (failed(pm.run(cloned))) throw std::runtime_error( "cudaq::builder failed to JIT compile the Quake representation."); diff --git a/python/tests/backends/test_IQM.py b/python/tests/backends/test_IQM.py index 200d078fcc..76bb1190a3 100644 --- a/python/tests/backends/test_IQM.py +++ b/python/tests/backends/test_IQM.py @@ -10,6 +10,7 @@ import tempfile import time from multiprocessing import Process +import numpy as np import cudaq from cudaq import spin @@ -160,6 +161,32 @@ def kernel(): result = cudaq.sample(kernel) +def test_IQM_state_preparation(): + + @cudaq.kernel + def kernel(vec: list[complex]): + qubits = cudaq.qvector(vec) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '00' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + + +def test_IQM_state_preparation_builder(): + kernel, state = cudaq.make_kernel(list[complex]) + qubits = kernel.qalloc(state) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '00' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/backends/test_IonQ.py b/python/tests/backends/test_IonQ.py index be93445359..92a7ac8a5d 100644 --- a/python/tests/backends/test_IonQ.py +++ b/python/tests/backends/test_IonQ.py @@ -8,6 +8,7 @@ import cudaq, pytest, os, time from cudaq import spin +import numpy as np from multiprocessing import Process try: from utils.mock_qpu.ionq import startServer @@ -156,6 +157,32 @@ def kernel(): result = cudaq.sample(kernel) +def test_ionq_state_preparation(): + + @cudaq.kernel + def kernel(vec: list[complex]): + qubits = cudaq.qvector(vec) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '00' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + + +def test_ionq_state_preparation_builder(): + kernel, state = cudaq.make_kernel(list[complex]) + qubits = kernel.qalloc(state) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '00' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/backends/test_OQC.py b/python/tests/backends/test_OQC.py index 70779e975f..0dc40e4bec 100644 --- a/python/tests/backends/test_OQC.py +++ b/python/tests/backends/test_OQC.py @@ -15,6 +15,7 @@ import cudaq from cudaq import spin +import numpy as np try: from utils.mock_qpu.oqc import startServer @@ -158,6 +159,32 @@ def test_OQC_observe(): assert assert_close(res.expectation()) +def test_OQC_state_preparation(): + + @cudaq.kernel + def kernel(vec: list[complex]): + qubits = cudaq.qvector(vec) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '00' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + + +def test_OQC_state_preparation_builder(): + kernel, state = cudaq.make_kernel(list[complex]) + qubits = kernel.qalloc(state) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '00' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py b/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py index b368cefdb0..70a1d29aa9 100644 --- a/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py +++ b/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py @@ -8,6 +8,7 @@ import cudaq, pytest, os, time from cudaq import spin +import numpy as np from multiprocessing import Process @@ -111,6 +112,18 @@ def test_quantinuum_exp_pauli(): assert assert_close(res.expectation()) +def test_quantinuum_state_preparation(): + kernel, state = cudaq.make_kernel(list[complex]) + qubits = kernel.qalloc(state) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '00' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py b/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py index 3bad589111..5576f46597 100644 --- a/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py +++ b/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py @@ -8,6 +8,7 @@ import cudaq, pytest, os, time from cudaq import spin +import numpy as np from multiprocessing import Process @@ -138,6 +139,20 @@ def kernel(): result = cudaq.sample(kernel) +def test_quantinuum_state_preparation(): + + @cudaq.kernel + def kernel(vec: list[complex]): + qubits = cudaq.qvector(vec) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '00' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/backends/test_Quantinuum_builder.py b/python/tests/backends/test_Quantinuum_builder.py index 1d82c6abb0..d20cb0d499 100644 --- a/python/tests/backends/test_Quantinuum_builder.py +++ b/python/tests/backends/test_Quantinuum_builder.py @@ -7,6 +7,7 @@ # ============================================================================ # import cudaq, pytest, os, time +import numpy as np from cudaq import spin from multiprocessing import Process try: @@ -145,6 +146,18 @@ def test_quantinuum_observe(): assert assert_close(res.expectation()) +def test_quantinuum_state_preparation(): + kernel, state = cudaq.make_kernel(list[complex]) + qubits = kernel.qalloc(state) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '00' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/kernel/test_kernel_qvector_init.py b/python/tests/kernel/test_kernel_qvector_init.py index 3edb5ca951..e892be2dc2 100644 --- a/python/tests/kernel/test_kernel_qvector_init.py +++ b/python/tests/kernel/test_kernel_qvector_init.py @@ -29,7 +29,7 @@ @skipIfPythonLessThan39 -def test_kernel_state_preparation(): +def test_kernel_complex_synthesize(): cudaq.reset_target() c = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] @@ -39,9 +39,22 @@ def kernel(vec: list[complex]): q = cudaq.qvector(vec) synthesized = cudaq.synthesize(kernel, c) - assert 'quake.init_state' in kernel.__str__() - assert not 'quake.init_state' in synthesized.__str__() + counts = cudaq.sample(synthesized) + assert '00' in counts + assert '10' in counts + + +@skipIfPythonLessThan39 +def test_kernel_float_synthesize(): + cudaq.reset_target() + c = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + + @cudaq.kernel + def kernel(vec: list[float]): + q = cudaq.qvector(vec) + + synthesized = cudaq.synthesize(kernel, c) counts = cudaq.sample(synthesized) assert '00' in counts assert '10' in counts diff --git a/python/tests/remote/test_remote_code_exec.py b/python/tests/remote/test_remote_code_exec.py index 67541fddc9..5b0f869f87 100644 --- a/python/tests/remote/test_remote_code_exec.py +++ b/python/tests/remote/test_remote_code_exec.py @@ -15,6 +15,7 @@ import cudaq from cudaq import spin +import numpy as np ## [PYTHON_VERSION_FIX] skipIfPythonLessThan39 = pytest.mark.skipif( @@ -349,6 +350,31 @@ def test_complex_vqe_named_lambda_sweep_opt(optimizer): def test_complex_vqe_named_lambda_sweep_grad(gradient): test_complex_vqe_named_lambda(cudaq.optimizers.Adam(), gradient) +@skipIfPythonLessThan39 +def test_state_preparation(): + + @cudaq.kernel + def kernel(vec: list[complex]): + qubits = cudaq.qvector(vec) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '00' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + +@skipIfPythonLessThan39 +def test_state_preparation_builder(): + kernel, state = cudaq.make_kernel(list[complex]) + qubits = kernel.qalloc(state) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '00' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts # leave for gdb debugging if __name__ == "__main__": diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index b598da28ed..9b8a80ba88 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -420,22 +420,6 @@ class BaseRemoteRESTQPU : public cudaq::QPU { throw std::runtime_error("Could not successfully apply quake-synth."); } - { - cudaq::info("Run State Prep.\n"); - mlir::PassManager pm(&context); - - pm.addPass(mlir::createCanonicalizerPass()); - pm.addPass(mlir::createCSEPass()); - pm.addPass(cudaq::opt::createLiftArrayAllocPass()); - pm.addPass(cudaq::opt::createStatePreparation(kernelName)); - if (disableMLIRthreading || enablePrintMLIREachPass) - moduleOp.getContext()->disableMultithreading(); - if (enablePrintMLIREachPass) - pm.enableIRPrinting(); - if (failed(pm.run(moduleOp))) - throw std::runtime_error("Could not successfully apply state prep."); - } - runPassPipeline(passPipelineConfig, moduleOp); auto entryPointFunc = moduleOp.lookupSymbol( diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index db1288caca..4c593f8085 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -159,17 +159,8 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { throw std::runtime_error("Could not successfully apply quake-synth."); } - { - cudaq::info("Run State Prep.\n"); - mlir::PassManager pm(&mlirContext); - pm.addPass(mlir::createCanonicalizerPass()); - pm.addPass(mlir::createCSEPass()); - pm.addPass(cudaq::opt::createLiftArrayAllocPass()); - pm.addPass(cudaq::opt::createStatePreparation(name)); - pm.addPass(mlir::createCanonicalizerPass()); - if (failed(pm.run(moduleOp))) - throw std::runtime_error("Could not successfully apply state-prep."); - } + // Note: do not run state preparation pass here since we are always + // using simulators. // Run client-side passes. `clientPasses` is empty right now, but the code // below accommodates putting passes into it. diff --git a/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.config b/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.config index 0b0555a3b3..c78a2b3e1e 100644 --- a/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.config +++ b/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.config @@ -16,7 +16,7 @@ GEN_TARGET_BACKEND=true LINKLIBS="${LINKLIBS} -lcudaq-rest-qpu" # Define the lowering pipeline -PLATFORM_LOWERING_CONFIG="expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),ionq-gate-set-mapping" +PLATFORM_LOWERING_CONFIG="canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),ionq-gate-set-mapping" # Tell the rest-qpu that we are generating QIR. CODEGEN_EMISSION=qir-base diff --git a/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.config b/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.config index 433658ec48..2db0f2b235 100644 --- a/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.config +++ b/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.config @@ -18,7 +18,7 @@ LINKLIBS="${LINKLIBS} -lcudaq-rest-qpu" # Define the lowering pipeline, here we lower to Base QIR # Note: the runtime will dynamically substitute %QPU_ARCH% based on # qpu-architecture -PLATFORM_LOWERING_CONFIG="expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),iqm-gate-set-mapping,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg,qubit-mapping{device=file(%QPU_ARCH%)},delay-measurements,regtomem),iqm-gate-set-mapping" +PLATFORM_LOWERING_CONFIG="canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),iqm-gate-set-mapping,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg,qubit-mapping{device=file(%QPU_ARCH%)},delay-measurements,regtomem),iqm-gate-set-mapping" # Tell the rest-qpu that we are generating IQM JSON. CODEGEN_EMISSION=iqm diff --git a/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.config b/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.config index 3f157cb80d..042fb8dd8d 100644 --- a/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.config +++ b/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.config @@ -18,7 +18,7 @@ LINKLIBS="${LINKLIBS} -lcudaq-rest-qpu" # Define the lowering pipeline. Lucy has an 8-qubit ring topology, so mapping # uses ring(8). # Toshiko uses a Kagome lattice with 2-3 connectivity per qubit -PLATFORM_LOWERING_CONFIG="expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),oqc-gate-set-mapping,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg,qubit-mapping{device=file(%QPU_ARCH%)},regtomem)" +PLATFORM_LOWERING_CONFIG="canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),oqc-gate-set-mapping,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg,qubit-mapping{device=file(%QPU_ARCH%)},regtomem)" # Tell the rest-qpu that we are generating QIR. diff --git a/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.config b/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.config index 17696630be..bed7159b28 100644 --- a/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.config +++ b/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.config @@ -16,7 +16,7 @@ GEN_TARGET_BACKEND=true LINKLIBS="${LINKLIBS} -lcudaq-rest-qpu" # Define the lowering pipeline, here we lower to Adaptive QIR -PLATFORM_LOWERING_CONFIG="expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),quantinuum-gate-set-mapping" +PLATFORM_LOWERING_CONFIG="canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),quantinuum-gate-set-mapping" # Tell the rest-qpu that we are generating QIR. CODEGEN_EMISSION=qir-adaptive diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index 886c3d92b8..994390cde3 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ // RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --enable-mlir --target quantinuum --emulate %s -o %t && %t | FileCheck %s #include #include @@ -16,6 +17,11 @@ __qpu__ void test_complex_constant_array() { } __qpu__ void test_complex_constant_array2() { + cudaq::qvector v1(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); + cudaq::qvector v2(std::vector({ 0., 0., M_SQRT1_2, M_SQRT1_2})); +} + +__qpu__ void test_complex_constant_array3() { cudaq::qvector v({ cudaq::complex(M_SQRT1_2), cudaq::complex(M_SQRT1_2), @@ -24,14 +30,14 @@ __qpu__ void test_complex_constant_array2() { }); } -__qpu__ void test_real_constant_array() { - cudaq::qvector v({ M_SQRT1_2, M_SQRT1_2, 0., 0.}); -} - __qpu__ void test_complex_array_param(std::vector inState) { cudaq::qvector q1 = inState; } +__qpu__ void test_real_constant_array() { + cudaq::qvector v({ M_SQRT1_2, M_SQRT1_2, 0., 0.}); +} + __qpu__ void test_real_array_param(std::vector inState) { cudaq::qvector q1 = inState; } @@ -49,6 +55,7 @@ void printCounts(cudaq::sample_result& result) { } int main() { + { auto counts = cudaq::sample(test_complex_constant_array); printCounts(counts); @@ -59,6 +66,11 @@ int main() { printCounts(counts); } + { + auto counts = cudaq::sample(test_complex_constant_array3); + printCounts(counts); + } + { auto counts = cudaq::sample(test_real_constant_array); printCounts(counts); @@ -118,8 +130,10 @@ int main() { // CHECK: 00 // CHECK: 10 -// CHECK: 00 -// CHECK: 10 +// CHECK: 0001 +// CHECK: 0011 +// CHECK: 1001 +// CHECK: 1011 // CHECK: 00 // CHECK: 10 @@ -127,23 +141,20 @@ int main() { // CHECK: 00 // CHECK: 10 - // CHECK: 00 // CHECK: 10 -// CHECK: 01 -// CHECK: 11 -// CHECK: 00 -// CHECK: 10 // CHECK: 01 // CHECK: 11 // CHECK: 00 // CHECK: 10 + // CHECK: 01 // CHECK: 11 // CHECK: 00 // CHECK: 10 + // CHECK: 01 -// CHECK: 11 +// CHECK: 11 \ No newline at end of file diff --git a/test/Quake/lift_array.qke b/test/Quake/lift_array.qke index b12196793d..a8b9b337b2 100644 --- a/test/Quake/lift_array.qke +++ b/test/Quake/lift_array.qke @@ -8,6 +8,34 @@ // RUN: cudaq-opt -lift-array-value %s | FileCheck %s +func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %cst = arith.constant 0.000000e+00 : f32 + %cst_0 = arith.constant 0.70710678118654757 : f64 + %0 = arith.truncf %cst_0 : f64 to f32 + %1 = complex.create %0, %cst : complex + %2 = complex.create %cst, %cst : complex + %3 = cc.alloca !cc.array x 4> + %4 = cc.cast %3 : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %1, %4 : !cc.ptr> + %5 = cc.compute_ptr %3[1] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %1, %5 : !cc.ptr> + %6 = cc.compute_ptr %3[2] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %2, %6 : !cc.ptr> + %7 = cc.compute_ptr %3[3] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %2, %7 : !cc.ptr> + %8 = quake.alloca !quake.veq<2> + %9 = quake.init_state %8, %4 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> + return + } + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = cc.address_of @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv.rodata_{{[0-9]+}} : !cc.ptr x 4>> +// CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_2:.*]] = quake.init_state %[[VAL_1]], %[[VAL_0]] : (!quake.veq<2>, !cc.ptr x 4>>) -> !quake.veq<2> +// CHECK: return +// CHECK: } + + func.func private @__nvqpp_vectorCopyCtor(%0: !cc.ptr, %1: i64, %2: i64) -> !cc.ptr func.func @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v() -> !cc.stdvec> attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { @@ -71,7 +99,6 @@ func.func @test2() -> !quake.veq<2> { // CHECK: return %[[VAL_2]] : !quake.veq<2> // CHECK: } -// CHECK-DAG: cc.global constant @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v.rodata_{{[0-9]+}} (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (-0.70710678118654757,0.000000e+00)]> : tensor<4xcomplex>) : !cc.array x 4> - -// CHECK-DAG: cc.global constant @test2.rodata_{{[0-9]+}} ([1.000000e+00, 2.000000e+00, 6.000000e+00, 9.000000e+00]) : !cc.array - +// CHECK-DAG: cc.global constant @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv.rodata_{{[0-9]+}} (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<4xcomplex>) : !cc.array x 4> +// CHECK-DAG: cc.global constant @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v.rodata_{{[0-9]+}} (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (-0.70710678118654757,0.000000e+00)]> : tensor<4xcomplex>) : !cc.array x 4> +// CHECK-DAG: cc.global constant @test2.rodata_{{[0-9]+}} (dense<[1.000000e+00, 2.000000e+00, 6.000000e+00, 9.000000e+00]> : tensor<4xf64>) : !cc.array diff --git a/test/Quake/lift_array_temp.qke b/test/Quake/lift_array_temp.qke new file mode 100644 index 0000000000..b3500d9e2a --- /dev/null +++ b/test/Quake/lift_array_temp.qke @@ -0,0 +1,50 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt -lift-array-value %s | FileCheck %s + + +func.func private @__nvqpp_vectorCopyCtor(%0: !cc.ptr, %1: i64, %2: i64) -> !cc.ptr + +func.func @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v() -> !cc.stdvec> attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %cst = arith.constant -0.70710678118654757 : f64 + %c16_i64 = arith.constant 16 : i64 + %c4_i64 = arith.constant 4 : i64 + %cst_0 = arith.constant 0.70710678118654757 : f64 + %cst_1 = arith.constant 0.000000e+00 : f64 + %0 = complex.create %cst_0, %cst_1 : complex + %1 = complex.create %cst_0, %cst_1 : complex + %2 = complex.create %cst_0, %cst_1 : complex + %3 = complex.create %cst, %cst_1 : complex + %4 = cc.alloca !cc.array x 4> + %5 = cc.cast %4 : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %0, %5 : !cc.ptr> + %6 = cc.compute_ptr %4[1] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %1, %6 : !cc.ptr> + %7 = cc.compute_ptr %4[2] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %2, %7 : !cc.ptr> + %8 = cc.compute_ptr %4[3] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %3, %8 : !cc.ptr> + %9 = cc.cast %4 : (!cc.ptr x 4>>) -> !cc.ptr + %10 = call @__nvqpp_vectorCopyCtor(%9, %c4_i64, %c16_i64) : (!cc.ptr, i64, i64) -> !cc.ptr + %11 = cc.stdvec_init %10, %c4_i64 : (!cc.ptr, i64) -> !cc.stdvec> + return %11 : !cc.stdvec> +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v() -> !cc.stdvec> attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = arith.constant 16 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 4 : i64 +// CHECK: %[[VAL_2:.*]] = cc.address_of @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v.rodata_{{[0-9]+}} : !cc.ptr x 4>> +// CHECK: %[[VAL_3:.*]] = cc.cast %[[VAL_2]] : (!cc.ptr x 4>>) -> !cc.ptr +// CHECK: %[[VAL_4:.*]] = call @__nvqpp_vectorCopyCtor(%[[VAL_3]], %[[VAL_1]], %[[VAL_0]]) : (!cc.ptr, i64, i64) -> !cc.ptr +// CHECK: %[[VAL_5:.*]] = cc.stdvec_init %[[VAL_4]], %[[VAL_1]] : (!cc.ptr, i64) -> !cc.stdvec> +// CHECK: return %[[VAL_5]] : !cc.stdvec> +// CHECK: } + +// CHECK-DAG: cc.global constant @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v.rodata_{{[0-9]+}} (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (-0.70710678118654757,0.000000e+00)]> : tensor<4xcomplex>) : !cc.array x 4> + diff --git a/test/Quake/state_prep.qke b/test/Quake/state_prep.qke new file mode 100644 index 0000000000..3ba6d077bb --- /dev/null +++ b/test/Quake/state_prep.qke @@ -0,0 +1,114 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt -state-prep %s | FileCheck %s + +module { + func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %0 = cc.address_of @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv.rodata_0 : !cc.ptr x 4>> + %1 = quake.alloca !quake.veq<2> + %2 = quake.init_state %1, %0 : (!quake.veq<2>, !cc.ptr x 4>>) -> !quake.veq<2> + return + } + cc.global constant @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv.rodata_0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<4xcomplex>) : !cc.array x 4> + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_1]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: quake.ry (%[[VAL_3]]) %[[VAL_2]] : (f64, !quake.ref) -> () +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_4]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: %[[VAL_6:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: quake.ry (%[[VAL_6]]) %[[VAL_5]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: %[[VAL_7:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: quake.ry (%[[VAL_7]]) %[[VAL_5]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: return +// CHECK: } + + + func.func @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %0 = cc.address_of @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv.rodata_0 : !cc.ptr> + %1 = quake.alloca !quake.veq<2> + %2 = quake.init_state %1, %0 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> + return + } + cc.global constant @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv.rodata_0 (dense<[0.70710678118654757, 0.70710678118654757, 0.000000e+00, 0.000000e+00]> : tensor<4xf64>) : !cc.array + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_1]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: quake.ry (%[[VAL_3]]) %[[VAL_2]] : (f64, !quake.ref) -> () +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_4]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: %[[VAL_6:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: quake.ry (%[[VAL_6]]) %[[VAL_5]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: %[[VAL_7:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: quake.ry (%[[VAL_7]]) %[[VAL_5]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: return +// CHECK: } + + func.func @__nvqpp__mlirgen__function_test_complex_array_param._Z24test_complex_array_paramSt6vectorISt7complexIfESaIS1_EE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %0 = cc.address_of @__nvqpp_rodata_init_state.0 : !cc.ptr x 4>> + %1 = cc.cast %0 : (!cc.ptr x 4>>) -> !cc.ptr x ?>> + %2 = quake.alloca !quake.veq<2> + %3 = quake.init_state %2, %1 : (!quake.veq<2>, !cc.ptr x ?>>) -> !quake.veq<2> + return + } + cc.global constant @__nvqpp_rodata_init_state.0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<4xcomplex>) : !cc.array x 4> + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_complex_array_param._Z24test_complex_array_paramSt6vectorISt7complexIfESaIS1_EE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_1]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: quake.ry (%[[VAL_3]]) %[[VAL_2]] : (f64, !quake.ref) -> () +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_4]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: %[[VAL_6:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: quake.ry (%[[VAL_6]]) %[[VAL_5]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: %[[VAL_7:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: quake.ry (%[[VAL_7]]) %[[VAL_5]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: return +// CHECK: } + + func.func @__nvqpp__mlirgen__function_test_real_array_param._Z21test_real_array_paramSt6vectorIfSaIfEE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %0 = cc.address_of @__nvqpp_rodata_init_state.1 : !cc.ptr> + %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr> + %2 = quake.alloca !quake.veq<2> + %3 = quake.init_state %2, %1 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> + return + } + cc.global constant @__nvqpp_rodata_init_state.1 (dense<[0.707106769, 0.707106769, 0.000000e+00, 0.000000e+00]> : tensor<4xf32>) : !cc.array + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_real_array_param._Z21test_real_array_paramSt6vectorIfSaIfEE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_1]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: quake.ry (%[[VAL_3]]) %[[VAL_2]] : (f64, !quake.ref) -> () +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_4]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: %[[VAL_6:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: quake.ry (%[[VAL_6]]) %[[VAL_5]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: %[[VAL_7:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: quake.ry (%[[VAL_7]]) %[[VAL_5]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: return +// CHECK: } +} From 4828dbba3d08066bcbed1cc1972beef78cd3b56e Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 11 Jul 2024 09:16:16 -0700 Subject: [PATCH 17/50] Fix failing tests --- program.py | 24 +++++++++++++++++++ python/tests/backends/test_IQM.py | 11 ++++----- python/tests/backends/test_IonQ.py | 5 ++-- python/tests/backends/test_OQC.py | 6 ++--- .../test_Quantinuum_LocalEmulation_builder.py | 3 ++- .../test_Quantinuum_LocalEmulation_kernel.py | 3 ++- .../tests/backends/test_Quantinuum_builder.py | 3 ++- .../tests/backends/test_Quantinuum_kernel.py | 3 ++- 8 files changed, 43 insertions(+), 15 deletions(-) create mode 100644 program.py diff --git a/program.py b/program.py new file mode 100644 index 0000000000..0c5b92f20e --- /dev/null +++ b/program.py @@ -0,0 +1,24 @@ +# ============================================================================ # +# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # +# All rights reserved. # +# # +# This source code and the accompanying materials are made available under # +# the terms of the Apache License 2.0 which accompanies this distribution. # +# ============================================================================ # + +import cudaq +import numpy as np + +cudaq.set_target('iqm', url="http://localhost/cocos", **{"qpu-architecture": "Adonis"}) + +@cudaq.kernel +def kernel(vec: list[complex]): + qubits = cudaq.qvector(vec) + +state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] +counts = cudaq.sample(kernel, state) +print(counts) +assert '00' in counts +assert '10' in counts +assert not '01' in counts +assert not '11' in counts \ No newline at end of file diff --git a/python/tests/backends/test_IQM.py b/python/tests/backends/test_IQM.py index d91f8575e9..bf3746bce4 100644 --- a/python/tests/backends/test_IQM.py +++ b/python/tests/backends/test_IQM.py @@ -9,6 +9,7 @@ import os import tempfile import time +from typing import List from multiprocessing import Process import numpy as np @@ -162,27 +163,25 @@ def kernel(): def test_IQM_state_preparation(): @cudaq.kernel - def kernel(vec: list[complex]): + def kernel(vec: List[complex]): qubits = cudaq.qvector(vec) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] counts = cudaq.sample(kernel, state) + counts.dump() assert '00' in counts assert '10' in counts - assert not '01' in counts - assert not '11' in counts def test_IQM_state_preparation_builder(): - kernel, state = cudaq.make_kernel(list[complex]) + kernel, state = cudaq.make_kernel(List[complex]) qubits = kernel.qalloc(state) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] counts = cudaq.sample(kernel, state) + counts.dump() assert '00' in counts assert '10' in counts - assert not '01' in counts - assert not '11' in counts # leave for gdb debugging diff --git a/python/tests/backends/test_IonQ.py b/python/tests/backends/test_IonQ.py index 92a7ac8a5d..f468a1d9c8 100644 --- a/python/tests/backends/test_IonQ.py +++ b/python/tests/backends/test_IonQ.py @@ -9,6 +9,7 @@ import cudaq, pytest, os, time from cudaq import spin import numpy as np +from typing import List from multiprocessing import Process try: from utils.mock_qpu.ionq import startServer @@ -160,7 +161,7 @@ def kernel(): def test_ionq_state_preparation(): @cudaq.kernel - def kernel(vec: list[complex]): + def kernel(vec: List[complex]): qubits = cudaq.qvector(vec) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] @@ -172,7 +173,7 @@ def kernel(vec: list[complex]): def test_ionq_state_preparation_builder(): - kernel, state = cudaq.make_kernel(list[complex]) + kernel, state = cudaq.make_kernel(List[complex]) qubits = kernel.qalloc(state) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] diff --git a/python/tests/backends/test_OQC.py b/python/tests/backends/test_OQC.py index 0dc40e4bec..1ff86c535c 100644 --- a/python/tests/backends/test_OQC.py +++ b/python/tests/backends/test_OQC.py @@ -9,7 +9,7 @@ import os import sys import time - +from typing import List import pytest from multiprocessing import Process @@ -162,7 +162,7 @@ def test_OQC_observe(): def test_OQC_state_preparation(): @cudaq.kernel - def kernel(vec: list[complex]): + def kernel(vec: List[complex]): qubits = cudaq.qvector(vec) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] @@ -174,7 +174,7 @@ def kernel(vec: list[complex]): def test_OQC_state_preparation_builder(): - kernel, state = cudaq.make_kernel(list[complex]) + kernel, state = cudaq.make_kernel(List[complex]) qubits = kernel.qalloc(state) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] diff --git a/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py b/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py index 70a1d29aa9..58176b4e32 100644 --- a/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py +++ b/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py @@ -9,6 +9,7 @@ import cudaq, pytest, os, time from cudaq import spin import numpy as np +from typing import List from multiprocessing import Process @@ -113,7 +114,7 @@ def test_quantinuum_exp_pauli(): def test_quantinuum_state_preparation(): - kernel, state = cudaq.make_kernel(list[complex]) + kernel, state = cudaq.make_kernel(List[complex]) qubits = kernel.qalloc(state) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] diff --git a/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py b/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py index 5576f46597..8471c10286 100644 --- a/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py +++ b/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py @@ -9,6 +9,7 @@ import cudaq, pytest, os, time from cudaq import spin import numpy as np +from typing import List from multiprocessing import Process @@ -142,7 +143,7 @@ def kernel(): def test_quantinuum_state_preparation(): @cudaq.kernel - def kernel(vec: list[complex]): + def kernel(vec: List[complex]): qubits = cudaq.qvector(vec) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] diff --git a/python/tests/backends/test_Quantinuum_builder.py b/python/tests/backends/test_Quantinuum_builder.py index d20cb0d499..48d50b7419 100644 --- a/python/tests/backends/test_Quantinuum_builder.py +++ b/python/tests/backends/test_Quantinuum_builder.py @@ -8,6 +8,7 @@ import cudaq, pytest, os, time import numpy as np +from typing import List from cudaq import spin from multiprocessing import Process try: @@ -147,7 +148,7 @@ def test_quantinuum_observe(): def test_quantinuum_state_preparation(): - kernel, state = cudaq.make_kernel(list[complex]) + kernel, state = cudaq.make_kernel(List[complex]) qubits = kernel.qalloc(state) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] diff --git a/python/tests/backends/test_Quantinuum_kernel.py b/python/tests/backends/test_Quantinuum_kernel.py index fc11224f5e..646f9cc787 100644 --- a/python/tests/backends/test_Quantinuum_kernel.py +++ b/python/tests/backends/test_Quantinuum_kernel.py @@ -10,6 +10,7 @@ import numpy as np from cudaq import spin from multiprocessing import Process +from typing import List try: from utils.mock_qpu.quantinuum import startServer except: @@ -173,7 +174,7 @@ def kernel(): def test_quantinuum_state_preparation(): @cudaq.kernel - def kernel(vec: list[complex]): + def kernel(vec: List[complex]): qubits = cudaq.qvector(vec) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] From 77dbe449b2dfeb67aa0765b81b92be3d29d1fc7d Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 11 Jul 2024 13:43:34 -0700 Subject: [PATCH 18/50] Fix failing tests --- dictionary.dic | Bin 0 -> 9936 bytes lib/Optimizer/Builder/Intrinsics.cpp | 30 +++++++++--------- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 4 +-- python/tests/backends/test_IQM.py | 20 +++++++----- runtime/common/BaseRestRemoteClient.h | 4 +++ .../execution/state_preparation_vector.cpp | 3 +- 6 files changed, 34 insertions(+), 27 deletions(-) create mode 100644 dictionary.dic diff --git a/dictionary.dic b/dictionary.dic new file mode 100644 index 0000000000000000000000000000000000000000..d5b1930b6ba86d2e1409b65c5d6b65c39e30fa80 GIT binary patch literal 9936 zcmai)4UlA4Rmbm6_v^2j>G|yW+S$!wAc4h@K={x|h0Mp!&ZIx4x8K{{*;K^zbidi{ zOi%Z0zwXUWL?B=hh)N_tETw{=f&no^1PZ926kjT)FhLY4wTz*PhzdzzSph8zfB*Yl z?@mz5uA2S7cka38oO|xM=bn4tR)bx?**sGBx2nBn_lSM-J4ZV0JDNvc`jVGkXa1jG zsrtzO;CL9yk*h-T=LU?qEoICThmE=VfH8l;pMHWEe@Xu0{EhI}{~hsLX7^mX<#!Dx z^r;~pjb4wggffoijX{MuS_owwEgF;7d&!uLux!k@uwu-#&^2a8=ovEyE+Plh%p!8G z5F+P@5dJHL@IPM&{|mqsQW( zmkVilr4YS02kOzHq~sEwF^VvqJhcg!H>zh<)2a_;!ise^rRy zcL=fbogiTD7b54^gvj}IA##30h@1z6$a$X-dw)xa9ltF^zz2oc^B{Pd{vQ(3|HDG~ zKO%(xqeA!}1~=gQ1F-))%0q@f#cxhX|L0lEo0nKDnxC;)GOHF{^TGr0Vc($vVbQ!$ zSS*>>2$>+)V!=FXv1ks*L@%44v{*66EV{LY_Vc$;3@&%vsf^{B&6SG zg-q~QESAmpLELXfQ{YXD=1L(6;ChRmSrWofvsg6eEtbq(7F~0{ka_ul#YOWeA$p&% zST;`!OC__O7P{sSEqdlLA%1@~fskjR_!cYXZ5CbgHH)5k#^M=s)gXMt*9{h3b6iNA zowArWn-)tZ5HcTkSzI*lwwN~$S}d7AvRF2cTXfA67CrMliyJ08M7|*L{e*C(U|wah zXpRdp_@u>(xkE_Yy-SE4pR~AWz6dJMJZ^Et{JpSX%=ayN=0`%tJvS_L%~cjR%=JPR z_@YJ6tP7d{pA~MD%nn#0j_$GOnFobT%%_BfiusZdMSpG4HO~lH;PYAX&0@)1D zBqYI}5@Of3Mc3>JvFmNZ%A&d7V%|Jtv0(noV$tN;q{@}Dxm`#Cy5FK_J_uIu*C&L; z`Im&qe^N-Id|HT~a-%}eJWog-Sq5D;pt8lh@huk3=$PoPd4WaG+#_Uz`KFM7`?f{b zjF0nUpGDP+7oA#w2*ixu-ui>~>Ckd5eD7G1M3srLzY^TQpKIX%^~)SMYMi0ig5NQN10?J0P^_`FWrhYw*vV{sy|T1mhGIvF!sKvl z{lt=eDVpSHZ0$tR%p{KHiCE8MX9xYpS-*9`uXWoUGnWkMuF1{}?Q00kl%tk{$xdfN zdbQc}BRQ|7*AOR0MtPLA!Z@fmrR( z5jPuZ4;unAEqgWP*_lv%(5-e+FLNPZ)^F*7nM1#u_ZDT@^q_yyul2gs%_f%R7>CQ6 z%+98(y^D=zv)b7+htd$srP78p92)j7?o!;fx(#&H&0*VEaTg0}%+lELgP^h5Xg0cg z=Abl{D+>SY^iaR?f|@4cG|7NewN|5x{3~wP+aTFyr?9KmU>uEhD=?#C_DnM8oLpLR zmqU}hQmFMHCUyPRC8asW?RphbN_H$oQ^((_Hce)LO*lI`z~fYseV7#J8fPlK(A=pO z8m-<%Gm&6hmYVEXvbxi5oejEXHl{5)JDBua=X*YL>WszL+N~32DnUb$?KL}-?zL*| zo!uTTpHGEU(q49ExKFo(Fq3iGpiiemS&i0NGo6H1v4Jp?ZnSEcst~|!AzAE0m+0y- zE-CnDG8I`Am`UeoslY}BtWT9(by>`*L?nHpv&MUx$m`>o!Nf}>RSiW+Thu-9tu zb{jj5J7LShMgWXDy_P0l`p8n*sbsZPQ$W};x?X8RV>~q6Xc1I47?>zU0}-b-Oq|O; zr!g?&809J)Clb|GQ0?AnCh;N^&37s!5rp%Y=B-qyX|o3r(e*o9ey6?T^I+ZXmB^yj zjc7jGgsQ=aikw^q2Zm%6VmzfF#`jvp62qCrvtFg-%7t?ogm`bOaJF&p?AqOIR zg_B8$rl$$pqlnN6H8O`0$>lL55&z>>hI`UIrubl2cShqFVl#ol1K*|lv0=S0BGk#u zC2F9MkrO5yyn+K}la!eoIGN=&v=HpIYUeubR=XD{%W4&qm;n|fow5U*6A272uWUG; ztk!RD<9rR=)9RXlQr%Vd#bzkdor*|y0=x<}UpAL&5FScm%7)B|lJV4_C3YJZ{N|p? z#;ImqlN?YOR7zGO*vOqd*iCjHev};SDWNcq&VTR7}fdE|CLLD0Ma2X)?_M*Zy~`rp6=7?oXRPMH7(_n#zAAR zyr@k~jBu_w4S7UXME^Czpvx zh3Y1W4=VPLA_bg)>5SYxhQYggHVTFYIH6J)A75Q!yNu=8=*S@XPPM1~Nm(*tZPBemT7RYccPew})$8UDmOJ-t#M$OmBWgFZ#uX%Eh$7zV9stmVT4SbV}F&!e0pmaznb?Q-)PWhwKJl#OruT{0VQOrn-*JzT1`)E#4~ z@~-Tdr7{C?>jz3dlMW|j4Q?V`Xl!=EU3?^_<8pR1(Y#>G_F>*R3uNPk!YPh( zF&&=Sk(=%Ic5j!Y%{x})k|9C$N+Dmlsi>l4cBI)pt6lKijus2M4&9YP>E38<6D@SN z_1kDNBq*gQdPbaPI|%kjg9XmgY>|S&0oHqRBv$0OGU*iaE8-ka@38u}8miZa2B>nA zKP@xDUpRj$f^_3J_!iSmnS_Os99ES&-%54OBR^EUl_d+Gzvy zFLON-FYbW=dm0JJ*`dXc3?>t^G;D!Ipb zUt;D;6gec_(y6uOHJ!wCAAr`<@g*FlO9A%8Rxdn$%qF-UaFsu1hgP!4g%Qsup~D$q4)Bmn}m%)NVq0VxAGBMJMeX(3;5QYi1?@96}$ z64p;MgRJ?@zK=vF_o8kl!=b5vgqTf+WaxA7_vxIpQZ!H%E6(Oek#x3G-PvI;wW17xw*KyWegb7> zd1^9=iBz0*au_6=^rnYIyOi}t;@q~V?sCfO?i%-0u`=PLS9guwYb2)2muxQ430L2I zic6T4yshe{vl@SEt8um$)^TvGk z3U=?xWrDpOwtp&>8as_!B*>_pgC29%(YhOrb62OFkys*Oth-_=%Rq*@%iV~!YeoWz zj??KrEil(4-Ex_A=bGGj*6opqD9Jc3>KY_F!xTSQ_I$~yq_cu^yp;R$6p6qf83^=e(LXn7ES`@avh7Llase9%UF`zW&&Xh|9 zx*yEeNm5)lv|Gd>2jyJCBc7~<>8?*W*&8)_RQj1WOBt(B>F3?KBsZ-qHaE@=)f+*r z&COu60%)no2{RcrbbMRu;%Q4p{oLSXy0S{xpi#_WSkxoT{!@Hlc?gyw3>wYFP z3a}jR595g{Z9Gmqv5+QG^=h}e)okBkNjfHurclZ5ou=$j#NE;EY+K=DFDu#a>Qwd42Itp?NcIM5T=H1XB>!LnC;M1-8QNKC9 zO|p{0cr%`;G8@75;Seejm?Yfp?zw_c8|Y zTtM%=eCv5N-$C^LHS~X)=RAL(f&XsCA^IqCh5E&j@km79!2Z?97qjpW_vetY-Gc4E z!}kdF$?$$B_Wd!w(EC06#`X4S>{mwk=h*#C=4Eo9Fa9o)qrT7J*9Z3b`f$|tTE_JX z?EE@5j7PGr;{D=2U3W+QZorO@!*@+&kL31$Z$kgG=>N@VT({GwfGzXT1|q+GZ=XHt z_q+Jy31mGVU+Vqc(Y)&UF?_j$%&UKF{?+ymQHhrhJ+|Pn*QQP(K-ALSj9Dn%C$$#?xWMsGc+#C7zQD}$o&8>{@1MvCuZ(z?a z%D?oz9v_w>JvT%?Jb+xu_%OcsIJs05QJ;~>-=gXFSJ?dfFn%7rhrwKA z??K{E?MGw}<9#`H9OHQ#T}Pv_y`DKs&}S`@nS!2y_XpVY;z-Uv@6%U_V&JFg^KJTm zVxNzH7}+kJk1#h!cz+6cZ>0az_*!)Ru0X!bMAW{`cwfL=J&yd3Kz~&< zZ~w%*{{P1BMRu$I8yUxg8XNN8h_4&8eT=`#KA*jcw$I`BwJ2s@g3X`BU*%|Qe}gU8 zB4-WSSC|(clNBC2WcZfcE?l2$sMdNp{?Yr3a0Qzp`FOCfHEHX!5jd=8~-$(Jo zlg!2Yu;quydI#^XkLL3IJg;KA_})Ojry}1x#kk%H{m-H~d^)mUeAh-iS4TF!2cBdk z?=f`zAd)9u{r-`*GuZcO! zCjL>|&5=!d{?$ru0YkWq@{&cGwudHB8z?Uu;*qTP;- VAByJwAU5kik$nz7`}je>{|1;FG}! -static std::vector asI32(const std::vector &v) { - std::vector result(v.size()); - for (auto iter : llvm::enumerate(v)) - result[iter.index()] = static_cast(iter.value()); - return result; -} - template DenseElementsAttr createArrayAttr(const std::vector &values, Type eleTy) { auto newValues = ArrayRef(values.data(), values.size()); @@ -402,6 +394,17 @@ DenseElementsAttr createArrayAttr(const std::vector &values, Type eleTy) { return DenseElementsAttr::get(tensorTy, newValues); } +DenseElementsAttr createArrayAttr(const std::vector &values, Type eleTy) { + std::vector converted; + for (auto b : values) { + converted.push_back(std::byte(b)); + } + auto newValues = ArrayRef(reinterpret_cast(converted.data()), + converted.size()); + auto tensorTy = RankedTensorType::get(converted.size(), eleTy); + return DenseElementsAttr::get(tensorTy, newValues); +} + template cc::GlobalOp buildVectorOfConstantElements(Location loc, ModuleOp module, StringRef name, @@ -465,25 +468,22 @@ IRBuilder::genVectorOfConstants(Location loc, ModuleOp module, StringRef name, cc::GlobalOp IRBuilder::genVectorOfConstants(Location loc, ModuleOp module, StringRef name, const std::vector &values) { - auto converted = asI32(values); return buildVectorOfConstantElements(loc, module, name, values, *this, - getI32Type()); + getI16Type()); } cc::GlobalOp IRBuilder::genVectorOfConstants(Location loc, ModuleOp module, StringRef name, const std::vector &values) { - auto converted = asI32(values); return buildVectorOfConstantElements(loc, module, name, values, *this, - getI32Type()); + getI8Type()); } cc::GlobalOp IRBuilder::genVectorOfConstants(Location loc, ModuleOp module, StringRef name, const std::vector &values) { - auto converted = asI32(values); - return buildVectorOfConstantElements(loc, module, name, converted, *this, - getI32Type()); + return buildVectorOfConstantElements(loc, module, name, values, *this, + getI1Type()); } Value IRBuilder::getByteSizeOfType(Location loc, Type ty) { diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 58a5f4a3f9..28622fa598 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -251,9 +251,9 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, generateNewValue = true; } if (generateNewValue) { - auto [memArr, data] = getArrayInMemory(); + auto [memArr, _] = getArrayInMemory(); OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointAfter(data.getDefiningOp()); + builder.setInsertionPointAfter(memArr.getDefiningOp()); Value size = builder.create(argLoc, vec.size(), 64); Value newVec = builder.create(argLoc, strTy, memArr, size); diff --git a/python/tests/backends/test_IQM.py b/python/tests/backends/test_IQM.py index bf3746bce4..38e2b55363 100644 --- a/python/tests/backends/test_IQM.py +++ b/python/tests/backends/test_IQM.py @@ -161,27 +161,31 @@ def kernel(): def test_IQM_state_preparation(): + shots = 10000 @cudaq.kernel def kernel(vec: List[complex]): qubits = cudaq.qvector(vec) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] - counts = cudaq.sample(kernel, state) - counts.dump() - assert '00' in counts - assert '10' in counts + counts = cudaq.sample(kernel, state, shots_count=shots) + assert assert_close(counts["00"], shots / 2, 2) + assert assert_close(counts["10"], shots / 2, 2) + assert assert_close(counts["01"], 0., 2) + assert assert_close(counts["11"], 0., 2) def test_IQM_state_preparation_builder(): + shots = 10000 kernel, state = cudaq.make_kernel(List[complex]) qubits = kernel.qalloc(state) state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] - counts = cudaq.sample(kernel, state) - counts.dump() - assert '00' in counts - assert '10' in counts + counts = cudaq.sample(kernel, state, shots_count=shots) + assert assert_close(counts["00"], shots / 2, 2) + assert assert_close(counts["10"], shots / 2, 2) + assert assert_close(counts["01"], 0., 2) + assert assert_close(counts["11"], 0., 2) # leave for gdb debugging diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index f9b7d0d736..9b2f51ffdb 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -21,6 +21,7 @@ #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/CodeGen/Pipelines.h" #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -159,6 +160,9 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { if (funcOp && (funcOp->hasAttr(cudaq::kernelAttrName) || funcOp.getName().startswith("__nvqpp__mlirgen__"))) moduleOp.push_back(funcOp.clone()); + // Add globals defined in the module. + if (auto globalOp = dyn_cast(op)) + moduleOp.push_back(globalOp.clone()); } if (args) { diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index 994390cde3..35d2b68619 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -55,7 +55,6 @@ void printCounts(cudaq::sample_result& result) { } int main() { - { auto counts = cudaq::sample(test_complex_constant_array); printCounts(counts); @@ -157,4 +156,4 @@ int main() { // CHECK: 10 // CHECK: 01 -// CHECK: 11 \ No newline at end of file +// CHECK: 11 From ebaf6c31d7adf7b4f2d5b407f069abd4b34285de Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 11 Jul 2024 16:30:38 -0700 Subject: [PATCH 19/50] Add remote sim tests --- lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp | 6 +- runtime/common/BaseRestRemoteClient.h | 21 +- targettests/Remote-Sim/state_init.cpp | 95 +++++++++ targettests/Remote-Sim/state_init_vector.cpp | 201 +++++++++++++++++++ 4 files changed, 314 insertions(+), 9 deletions(-) create mode 100644 targettests/Remote-Sim/state_init.cpp create mode 100644 targettests/Remote-Sim/state_init_vector.cpp diff --git a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp index 8d4be784db..153f8d5b56 100644 --- a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp +++ b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp @@ -43,7 +43,9 @@ struct VerifyNVQIRCallOpsPass cudaq::opt::NVQIRInvokeRotationWithControlBits, cudaq::opt::NVQIRInvokeWithControlRegisterOrBits, cudaq::opt::NVQIRPackSingleQubitInArray, - cudaq::opt::NVQIRReleasePackedQubitArray}; + cudaq::opt::NVQIRReleasePackedQubitArray, + cudaq::getNumQubitsFromCudaqState, + }; // It must be either NVQIR extension functions or in the allowed list. return std::find(NVQIR_FUNCS.begin(), NVQIR_FUNCS.end(), functionName) != NVQIR_FUNCS.end() || @@ -71,7 +73,7 @@ struct VerifyNVQIRCallOpsPass passFailed = true; return WalkResult::interrupt(); } else if (!isa(op)) { // No pointers allowed except for the above operations. for (auto oper : op->getOperands()) { diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index 9b2f51ffdb..bde3a73011 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -155,19 +155,24 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { auto moduleOp = builder.create(); moduleOp->setAttrs((*module)->getAttrDictionary()); for (auto &op : *module) { - auto funcOp = dyn_cast(op); - // Add quantum kernels defined in the module. - if (funcOp && (funcOp->hasAttr(cudaq::kernelAttrName) || - funcOp.getName().startswith("__nvqpp__mlirgen__"))) - moduleOp.push_back(funcOp.clone()); - // Add globals defined in the module. - if (auto globalOp = dyn_cast(op)) + if (auto funcOp = dyn_cast(op)) { + // Add quantum kernels defined in the module. + if (funcOp->hasAttr(cudaq::kernelAttrName) || + funcOp.getName().startswith("__nvqpp__mlirgen__") || + funcOp.getBody().empty()) + moduleOp.push_back(funcOp.clone()); + } + if (auto globalOp = dyn_cast(op)) { + // Add globals defined in the module. moduleOp.push_back(globalOp.clone()); + } } if (args) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&mlirContext); + moduleOp.getContext()->disableMultithreading(); + pm.enableIRPrinting(); pm.addPass(cudaq::opt::createQuakeSynthesizer(name, args)); pm.addPass(mlir::createCanonicalizerPass()); if (failed(pm.run(moduleOp))) @@ -180,6 +185,8 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { // Run client-side passes. `clientPasses` is empty right now, but the code // below accommodates putting passes into it. mlir::PassManager pm(&mlirContext); + moduleOp.getContext()->disableMultithreading(); + pm.enableIRPrinting(); std::string errMsg; llvm::raw_string_ostream os(errMsg); const std::string pipeline = diff --git a/targettests/Remote-Sim/state_init.cpp b/targettests/Remote-Sim/state_init.cpp new file mode 100644 index 0000000000..6677b4746c --- /dev/null +++ b/targettests/Remote-Sim/state_init.cpp @@ -0,0 +1,95 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: remote-sim + +// clang-format off +// RUN: nvq++ %cpp_std --enable-mlir --target remote-mqpu %s -o %t && %t +// RUN: nvq++ %cpp_std --target remote-mqpu %s -o %t && %t // TODO: this fails to compile, do we need it? +// clang-format on + +#include +#include + + +__qpu__ void test_complex_array_param(cudaq::state* inState) { + cudaq::qvector q1(inState); +} + +void printCounts(cudaq::sample_result& result) { + std::vector values{}; + for (auto &&[bits, counts] : result) { + values.push_back(bits); + } + + std::sort(values.begin(), values.end()); + for (auto &&bits : values) { + std::cout << bits << '\n'; + } +} + +int main() { + { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + auto state = cudaq::state::from_data(vec); + auto state1 = cudaq::state::from_data(vec1); + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_complex_array_param, &state); + printCounts(counts); + + counts = cudaq::sample(test_complex_array_param, &state1); + printCounts(counts); + } + + // { + // // Passing state data as argument (builder mode) + // auto [kernel, state] = cudaq::make_kernel(); + // auto qubits = kernel.qalloc(state); + + // auto counts = cudaq::sample(kernel, &state); + // printCounts(counts); + + // counts = cudaq::sample(kernel, &state1); + // printCounts(counts); + // } + } +} + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 0001 +// CHECK: 0011 +// CHECK: 1001 +// CHECK: 1011 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 01 +// CHECK: 11 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 01 +// CHECK: 11 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 01 +// CHECK: 11 diff --git a/targettests/Remote-Sim/state_init_vector.cpp b/targettests/Remote-Sim/state_init_vector.cpp new file mode 100644 index 0000000000..7e93b63dae --- /dev/null +++ b/targettests/Remote-Sim/state_init_vector.cpp @@ -0,0 +1,201 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: remote-sim + +// clang-format off +// RUN: nvq++ %cpp_std --enable-mlir --target remote-mqpu %s -o %t && %t +// RUN: nvq++ %cpp_std --target remote-mqpu %s -o %t && %t // TODO: this fails to compile, do we need it? +// clang-format on + +#include +#include + + + +__qpu__ void test_complex_constant_array() { + cudaq::qvector v(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); +} + +__qpu__ void test_complex_constant_array2() { + cudaq::qvector v1(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); + cudaq::qvector v2(std::vector({ 0., 0., M_SQRT1_2, M_SQRT1_2})); +} + +__qpu__ void test_complex_constant_array3() { + cudaq::qvector v({ + cudaq::complex(M_SQRT1_2), + cudaq::complex(M_SQRT1_2), + cudaq::complex(0.0), + cudaq::complex(0.0) + }); +} + +__qpu__ void test_complex_array_param(std::vector inState) { + cudaq::qvector q1 = inState; +} + +__qpu__ void test_real_constant_array() { + cudaq::qvector v({ M_SQRT1_2, M_SQRT1_2, 0., 0.}); +} + +__qpu__ void test_real_array_param(std::vector inState) { + cudaq::qvector q1 = inState; +} + +__qpu__ void test_double_array_param(std::vector inState) { + cudaq::qvector q = inState; +} + +__qpu__ void test_float_array_param(std::vector inState) { + cudaq::qvector q = inState; +} + +void printCounts(cudaq::sample_result& result) { + std::vector values{}; + for (auto &&[bits, counts] : result) { + values.push_back(bits); + } + + std::sort(values.begin(), values.end()); + for (auto &&bits : values) { + std::cout << bits << '\n'; + } +} + +int main() { + // { + // auto counts = cudaq::sample(test_complex_constant_array); + // printCounts(counts); + // } + + // { + // auto counts = cudaq::sample(test_complex_constant_array2); + // printCounts(counts); + // } + + // { + // auto counts = cudaq::sample(test_complex_constant_array3); + // printCounts(counts); + // } + + // { + // auto counts = cudaq::sample(test_real_constant_array); + // printCounts(counts); + // } + + // { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_complex_array_param, vec); + printCounts(counts); + + counts = cudaq::sample(test_complex_array_param, vec1); + printCounts(counts); + } + + // { + // // Passing state data as argument (builder mode) + // auto [kernel, v] = cudaq::make_kernel>(); + // auto qubits = kernel.qalloc(v); + + // auto counts = cudaq::sample(kernel, vec); + // printCounts(counts); + + // counts = cudaq::sample(kernel, vec1); + // printCounts(counts); + // } + // } + + // { + // std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + // std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + // { + // // Passing state data as argument (kernel mode) + // auto counts = cudaq::sample(test_real_array_param, vec); + // printCounts(counts); + + // counts = cudaq::sample(test_real_array_param, vec1); + // printCounts(counts); + // } + + // { + // // Passing state data as argument (builder mode) + // auto [kernel, v] = cudaq::make_kernel>(); + // auto qubits = kernel.qalloc(v); + + // auto counts = cudaq::sample(kernel, vec); + // printCounts(counts); + + // counts = cudaq::sample(kernel, vec1); + // printCounts(counts); + // } + // } + + // Error message: "Invalid user-provided state data. Simulator is FP64 but state data is FP32." + // { + // std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + // std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + // { + // // Passing state data as argument (kernel mode) + // auto counts = cudaq::sample(test_double_array_param, vec); + // printCounts(counts); + + // counts = cudaq::sample(test_double_array_param, vec1); + // printCounts(counts); + // } + // } + + // UCX ERROR Failed to allocate memory pool (name=mm_recv_desc) chunk: Out of memory + // { + // std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + // std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + // { + // // Passing state data as argument (kernel mode) + // auto counts = cudaq::sample(test_float_array_param, vec); + // printCounts(counts); + + // counts = cudaq::sample(test_float_array_param, vec1); + // printCounts(counts); + // } + // } +} + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 0001 +// CHECK: 0011 +// CHECK: 1001 +// CHECK: 1011 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 01 +// CHECK: 11 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 01 +// CHECK: 11 + +// CHECK: 00 +// CHECK: 10 + +// CHECK: 01 +// CHECK: 11 From 794f564d1ab5e9c98b2728008dffc51f90a7da1f Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 11 Jul 2024 16:32:25 -0700 Subject: [PATCH 20/50] Remove files added by mistake --- dictionary.dic | Bin 9936 -> 0 bytes program.py | 24 ------------------------ 2 files changed, 24 deletions(-) delete mode 100644 dictionary.dic delete mode 100644 program.py diff --git a/dictionary.dic b/dictionary.dic deleted file mode 100644 index d5b1930b6ba86d2e1409b65c5d6b65c39e30fa80..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9936 zcmai)4UlA4Rmbm6_v^2j>G|yW+S$!wAc4h@K={x|h0Mp!&ZIx4x8K{{*;K^zbidi{ zOi%Z0zwXUWL?B=hh)N_tETw{=f&no^1PZ926kjT)FhLY4wTz*PhzdzzSph8zfB*Yl z?@mz5uA2S7cka38oO|xM=bn4tR)bx?**sGBx2nBn_lSM-J4ZV0JDNvc`jVGkXa1jG zsrtzO;CL9yk*h-T=LU?qEoICThmE=VfH8l;pMHWEe@Xu0{EhI}{~hsLX7^mX<#!Dx z^r;~pjb4wggffoijX{MuS_owwEgF;7d&!uLux!k@uwu-#&^2a8=ovEyE+Plh%p!8G z5F+P@5dJHL@IPM&{|mqsQW( zmkVilr4YS02kOzHq~sEwF^VvqJhcg!H>zh<)2a_;!ise^rRy zcL=fbogiTD7b54^gvj}IA##30h@1z6$a$X-dw)xa9ltF^zz2oc^B{Pd{vQ(3|HDG~ zKO%(xqeA!}1~=gQ1F-))%0q@f#cxhX|L0lEo0nKDnxC;)GOHF{^TGr0Vc($vVbQ!$ zSS*>>2$>+)V!=FXv1ks*L@%44v{*66EV{LY_Vc$;3@&%vsf^{B&6SG zg-q~QESAmpLELXfQ{YXD=1L(6;ChRmSrWofvsg6eEtbq(7F~0{ka_ul#YOWeA$p&% zST;`!OC__O7P{sSEqdlLA%1@~fskjR_!cYXZ5CbgHH)5k#^M=s)gXMt*9{h3b6iNA zowArWn-)tZ5HcTkSzI*lwwN~$S}d7AvRF2cTXfA67CrMliyJ08M7|*L{e*C(U|wah zXpRdp_@u>(xkE_Yy-SE4pR~AWz6dJMJZ^Et{JpSX%=ayN=0`%tJvS_L%~cjR%=JPR z_@YJ6tP7d{pA~MD%nn#0j_$GOnFobT%%_BfiusZdMSpG4HO~lH;PYAX&0@)1D zBqYI}5@Of3Mc3>JvFmNZ%A&d7V%|Jtv0(noV$tN;q{@}Dxm`#Cy5FK_J_uIu*C&L; z`Im&qe^N-Id|HT~a-%}eJWog-Sq5D;pt8lh@huk3=$PoPd4WaG+#_Uz`KFM7`?f{b zjF0nUpGDP+7oA#w2*ixu-ui>~>Ckd5eD7G1M3srLzY^TQpKIX%^~)SMYMi0ig5NQN10?J0P^_`FWrhYw*vV{sy|T1mhGIvF!sKvl z{lt=eDVpSHZ0$tR%p{KHiCE8MX9xYpS-*9`uXWoUGnWkMuF1{}?Q00kl%tk{$xdfN zdbQc}BRQ|7*AOR0MtPLA!Z@fmrR( z5jPuZ4;unAEqgWP*_lv%(5-e+FLNPZ)^F*7nM1#u_ZDT@^q_yyul2gs%_f%R7>CQ6 z%+98(y^D=zv)b7+htd$srP78p92)j7?o!;fx(#&H&0*VEaTg0}%+lELgP^h5Xg0cg z=Abl{D+>SY^iaR?f|@4cG|7NewN|5x{3~wP+aTFyr?9KmU>uEhD=?#C_DnM8oLpLR zmqU}hQmFMHCUyPRC8asW?RphbN_H$oQ^((_Hce)LO*lI`z~fYseV7#J8fPlK(A=pO z8m-<%Gm&6hmYVEXvbxi5oejEXHl{5)JDBua=X*YL>WszL+N~32DnUb$?KL}-?zL*| zo!uTTpHGEU(q49ExKFo(Fq3iGpiiemS&i0NGo6H1v4Jp?ZnSEcst~|!AzAE0m+0y- zE-CnDG8I`Am`UeoslY}BtWT9(by>`*L?nHpv&MUx$m`>o!Nf}>RSiW+Thu-9tu zb{jj5J7LShMgWXDy_P0l`p8n*sbsZPQ$W};x?X8RV>~q6Xc1I47?>zU0}-b-Oq|O; zr!g?&809J)Clb|GQ0?AnCh;N^&37s!5rp%Y=B-qyX|o3r(e*o9ey6?T^I+ZXmB^yj zjc7jGgsQ=aikw^q2Zm%6VmzfF#`jvp62qCrvtFg-%7t?ogm`bOaJF&p?AqOIR zg_B8$rl$$pqlnN6H8O`0$>lL55&z>>hI`UIrubl2cShqFVl#ol1K*|lv0=S0BGk#u zC2F9MkrO5yyn+K}la!eoIGN=&v=HpIYUeubR=XD{%W4&qm;n|fow5U*6A272uWUG; ztk!RD<9rR=)9RXlQr%Vd#bzkdor*|y0=x<}UpAL&5FScm%7)B|lJV4_C3YJZ{N|p? z#;ImqlN?YOR7zGO*vOqd*iCjHev};SDWNcq&VTR7}fdE|CLLD0Ma2X)?_M*Zy~`rp6=7?oXRPMH7(_n#zAAR zyr@k~jBu_w4S7UXME^Czpvx zh3Y1W4=VPLA_bg)>5SYxhQYggHVTFYIH6J)A75Q!yNu=8=*S@XPPM1~Nm(*tZPBemT7RYccPew})$8UDmOJ-t#M$OmBWgFZ#uX%Eh$7zV9stmVT4SbV}F&!e0pmaznb?Q-)PWhwKJl#OruT{0VQOrn-*JzT1`)E#4~ z@~-Tdr7{C?>jz3dlMW|j4Q?V`Xl!=EU3?^_<8pR1(Y#>G_F>*R3uNPk!YPh( zF&&=Sk(=%Ic5j!Y%{x})k|9C$N+Dmlsi>l4cBI)pt6lKijus2M4&9YP>E38<6D@SN z_1kDNBq*gQdPbaPI|%kjg9XmgY>|S&0oHqRBv$0OGU*iaE8-ka@38u}8miZa2B>nA zKP@xDUpRj$f^_3J_!iSmnS_Os99ES&-%54OBR^EUl_d+Gzvy zFLON-FYbW=dm0JJ*`dXc3?>t^G;D!Ipb zUt;D;6gec_(y6uOHJ!wCAAr`<@g*FlO9A%8Rxdn$%qF-UaFsu1hgP!4g%Qsup~D$q4)Bmn}m%)NVq0VxAGBMJMeX(3;5QYi1?@96}$ z64p;MgRJ?@zK=vF_o8kl!=b5vgqTf+WaxA7_vxIpQZ!H%E6(Oek#x3G-PvI;wW17xw*KyWegb7> zd1^9=iBz0*au_6=^rnYIyOi}t;@q~V?sCfO?i%-0u`=PLS9guwYb2)2muxQ430L2I zic6T4yshe{vl@SEt8um$)^TvGk z3U=?xWrDpOwtp&>8as_!B*>_pgC29%(YhOrb62OFkys*Oth-_=%Rq*@%iV~!YeoWz zj??KrEil(4-Ex_A=bGGj*6opqD9Jc3>KY_F!xTSQ_I$~yq_cu^yp;R$6p6qf83^=e(LXn7ES`@avh7Llase9%UF`zW&&Xh|9 zx*yEeNm5)lv|Gd>2jyJCBc7~<>8?*W*&8)_RQj1WOBt(B>F3?KBsZ-qHaE@=)f+*r z&COu60%)no2{RcrbbMRu;%Q4p{oLSXy0S{xpi#_WSkxoT{!@Hlc?gyw3>wYFP z3a}jR595g{Z9Gmqv5+QG^=h}e)okBkNjfHurclZ5ou=$j#NE;EY+K=DFDu#a>Qwd42Itp?NcIM5T=H1XB>!LnC;M1-8QNKC9 zO|p{0cr%`;G8@75;Seejm?Yfp?zw_c8|Y zTtM%=eCv5N-$C^LHS~X)=RAL(f&XsCA^IqCh5E&j@km79!2Z?97qjpW_vetY-Gc4E z!}kdF$?$$B_Wd!w(EC06#`X4S>{mwk=h*#C=4Eo9Fa9o)qrT7J*9Z3b`f$|tTE_JX z?EE@5j7PGr;{D=2U3W+QZorO@!*@+&kL31$Z$kgG=>N@VT({GwfGzXT1|q+GZ=XHt z_q+Jy31mGVU+Vqc(Y)&UF?_j$%&UKF{?+ymQHhrhJ+|Pn*QQP(K-ALSj9Dn%C$$#?xWMsGc+#C7zQD}$o&8>{@1MvCuZ(z?a z%D?oz9v_w>JvT%?Jb+xu_%OcsIJs05QJ;~>-=gXFSJ?dfFn%7rhrwKA z??K{E?MGw}<9#`H9OHQ#T}Pv_y`DKs&}S`@nS!2y_XpVY;z-Uv@6%U_V&JFg^KJTm zVxNzH7}+kJk1#h!cz+6cZ>0az_*!)Ru0X!bMAW{`cwfL=J&yd3Kz~&< zZ~w%*{{P1BMRu$I8yUxg8XNN8h_4&8eT=`#KA*jcw$I`BwJ2s@g3X`BU*%|Qe}gU8 zB4-WSSC|(clNBC2WcZfcE?l2$sMdNp{?Yr3a0Qzp`FOCfHEHX!5jd=8~-$(Jo zlg!2Yu;quydI#^XkLL3IJg;KA_})Ojry}1x#kk%H{m-H~d^)mUeAh-iS4TF!2cBdk z?=f`zAd)9u{r-`*GuZcO! zCjL>|&5=!d{?$ru0YkWq@{&cGwudHB8z?Uu;*qTP;- VAByJwAU5kik$nz7`}je>{|1;FG}! Date: Fri, 12 Jul 2024 09:19:57 -0700 Subject: [PATCH 21/50] Update lib/Optimizer/Transforms/LiftArrayAlloc.cpp Co-authored-by: Ben Howe <141149032+bmhowe23@users.noreply.github.com> --- lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index d541edcacb..53150aadcb 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -96,9 +96,8 @@ class AllocaPattern : public OpRewritePattern { PatternRewriter &rewriter) const override { SmallVector stores; bool toGlobal = false; - if (!isGoodCandidate(alloc, stores, dom, toGlobal)) { + if (!isGoodCandidate(alloc, stores, dom, toGlobal)) return failure(); - } LLVM_DEBUG(llvm::dbgs() << "Candidate was found\n"); auto arrTy = cast(alloc.getType().getElementType()); From bdf119e95da6f06f65913645f54aec937bb2b1b0 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 12 Jul 2024 09:22:12 -0700 Subject: [PATCH 22/50] Update runtime/common/BaseRemoteRESTQPU.h Co-authored-by: Ben Howe <141149032+bmhowe23@users.noreply.github.com> --- runtime/common/BaseRemoteRESTQPU.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index c6c88c4d0c..381c94312e 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -382,11 +382,9 @@ class BaseRemoteRESTQPU : public cudaq::QPU { moduleOp.push_back(func.clone()); moduleOp->setAttrs(m_module->getAttrDictionary()); - for (auto &op : m_module.getOps()) { - if (auto globalOp = dyn_cast(op)) { + for (auto &op : m_module.getOps()) + if (auto globalOp = dyn_cast(op)) moduleOp.push_back(globalOp.clone()); - } - } // Lambda to apply a specific pipeline to the given ModuleOp auto runPassPipeline = [&](const std::string &pipeline, From 2c88a78544316e9a60a3c4f31155ea9a0d977476 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 12 Jul 2024 09:36:52 -0700 Subject: [PATCH 23/50] Update lib/Optimizer/Transforms/LiftArrayAlloc.cpp Co-authored-by: Ben Howe <141149032+bmhowe23@users.noreply.github.com> --- lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index 53150aadcb..20d9ae04dc 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -165,9 +165,8 @@ class AllocaPattern : public OpRewritePattern { ArrayRef{offset}); continue; } - if (isa(useuser)) { + if (isa(useuser)) toErase.push_back(useuser); - } isLive = true; } if (!isLive) From 2d9e957540aecf58e4552e62ccf4c78f87d49a52 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 12 Jul 2024 16:03:42 -0700 Subject: [PATCH 24/50] Addressed CR comments, added tests --- include/cudaq/Optimizer/Transforms/Passes.h | 2 - include/cudaq/Optimizer/Transforms/Passes.td | 12 +- lib/Optimizer/Transforms/CMakeLists.txt | 3 +- lib/Optimizer/Transforms/ConstPropComplex.cpp | 210 ++++++++++++++++++ lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 134 +---------- lib/Optimizer/Transforms/StatePreparation.cpp | 31 ++- .../cudaq/platform/py_alt_launch_kernel.cpp | 5 +- .../default/rest/helpers/ionq/ionq.config | 2 +- .../default/rest/helpers/iqm/iqm.config | 2 +- .../default/rest/helpers/oqc/oqc.config | 2 +- .../rest/helpers/quantinuum/quantinuum.config | 2 +- test/Quake/const_prop_complex.qke | 133 +++++++++++ test/Quake/lift_array.qke | 98 ++++---- 13 files changed, 433 insertions(+), 203 deletions(-) create mode 100644 lib/Optimizer/Transforms/ConstPropComplex.cpp create mode 100644 test/Quake/const_prop_complex.qke diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 57b79cdec2..996b6e56a7 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -36,12 +36,10 @@ createApplyOpSpecializationPass(bool computeActionOpt); std::unique_ptr createDelayMeasurementsPass(); std::unique_ptr createExpandMeasurementsPass(); std::unique_ptr createLambdaLiftingPass(); -std::unique_ptr createLiftArrayAllocPass(); std::unique_ptr createLowerToCFGPass(); std::unique_ptr createObserveAnsatzPass(std::vector &); std::unique_ptr createQuakeAddMetadata(); std::unique_ptr createQuakeAddDeallocs(); -std::unique_ptr createStatePreparation(); std::unique_ptr createQuakeSynthesizer(); std::unique_ptr createQuakeSynthesizer(std::string_view, void *); std::unique_ptr createRaiseToAffinePass(); diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 1a2675d482..2f6a951551 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -118,6 +118,14 @@ def CombineQuantumAllocations : let dependentDialects = ["cudaq::cc::CCDialect", "quake::QuakeDialect"]; } +def ConstPropComplex : Pass<"const-prop-complex", "mlir::ModuleOp"> { + let summary = "Create and propagate complex constants."; + let description = [{ + Rewrite the complex.CreateOp to complex.ConstantOp when possible. + Replace array pointer casts with element pointer casts. + }]; +} + def ConvertToDirectCalls : Pass<"indirect-to-direct-calls", "mlir::func::FuncOp"> { let summary = "Convert calls to direct calls to Quake routines."; @@ -532,7 +540,7 @@ def ObserveAnsatz : Pass<"observe-ansatz", "mlir::func::FuncOp"> { ]; } -def PrepareState : Pass<"state-prep", "mlir::ModuleOp"> { +def StatePreparation : Pass<"state-prep", "mlir::ModuleOp"> { let summary = "Convert state vector data into gates"; let description = [{ @@ -574,8 +582,6 @@ def PrepareState : Pass<"state-prep", "mlir::ModuleOp"> { } ``` }]; - - let constructor = "cudaq::opt::createStatePreparation()"; } def PromoteRefToVeqAlloc : Pass<"promote-qubit-allocation"> { diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index ac79e8dd03..78157ee94b 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -17,6 +17,7 @@ add_cudaq_library(OptTransforms ApplyOpSpecialization.cpp BasisConversion.cpp CombineQuantumAlloc.cpp + ConstPropComplex.cpp Decomposition.cpp DecompositionPatterns.cpp DelayMeasurements.cpp @@ -40,13 +41,13 @@ add_cudaq_library(OptTransforms MultiControlDecomposition.cpp ObserveAnsatz.cpp PruneCtrlRelations.cpp + PySynthCallableBlockArgs.cpp QuakeAddMetadata.cpp QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp StateDecomposer.cpp StatePreparation.cpp - PySynthCallableBlockArgs.cpp DEPENDS OptTransformsPassIncGen diff --git a/lib/Optimizer/Transforms/ConstPropComplex.cpp b/lib/Optimizer/Transforms/ConstPropComplex.cpp new file mode 100644 index 0000000000..aa06f044e5 --- /dev/null +++ b/lib/Optimizer/Transforms/ConstPropComplex.cpp @@ -0,0 +1,210 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Transforms/Passes.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Dominance.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" + +namespace cudaq::opt { +#define GEN_PASS_DEF_CONSTPROPCOMPLEX +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + +#define DEBUG_TYPE "const-prop-complex" + +using namespace mlir; + +namespace { + +// Replace array ptr casts that throw away the size by a cast to element +// pointer. +// +//%1 = cc.cast %0 : (!cc.ptr x 4>>) -> +//!cc.ptr x ?>> +// -> +//%1 = cc.cast %0 : (!cc.ptr x 4>>) -> +//!cc.ptr> +class CastArrayPtrPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(cudaq::cc::CastOp cast, + PatternRewriter &rewriter) const override { + + auto fromTy = cast.getOperand().getType(); + auto toTy = cast.getType(); + + if (auto ptrFromTy = dyn_cast(fromTy)) { + if (auto arrayFromTy = + dyn_cast(ptrFromTy.getElementType())) { + if (auto ptrToTy = dyn_cast(toTy)) { + if (auto arrayToTy = + dyn_cast(ptrToTy.getElementType())) { + if (arrayFromTy.getElementType() == arrayToTy.getElementType()) { + auto eleTy = arrayFromTy.getElementType(); + auto elePtrType = cudaq::cc::PointerType::get(eleTy); + rewriter.replaceOpWithNewOp(cast, elePtrType, + cast.getOperand()); + return success(); + } + } + } + } + } + return failure(); + } +}; + +// Fold complex.create ops if the arguments are constants. +class ComplexCreatePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(complex::CreateOp create, + PatternRewriter &rewriter) const override { + auto re = create.getReal(); + auto im = create.getImaginary(); + auto reCon = re.getDefiningOp(); + auto imCon = im.getDefiningOp(); + if (reCon && imCon) { + auto aa = ArrayAttr::get( + rewriter.getContext(), + ArrayRef{reCon.getValue(), imCon.getValue()}); + rewriter.replaceOpWithNewOp(create, create.getType(), + aa); + return success(); + } + return failure(); + } +}; + +// Fold arith.trunc ops if the argument is constant. +class FloatTruncatePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(arith::TruncFOp truncate, + PatternRewriter &rewriter) const override { + auto val = truncate.getOperand(); + auto valCon = val.getDefiningOp(); + if (valCon) { + auto v = valCon.value().convertToDouble(); + auto fTy = dyn_cast(truncate.getType()); + rewriter.replaceOpWithNewOp( + truncate, APFloat{static_cast(v)}, fTy); + return success(); + } + return failure(); + } +}; + +// Fold arith.ext ops if the argument is constant. +class FloatExtendPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(arith::ExtFOp extend, + PatternRewriter &rewriter) const override { + auto val = extend.getOperand(); + auto valCon = val.getDefiningOp(); + if (valCon) { + auto v = valCon.value().convertToFloat(); + auto fTy = dyn_cast(extend.getType()); + rewriter.replaceOpWithNewOp( + extend, APFloat{static_cast(v)}, fTy); + return success(); + } + return failure(); + } +}; + +// Fold complex.re ops if the argument is constant. +class ComplexRePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(complex::ReOp re, + PatternRewriter &rewriter) const override { + auto val = re.getOperand(); + auto valCon = val.getDefiningOp(); + if (valCon) { + auto attr = valCon.getValue(); + auto real = cast(attr[0]).getValue(); + auto fTy = dyn_cast(re.getType()); + auto v = fTy.isF64() ? real.convertToDouble() : real.convertToFloat(); + rewriter.replaceOpWithNewOp(re, APFloat{v}, fTy); + return success(); + } + return failure(); + } +}; + +// Fold complex.im ops if the argument is constant. +class ComplexImPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(complex::ImOp im, + PatternRewriter &rewriter) const override { + auto val = im.getOperand(); + auto valCon = val.getDefiningOp(); + if (valCon) { + auto attr = valCon.getValue(); + auto imag = cast(attr[1]).getValue(); + auto fTy = dyn_cast(im.getType()); + auto v = fTy.isF64() ? imag.convertToDouble() : imag.convertToFloat(); + rewriter.replaceOpWithNewOp(im, APFloat{v}, fTy); + return success(); + } + return failure(); + } +}; + +class ConstPropComplexPass + : public cudaq::opt::impl::ConstPropComplexBase { +public: + using ConstPropComplexBase::ConstPropComplexBase; + + void runOnOperation() override { + auto *ctx = &getContext(); + auto module = getOperation(); + for (Operation &op : *module.getBody()) { + auto func = dyn_cast(op); + if (!func) + continue; + DominanceInfo domInfo(func); + std::string funcName = func.getName().str(); + RewritePatternSet patterns(ctx); + patterns.insert(ctx); + patterns.insert(ctx); + patterns.insert(ctx); + patterns.insert(ctx); + patterns.insert(ctx); + patterns.insert(ctx); + + LLVM_DEBUG(llvm::dbgs() + << "Before lifting constant array: " << func << '\n'); + + if (failed(applyPatternsAndFoldGreedily(func.getOperation(), + std::move(patterns)))) + signalPassFailure(); + + LLVM_DEBUG(llvm::dbgs() + << "After lifting constant array: " << func << '\n'); + } + } +}; +} // namespace diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index 20d9ae04dc..1554acec06 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -100,7 +100,7 @@ class AllocaPattern : public OpRewritePattern { return failure(); LLVM_DEBUG(llvm::dbgs() << "Candidate was found\n"); - auto arrTy = cast(alloc.getType().getElementType()); + auto arrTy = cast(alloc.getElementType()); SmallVector values; // Every element of `stores` must be a cc::StoreOp with a ConstantOp as the @@ -217,8 +217,8 @@ class AllocaPattern : public OpRewritePattern { if (std::distance(alloc->getUses().begin(), alloc->getUses().end()) < size) return false; - // Keep a scoreboard for every element in the array. Every element *must* - // be stored to with a constant exactly one time. + // Keep a scoreboard for every element in the array. Every element *must* + // be stored to with a constant exactly one time. scoreboard.resize(size); for (int i = 0; i < size; i++) scoreboard[i] = nullptr; @@ -284,19 +284,11 @@ class AllocaPattern : public OpRewritePattern { scoreboard[0] = w; continue; } - // can be a cast only used for a quake.init_state or vector init - continue; - } else { - if (getWriteOp(cast, 0)) { - LLVM_DEBUG(llvm::dbgs() - << "another cast used in store: " << *op << '\n'); - return false; - } - // can be a cast only used for a quake.init_state or vector init - toGlobal = true; - continue; + return false; } LLVM_DEBUG(llvm::dbgs() << "unexpected cast: " << *op << '\n'); + toGlobalUses.push_back(op); + toGlobal = true; continue; } LLVM_DEBUG(llvm::dbgs() << "unexpected use: " << *op << '\n'); @@ -341,111 +333,6 @@ class AllocaPattern : public OpRewritePattern { mutable ModuleOp module; }; -// Fold complex.create ops if the arguments are constants. -class ComplexCreatePattern : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(complex::CreateOp create, - PatternRewriter &rewriter) const override { - auto re = create.getReal(); - auto im = create.getImaginary(); - auto reCon = re.getDefiningOp(); - auto imCon = im.getDefiningOp(); - if (reCon && imCon) { - auto aa = ArrayAttr::get( - rewriter.getContext(), - ArrayRef{reCon.getValue(), imCon.getValue()}); - rewriter.replaceOpWithNewOp(create, create.getType(), - aa); - return success(); - } - return failure(); - } -}; - -// Fold arith.trunc ops if the argument is constant. -class FloatTruncatePattern : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(arith::TruncFOp truncate, - PatternRewriter &rewriter) const override { - auto val = truncate.getOperand(); - auto valCon = val.getDefiningOp(); - if (valCon) { - auto v = valCon.value().convertToDouble(); - auto fTy = dyn_cast(truncate.getType()); - rewriter.replaceOpWithNewOp( - truncate, APFloat{static_cast(v)}, fTy); - return success(); - } - return failure(); - } -}; - -// Fold arith.ext ops if the argument is constant. -class FloatExtendPattern : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(arith::ExtFOp extend, - PatternRewriter &rewriter) const override { - auto val = extend.getOperand(); - auto valCon = val.getDefiningOp(); - if (valCon) { - auto v = valCon.value().convertToFloat(); - auto fTy = dyn_cast(extend.getType()); - rewriter.replaceOpWithNewOp( - extend, APFloat{static_cast(v)}, fTy); - return success(); - } - return failure(); - } -}; - -// Fold complex.re ops if the argument is constant. -class ComplexRePattern : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(complex::ReOp re, - PatternRewriter &rewriter) const override { - auto val = re.getOperand(); - auto valCon = val.getDefiningOp(); - if (valCon) { - auto attr = valCon.getValue(); - auto real = cast(attr[0]).getValue(); - auto fTy = dyn_cast(re.getType()); - auto v = fTy.isF64() ? real.convertToDouble() : real.convertToFloat(); - rewriter.replaceOpWithNewOp(re, APFloat{v}, fTy); - return success(); - } - return failure(); - } -}; - -// Fold complex.im ops if the argument is constant. -class ComplexImPattern : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(complex::ImOp im, - PatternRewriter &rewriter) const override { - auto val = im.getOperand(); - auto valCon = val.getDefiningOp(); - if (valCon) { - auto attr = valCon.getValue(); - auto real = cast(attr[0]).getValue(); - auto fTy = dyn_cast(im.getType()); - auto v = fTy.isF64() ? real.convertToDouble() : real.convertToFloat(); - rewriter.replaceOpWithNewOp(im, APFloat{v}, fTy); - return success(); - } - return failure(); - } -}; - class LiftArrayAllocPass : public cudaq::opt::impl::LiftArrayAllocBase { public: @@ -462,11 +349,6 @@ class LiftArrayAllocPass std::string funcName = func.getName().str(); RewritePatternSet patterns(ctx); patterns.insert(ctx, domInfo, funcName, module); - patterns.insert(ctx); - patterns.insert(ctx); - patterns.insert(ctx); - patterns.insert(ctx); - patterns.insert(ctx); LLVM_DEBUG(llvm::dbgs() << "Before lifting constant array: " << func << '\n'); @@ -481,7 +363,3 @@ class LiftArrayAllocPass } }; } // namespace - -std::unique_ptr cudaq::opt::createLiftArrayAllocPass() { - return std::make_unique(); -} diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index 8f84623a29..83e60cc734 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -23,9 +23,15 @@ #include "mlir/Pass/Pass.h" #include "mlir/Target/LLVMIR/TypeToLLVM.h" #include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/Passes.h" #include "mlir/Transforms/RegionUtils.h" #include +namespace cudaq::opt { +#define GEN_PASS_DEF_STATEPREPARATION +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + #define DEBUG_TYPE "state-preparation" using namespace mlir; @@ -34,7 +40,7 @@ using namespace mlir; /// For example: /// /// -/// Before PrepareState (state-prep): +/// Before StatePreparation (state-prep): /// /// module { /// func.func @foo() attributes { @@ -49,7 +55,7 @@ using namespace mlir; /// !cc.array x 4> /// } /// -/// After PrepareState (state-prep): +/// After StatePreparation (state-prep): /// /// module { /// func.func @foo() attributes { @@ -113,6 +119,8 @@ readConstantArray(mlir::OpBuilder &builder, cudaq::cc::GlobalOp &global) { LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); auto toErase = std::vector(); + auto succeeded = false; + funcOp->walk([&](Operation *op) { if (auto initOp = dyn_cast(op)) { toErase.push_back(initOp); @@ -145,18 +153,24 @@ LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { initOp.replaceAllUsesWith(qubits); toErase.push_back(addr); toErase.push_back(global); + succeeded = true; } } } } }); + if (!succeeded) { + funcOp.emitOpError("StatePreparation failed to replace quake.init_state"); + return failure(); + } + for (auto &op : toErase) { if (op->getUses().empty()) { op->erase(); } else { - module.emitOpError("StatePreparation failed to remove quake.init_state " - "or its dependencies."); + op->emitOpError("StatePreparation failed to remove quake.init_state " + "or its dependencies."); return failure(); } } @@ -164,10 +178,11 @@ LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { return success(); } -class StatePreparation : public cudaq::opt::PrepareStateBase { +class StatePreparationPass + : public cudaq::opt::impl::StatePreparationBase { protected: public: - StatePreparation() = default; + using StatePreparationBase::StatePreparationBase; mlir::ModuleOp getModule() { return getOperation(); } @@ -190,7 +205,3 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { }; } // namespace - -std::unique_ptr cudaq::opt::createStatePreparation() { - return std::make_unique(); -} diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 6d238509ec..6d2afefb6d 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -463,7 +463,7 @@ py::object pyAltLaunchKernelR(const std::string &name, MlirModule module, } /// @brief Helper function to get boolean environment variable -bool getEnvBool(const char *envName, bool defaultVal = false) { +static bool getEnvBool(const char *envName, bool defaultVal = false) { if (auto envVal = std::getenv(envName)) { std::string tmp(envVal); std::transform(tmp.begin(), tmp.end(), tmp.begin(), @@ -499,8 +499,9 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, // in their runtime. auto &platform = cudaq::get_platform(); if (!platform.is_simulator() || platform.is_emulated()) { + pm.addPass(cudaq::opt::createConstPropComplex()); pm.addPass(createCSEPass()); - pm.addPass(cudaq::opt::createLiftArrayAllocPass()); + pm.addPass(cudaq::opt::createLiftArrayAlloc()); pm.addPass(cudaq::opt::createStatePreparation()); } pm.addPass(createCanonicalizerPass()); diff --git a/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.config b/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.config index c78a2b3e1e..053134b680 100644 --- a/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.config +++ b/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.config @@ -16,7 +16,7 @@ GEN_TARGET_BACKEND=true LINKLIBS="${LINKLIBS} -lcudaq-rest-qpu" # Define the lowering pipeline -PLATFORM_LOWERING_CONFIG="canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),ionq-gate-set-mapping" +PLATFORM_LOWERING_CONFIG="const-prop-complex,canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),ionq-gate-set-mapping" # Tell the rest-qpu that we are generating QIR. CODEGEN_EMISSION=qir-base diff --git a/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.config b/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.config index 2db0f2b235..3d98fd209e 100644 --- a/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.config +++ b/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.config @@ -18,7 +18,7 @@ LINKLIBS="${LINKLIBS} -lcudaq-rest-qpu" # Define the lowering pipeline, here we lower to Base QIR # Note: the runtime will dynamically substitute %QPU_ARCH% based on # qpu-architecture -PLATFORM_LOWERING_CONFIG="canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),iqm-gate-set-mapping,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg,qubit-mapping{device=file(%QPU_ARCH%)},delay-measurements,regtomem),iqm-gate-set-mapping" +PLATFORM_LOWERING_CONFIG="const-prop-complex,canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),iqm-gate-set-mapping,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg,qubit-mapping{device=file(%QPU_ARCH%)},delay-measurements,regtomem),iqm-gate-set-mapping" # Tell the rest-qpu that we are generating IQM JSON. CODEGEN_EMISSION=iqm diff --git a/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.config b/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.config index 042fb8dd8d..d447f49cf8 100644 --- a/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.config +++ b/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.config @@ -18,7 +18,7 @@ LINKLIBS="${LINKLIBS} -lcudaq-rest-qpu" # Define the lowering pipeline. Lucy has an 8-qubit ring topology, so mapping # uses ring(8). # Toshiko uses a Kagome lattice with 2-3 connectivity per qubit -PLATFORM_LOWERING_CONFIG="canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),oqc-gate-set-mapping,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg,qubit-mapping{device=file(%QPU_ARCH%)},regtomem)" +PLATFORM_LOWERING_CONFIG="const-prop-complex,canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),oqc-gate-set-mapping,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg,qubit-mapping{device=file(%QPU_ARCH%)},regtomem)" # Tell the rest-qpu that we are generating QIR. diff --git a/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.config b/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.config index bed7159b28..64452c8759 100644 --- a/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.config +++ b/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.config @@ -16,7 +16,7 @@ GEN_TARGET_BACKEND=true LINKLIBS="${LINKLIBS} -lcudaq-rest-qpu" # Define the lowering pipeline, here we lower to Adaptive QIR -PLATFORM_LOWERING_CONFIG="canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),quantinuum-gate-set-mapping" +PLATFORM_LOWERING_CONFIG="const-prop-complex,canonicalize,cse,lift-array-value,state-prep,expand-measurements,unrolling-pipeline,decomposition{enable-patterns=U3ToRotations},func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),quantinuum-gate-set-mapping" # Tell the rest-qpu that we are generating QIR. CODEGEN_EMISSION=qir-adaptive diff --git a/test/Quake/const_prop_complex.qke b/test/Quake/const_prop_complex.qke new file mode 100644 index 0000000000..4f0b5215cf --- /dev/null +++ b/test/Quake/const_prop_complex.qke @@ -0,0 +1,133 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt -const-prop-complex -cse -lift-array-value %s | FileCheck %s + +func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %cst = arith.constant 0.000000e+00 : f32 + %cst_0 = arith.constant 0.70710678118654757 : f64 + %0 = arith.truncf %cst_0 : f64 to f32 + %1 = complex.create %0, %cst : complex + %2 = complex.create %cst, %cst : complex + %3 = cc.alloca !cc.array x 4> + %4 = cc.cast %3 : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %1, %4 : !cc.ptr> + %5 = cc.compute_ptr %3[1] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %1, %5 : !cc.ptr> + %6 = cc.compute_ptr %3[2] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %2, %6 : !cc.ptr> + %7 = cc.compute_ptr %3[3] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %2, %7 : !cc.ptr> + %8 = quake.alloca !quake.veq<2> + %9 = quake.init_state %8, %4 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> + return + } + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = complex.constant [0.707106769 : f32, 0.000000e+00 : f32] : complex +// CHECK: %[[VAL_1:.*]] = complex.constant [0.000000e+00 : f32, 0.000000e+00 : f32] : complex +// CHECK: %[[VAL_2:.*]] = cc.alloca !cc.array x 4> +// CHECK: %[[VAL_3:.*]] = cc.cast %[[VAL_2]] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_0]], %[[VAL_3]] : !cc.ptr> +// CHECK: %[[VAL_4:.*]] = cc.compute_ptr %[[VAL_2]][1] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_0]], %[[VAL_4]] : !cc.ptr> +// CHECK: %[[VAL_5:.*]] = cc.compute_ptr %[[VAL_2]][2] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_1]], %[[VAL_5]] : !cc.ptr> +// CHECK: %[[VAL_6:.*]] = cc.compute_ptr %[[VAL_2]][3] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_1]], %[[VAL_6]] : !cc.ptr> +// CHECK: %[[VAL_7:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_8:.*]] = quake.init_state %[[VAL_7]], %[[VAL_3]] : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> +// CHECK: return +// CHECK: } + + +func.func private @__nvqpp_vectorCopyCtor(%0: !cc.ptr, %1: i64, %2: i64) -> !cc.ptr + +func.func @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v() -> !cc.stdvec> attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %cst = arith.constant -0.70710678118654757 : f64 + %c16_i64 = arith.constant 16 : i64 + %c4_i64 = arith.constant 4 : i64 + %cst_0 = arith.constant 0.70710678118654757 : f64 + %cst_1 = arith.constant 0.000000e+00 : f64 + %0 = complex.create %cst_0, %cst_1 : complex + %1 = complex.create %cst_0, %cst_1 : complex + %2 = complex.create %cst_0, %cst_1 : complex + %3 = complex.create %cst, %cst_1 : complex + %4 = cc.alloca !cc.array x 4> + %5 = cc.cast %4 : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %0, %5 : !cc.ptr> + %6 = cc.compute_ptr %4[1] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %1, %6 : !cc.ptr> + %7 = cc.compute_ptr %4[2] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %2, %7 : !cc.ptr> + %8 = cc.compute_ptr %4[3] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %3, %8 : !cc.ptr> + %9 = cc.cast %4 : (!cc.ptr x 4>>) -> !cc.ptr + %10 = call @__nvqpp_vectorCopyCtor(%9, %c4_i64, %c16_i64) : (!cc.ptr, i64, i64) -> !cc.ptr + %11 = cc.stdvec_init %10, %c4_i64 : (!cc.ptr, i64) -> !cc.stdvec> + return %11 : !cc.stdvec> +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v() -> !cc.stdvec> attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = complex.constant [0.70710678118654757, 0.000000e+00] : complex +// CHECK: %[[VAL_1:.*]] = complex.constant [-0.70710678118654757, 0.000000e+00] : complex +// CHECK: %[[VAL_2:.*]] = arith.constant 16 : i64 +// CHECK: %[[VAL_3:.*]] = arith.constant 4 : i64 +// CHECK: %[[VAL_4:.*]] = cc.alloca !cc.array x 4> +// CHECK: %[[VAL_5:.*]] = cc.cast %[[VAL_4]] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_0]], %[[VAL_5]] : !cc.ptr> +// CHECK: %[[VAL_6:.*]] = cc.compute_ptr %[[VAL_4]][1] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_0]], %[[VAL_6]] : !cc.ptr> +// CHECK: %[[VAL_7:.*]] = cc.compute_ptr %[[VAL_4]][2] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_0]], %[[VAL_7]] : !cc.ptr> +// CHECK: %[[VAL_8:.*]] = cc.compute_ptr %[[VAL_4]][3] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_1]], %[[VAL_8]] : !cc.ptr> +// CHECK: %[[VAL_9:.*]] = cc.cast %[[VAL_4]] : (!cc.ptr x 4>>) -> !cc.ptr +// CHECK: %[[VAL_10:.*]] = call @__nvqpp_vectorCopyCtor(%[[VAL_9]], %[[VAL_3]], %[[VAL_2]]) : (!cc.ptr, i64, i64) -> !cc.ptr +// CHECK: %[[VAL_11:.*]] = cc.stdvec_init %[[VAL_10]], %[[VAL_3]] : (!cc.ptr, i64) -> !cc.stdvec> +// CHECK: return %[[VAL_11]] : !cc.stdvec> +// CHECK: } + + +func.func @test2() -> !quake.veq<2> { + %0 = cc.alloca !cc.array + %1 = cc.compute_ptr %0[0] : (!cc.ptr>) -> !cc.ptr + %2 = arith.constant 1.0 : f64 + cc.store %2, %1 : !cc.ptr + %3 = cc.compute_ptr %0[1] : (!cc.ptr>) -> !cc.ptr + %4 = arith.constant 2.0 : f64 + cc.store %4, %3 : !cc.ptr + %5 = cc.compute_ptr %0[2] : (!cc.ptr>) -> !cc.ptr + %6 = arith.constant 6.0 : f64 + cc.store %6, %5 : !cc.ptr + %7 = cc.compute_ptr %0[3] : (!cc.ptr>) -> !cc.ptr + %8 = arith.constant 9.0 : f64 + cc.store %8, %7 : !cc.ptr + %9 = quake.alloca !quake.veq<2> + %10 = quake.init_state %9, %0 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> + return %10 : !quake.veq<2> +} + +// CHECK-LABEL: func.func @test2() -> !quake.veq<2> { +// CHECK: %[[VAL_0:.*]] = arith.constant 9.000000e+00 : f64 +// CHECK: %[[VAL_1:.*]] = arith.constant 6.000000e+00 : f64 +// CHECK: %[[VAL_2:.*]] = arith.constant 2.000000e+00 : f64 +// CHECK: %[[VAL_3:.*]] = arith.constant 1.000000e+00 : f64 +// CHECK: %[[VAL_4:.*]] = cc.alloca !cc.array +// CHECK: %[[VAL_5:.*]] = cc.compute_ptr %[[VAL_4]][0] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_3]], %[[VAL_5]] : !cc.ptr +// CHECK: %[[VAL_6:.*]] = cc.compute_ptr %[[VAL_4]][1] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_2]], %[[VAL_6]] : !cc.ptr +// CHECK: %[[VAL_7:.*]] = cc.compute_ptr %[[VAL_4]][2] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_1]], %[[VAL_7]] : !cc.ptr +// CHECK: %[[VAL_8:.*]] = cc.compute_ptr %[[VAL_4]][3] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_0]], %[[VAL_8]] : !cc.ptr +// CHECK: %[[VAL_9:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_10:.*]] = quake.init_state %[[VAL_9]], %[[VAL_4]] : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> +// CHECK: return %[[VAL_10]] : !quake.veq<2> +// CHECK: } \ No newline at end of file diff --git a/test/Quake/lift_array.qke b/test/Quake/lift_array.qke index a8b9b337b2..73a450d42c 100644 --- a/test/Quake/lift_array.qke +++ b/test/Quake/lift_array.qke @@ -9,24 +9,21 @@ // RUN: cudaq-opt -lift-array-value %s | FileCheck %s func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - %cst = arith.constant 0.000000e+00 : f32 - %cst_0 = arith.constant 0.70710678118654757 : f64 - %0 = arith.truncf %cst_0 : f64 to f32 - %1 = complex.create %0, %cst : complex - %2 = complex.create %cst, %cst : complex - %3 = cc.alloca !cc.array x 4> - %4 = cc.cast %3 : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %1, %4 : !cc.ptr> - %5 = cc.compute_ptr %3[1] : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %1, %5 : !cc.ptr> - %6 = cc.compute_ptr %3[2] : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %2, %6 : !cc.ptr> - %7 = cc.compute_ptr %3[3] : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %2, %7 : !cc.ptr> - %8 = quake.alloca !quake.veq<2> - %9 = quake.init_state %8, %4 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> - return - } + %cst = complex.constant [0.707106769 : f32, 0.000000e+00 : f32] : complex + %cst_0 = complex.constant [0.000000e+00 : f32, 0.000000e+00 : f32] : complex + %0 = cc.alloca !cc.array x 4> + %1 = cc.cast %0 : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %cst, %1 : !cc.ptr> + %2 = cc.compute_ptr %0[1] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %cst, %2 : !cc.ptr> + %3 = cc.compute_ptr %0[2] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %cst_0, %3 : !cc.ptr> + %4 = cc.compute_ptr %0[3] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %cst_0, %4 : !cc.ptr> + %5 = quake.alloca !quake.veq<2> + %6 = quake.init_state %5, %1 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> + return +} // CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { // CHECK: %[[VAL_0:.*]] = cc.address_of @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv.rodata_{{[0-9]+}} : !cc.ptr x 4>> @@ -36,31 +33,26 @@ func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_compl // CHECK: } -func.func private @__nvqpp_vectorCopyCtor(%0: !cc.ptr, %1: i64, %2: i64) -> !cc.ptr +func.func private @__nvqpp_vectorCopyCtor(!cc.ptr, i64, i64) -> !cc.ptr func.func @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v() -> !cc.stdvec> attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - %cst = arith.constant -0.70710678118654757 : f64 + %cst = complex.constant [0.70710678118654757, 0.000000e+00] : complex + %cst_0 = complex.constant [-0.70710678118654757, 0.000000e+00] : complex %c16_i64 = arith.constant 16 : i64 %c4_i64 = arith.constant 4 : i64 - %cst_0 = arith.constant 0.70710678118654757 : f64 - %cst_1 = arith.constant 0.000000e+00 : f64 - %0 = complex.create %cst_0, %cst_1 : complex - %1 = complex.create %cst_0, %cst_1 : complex - %2 = complex.create %cst_0, %cst_1 : complex - %3 = complex.create %cst, %cst_1 : complex - %4 = cc.alloca !cc.array x 4> - %5 = cc.cast %4 : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %0, %5 : !cc.ptr> - %6 = cc.compute_ptr %4[1] : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %1, %6 : !cc.ptr> - %7 = cc.compute_ptr %4[2] : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %2, %7 : !cc.ptr> - %8 = cc.compute_ptr %4[3] : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %3, %8 : !cc.ptr> - %9 = cc.cast %4 : (!cc.ptr x 4>>) -> !cc.ptr - %10 = call @__nvqpp_vectorCopyCtor(%9, %c4_i64, %c16_i64) : (!cc.ptr, i64, i64) -> !cc.ptr - %11 = cc.stdvec_init %10, %c4_i64 : (!cc.ptr, i64) -> !cc.stdvec> - return %11 : !cc.stdvec> + %0 = cc.alloca !cc.array x 4> + %1 = cc.cast %0 : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %cst, %1 : !cc.ptr> + %2 = cc.compute_ptr %0[1] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %cst, %2 : !cc.ptr> + %3 = cc.compute_ptr %0[2] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %cst, %3 : !cc.ptr> + %4 = cc.compute_ptr %0[3] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %cst_0, %4 : !cc.ptr> + %5 = cc.cast %0 : (!cc.ptr x 4>>) -> !cc.ptr + %6 = call @__nvqpp_vectorCopyCtor(%5, %c4_i64, %c16_i64) : (!cc.ptr, i64, i64) -> !cc.ptr + %7 = cc.stdvec_init %6, %c4_i64 : (!cc.ptr, i64) -> !cc.stdvec> + return %7 : !cc.stdvec> } // CHECK-LABEL: func.func @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v() -> !cc.stdvec> attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { @@ -74,22 +66,22 @@ func.func @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generato // CHECK: } func.func @test2() -> !quake.veq<2> { + %cst = arith.constant 9.000000e+00 : f64 + %cst_0 = arith.constant 6.000000e+00 : f64 + %cst_1 = arith.constant 2.000000e+00 : f64 + %cst_2 = arith.constant 1.000000e+00 : f64 %0 = cc.alloca !cc.array %1 = cc.compute_ptr %0[0] : (!cc.ptr>) -> !cc.ptr - %2 = arith.constant 1.0 : f64 - cc.store %2, %1 : !cc.ptr - %3 = cc.compute_ptr %0[1] : (!cc.ptr>) -> !cc.ptr - %4 = arith.constant 2.0 : f64 - cc.store %4, %3 : !cc.ptr - %5 = cc.compute_ptr %0[2] : (!cc.ptr>) -> !cc.ptr - %6 = arith.constant 6.0 : f64 - cc.store %6, %5 : !cc.ptr - %7 = cc.compute_ptr %0[3] : (!cc.ptr>) -> !cc.ptr - %8 = arith.constant 9.0 : f64 - cc.store %8, %7 : !cc.ptr - %9 = quake.alloca !quake.veq<2> - %10 = quake.init_state %9, %0 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> - return %10 : !quake.veq<2> + cc.store %cst_2, %1 : !cc.ptr + %2 = cc.compute_ptr %0[1] : (!cc.ptr>) -> !cc.ptr + cc.store %cst_1, %2 : !cc.ptr + %3 = cc.compute_ptr %0[2] : (!cc.ptr>) -> !cc.ptr + cc.store %cst_0, %3 : !cc.ptr + %4 = cc.compute_ptr %0[3] : (!cc.ptr>) -> !cc.ptr + cc.store %cst, %4 : !cc.ptr + %5 = quake.alloca !quake.veq<2> + %6 = quake.init_state %5, %0 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> + return %6 : !quake.veq<2> } // CHECK-LABEL: func.func @test2() -> !quake.veq<2> { From 138145646ceae2bc67e3695bc2de94ef4c1c8cbf Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 12 Jul 2024 18:31:29 -0700 Subject: [PATCH 25/50] Fix failing tests --- lib/Optimizer/Transforms/StatePreparation.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index 83e60cc734..86fad793a5 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -119,11 +119,13 @@ readConstantArray(mlir::OpBuilder &builder, cudaq::cc::GlobalOp &global) { LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); auto toErase = std::vector(); - auto succeeded = false; + auto hasInitState = false; + auto replacedInitState = false; funcOp->walk([&](Operation *op) { if (auto initOp = dyn_cast(op)) { toErase.push_back(initOp); + hasInitState = true; auto loc = op->getLoc(); builder.setInsertionPointAfter(initOp); // Find the qvector alloc. @@ -153,14 +155,14 @@ LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { initOp.replaceAllUsesWith(qubits); toErase.push_back(addr); toErase.push_back(global); - succeeded = true; + replacedInitState = true; } } } } }); - if (!succeeded) { + if (hasInitState && !replacedInitState) { funcOp.emitOpError("StatePreparation failed to replace quake.init_state"); return failure(); } From 907b415e52c6db48c6269b5515c2de53b4f74349 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 12 Jul 2024 20:28:12 -0700 Subject: [PATCH 26/50] Fixed error message --- lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index 1554acec06..2336e6a97d 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -183,8 +183,8 @@ class AllocaPattern : public OpRewritePattern { if (op->getUses().empty()) { rewriter.eraseOp(op); } else { - module.emitOpError("LiftArrayAlloc failed to remove quake.init_state " - "or its dependencies."); + op->emitOpError("LiftArrayAlloc failed to remove cc::AllocOp " + "or its uses."); return failure(); } } From 19a164c8483fce031613a664dcc19bf3ba26bd4f Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 12 Jul 2024 21:39:48 -0700 Subject: [PATCH 27/50] Fix failing tests --- test/AST-Quake/custom_op_concrete_matrix.cpp | 2 +- test/Quake/const_prop_complex.qke | 2 +- tools/nvqpp/nvq++.in | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/AST-Quake/custom_op_concrete_matrix.cpp b/test/AST-Quake/custom_op_concrete_matrix.cpp index bb3b697005..3f350a6222 100644 --- a/test/AST-Quake/custom_op_concrete_matrix.cpp +++ b/test/AST-Quake/custom_op_concrete_matrix.cpp @@ -6,7 +6,7 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ -// RUN: cudaq-quake %cpp_std %s | cudaq-opt -lift-array-value -get-concrete-matrix | FileCheck %s +// RUN: cudaq-quake %cpp_std %s | cudaq-opt -const-prop-complex -lift-array-value -get-concrete-matrix | FileCheck %s #include diff --git a/test/Quake/const_prop_complex.qke b/test/Quake/const_prop_complex.qke index 4f0b5215cf..7b75d72ac9 100644 --- a/test/Quake/const_prop_complex.qke +++ b/test/Quake/const_prop_complex.qke @@ -6,7 +6,7 @@ // the terms of the Apache License 2.0 which accompanies this distribution. // // ========================================================================== // -// RUN: cudaq-opt -const-prop-complex -cse -lift-array-value %s | FileCheck %s +// RUN: cudaq-opt -const-prop-complex %s | FileCheck %s func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %cst = arith.constant 0.000000e+00 : f32 diff --git a/tools/nvqpp/nvq++.in b/tools/nvqpp/nvq++.in index b1f55b57b1..2197e91ecb 100644 --- a/tools/nvqpp/nvq++.in +++ b/tools/nvqpp/nvq++.in @@ -708,7 +708,7 @@ fi if ${ENABLE_DEVICE_CODE_LOADERS}; then RUN_OPT=true OPT_PASSES=$(add_pass_to_pipeline "${OPT_PASSES}" "func.func(quake-add-metadata)") - OPT_PASSES=$(add_pass_to_pipeline "${OPT_PASSES}" "lift-array-value,func.func(get-concrete-matrix),device-code-loader{use-quake=1}") + OPT_PASSES=$(add_pass_to_pipeline "${OPT_PASSES}" "const-prop-complex,lift-array-value,func.func(get-concrete-matrix),device-code-loader{use-quake=1}") fi if ${ENABLE_LOWER_TO_CFG}; then RUN_OPT=true From 9d9b9e9fd44c779e5958b5c205335aa6e26b55ea Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Mon, 15 Jul 2024 11:05:14 -0700 Subject: [PATCH 28/50] Temp --- include/cudaq/Optimizer/Transforms/Passes.h | 3 +- .../Optimizer/Transforms/SimulationData.h | 50 +++++++++++++++++++ lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 7 +-- runtime/common/BaseRemoteRESTQPU.h | 24 ++++++++- 4 files changed, 79 insertions(+), 5 deletions(-) create mode 100644 include/cudaq/Optimizer/Transforms/SimulationData.h diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 996b6e56a7..40ee1465be 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -15,6 +15,7 @@ #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassRegistry.h" +#include "SimulationData.h" namespace cudaq::opt { @@ -41,7 +42,7 @@ std::unique_ptr createObserveAnsatzPass(std::vector &); std::unique_ptr createQuakeAddMetadata(); std::unique_ptr createQuakeAddDeallocs(); std::unique_ptr createQuakeSynthesizer(); -std::unique_ptr createQuakeSynthesizer(std::string_view, void *); +std::unique_ptr createQuakeSynthesizer(std::string_view, SimulationData::getSimulationDataFunc*, void *); std::unique_ptr createRaiseToAffinePass(); std::unique_ptr createUnwindLoweringPass(); diff --git a/include/cudaq/Optimizer/Transforms/SimulationData.h b/include/cudaq/Optimizer/Transforms/SimulationData.h new file mode 100644 index 0000000000..77f5fe37db --- /dev/null +++ b/include/cudaq/Optimizer/Transforms/SimulationData.h @@ -0,0 +1,50 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ +#include +#include + +// cudaq::state is defined in the runtime. The compiler will never need to know +// about its implementation and there should not be a circular build/library +// dependence because of it. Simply forward declare it, as it is notional. +namespace cudaq { +class state; +} + + +/// Owns the data +class SimulationData { + public: + typedef SimulationData (getSimulationDataFunc)(cudaq::state*); + + SimulationData(void *data, std::size_t size, std::size_t elementSize): + data(data), size(size), elementSize(elementSize) {} + + template + std::vector toVector() { + assert(sizeof(T) == elementSize && "incorrect element size in simulation data"); + std::vector result; + + for (auto i = 0; i < size; i++) { + auto elePtr = reinterpret_cast(data + i*elementSize); + result[i] = *elePtr; + } + + return result; + } + + ~SimulationData() { + delete data; + } + +private: + void* data; + std::size_t size; + std::size_t elementSize; +}; + + diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index c657f53ae7..cd7a0cfc9a 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -15,6 +15,7 @@ #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" #include "cudaq/Optimizer/Transforms/Passes.h" +#include "cudaq/Optimizer/Transforms/SimulationData.h" #include "llvm/Support/Debug.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/Arith/IR/Arith.h" @@ -359,7 +360,7 @@ class QuakeSynthesizer public: QuakeSynthesizer() = default; - QuakeSynthesizer(std::string_view kernel, void *a) + QuakeSynthesizer(std::string_view kernel, SimulationData::getSimulationDataFunc* getData, void *a) : kernelName(kernel), args(a) {} mlir::ModuleOp getModule() { return getOperation(); } @@ -713,6 +714,6 @@ std::unique_ptr cudaq::opt::createQuakeSynthesizer() { } std::unique_ptr -cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, void *a) { - return std::make_unique(kernelName, a); +cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, SimulationData::getSimulationDataFunc* getData, void *a) { + return std::make_unique(kernelName, getData, a); } diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 30445dae45..42005600d2 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -22,6 +22,7 @@ #include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" #include "cudaq/Optimizer/Transforms/Passes.h" +#include "cudaq/Optimizer/Transforms/SimulationData.h" #include "cudaq/Support/Plugin.h" #include "cudaq/platform/qpu.h" #include "cudaq/platform/quantum_platform.h" @@ -355,6 +356,27 @@ class BaseRemoteRESTQPU : public cudaq::QPU { return output_names; } + SimulationData readSimulationData(cudaq::state* s) { + void *dataPtr = nullptr; + void *dataPtr = nullptr; + auto stateVector = s->get_tensor(); + auto precision = s->get_precision(); + auto numElements = stateVector.get_num_elements(); + auto elementSize = 0; + if (precision == SimulationState::precision::fp32) { + elementSize = sizeof(std::complex[numElements]; + s->to_host(hostData, numElements); + dataPtr = reinterpret_cast(hostData); + } else { + elementSize = sizeof(std::complex[numElements]; + s->to_host(hostData, numElements); + dataPtr = reinterpret_cast(hostData); + } + return SimulationData(dataPtr, numElements, elementSize); +} + /// @brief Extract the Quake representation for the given kernel name and /// lower it to the code format required for the specific backend. The /// lowering process is controllable via the configuration file in the @@ -413,7 +435,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&context); - pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, readSimulationData, updatedArgs)); pm.addPass(mlir::createCanonicalizerPass()); if (disableMLIRthreading || enablePrintMLIREachPass) moduleOp.getContext()->disableMultithreading(); From bd002f201cdfa4343f22e65dec16ee74a06ba8e1 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 16 Jul 2024 09:22:17 -0700 Subject: [PATCH 29/50] Synthesize state pointers for remote sim --- include/cudaq/Optimizer/Transforms/Passes.h | 2 +- .../Optimizer/Transforms/SimulationData.h | 24 +-- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 139 ++++++++++++++++-- .../cudaq/platform/py_alt_launch_kernel.cpp | 2 +- runtime/common/BaseRemoteRESTQPU.h | 23 +-- runtime/common/BaseRestRemoteClient.h | 42 +++++- unittests/Optimizer/QuakeSynthTester.cpp | 2 +- 7 files changed, 183 insertions(+), 51 deletions(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 40ee1465be..0f63d7b3b2 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -42,7 +42,7 @@ std::unique_ptr createObserveAnsatzPass(std::vector &); std::unique_ptr createQuakeAddMetadata(); std::unique_ptr createQuakeAddDeallocs(); std::unique_ptr createQuakeSynthesizer(); -std::unique_ptr createQuakeSynthesizer(std::string_view, SimulationData::getSimulationDataFunc*, void *); +std::unique_ptr createQuakeSynthesizer(std::string_view, SimulationStateData::getDataFunc*, void *); std::unique_ptr createRaiseToAffinePass(); std::unique_ptr createUnwindLoweringPass(); diff --git a/include/cudaq/Optimizer/Transforms/SimulationData.h b/include/cudaq/Optimizer/Transforms/SimulationData.h index 77f5fe37db..62fcb12c2e 100644 --- a/include/cudaq/Optimizer/Transforms/SimulationData.h +++ b/include/cudaq/Optimizer/Transforms/SimulationData.h @@ -5,9 +5,14 @@ * This source code and the accompanying materials are made available under * * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ + +#pragma once + #include #include +#include + // cudaq::state is defined in the runtime. The compiler will never need to know // about its implementation and there should not be a circular build/library // dependence because of it. Simply forward declare it, as it is notional. @@ -17,11 +22,11 @@ class state; /// Owns the data -class SimulationData { +class SimulationStateData { public: - typedef SimulationData (getSimulationDataFunc)(cudaq::state*); + typedef SimulationStateData (getDataFunc)(cudaq::state*); - SimulationData(void *data, std::size_t size, std::size_t elementSize): + SimulationStateData(void *data, std::size_t size, std::size_t elementSize): data(data), size(size), elementSize(elementSize) {} template @@ -29,19 +34,20 @@ class SimulationData { assert(sizeof(T) == elementSize && "incorrect element size in simulation data"); std::vector result; - for (auto i = 0; i < size; i++) { - auto elePtr = reinterpret_cast(data + i*elementSize); - result[i] = *elePtr; + std::cout << "SimulationStateData:" << std::endl; + for (std::size_t i = 0; i < size; i++) { + auto elePtr = reinterpret_cast(data) + i; + result.push_back(*elePtr); + std::cout << *elePtr << std::endl; } return result; } - ~SimulationData() { - delete data; + ~SimulationStateData() { + delete reinterpret_cast(data); } -private: void* data; std::size_t size; std::size_t elementSize; diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index cd7a0cfc9a..85fac7009c 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -26,6 +26,8 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" +#include + #define DEBUG_TYPE "quake-synthesizer" using namespace mlir; @@ -125,6 +127,74 @@ createArrayInMemory(OpBuilder &builder, ModuleOp module, unsigned &counter, return {buffer, data}; } +template +LogicalResult +synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, + BlockArgument argument, ELETY eleTy, std::vector &vec, + MAKER makeElementValue) { + auto *ctx = builder.getContext(); + auto argLoc = argument.getLoc(); + + auto strTy = cudaq::cc::StdvecType::get(eleTy); + auto arrTy = cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()); + + builder.setInsertionPointToStart(argument.getOwner()); + + // Iterate over the users of this state argument. + for (auto *argUser : argument.getUsers()) { + // Replace a calls to runtime function that reads the number of qubits + // with the log of the length, which is a synthesized constant. + if (auto numOfQubitsOp = dyn_cast(argUser)) { + if (auto calleeAttr = numOfQubitsOp.getCalleeAttr()) { + auto funcName = calleeAttr.getValue().str(); + std::cout << "Call on state: " << funcName << std::endl; + if (funcName == cudaq::getNumQubitsFromCudaqState) { + Value numOfQubits = builder.create( + argLoc, log2(vec.size()), builder.getI64Type()); + numOfQubitsOp.replaceAllUsesWith(ValueRange{numOfQubits}); + numOfQubitsOp.erase(); + std::cout << "Removed getNumQubitsFromCudaqState" << std::endl; + } else { + argUser->emitError("Unexpected call on state argument"); + return failure(); + } + } + } + } + + std::cout << "Synthesizing vec" << std::endl; + OpBuilder::InsertionGuard guard(builder); + auto [buffer, _] = + createArrayInMemory(builder, module, counter, argument, vec, arrTy); + auto ptrArrEleTy = + cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(eleTy)); + Value memArr = builder.create(argLoc, ptrArrEleTy, buffer); + + builder.setInsertionPointAfter(memArr.getDefiningOp()); + Value size = builder.create(argLoc, vec.size(), 64); + Value newVec = + builder.create(argLoc, strTy, memArr, size); + argument.replaceAllUsesWith(newVec); + + std::cout << "Done Synthesizing vec" << std::endl; + + return success(); +} + +static LogicalResult +synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, + BlockArgument argument, std::vector> &vec) { + return synthesizeStateArgument(builder, module, counter, argument, + ComplexType::get(builder.getF32Type()), vec, makeComplexElement); +} + +static LogicalResult +synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, + BlockArgument argument, std::vector> &vec) { + return synthesizeStateArgument(builder, module, counter, argument, + ComplexType::get(builder.getF64Type()), vec, makeComplexElement); +} + template LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, @@ -132,9 +202,11 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, MAKER makeElementValue) { auto *ctx = builder.getContext(); auto argTy = argument.getType(); + assert(isa(argTy)); auto strTy = cast(argTy); auto eleTy = cast(strTy.getElementType()); + builder.setInsertionPointToStart(argument.getOwner()); auto argLoc = argument.getLoc(); @@ -357,11 +429,12 @@ class QuakeSynthesizer // The raw pointer to the runtime arguments. void *args; + SimulationStateData::getDataFunc* getStateData; public: QuakeSynthesizer() = default; - QuakeSynthesizer(std::string_view kernel, SimulationData::getSimulationDataFunc* getData, void *a) - : kernelName(kernel), args(a) {} + QuakeSynthesizer(std::string_view kernel, SimulationStateData::getDataFunc* getData, void *a) + : kernelName(kernel), args(a), getStateData(getData) {} mlir::ModuleOp getModule() { return getOperation(); } @@ -500,19 +573,53 @@ class QuakeSynthesizer if (auto ptrTy = dyn_cast(type)) { if (isa(ptrTy.getElementType())) { - // Special case of a `cudaq::state*` which must be in the same address - // space. This references a container to a set of simulation - // amplitudes. - synthesizeRuntimeArgument( - builder, argument, args, offset, sizeof(void *), - [=](OpBuilder &builder, cudaq::state **concrete) { - Value rawPtr = builder.create( - loc, reinterpret_cast(*concrete), - sizeof(void *) * 8); - auto stateTy = cudaq::cc::StateType::get(builder.getContext()); - return builder.create( - loc, cudaq::cc::PointerType::get(stateTy), rawPtr); - }); + if (getStateData != nullptr) { + std::cout << "Reading state data:" << std::endl; + cudaq::state* concrete; + std::memcpy(&concrete, ((char *)args) + offset, sizeof(cudaq::state*)); + std::cout << "Getting state data:" << std::endl; + auto stateData = getStateData(concrete); + if (stateData.elementSize == sizeof(std::complex)) { + auto v = stateData.toVector>(); + std::cout << "Read vector of double:" << std::endl; + for (auto e: v) { + std::cout << e << "," <>(); + std::cout << "Read vector of float:" << std::endl; + for (auto e: v) { + std::cout << e << "," <( + // builder, argument, args, offset, sizeof(void *), + // [=](OpBuilder &builder, cudaq::state **concrete) { + // Value rawPtr = builder.create( + // loc, reinterpret_cast(*concrete), + // sizeof(void *) * 8); + // auto stateTy = cudaq::cc::StateType::get(builder.getContext()); + // return builder.create( + // loc, cudaq::cc::PointerType::get(stateTy), rawPtr); + // }); + else { + funcOp.emitOpError("synthesis: unsupported state argument type"); + signalPassFailure(); + } continue; } // N.B. Other pointers will not be materialized and may be in a @@ -714,6 +821,6 @@ std::unique_ptr cudaq::opt::createQuakeSynthesizer() { } std::unique_ptr -cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, SimulationData::getSimulationDataFunc* getData, void *a) { +cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, SimulationStateData::getDataFunc* getData, void *a) { return std::make_unique(kernelName, getData, a); } diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 6d2afefb6d..16d5575228 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -491,7 +491,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, getEnvBool("CUDAQ_MLIR_PRINT_EACH_PASS", false); PassManager pm(context); - pm.addPass(cudaq::opt::createQuakeSynthesizer(name, rawArgs)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(name, nullptr, rawArgs)); pm.addPass(createCanonicalizerPass()); // Run state preparation for quantum devices only. diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 42005600d2..471d171673 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -356,27 +356,6 @@ class BaseRemoteRESTQPU : public cudaq::QPU { return output_names; } - SimulationData readSimulationData(cudaq::state* s) { - void *dataPtr = nullptr; - void *dataPtr = nullptr; - auto stateVector = s->get_tensor(); - auto precision = s->get_precision(); - auto numElements = stateVector.get_num_elements(); - auto elementSize = 0; - if (precision == SimulationState::precision::fp32) { - elementSize = sizeof(std::complex[numElements]; - s->to_host(hostData, numElements); - dataPtr = reinterpret_cast(hostData); - } else { - elementSize = sizeof(std::complex[numElements]; - s->to_host(hostData, numElements); - dataPtr = reinterpret_cast(hostData); - } - return SimulationData(dataPtr, numElements, elementSize); -} - /// @brief Extract the Quake representation for the given kernel name and /// lower it to the code format required for the specific backend. The /// lowering process is controllable via the configuration file in the @@ -435,7 +414,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&context); - pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, readSimulationData, updatedArgs)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, nullptr, updatedArgs)); pm.addPass(mlir::createCanonicalizerPass()); if (disableMLIRthreading || enablePrintMLIREachPass) moduleOp.getContext()->disableMultithreading(); diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index d82457669d..65ff68c4ad 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -92,6 +92,39 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { }); } + static SimulationStateData readSimulationStateData(cudaq::state* s) { + std::cout << "Reading sim state data" << std::endl; + void *dataPtr = nullptr; + auto stateVector = s->get_tensor(); + auto precision = s->get_precision(); + auto numElements = stateVector.get_num_elements(); + auto elementSize = 0; + if (precision == SimulationState::precision::fp32) { + std::cout << "32 bit precision" << std::endl; + elementSize = sizeof(std::complex); + auto *hostData = new std::complex[numElements]; + std::cout << "Reading host data" << std::endl; + s->to_host(hostData, numElements); + std::cout << "Host data:" << std::endl; + for (size_t i = 0; i< numElements; i++) { + std::cout << hostData[i] << std::endl; + } + dataPtr = reinterpret_cast(hostData); + } else { + std::cout << "64 bit precision" << std::endl; + elementSize = sizeof(std::complex); + auto *hostData = new std::complex[numElements]; + std::cout << "Reading host data" << std::endl; + s->to_host(hostData, numElements); + std::cout << "Host data:" << std::endl; + for (size_t i = 0; i< numElements; i++) { + std::cout << hostData[i] << std::endl; + } + dataPtr = reinterpret_cast(hostData); + } + return SimulationStateData(dataPtr, numElements, elementSize); + } + public: virtual void setConfig( const std::unordered_map &configs) override { @@ -172,7 +205,14 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { mlir::PassManager pm(&mlirContext); moduleOp.getContext()->disableMultithreading(); pm.enableIRPrinting(); - pm.addPass(cudaq::opt::createQuakeSynthesizer(name, args)); + auto &platform = cudaq::get_platform(); + if (platform.is_simulator()) { + // For efficiency, we don't run state prep to convert states to gates on + // simulators, instead we synthesize them as vectors. + pm.addPass(cudaq::opt::createQuakeSynthesizer(name, readSimulationStateData, args)); + } else { + pm.addPass(cudaq::opt::createQuakeSynthesizer(name, nullptr, args)); + } pm.addPass(mlir::createCanonicalizerPass()); if (failed(pm.run(moduleOp))) throw std::runtime_error("Could not successfully apply quake-synth."); diff --git a/unittests/Optimizer/QuakeSynthTester.cpp b/unittests/Optimizer/QuakeSynthTester.cpp index c7e6bce0c5..745043ebd1 100644 --- a/unittests/Optimizer/QuakeSynthTester.cpp +++ b/unittests/Optimizer/QuakeSynthTester.cpp @@ -54,7 +54,7 @@ LogicalResult runQuakeSynth(std::string_view kernelName, void *rawArgs, PassManager pm(module->getContext()); module->getContext()->disableMultithreading(); pm.enableIRPrinting(); - pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, rawArgs)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, nullptr, rawArgs)); pm.addPass(createCanonicalizerPass()); pm.addPass(cudaq::opt::createExpandMeasurementsPass()); pm.addNestedPass(cudaq::opt::createClassicalMemToReg()); From 87aff5fa94fb8bfb9574cf81aa441ded7fe066c7 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 16 Jul 2024 09:26:20 -0700 Subject: [PATCH 30/50] Addressed CR comments --- targettests/execution/state_preparation_vector.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index 35d2b68619..1a1c7421aa 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -6,8 +6,13 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std --enable-mlir --target quantinuum --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target ionq --emulate %s -o %t && %t | FileCheck %s +// 2 different IQM machines for 2 different topologies +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Adonis --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Apollo --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target oqc --emulate %s -o %t && %t | FileCheck %s #include #include From 0931e6fa870b22bce0d25780f9b837e66659196a Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 16 Jul 2024 11:25:56 -0700 Subject: [PATCH 31/50] Cleanup --- include/cudaq/Optimizer/Transforms/Passes.h | 3 +- .../Optimizer/Transforms/SimulationData.h | 28 ++-- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 148 +++++++++--------- .../cudaq/platform/py_alt_launch_kernel.cpp | 2 +- runtime/common/BaseRemoteRESTQPU.h | 2 +- runtime/common/BaseRestRemoteClient.h | 4 +- targettests/Remote-Sim/state_init.cpp | 26 --- unittests/Optimizer/QuakeSynthTester.cpp | 2 +- 8 files changed, 98 insertions(+), 117 deletions(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 0f63d7b3b2..7745734c49 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -42,7 +42,8 @@ std::unique_ptr createObserveAnsatzPass(std::vector &); std::unique_ptr createQuakeAddMetadata(); std::unique_ptr createQuakeAddDeallocs(); std::unique_ptr createQuakeSynthesizer(); -std::unique_ptr createQuakeSynthesizer(std::string_view, SimulationStateData::getDataFunc*, void *); +std::unique_ptr createQuakeSynthesizer(std::string_view, void *, SimulationStateData::getDataFunc*); +std::unique_ptr createQuakeSynthesizer(std::string_view, void *, bool); std::unique_ptr createRaiseToAffinePass(); std::unique_ptr createUnwindLoweringPass(); diff --git a/include/cudaq/Optimizer/Transforms/SimulationData.h b/include/cudaq/Optimizer/Transforms/SimulationData.h index 62fcb12c2e..9504dc7c08 100644 --- a/include/cudaq/Optimizer/Transforms/SimulationData.h +++ b/include/cudaq/Optimizer/Transforms/SimulationData.h @@ -29,20 +29,20 @@ class SimulationStateData { SimulationStateData(void *data, std::size_t size, std::size_t elementSize): data(data), size(size), elementSize(elementSize) {} - template - std::vector toVector() { - assert(sizeof(T) == elementSize && "incorrect element size in simulation data"); - std::vector result; - - std::cout << "SimulationStateData:" << std::endl; - for (std::size_t i = 0; i < size; i++) { - auto elePtr = reinterpret_cast(data) + i; - result.push_back(*elePtr); - std::cout << *elePtr << std::endl; - } - - return result; - } + // template + // std::vector toVector() { + // assert(sizeof(T) == elementSize && "incorrect element size in simulation data"); + // std::vector result; + + // std::cout << "SimulationStateData:" << std::endl; + // for (std::size_t i = 0; i < size; i++) { + // auto elePtr = reinterpret_cast(data) + i; + // result.push_back(*elePtr); + // std::cout << *elePtr << std::endl; + // } + + // return result; + // } ~SimulationStateData() { delete reinterpret_cast(data); diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 85fac7009c..bde179512d 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -127,6 +127,19 @@ createArrayInMemory(OpBuilder &builder, ModuleOp module, unsigned &counter, return {buffer, data}; } +template +std::vector stateDataToVector(SimulationStateData& stateData) { + assert(sizeof(T) == stateData.elementSize && "incorrect element size in simulation data"); + std::vector result; + + for (std::size_t i = 0; i < stateData.size; i++) { + auto elePtr = reinterpret_cast(stateData.data) + i; + result.push_back(*elePtr); + } + + return result; +} + template LogicalResult synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, @@ -135,10 +148,11 @@ synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, auto *ctx = builder.getContext(); auto argLoc = argument.getLoc(); - auto strTy = cudaq::cc::StdvecType::get(eleTy); + //auto strTy = cudaq::cc::StdvecType::get(eleTy); auto arrTy = cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()); builder.setInsertionPointToStart(argument.getOwner()); + auto toErase = std::vector(); // Iterate over the users of this state argument. for (auto *argUser : argument.getUsers()) { @@ -147,13 +161,11 @@ synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, if (auto numOfQubitsOp = dyn_cast(argUser)) { if (auto calleeAttr = numOfQubitsOp.getCalleeAttr()) { auto funcName = calleeAttr.getValue().str(); - std::cout << "Call on state: " << funcName << std::endl; if (funcName == cudaq::getNumQubitsFromCudaqState) { Value numOfQubits = builder.create( argLoc, log2(vec.size()), builder.getI64Type()); numOfQubitsOp.replaceAllUsesWith(ValueRange{numOfQubits}); - numOfQubitsOp.erase(); - std::cout << "Removed getNumQubitsFromCudaqState" << std::endl; + toErase.push_back(numOfQubitsOp); } else { argUser->emitError("Unexpected call on state argument"); return failure(); @@ -162,7 +174,6 @@ synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, } } - std::cout << "Synthesizing vec" << std::endl; OpBuilder::InsertionGuard guard(builder); auto [buffer, _] = createArrayInMemory(builder, module, counter, argument, vec, arrTy); @@ -170,29 +181,34 @@ synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(eleTy)); Value memArr = builder.create(argLoc, ptrArrEleTy, buffer); - builder.setInsertionPointAfter(memArr.getDefiningOp()); - Value size = builder.create(argLoc, vec.size(), 64); - Value newVec = - builder.create(argLoc, strTy, memArr, size); - argument.replaceAllUsesWith(newVec); - - std::cout << "Done Synthesizing vec" << std::endl; + // builder.setInsertionPointAfter(memArr.getDefiningOp()); + // Value size = builder.create(argLoc, vec.size(), 64); + // Value newVec = + // builder.create(argLoc, strTy, memArr, size); + argument.replaceAllUsesWith(memArr); - return success(); -} + for (auto &op : toErase) { + op->erase(); + } -static LogicalResult -synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, - BlockArgument argument, std::vector> &vec) { - return synthesizeStateArgument(builder, module, counter, argument, - ComplexType::get(builder.getF32Type()), vec, makeComplexElement); + return success(); } static LogicalResult synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, - BlockArgument argument, std::vector> &vec) { - return synthesizeStateArgument(builder, module, counter, argument, + BlockArgument argument, SimulationStateData& stateData) { + + if (stateData.elementSize == sizeof(std::complex)) { + auto vec = stateDataToVector>(stateData); + return synthesizeStateArgument(builder, module, counter, argument, ComplexType::get(builder.getF64Type()), vec, makeComplexElement); + } else if (stateData.elementSize == sizeof(std::complex)) { + auto vec = stateDataToVector>(stateData); + return synthesizeStateArgument(builder, module, counter, argument, + ComplexType::get(builder.getF32Type()), vec, makeComplexElement); + } + module.emitError("unexpected element size in simulation state data"); + return failure(); } template @@ -429,12 +445,17 @@ class QuakeSynthesizer // The raw pointer to the runtime arguments. void *args; - SimulationStateData::getDataFunc* getStateData; + + // Function to read the state data, if any. + SimulationStateData::getDataFunc* getStateData; + + // Is the simulation running in the same address space as synthesis? + bool sameAddressSpace; public: QuakeSynthesizer() = default; - QuakeSynthesizer(std::string_view kernel, SimulationStateData::getDataFunc* getData, void *a) - : kernelName(kernel), args(a), getStateData(getData) {} + QuakeSynthesizer(std::string_view kernel, void *a, SimulationStateData::getDataFunc* getData, bool sameSpace = false) + : kernelName(kernel), args(a), getStateData(getData), sameAddressSpace(sameSpace) {} mlir::ModuleOp getModule() { return getOperation(); } @@ -573,51 +594,31 @@ class QuakeSynthesizer if (auto ptrTy = dyn_cast(type)) { if (isa(ptrTy.getElementType())) { - if (getStateData != nullptr) { - std::cout << "Reading state data:" << std::endl; - cudaq::state* concrete; - std::memcpy(&concrete, ((char *)args) + offset, sizeof(cudaq::state*)); - std::cout << "Getting state data:" << std::endl; - auto stateData = getStateData(concrete); - if (stateData.elementSize == sizeof(std::complex)) { - auto v = stateData.toVector>(); - std::cout << "Read vector of double:" << std::endl; - for (auto e: v) { - std::cout << e << "," <>(); - std::cout << "Read vector of float:" << std::endl; - for (auto e: v) { - std::cout << e << "," <( - // builder, argument, args, offset, sizeof(void *), - // [=](OpBuilder &builder, cudaq::state **concrete) { - // Value rawPtr = builder.create( - // loc, reinterpret_cast(*concrete), - // sizeof(void *) * 8); - // auto stateTy = cudaq::cc::StateType::get(builder.getContext()); - // return builder.create( - // loc, cudaq::cc::PointerType::get(stateTy), rawPtr); - // }); - else { - funcOp.emitOpError("synthesis: unsupported state argument type"); + if (sameAddressSpace) { + // Special case of a `cudaq::state*` which must be in the same address + // space. This references a container to a set of simulation + // amplitudes. + synthesizeRuntimeArgument( + builder, argument, args, offset, sizeof(void *), + [=](OpBuilder &builder, cudaq::state **concrete) { + Value rawPtr = builder.create( + loc, reinterpret_cast(*concrete), + sizeof(void *) * 8); + auto stateTy = cudaq::cc::StateType::get(builder.getContext()); + return builder.create( + loc, cudaq::cc::PointerType::get(stateTy), rawPtr); + }); + } else if (getStateData != nullptr) { + // Special case of running on a simulator in a different address space, + // when we know how to convert state to data. + cudaq::state* concrete; + std::memcpy(&concrete, ((char *)args) + offset, sizeof(cudaq::state*)); + auto stateData = getStateData(concrete); + if (failed(synthesizeStateArgument(builder, module, counter, argument, stateData))) + module.emitError("Failed to synthesize state*"); + } else { + // All other cases are not yet supported (i.e. quantum hardware). + funcOp.emitOpError("synthesis: unsupported argument type: state*"); signalPassFailure(); } continue; @@ -821,6 +822,11 @@ std::unique_ptr cudaq::opt::createQuakeSynthesizer() { } std::unique_ptr -cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, SimulationStateData::getDataFunc* getData, void *a) { - return std::make_unique(kernelName, getData, a); +cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, void *a, SimulationStateData::getDataFunc* getData) { + return std::make_unique(kernelName, a, getData, false); +} + +std::unique_ptr +cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, void *a, bool sameAddressSpace) { + return std::make_unique(kernelName, a, nullptr, sameAddressSpace); } diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 16d5575228..a0032cf82e 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -491,7 +491,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, getEnvBool("CUDAQ_MLIR_PRINT_EACH_PASS", false); PassManager pm(context); - pm.addPass(cudaq::opt::createQuakeSynthesizer(name, nullptr, rawArgs)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(name, rawArgs, true)); pm.addPass(createCanonicalizerPass()); // Run state preparation for quantum devices only. diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 471d171673..3a35a019d2 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -414,7 +414,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&context); - pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, nullptr, updatedArgs)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs, nullptr)); pm.addPass(mlir::createCanonicalizerPass()); if (disableMLIRthreading || enablePrintMLIREachPass) moduleOp.getContext()->disableMultithreading(); diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index 65ff68c4ad..1ea51ff344 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -209,9 +209,9 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { if (platform.is_simulator()) { // For efficiency, we don't run state prep to convert states to gates on // simulators, instead we synthesize them as vectors. - pm.addPass(cudaq::opt::createQuakeSynthesizer(name, readSimulationStateData, args)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(name, args, readSimulationStateData)); } else { - pm.addPass(cudaq::opt::createQuakeSynthesizer(name, nullptr, args)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(name, args, nullptr)); } pm.addPass(mlir::createCanonicalizerPass()); if (failed(pm.run(moduleOp))) diff --git a/targettests/Remote-Sim/state_init.cpp b/targettests/Remote-Sim/state_init.cpp index 6677b4746c..4615e2ec08 100644 --- a/targettests/Remote-Sim/state_init.cpp +++ b/targettests/Remote-Sim/state_init.cpp @@ -65,31 +65,5 @@ int main() { // CHECK: 00 // CHECK: 10 -// CHECK: 0001 -// CHECK: 0011 -// CHECK: 1001 -// CHECK: 1011 - -// CHECK: 00 -// CHECK: 10 - -// CHECK: 00 -// CHECK: 10 - -// CHECK: 00 -// CHECK: 10 - -// CHECK: 01 -// CHECK: 11 - -// CHECK: 00 -// CHECK: 10 - -// CHECK: 01 -// CHECK: 11 - -// CHECK: 00 -// CHECK: 10 - // CHECK: 01 // CHECK: 11 diff --git a/unittests/Optimizer/QuakeSynthTester.cpp b/unittests/Optimizer/QuakeSynthTester.cpp index 745043ebd1..ede99e8e82 100644 --- a/unittests/Optimizer/QuakeSynthTester.cpp +++ b/unittests/Optimizer/QuakeSynthTester.cpp @@ -54,7 +54,7 @@ LogicalResult runQuakeSynth(std::string_view kernelName, void *rawArgs, PassManager pm(module->getContext()); module->getContext()->disableMultithreading(); pm.enableIRPrinting(); - pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, nullptr, rawArgs)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, rawArgs, nullptr)); pm.addPass(createCanonicalizerPass()); pm.addPass(cudaq::opt::createExpandMeasurementsPass()); pm.addNestedPass(cudaq::opt::createClassicalMemToReg()); From cd528c75aa67258c0c75554b650706ce356997da Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 17 Jul 2024 15:23:37 -0700 Subject: [PATCH 32/50] Address CR comments --- dictionary.dic | Bin 0 -> 9936 bytes lib/Optimizer/Builder/Intrinsics.cpp | 8 +- lib/Optimizer/Transforms/CMakeLists.txt | 1 - lib/Optimizer/Transforms/ConstPropComplex.cpp | 70 ++-- lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 27 +- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 127 +++++--- lib/Optimizer/Transforms/StateDecomposer.cpp | 128 -------- lib/Optimizer/Transforms/StateDecomposer.h | 175 ---------- lib/Optimizer/Transforms/StatePreparation.cpp | 301 ++++++++++++++++-- python/cudaq/kernel/ast_bridge.py | 14 +- .../cudaq/platform/py_alt_launch_kernel.cpp | 14 +- runtime/common/BaseRemoteRESTQPU.h | 13 +- runtime/common/CMakeLists.txt | 3 +- runtime/common/Environment.cpp | 26 ++ runtime/common/Environment.h | 16 + .../execution/state_preparation_vector.cpp | 240 +++++++++----- test/Quake/const_prop_complex.qke | 37 +++ test/Quake/state_prep.qke | 97 +++--- 18 files changed, 702 insertions(+), 595 deletions(-) create mode 100644 dictionary.dic delete mode 100644 lib/Optimizer/Transforms/StateDecomposer.cpp delete mode 100644 lib/Optimizer/Transforms/StateDecomposer.h create mode 100644 runtime/common/Environment.cpp create mode 100644 runtime/common/Environment.h diff --git a/dictionary.dic b/dictionary.dic new file mode 100644 index 0000000000000000000000000000000000000000..d5b1930b6ba86d2e1409b65c5d6b65c39e30fa80 GIT binary patch literal 9936 zcmai)4UlA4Rmbm6_v^2j>G|yW+S$!wAc4h@K={x|h0Mp!&ZIx4x8K{{*;K^zbidi{ zOi%Z0zwXUWL?B=hh)N_tETw{=f&no^1PZ926kjT)FhLY4wTz*PhzdzzSph8zfB*Yl z?@mz5uA2S7cka38oO|xM=bn4tR)bx?**sGBx2nBn_lSM-J4ZV0JDNvc`jVGkXa1jG zsrtzO;CL9yk*h-T=LU?qEoICThmE=VfH8l;pMHWEe@Xu0{EhI}{~hsLX7^mX<#!Dx z^r;~pjb4wggffoijX{MuS_owwEgF;7d&!uLux!k@uwu-#&^2a8=ovEyE+Plh%p!8G z5F+P@5dJHL@IPM&{|mqsQW( zmkVilr4YS02kOzHq~sEwF^VvqJhcg!H>zh<)2a_;!ise^rRy zcL=fbogiTD7b54^gvj}IA##30h@1z6$a$X-dw)xa9ltF^zz2oc^B{Pd{vQ(3|HDG~ zKO%(xqeA!}1~=gQ1F-))%0q@f#cxhX|L0lEo0nKDnxC;)GOHF{^TGr0Vc($vVbQ!$ zSS*>>2$>+)V!=FXv1ks*L@%44v{*66EV{LY_Vc$;3@&%vsf^{B&6SG zg-q~QESAmpLELXfQ{YXD=1L(6;ChRmSrWofvsg6eEtbq(7F~0{ka_ul#YOWeA$p&% zST;`!OC__O7P{sSEqdlLA%1@~fskjR_!cYXZ5CbgHH)5k#^M=s)gXMt*9{h3b6iNA zowArWn-)tZ5HcTkSzI*lwwN~$S}d7AvRF2cTXfA67CrMliyJ08M7|*L{e*C(U|wah zXpRdp_@u>(xkE_Yy-SE4pR~AWz6dJMJZ^Et{JpSX%=ayN=0`%tJvS_L%~cjR%=JPR z_@YJ6tP7d{pA~MD%nn#0j_$GOnFobT%%_BfiusZdMSpG4HO~lH;PYAX&0@)1D zBqYI}5@Of3Mc3>JvFmNZ%A&d7V%|Jtv0(noV$tN;q{@}Dxm`#Cy5FK_J_uIu*C&L; z`Im&qe^N-Id|HT~a-%}eJWog-Sq5D;pt8lh@huk3=$PoPd4WaG+#_Uz`KFM7`?f{b zjF0nUpGDP+7oA#w2*ixu-ui>~>Ckd5eD7G1M3srLzY^TQpKIX%^~)SMYMi0ig5NQN10?J0P^_`FWrhYw*vV{sy|T1mhGIvF!sKvl z{lt=eDVpSHZ0$tR%p{KHiCE8MX9xYpS-*9`uXWoUGnWkMuF1{}?Q00kl%tk{$xdfN zdbQc}BRQ|7*AOR0MtPLA!Z@fmrR( z5jPuZ4;unAEqgWP*_lv%(5-e+FLNPZ)^F*7nM1#u_ZDT@^q_yyul2gs%_f%R7>CQ6 z%+98(y^D=zv)b7+htd$srP78p92)j7?o!;fx(#&H&0*VEaTg0}%+lELgP^h5Xg0cg z=Abl{D+>SY^iaR?f|@4cG|7NewN|5x{3~wP+aTFyr?9KmU>uEhD=?#C_DnM8oLpLR zmqU}hQmFMHCUyPRC8asW?RphbN_H$oQ^((_Hce)LO*lI`z~fYseV7#J8fPlK(A=pO z8m-<%Gm&6hmYVEXvbxi5oejEXHl{5)JDBua=X*YL>WszL+N~32DnUb$?KL}-?zL*| zo!uTTpHGEU(q49ExKFo(Fq3iGpiiemS&i0NGo6H1v4Jp?ZnSEcst~|!AzAE0m+0y- zE-CnDG8I`Am`UeoslY}BtWT9(by>`*L?nHpv&MUx$m`>o!Nf}>RSiW+Thu-9tu zb{jj5J7LShMgWXDy_P0l`p8n*sbsZPQ$W};x?X8RV>~q6Xc1I47?>zU0}-b-Oq|O; zr!g?&809J)Clb|GQ0?AnCh;N^&37s!5rp%Y=B-qyX|o3r(e*o9ey6?T^I+ZXmB^yj zjc7jGgsQ=aikw^q2Zm%6VmzfF#`jvp62qCrvtFg-%7t?ogm`bOaJF&p?AqOIR zg_B8$rl$$pqlnN6H8O`0$>lL55&z>>hI`UIrubl2cShqFVl#ol1K*|lv0=S0BGk#u zC2F9MkrO5yyn+K}la!eoIGN=&v=HpIYUeubR=XD{%W4&qm;n|fow5U*6A272uWUG; ztk!RD<9rR=)9RXlQr%Vd#bzkdor*|y0=x<}UpAL&5FScm%7)B|lJV4_C3YJZ{N|p? z#;ImqlN?YOR7zGO*vOqd*iCjHev};SDWNcq&VTR7}fdE|CLLD0Ma2X)?_M*Zy~`rp6=7?oXRPMH7(_n#zAAR zyr@k~jBu_w4S7UXME^Czpvx zh3Y1W4=VPLA_bg)>5SYxhQYggHVTFYIH6J)A75Q!yNu=8=*S@XPPM1~Nm(*tZPBemT7RYccPew})$8UDmOJ-t#M$OmBWgFZ#uX%Eh$7zV9stmVT4SbV}F&!e0pmaznb?Q-)PWhwKJl#OruT{0VQOrn-*JzT1`)E#4~ z@~-Tdr7{C?>jz3dlMW|j4Q?V`Xl!=EU3?^_<8pR1(Y#>G_F>*R3uNPk!YPh( zF&&=Sk(=%Ic5j!Y%{x})k|9C$N+Dmlsi>l4cBI)pt6lKijus2M4&9YP>E38<6D@SN z_1kDNBq*gQdPbaPI|%kjg9XmgY>|S&0oHqRBv$0OGU*iaE8-ka@38u}8miZa2B>nA zKP@xDUpRj$f^_3J_!iSmnS_Os99ES&-%54OBR^EUl_d+Gzvy zFLON-FYbW=dm0JJ*`dXc3?>t^G;D!Ipb zUt;D;6gec_(y6uOHJ!wCAAr`<@g*FlO9A%8Rxdn$%qF-UaFsu1hgP!4g%Qsup~D$q4)Bmn}m%)NVq0VxAGBMJMeX(3;5QYi1?@96}$ z64p;MgRJ?@zK=vF_o8kl!=b5vgqTf+WaxA7_vxIpQZ!H%E6(Oek#x3G-PvI;wW17xw*KyWegb7> zd1^9=iBz0*au_6=^rnYIyOi}t;@q~V?sCfO?i%-0u`=PLS9guwYb2)2muxQ430L2I zic6T4yshe{vl@SEt8um$)^TvGk z3U=?xWrDpOwtp&>8as_!B*>_pgC29%(YhOrb62OFkys*Oth-_=%Rq*@%iV~!YeoWz zj??KrEil(4-Ex_A=bGGj*6opqD9Jc3>KY_F!xTSQ_I$~yq_cu^yp;R$6p6qf83^=e(LXn7ES`@avh7Llase9%UF`zW&&Xh|9 zx*yEeNm5)lv|Gd>2jyJCBc7~<>8?*W*&8)_RQj1WOBt(B>F3?KBsZ-qHaE@=)f+*r z&COu60%)no2{RcrbbMRu;%Q4p{oLSXy0S{xpi#_WSkxoT{!@Hlc?gyw3>wYFP z3a}jR595g{Z9Gmqv5+QG^=h}e)okBkNjfHurclZ5ou=$j#NE;EY+K=DFDu#a>Qwd42Itp?NcIM5T=H1XB>!LnC;M1-8QNKC9 zO|p{0cr%`;G8@75;Seejm?Yfp?zw_c8|Y zTtM%=eCv5N-$C^LHS~X)=RAL(f&XsCA^IqCh5E&j@km79!2Z?97qjpW_vetY-Gc4E z!}kdF$?$$B_Wd!w(EC06#`X4S>{mwk=h*#C=4Eo9Fa9o)qrT7J*9Z3b`f$|tTE_JX z?EE@5j7PGr;{D=2U3W+QZorO@!*@+&kL31$Z$kgG=>N@VT({GwfGzXT1|q+GZ=XHt z_q+Jy31mGVU+Vqc(Y)&UF?_j$%&UKF{?+ymQHhrhJ+|Pn*QQP(K-ALSj9Dn%C$$#?xWMsGc+#C7zQD}$o&8>{@1MvCuZ(z?a z%D?oz9v_w>JvT%?Jb+xu_%OcsIJs05QJ;~>-=gXFSJ?dfFn%7rhrwKA z??K{E?MGw}<9#`H9OHQ#T}Pv_y`DKs&}S`@nS!2y_XpVY;z-Uv@6%U_V&JFg^KJTm zVxNzH7}+kJk1#h!cz+6cZ>0az_*!)Ru0X!bMAW{`cwfL=J&yd3Kz~&< zZ~w%*{{P1BMRu$I8yUxg8XNN8h_4&8eT=`#KA*jcw$I`BwJ2s@g3X`BU*%|Qe}gU8 zB4-WSSC|(clNBC2WcZfcE?l2$sMdNp{?Yr3a0Qzp`FOCfHEHX!5jd=8~-$(Jo zlg!2Yu;quydI#^XkLL3IJg;KA_})Ojry}1x#kk%H{m-H~d^)mUeAh-iS4TF!2cBdk z?=f`zAd)9u{r-`*GuZcO! zCjL>|&5=!d{?$ru0YkWq@{&cGwudHB8z?Uu;*qTP;- VAByJwAU5kik$nz7`}je>{|1;FG}! -DenseElementsAttr createArrayAttr(const std::vector &values, Type eleTy) { +DenseElementsAttr createDenseElementsAttr(const std::vector &values, + Type eleTy) { auto newValues = ArrayRef(values.data(), values.size()); auto tensorTy = RankedTensorType::get(values.size(), eleTy); return DenseElementsAttr::get(tensorTy, newValues); } -DenseElementsAttr createArrayAttr(const std::vector &values, Type eleTy) { +DenseElementsAttr createDenseElementsAttr(const std::vector &values, + Type eleTy) { std::vector converted; for (auto b : values) { converted.push_back(std::byte(b)); @@ -417,7 +419,7 @@ cc::GlobalOp buildVectorOfConstantElements(Location loc, ModuleOp module, builder.setInsertionPointToEnd(module.getBody()); auto globalTy = cc::ArrayType::get(ctx, eleTy, values.size()); - auto arrayAttr = createArrayAttr(values, eleTy); + auto arrayAttr = createDenseElementsAttr(values, eleTy); return builder.create(loc, globalTy, name, arrayAttr, /*constant=*/true, /*external=*/false); diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index 0b5aa6d23a..881625db21 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -47,7 +47,6 @@ add_cudaq_library(OptTransforms QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp - StateDecomposer.cpp StatePreparation.cpp DEPENDS diff --git a/lib/Optimizer/Transforms/ConstPropComplex.cpp b/lib/Optimizer/Transforms/ConstPropComplex.cpp index 7439b44a4b..d1ffc8d5af 100644 --- a/lib/Optimizer/Transforms/ConstPropComplex.cpp +++ b/lib/Optimizer/Transforms/ConstPropComplex.cpp @@ -29,45 +29,6 @@ using namespace mlir; namespace { -// Replace array ptr casts that throw away the size by a cast to element -// pointer. -// -//%1 = cc.cast %0 : (!cc.ptr x 4>>) -> -//! cc.ptr x ?>> -// -> -//%1 = cc.cast %0 : (!cc.ptr x 4>>) -> -//! cc.ptr> -class CastArrayPtrPattern : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(cudaq::cc::CastOp cast, - PatternRewriter &rewriter) const override { - - auto fromTy = cast.getOperand().getType(); - auto toTy = cast.getType(); - - if (auto ptrFromTy = dyn_cast(fromTy)) { - if (auto arrayFromTy = - dyn_cast(ptrFromTy.getElementType())) { - if (auto ptrToTy = dyn_cast(toTy)) { - if (auto arrayToTy = - dyn_cast(ptrToTy.getElementType())) { - if (arrayFromTy.getElementType() == arrayToTy.getElementType()) { - auto eleTy = arrayFromTy.getElementType(); - auto elePtrType = cudaq::cc::PointerType::get(eleTy); - rewriter.replaceOpWithNewOp(cast, elePtrType, - cast.getOperand()); - return success(); - } - } - } - } - } - return failure(); - } -}; - // Fold complex.create ops if the arguments are constants. class ComplexCreatePattern : public OpRewritePattern { public: @@ -91,6 +52,35 @@ class ComplexCreatePattern : public OpRewritePattern { } }; +// Fold floating point cast ops if the argument is constant. +class FloatCastPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(cudaq::cc::CastOp cast, + PatternRewriter &rewriter) const override { + auto val = cast.getOperand(); + auto valCon = val.getDefiningOp(); + if (valCon) { + auto fTy = dyn_cast(cast.getType()); + if (fTy == rewriter.getF64Type()) { + auto v = valCon.value().convertToFloat(); + auto fTy = dyn_cast(cast.getType()); + rewriter.replaceOpWithNewOp( + cast, APFloat{static_cast(v)}, fTy); + return success(); + } else if (fTy == rewriter.getF32Type()) { + auto v = valCon.value().convertToDouble(); + auto fTy = dyn_cast(cast.getType()); + rewriter.replaceOpWithNewOp( + cast, APFloat{static_cast(v)}, fTy); + return success(); + } + } + return failure(); + } +}; + // Fold arith.trunc ops if the argument is constant. class FloatTruncatePattern : public OpRewritePattern { public: @@ -189,11 +179,11 @@ class ConstPropComplexPass std::string funcName = func.getName().str(); RewritePatternSet patterns(ctx); patterns.insert(ctx); + patterns.insert(ctx); patterns.insert(ctx); patterns.insert(ctx); patterns.insert(ctx); patterns.insert(ctx); - patterns.insert(ctx); LLVM_DEBUG(llvm::dbgs() << "Before lifting constant array: " << func << '\n'); diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index 2336e6a97d..9de57fad81 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -180,13 +180,7 @@ class AllocaPattern : public OpRewritePattern { } for (auto *op : toErase) { - if (op->getUses().empty()) { - rewriter.eraseOp(op); - } else { - op->emitOpError("LiftArrayAlloc failed to remove cc::AllocOp " - "or its uses."); - return failure(); - } + rewriter.eraseOp(op); } return success(); } @@ -259,7 +253,9 @@ class AllocaPattern : public OpRewritePattern { return theStore; }; - auto ptrArrEleTy = cudaq::cc::PointerType::get(arrTy.getElementType()); + auto unsizedArrTy = cudaq::cc::ArrayType::get(arrEleTy); + auto ptrUnsizedArrTy = cudaq::cc::PointerType::get(unsizedArrTy); + auto ptrArrEleTy = cudaq::cc::PointerType::get(arrEleTy); for (auto &use : alloc->getUses()) { // All uses *must* be a degenerate cc.cast, cc.compute_ptr, or // cc.init_state. @@ -278,6 +274,7 @@ class AllocaPattern : public OpRewritePattern { return false; } if (auto cast = dyn_cast(op)) { + // Process casts that are used in store ops. if (cast.getType() == ptrArrEleTy) { if (auto w = getWriteOp(cast, 0)) if (!scoreboard[0]) { @@ -286,6 +283,20 @@ class AllocaPattern : public OpRewritePattern { } return false; } + // Process casts that are used in quake.init_state. + if (cast.getType() == ptrUnsizedArrTy) { + if (getWriteOp(cast, 0)) + LLVM_DEBUG( + llvm::dbgs() + << "unexpected use of array size removing cast in a store" + << *op << '\n'); + continue; + } + if (isa(op)) { + toGlobalUses.push_back(op); + toGlobal = true; + continue; + } LLVM_DEBUG(llvm::dbgs() << "unexpected cast: " << *op << '\n'); toGlobalUses.push_back(op); toGlobal = true; diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index d8cc3e22d0..e309f86214 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -104,31 +104,22 @@ Value makeComplexElement(OpBuilder &builder, Location argLoc, return builder.create(argLoc, eleTy, complexVal); } -template -std::tuple -createArrayInMemory(OpBuilder &builder, ModuleOp module, unsigned &counter, - BlockArgument argument, std::vector &vec, - cudaq::cc::ArrayType arrTy) { - auto argLoc = argument.getLoc(); - - // Stick global at end of Module. - std::string symbol = "__nvqpp_rodata_init_state." + std::to_string(counter++); - - cudaq::IRBuilder irBuilder(builder); - irBuilder.genVectorOfConstants(argLoc, module, symbol, vec); - - builder.setInsertionPointToStart(argument.getOwner()); - auto buffer = builder.create( - argLoc, cudaq::cc::PointerType::get(arrTy), symbol); - auto data = builder.create(argLoc, arrTy, buffer); - return {buffer, data}; +/// returns true if and only if \p argument is used by a `quake.init_state` +/// operation. +static bool hasInitStateUse(BlockArgument argument) { + for (auto *argUser : argument.getUsers()) + if (auto stdvecDataOp = dyn_cast(argUser)) + for (auto *dataUser : stdvecDataOp->getUsers()) + if (isa(dataUser)) + return true; + return false; } -template +template LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector &vec, - MAKER makeElementValue) { + ATTR arrayAttr, MAKER makeElementValue) { auto *ctx = builder.getContext(); auto argTy = argument.getType(); assert(isa(argTy)); @@ -138,24 +129,43 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, auto argLoc = argument.getLoc(); auto arrTy = cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()); + auto conArray = builder.create( + argLoc, cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()), arrayAttr); + std::optional arrayInMemory; - std::optional conArray; auto ptrEleTy = cudaq::cc::PointerType::get(eleTy); bool generateNewValue = false; // Helper function that materializes the array in memory. - auto getArrayInMemory = [&]() -> std::tuple { + auto getArrayInMemory = [&]() -> Value { if (arrayInMemory) - return {*arrayInMemory, *conArray}; + return *arrayInMemory; OpBuilder::InsertionGuard guard(builder); - auto [buffer, data] = - createArrayInMemory(builder, module, counter, argument, vec, arrTy); + auto argLoc = argument.getLoc(); + + Value buffer; + if (hasInitStateUse(argument)) { + // Stick global at end of Module. + std::string symbol = + "__nvqpp_rodata_init_state." + std::to_string(counter++); + + cudaq::IRBuilder irBuilder(builder); + irBuilder.genVectorOfConstants(argLoc, module, symbol, vec); + + builder.setInsertionPointToStart(argument.getOwner()); + buffer = builder.create( + argLoc, cudaq::cc::PointerType::get(arrTy), symbol); + } else { + builder.setInsertionPointAfter(conArray); + buffer = builder.create(argLoc, arrTy); + builder.create(argLoc, conArray, buffer); + } + auto ptrArrEleTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(eleTy)); Value res = builder.create(argLoc, ptrArrEleTy, buffer); arrayInMemory = res; - conArray = data; - return {res, data}; + return res; }; auto replaceLoads = [&](cudaq::cc::ComputePtrOp elePtrOp, @@ -208,11 +218,11 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, if (index == cudaq::cc::ComputePtrOp::kDynamicIndex) { OpBuilder::InsertionGuard guard(builder); builder.setInsertionPoint(elePtrOp); - auto [memArr, conArray] = getArrayInMemory(); Value getEle = builder.create( elePtrOp.getLoc(), eleTy, conArray, elePtrOp.getDynamicIndices()[0]); if (failed(replaceLoads(elePtrOp, getEle))) { + Value memArr = getArrayInMemory(); builder.setInsertionPoint(elePtrOp); Value newComputedPtr = builder.create( argLoc, ptrEleTy, memArr, @@ -225,7 +235,7 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, Value runtimeParam = makeElementValue(builder, argLoc, vec[index], eleTy); if (failed(replaceLoads(elePtrOp, runtimeParam))) { - auto [memArr, _] = getArrayInMemory(); + Value memArr = getArrayInMemory(); OpBuilder::InsertionGuard guard(builder); builder.setInsertionPoint(elePtrOp); Value newComputedPtr = builder.create( @@ -240,7 +250,7 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, // Check if there were other uses of `vec.data()` and simply forward the // constant array as materialized in memory. if (replaceOtherUses) { - auto [memArr, _] = getArrayInMemory(); + Value memArr = getArrayInMemory(); stdvecDataOp.replaceAllUsesWith(memArr); } continue; @@ -252,7 +262,7 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, generateNewValue = true; } if (generateNewValue) { - auto [memArr, _] = getArrayInMemory(); + Value memArr = getArrayInMemory(); OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointAfter(memArr.getDefiningOp()); Value size = builder.create(argLoc, vec.size(), 64); @@ -271,11 +281,16 @@ std::vector asI32(const std::vector &v) { return result; } +// TODO: consider using DenseArrayAttr here instead. NB: such a change may alter +// the output of the constant array op. static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector &vec) { - return synthesizeVectorArgument( - builder, module, counter, argument, vec, makeIntegerElement); + + auto arrayAttr = builder.getI32ArrayAttr(asI32(vec)); + return synthesizeVectorArgument(builder, module, counter, + argument, vec, arrayAttr, + makeIntegerElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, @@ -283,8 +298,10 @@ static LogicalResult synthesizeVectorArgument(OpBuilder &builder, unsigned &counter, BlockArgument argument, std::vector &vec) { - return synthesizeVectorArgument( - builder, module, counter, argument, vec, makeIntegerElement); + auto arrayAttr = builder.getI32ArrayAttr(asI32(vec)); + return synthesizeVectorArgument(builder, module, counter, + argument, vec, arrayAttr, + makeIntegerElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, @@ -292,8 +309,9 @@ static LogicalResult synthesizeVectorArgument(OpBuilder &builder, unsigned &counter, BlockArgument argument, std::vector &vec) { + auto arrayAttr = builder.getI32ArrayAttr(asI32(vec)); return synthesizeVectorArgument( - builder, module, counter, argument, vec, + builder, module, counter, argument, vec, arrayAttr, makeIntegerElement); } @@ -302,8 +320,9 @@ static LogicalResult synthesizeVectorArgument(OpBuilder &builder, unsigned &counter, BlockArgument argument, std::vector &vec) { + auto arrayAttr = builder.getI32ArrayAttr(vec); return synthesizeVectorArgument( - builder, module, counter, argument, vec, + builder, module, counter, argument, vec, arrayAttr, makeIntegerElement); } @@ -312,39 +331,59 @@ static LogicalResult synthesizeVectorArgument(OpBuilder &builder, unsigned &counter, BlockArgument argument, std::vector &vec) { + auto arrayAttr = builder.getI64ArrayAttr(vec); return synthesizeVectorArgument( - builder, module, counter, argument, vec, + builder, module, counter, argument, vec, arrayAttr, makeIntegerElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector &vec) { + auto arrayAttr = builder.getF32ArrayAttr(vec); return synthesizeVectorArgument(builder, module, counter, argument, - vec, makeFloatElement); + vec, arrayAttr, + makeFloatElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector &vec) { + auto arrayAttr = builder.getF64ArrayAttr(vec); return synthesizeVectorArgument(builder, module, counter, argument, - vec, makeFloatElement); + vec, arrayAttr, + makeFloatElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector> &vec) { - return synthesizeVectorArgument( - builder, module, counter, argument, vec, makeComplexElement); + + std::vector vec2; + for (auto c : vec) { + vec2.push_back(c.real()); + vec2.push_back(c.imag()); + } + auto arrayAttr = builder.getF32ArrayAttr(vec2); + return synthesizeVectorArgument(builder, module, counter, + argument, vec, arrayAttr, + makeComplexElement); } static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector> &vec) { - return synthesizeVectorArgument( - builder, module, counter, argument, vec, makeComplexElement); + std::vector vec2; + for (auto c : vec) { + vec2.push_back(c.real()); + vec2.push_back(c.imag()); + } + auto arrayAttr = builder.getF64ArrayAttr(vec2); + return synthesizeVectorArgument(builder, module, counter, + argument, vec, arrayAttr, + makeComplexElement); } namespace { diff --git a/lib/Optimizer/Transforms/StateDecomposer.cpp b/lib/Optimizer/Transforms/StateDecomposer.cpp deleted file mode 100644 index 62ca8a9d73..0000000000 --- a/lib/Optimizer/Transforms/StateDecomposer.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -#include "StateDecomposer.h" - -namespace cudaq::details { - -std::vector grayCode(std::size_t numBits) { - std::vector result(1ULL << numBits); - for (std::size_t i = 0; i < (1ULL << numBits); ++i) - result[i] = ((i >> 1) ^ i); - return result; -} - -std::vector getControlIndices(std::size_t numBits) { - auto code = grayCode(numBits); - std::vector indices; - for (auto i = 0u; i < code.size(); ++i) { - // The position of the control in the lth CNOT gate is set to match - // the position where the lth and (l + 1)th bit strings g[l] and g[l+1] of - // the binary reflected Gray code differ. - auto position = std::log2(code[i] ^ code[(i + 1) % code.size()]); - // N.B: In CUDA Quantum we write the least significant bit (LSb) on the left - // - // lsb -v - // 001 - // ^- msb - // - // Meaning that the bitstring 001 represents the number four instead of one. - // The above position calculation uses the 'normal' convention of writing - // numbers with the LSb on the left. - // - // Now, what we need to find out is the position of the 1 in the bitstring. - // If we take LSb as being position 0, then for the normal convention its - // position will be 0. Using CUDA Quantum convention it will be 2. Hence, - // we need to convert the position we find using: - // - // numBits - position - 1 - // - // The extra -1 is to account for indices starting at 0. Using the above - // examples: - // - // bitstring: 001 - // numBits: 3 - // position: 0 - // - // We have the converted position: 2, which is what we need. - indices.emplace_back(numBits - position - 1); - } - return indices; -} - -std::vector convertAngles(const std::span alphas) { - // Implements Eq. (3) from https://arxiv.org/pdf/quant-ph/0407010.pdf - // - // N.B: The paper does fails to explicitly define what is the dot operator in - // the exponent of -1. Ref. 3 solves the mystery: its the bitwise inner - // product. - auto bitwiseInnerProduct = [](std::size_t a, std::size_t b) { - auto product = a & b; - auto sumOfProducts = 0; - while (product) { - sumOfProducts += product & 0b1 ? 1 : 0; - product = product >> 1; - } - return sumOfProducts; - }; - std::vector thetas(alphas.size(), 0); - for (std::size_t i = 0u; i < alphas.size(); ++i) { - for (std::size_t j = 0u; j < alphas.size(); ++j) - thetas[i] += - bitwiseInnerProduct(j, ((i >> 1) ^ i)) & 0b1 ? -alphas[j] : alphas[j]; - thetas[i] /= alphas.size(); - } - return thetas; -} - -std::vector getAlphaZ(const std::span data, - std::size_t numQubits, std::size_t k) { - // Implements Eq. (5) from https://arxiv.org/pdf/quant-ph/0407010.pdf - std::vector angles; - double divisor = static_cast(1ULL << (k - 1)); - for (std::size_t j = 1; j <= (1ULL << (numQubits - k)); ++j) { - double angle = 0.0; - for (std::size_t l = 1; l <= (1ULL << (k - 1)); ++l) - // N.B: There is an extra '-1' on these indices computations to account - // for the fact that our indices start at 0. - angle += data[(2 * j - 1) * (1 << (k - 1)) + l - 1] - - data[(2 * j - 2) * (1 << (k - 1)) + l - 1]; - angles.push_back(angle / divisor); - } - return angles; -} - -std::vector getAlphaY(const std::span data, - std::size_t numQubits, std::size_t k) { - // Implements Eq. (8) from https://arxiv.org/pdf/quant-ph/0407010.pdf - // N.B: There is an extra '-1' on these indices computations to account for - // the fact that our indices start at 0. - std::vector angles; - for (std::size_t j = 1; j <= (1ULL << (numQubits - k)); ++j) { - double numerator = 0; - for (std::size_t l = 1; l <= (1ULL << (k - 1)); ++l) { - numerator += - std::pow(std::abs(data[(2 * j - 1) * (1 << (k - 1)) + l - 1]), 2); - } - - double denominator = 0; - for (std::size_t l = 1; l <= (1ULL << k); ++l) { - denominator += std::pow(std::abs(data[(j - 1) * (1 << k) + l - 1]), 2); - } - - if (denominator == 0.0) { - assert(numerator == 0.0 && - "If the denominator is zero, the numerator must also be zero."); - angles.push_back(0.0); - continue; - } - angles.push_back(2.0 * std::asin(std::sqrt(numerator / denominator))); - } - return angles; -} -} // namespace cudaq::details diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h deleted file mode 100644 index a09b8a64e9..0000000000 --- a/lib/Optimizer/Transforms/StateDecomposer.h +++ /dev/null @@ -1,175 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -#include "PassDetails.h" -#include "cudaq/Optimizer/Builder/Runtime.h" -#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" -#include "cudaq/Optimizer/Transforms/Passes.h" -#include "llvm/Support/Debug.h" -#include "mlir/Conversion/LLVMCommon/TypeConverter.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Complex/IR/Complex.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/Math/IR/Math.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Target/LLVMIR/TypeToLLVM.h" -#include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/RegionUtils.h" -#include - -namespace cudaq::details { - -/// @brief Converts angles of a uniformly controlled rotation to angles of -/// non-controlled rotations. -std::vector convertAngles(const std::span alphas); - -/// @brief Return the control indices dictated by the gray code implementation. -/// -/// Here, numBits is the number of controls. -std::vector getControlIndices(std::size_t numBits); - -/// @brief Return angles required to implement a uniformly controlled z-rotation -/// on the `kth` qubit. -std::vector getAlphaZ(const std::span data, - std::size_t numQubits, std::size_t k); - -/// @brief Return angles required to implement a uniformly controlled y-rotation -/// on the `kth` qubit. -std::vector getAlphaY(const std::span data, - std::size_t numQubits, std::size_t k); -} // namespace cudaq::details - -class StateGateBuilder { -public: - StateGateBuilder(mlir::OpBuilder &b, mlir::Location &l, mlir::Value &q) - : builder(b), loc(l), qubits(q) {} - - template - void applyRotationOp(double theta, std::size_t target) { - auto qubit = createQubitRef(target); - auto thetaValue = createAngleValue(theta); - builder.create(loc, thetaValue, mlir::ValueRange{}, qubit); - }; - - void applyX(std::size_t control, std::size_t target) { - auto qubitC = createQubitRef(control); - auto qubitT = createQubitRef(target); - builder.create(loc, qubitC, qubitT); - }; - -private: - mlir::Value createQubitRef(std::size_t index) { - if (qubitRefs.contains(index)) { - return qubitRefs[index]; - } - - auto indexValue = builder.create( - loc, index, builder.getIntegerType(64)); - auto ref = builder.create(loc, qubits, indexValue); - qubitRefs[index] = ref; - return ref; - } - - mlir::Value createAngleValue(double angle) { - return builder.create( - loc, llvm::APFloat{angle}, builder.getF64Type()); - } - - mlir::OpBuilder &builder; - mlir::Location &loc; - mlir::Value &qubits; - - std::unordered_map qubitRefs = - std::unordered_map(); -}; - -class StateDecomposer { -public: - StateDecomposer(StateGateBuilder &b, std::span> a) - : builder(b), amplitudes(a), numQubits(log2(a.size())) {} - - /// @brief Decompose the input state vector data to a set of controlled - /// operations and rotations. This function takes as input a `OpBuilder` - /// and appends the operations of the decomposition to its internal - /// representation. This implementation follows the algorithm defined in - /// `https://arxiv.org/pdf/quant-ph/0407010.pdf`. - void decompose() { - - // Decompose the state into phases and magnitudes. - bool needsPhaseEqualization = false; - std::vector phases; - std::vector magnitudes; - for (const auto &a : amplitudes) { - phases.push_back(std::arg(a)); - magnitudes.push_back(std::abs(a)); - // FIXME: remove magic number. - needsPhaseEqualization |= std::abs(phases.back()) > 1e-10; - } - - // N.B: The algorithm, as described in the paper, creates a circuit that - // begins with a target state and brings it to the all zero state. Hence, - // this implementation do the two steps described in Section III in reverse - // order. - - // Apply uniformly controlled y-rotations, the construction in Eq. (4). - for (std::size_t j = 1; j <= numQubits; ++j) { - auto k = numQubits - j + 1; - auto numControls = j - 1; - auto target = j - 1; - auto alphaYk = cudaq::details::getAlphaY(magnitudes, numQubits, k); - applyRotation(alphaYk, numControls, target); - } - - if (!needsPhaseEqualization) - return; - - // Apply uniformly controlled z-rotations, the construction in Eq. (4). - for (std::size_t j = 1; j <= numQubits; ++j) { - auto k = numQubits - j + 1; - auto numControls = j - 1; - auto target = j - 1; - auto alphaZk = cudaq::details::getAlphaZ(phases, numQubits, k); - if (alphaZk.empty()) - continue; - applyRotation(alphaZk, numControls, target); - } - } - -private: - /// @brief Apply a uniformly controlled rotation on the target qubit. - template - void applyRotation(const std::span alphas, std::size_t numControls, - std::size_t target) { - - // In our model the index 1 (i.e. |01>) in quantum state data - // corresponds to qubits[0] = 1 and qubits[1] = 0. - // Revert the order of qubits as the state preparation algorithm - // we use assumes the opposite. - auto qubitIndex = [&](std::size_t i) { return numQubits - i - 1; }; - - auto thetas = cudaq::details::convertAngles(alphas); - if (numControls == 0) { - builder.applyRotationOp(thetas[0], qubitIndex(target)); - return; - } - - auto controlIndices = cudaq::details::getControlIndices(numControls); - assert(thetas.size() == controlIndices.size()); - for (auto [i, c] : llvm::enumerate(controlIndices)) { - builder.applyRotationOp(thetas[i], qubitIndex(target)); - builder.applyX(qubitIndex(c), qubitIndex(target)); - } - } - - StateGateBuilder &builder; - std::span> amplitudes; - std::size_t numQubits; -}; diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index 86fad793a5..f7a104b2ae 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -7,7 +7,6 @@ ******************************************************************************/ #include "PassDetails.h" -#include "StateDecomposer.h" #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" @@ -36,12 +35,257 @@ namespace cudaq::opt { using namespace mlir; +namespace cudaq::details { + +std::vector grayCode(std::size_t numBits) { + std::vector result(1ULL << numBits); + for (std::size_t i = 0; i < (1ULL << numBits); ++i) + result[i] = ((i >> 1) ^ i); + return result; +} + +std::vector getControlIndices(std::size_t numBits) { + auto code = grayCode(numBits); + std::vector indices; + for (auto i = 0u; i < code.size(); ++i) { + // The position of the control in the lth CNOT gate is set to match + // the position where the lth and (l + 1)th bit strings g[l] and g[l+1] of + // the binary reflected Gray code differ. + auto position = std::log2(code[i] ^ code[(i + 1) % code.size()]); + // N.B: In CUDA Quantum we write the least significant bit (LSb) on the left + // + // lsb -v + // 001 + // ^- msb + // + // Meaning that the bitstring 001 represents the number four instead of one. + // The above position calculation uses the 'normal' convention of writing + // numbers with the LSb on the left. + // + // Now, what we need to find out is the position of the 1 in the bitstring. + // If we take LSb as being position 0, then for the normal convention its + // position will be 0. Using CUDA Quantum convention it will be 2. Hence, + // we need to convert the position we find using: + // + // numBits - position - 1 + // + // The extra -1 is to account for indices starting at 0. Using the above + // examples: + // + // bitstring: 001 + // numBits: 3 + // position: 0 + // + // We have the converted position: 2, which is what we need. + indices.emplace_back(numBits - position - 1); + } + return indices; +} + +std::vector convertAngles(const std::span alphas) { + // Implements Eq. (3) from https://arxiv.org/pdf/quant-ph/0407010.pdf + // + // N.B: The paper does fails to explicitly define what is the dot operator in + // the exponent of -1. Ref. 3 solves the mystery: its the bitwise inner + // product. + auto bitwiseInnerProduct = [](std::size_t a, std::size_t b) { + auto product = a & b; + auto sumOfProducts = 0; + while (product) { + sumOfProducts += product & 0b1 ? 1 : 0; + product = product >> 1; + } + return sumOfProducts; + }; + std::vector thetas(alphas.size(), 0); + for (std::size_t i = 0u; i < alphas.size(); ++i) { + for (std::size_t j = 0u; j < alphas.size(); ++j) + thetas[i] += + bitwiseInnerProduct(j, ((i >> 1) ^ i)) & 0b1 ? -alphas[j] : alphas[j]; + thetas[i] /= alphas.size(); + } + return thetas; +} + +std::vector getAlphaZ(const std::span data, + std::size_t numQubits, std::size_t k) { + // Implements Eq. (5) from https://arxiv.org/pdf/quant-ph/0407010.pdf + std::vector angles; + double divisor = static_cast(1ULL << (k - 1)); + for (std::size_t j = 1; j <= (1ULL << (numQubits - k)); ++j) { + double angle = 0.0; + for (std::size_t l = 1; l <= (1ULL << (k - 1)); ++l) + // N.B: There is an extra '-1' on these indices computations to account + // for the fact that our indices start at 0. + angle += data[(2 * j - 1) * (1 << (k - 1)) + l - 1] - + data[(2 * j - 2) * (1 << (k - 1)) + l - 1]; + angles.push_back(angle / divisor); + } + return angles; +} + +std::vector getAlphaY(const std::span data, + std::size_t numQubits, std::size_t k) { + // Implements Eq. (8) from https://arxiv.org/pdf/quant-ph/0407010.pdf + // N.B: There is an extra '-1' on these indices computations to account for + // the fact that our indices start at 0. + std::vector angles; + for (std::size_t j = 1; j <= (1ULL << (numQubits - k)); ++j) { + double numerator = 0; + for (std::size_t l = 1; l <= (1ULL << (k - 1)); ++l) { + numerator += + std::pow(std::abs(data[(2 * j - 1) * (1 << (k - 1)) + l - 1]), 2); + } + + double denominator = 0; + for (std::size_t l = 1; l <= (1ULL << k); ++l) { + denominator += std::pow(std::abs(data[(j - 1) * (1 << k) + l - 1]), 2); + } + + if (denominator == 0.0) { + assert(numerator == 0.0 && + "If the denominator is zero, the numerator must also be zero."); + angles.push_back(0.0); + continue; + } + angles.push_back(2.0 * std::asin(std::sqrt(numerator / denominator))); + } + return angles; +} +} // namespace cudaq::details + +class StateGateBuilder { +public: + StateGateBuilder(mlir::OpBuilder &b, mlir::Location &l, mlir::Value &q) + : builder(b), loc(l), qubits(q) {} + + template + void applyRotationOp(double theta, std::size_t target) { + auto qubit = createQubitRef(target); + auto thetaValue = createAngleValue(theta); + builder.create(loc, thetaValue, mlir::ValueRange{}, qubit); + }; + + void applyX(std::size_t control, std::size_t target) { + auto qubitC = createQubitRef(control); + auto qubitT = createQubitRef(target); + builder.create(loc, qubitC, qubitT); + }; + +private: + mlir::Value createQubitRef(std::size_t index) { + if (qubitRefs.contains(index)) { + return qubitRefs[index]; + } + + auto indexValue = builder.create( + loc, index, builder.getIntegerType(64)); + auto ref = builder.create(loc, qubits, indexValue); + qubitRefs[index] = ref; + return ref; + } + + mlir::Value createAngleValue(double angle) { + return builder.create( + loc, llvm::APFloat{angle}, builder.getF64Type()); + } + + mlir::OpBuilder &builder; + mlir::Location &loc; + mlir::Value &qubits; + + std::unordered_map qubitRefs = + std::unordered_map(); +}; + +class StateDecomposer { +public: + StateDecomposer(StateGateBuilder &b, std::span> a) + : builder(b), amplitudes(a), numQubits(log2(a.size())) {} + + /// @brief Decompose the input state vector data to a set of controlled + /// operations and rotations. This function takes as input a `OpBuilder` + /// and appends the operations of the decomposition to its internal + /// representation. This implementation follows the algorithm defined in + /// `https://arxiv.org/pdf/quant-ph/0407010.pdf`. + void decompose() { + + // Decompose the state into phases and magnitudes. + bool needsPhaseEqualization = false; + std::vector phases; + std::vector magnitudes; + for (const auto &a : amplitudes) { + phases.push_back(std::arg(a)); + magnitudes.push_back(std::abs(a)); + // FIXME: remove magic number. + needsPhaseEqualization |= std::abs(phases.back()) > 1e-10; + } + + // N.B: The algorithm, as described in the paper, creates a circuit that + // begins with a target state and brings it to the all zero state. Hence, + // this implementation do the two steps described in Section III in reverse + // order. + + // Apply uniformly controlled y-rotations, the construction in Eq. (4). + for (std::size_t j = 1; j <= numQubits; ++j) { + auto k = numQubits - j + 1; + auto numControls = j - 1; + auto target = j - 1; + auto alphaYk = cudaq::details::getAlphaY(magnitudes, numQubits, k); + applyRotation(alphaYk, numControls, target); + } + + if (!needsPhaseEqualization) + return; + + // Apply uniformly controlled z-rotations, the construction in Eq. (4). + for (std::size_t j = 1; j <= numQubits; ++j) { + auto k = numQubits - j + 1; + auto numControls = j - 1; + auto target = j - 1; + auto alphaZk = cudaq::details::getAlphaZ(phases, numQubits, k); + if (alphaZk.empty()) + continue; + applyRotation(alphaZk, numControls, target); + } + } + +private: + /// @brief Apply a uniformly controlled rotation on the target qubit. + template + void applyRotation(const std::span alphas, std::size_t numControls, + std::size_t target) { + + // In our model the index 1 (i.e. |01>) in quantum state data + // corresponds to qubits[0] = 1 and qubits[1] = 0. + // Revert the order of qubits as the state preparation algorithm + // we use assumes the opposite. + auto qubitIndex = [&](std::size_t i) { return numQubits - i - 1; }; + + auto thetas = cudaq::details::convertAngles(alphas); + if (numControls == 0) { + builder.applyRotationOp(thetas[0], qubitIndex(target)); + return; + } + + auto controlIndices = cudaq::details::getControlIndices(numControls); + assert(thetas.size() == controlIndices.size()); + for (auto [i, c] : llvm::enumerate(controlIndices)) { + builder.applyRotationOp(thetas[i], qubitIndex(target)); + builder.applyX(qubitIndex(c), qubitIndex(target)); + } + } + + StateGateBuilder &builder; + std::span> amplitudes; + std::size_t numQubits; +}; + /// Replace a qubit initialization from vectors with quantum gates. /// For example: /// -/// /// Before StatePreparation (state-prep): -/// +/// ``` /// module { /// func.func @foo() attributes { /// %0 = cc.address_of @foo.rodata_0 : !cc.ptr x 4>> @@ -54,9 +298,10 @@ using namespace mlir; /// (0.000000e+00,0.000000e+00)]> : tensor<4xcomplex>) : /// !cc.array x 4> /// } +/// ``` /// /// After StatePreparation (state-prep): -/// +/// ``` /// module { /// func.func @foo() attributes { /// %0 = quake.alloca !quake.veq<2> @@ -75,22 +320,16 @@ using namespace mlir; /// return /// } /// } +/// ``` namespace { std::vector> -readConstantArray(mlir::OpBuilder &builder, cudaq::cc::GlobalOp &global) { +readGlobalConstantArray(mlir::OpBuilder &builder, cudaq::cc::GlobalOp &global) { std::vector> result{}; auto attr = global.getValue(); - auto type = global.getType().getElementType(); - - auto arrayTy = dyn_cast(type); - assert(arrayTy); - assert(attr.has_value()); - - auto elementsAttr = dyn_cast(attr.value()); - assert(elementsAttr); + auto elementsAttr = cast(attr.value()); auto eleTy = elementsAttr.getElementType(); auto values = elementsAttr.getValues(); @@ -119,13 +358,10 @@ readConstantArray(mlir::OpBuilder &builder, cudaq::cc::GlobalOp &global) { LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); auto toErase = std::vector(); - auto hasInitState = false; - auto replacedInitState = false; + auto result = success(); funcOp->walk([&](Operation *op) { if (auto initOp = dyn_cast(op)) { - toErase.push_back(initOp); - hasInitState = true; auto loc = op->getLoc(); builder.setInsertionPointAfter(initOp); // Find the qvector alloc. @@ -134,10 +370,10 @@ LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { // Find vector data. auto data = initOp.getOperand(1); - if (auto cast = dyn_cast(data.getDefiningOp())) { + auto cast = dyn_cast(data.getDefiningOp()); + if (cast) data = cast.getOperand(); - toErase.push_back(cast); - } + if (auto addr = dyn_cast(data.getDefiningOp())) { @@ -145,7 +381,7 @@ LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { auto symbol = module.lookupSymbol(globalName); if (auto global = dyn_cast(symbol)) { // Read state initialization data from the global array. - auto vec = readConstantArray(builder, global); + auto vec = readGlobalConstantArray(builder, global); // Prepare state from vector data. auto gateBuilder = StateGateBuilder(builder, loc, qubits); @@ -153,31 +389,26 @@ LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { decomposer.decompose(); initOp.replaceAllUsesWith(qubits); + toErase.push_back(initOp); + if (cast) + toErase.push_back(cast); toErase.push_back(addr); toErase.push_back(global); - replacedInitState = true; + return; } } } + funcOp.emitOpError( + "StatePreparation failed to find to replace quake.state_init"); + result = failure(); } }); - if (hasInitState && !replacedInitState) { - funcOp.emitOpError("StatePreparation failed to replace quake.init_state"); - return failure(); - } - for (auto &op : toErase) { - if (op->getUses().empty()) { - op->erase(); - } else { - op->emitOpError("StatePreparation failed to remove quake.init_state " - "or its dependencies."); - return failure(); - } + op->erase(); } - return success(); + return result; } class StatePreparationPass diff --git a/python/cudaq/kernel/ast_bridge.py b/python/cudaq/kernel/ast_bridge.py index 25b80d9043..825ee78c2a 100644 --- a/python/cudaq/kernel/ast_bridge.py +++ b/python/cudaq/kernel/ast_bridge.py @@ -539,18 +539,18 @@ def __copyVectorAndCastElements(self, source, targetEleType): if (sourceEleType == targetEleType): return sourcePtr - sourceArrTy = cc.ArrayType.get(self.ctx, sourceEleType) + sourceArrType = cc.ArrayType.get(self.ctx, sourceEleType) sourceElePtrTy = cc.PointerType.get(self.ctx, sourceEleType) - sourceArrPtrTy = cc.PointerType.get(self.ctx, sourceArrTy) + sourceArrElePtrTy = cc.PointerType.get(self.ctx, sourceArrType) sourceValue = self.ifPointerThenLoad(sourcePtr) - sourceDataPtr = cc.StdvecDataOp(sourceArrPtrTy, sourceValue).result + sourceDataPtr = cc.StdvecDataOp(sourceArrElePtrTy, sourceValue).result sourceSize = cc.StdvecSizeOp(self.getIntegerType(), sourceValue).result - targetElePtrTy = cc.PointerType.get(self.ctx, targetEleType) + targetElePtrType = cc.PointerType.get(self.ctx, targetEleType) targetTy = cc.ArrayType.get(self.ctx, targetEleType) - targetArrPtrTy = cc.PointerType.get(self.ctx, targetTy) + targetArrElePtrTy = cc.PointerType.get(self.ctx, targetTy) targetVecTy = cc.StdvecType.get(self.ctx, targetEleType) - targetPtr = cc.AllocaOp(targetArrPtrTy, + targetPtr = cc.AllocaOp(targetArrElePtrTy, TypeAttr.get(targetEleType), seqSize=sourceSize).result @@ -561,7 +561,7 @@ def bodyBuilder(iterVar): rawIndex).result loadedEle = cc.LoadOp(eleAddr).result castedEle = self.promoteOperandType(targetEleType, loadedEle) - targetEleAddr = cc.ComputePtrOp(targetElePtrTy, targetPtr, + targetEleAddr = cc.ComputePtrOp(targetElePtrType, targetPtr, [iterVar], rawIndex).result cc.StoreOp(castedEle, targetEleAddr) diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index f5cc0bec07..8496199d15 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -8,6 +8,7 @@ #include "JITExecutionCache.h" #include "common/ArgumentWrapper.h" +#include "common/Environment.h" #include "cudaq/Optimizer/Builder/Factory.h" #include "cudaq/Optimizer/CAPI/Dialects.h" #include "cudaq/Optimizer/CodeGen/OpenQASMEmitter.h" @@ -493,18 +494,6 @@ py::object pyAltLaunchKernelR(const std::string &name, MlirModule module, return returnValue; } -/// @brief Helper function to get boolean environment variable -static bool getEnvBool(const char *envName, bool defaultVal = false) { - if (auto envVal = std::getenv(envName)) { - std::string tmp(envVal); - std::transform(tmp.begin(), tmp.end(), tmp.begin(), - [](unsigned char c) { return std::tolower(c); }); - if (tmp == "1" || tmp == "on" || tmp == "true" || tmp == "yes") - return true; - } - return defaultVal; -} - MlirModule synthesizeKernel(const std::string &name, MlirModule module, cudaq::OpaqueArguments &runtimeArgs) { ScopedTraceWithContext(cudaq::TIMING_JIT, "synthesizeKernel", name); @@ -531,7 +520,6 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, auto &platform = cudaq::get_platform(); if (!platform.is_simulator() || platform.is_emulated()) { pm.addPass(cudaq::opt::createConstPropComplex()); - pm.addPass(createCSEPass()); pm.addPass(cudaq::opt::createLiftArrayAlloc()); pm.addPass(cudaq::opt::createStatePreparation()); } diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 30445dae45..00e91ffc33 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -8,6 +8,7 @@ #pragma once +#include "common/Environment.h" #include "common/ExecutionContext.h" #include "common/Executor.h" #include "common/FmtCore.h" @@ -125,18 +126,6 @@ class BaseRemoteRESTQPU : public cudaq::QPU { delete jit; } - /// @brief Helper function to get boolean environment variable - bool getEnvBool(const char *envName, bool defaultVal = false) { - if (auto envVal = std::getenv(envName)) { - std::string tmp(envVal); - std::transform(tmp.begin(), tmp.end(), tmp.begin(), - [](unsigned char c) { return std::tolower(c); }); - if (tmp == "1" || tmp == "on" || tmp == "true" || tmp == "yes") - return true; - } - return defaultVal; - } - virtual std::tuple extractQuakeCodeAndContext(const std::string &kernelName, void *data) = 0; virtual void cleanupContext(mlir::MLIRContext *context) { return; } diff --git a/runtime/common/CMakeLists.txt b/runtime/common/CMakeLists.txt index 01b4b0a235..220c60efe1 100644 --- a/runtime/common/CMakeLists.txt +++ b/runtime/common/CMakeLists.txt @@ -17,6 +17,7 @@ set(COMMON_RUNTIME_SRC Resources.cpp Trace.cpp Future.cpp + Environment.cpp Executor.cpp ) @@ -83,7 +84,7 @@ endif() get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) -add_library(cudaq-mlir-runtime SHARED RuntimeMLIR.cpp JIT.cpp Logger.cpp) +add_library(cudaq-mlir-runtime SHARED RuntimeMLIR.cpp Environment.cpp JIT.cpp Logger.cpp) set_property(GLOBAL APPEND PROPERTY CUDAQ_RUNTIME_LIBS cudaq-mlir-runtime) set_source_files_properties(JIT.cpp PROPERTIES COMPILE_FLAGS -fno-rtti) diff --git a/runtime/common/Environment.cpp b/runtime/common/Environment.cpp new file mode 100644 index 0000000000..e22e4a066e --- /dev/null +++ b/runtime/common/Environment.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "Environment.h" +#include +#include + +namespace cudaq { + +/// @brief Helper function to get boolean environment variable +bool getEnvBool(const char *envName, bool defaultVal = false) { + if (auto envVal = std::getenv(envName)) { + std::string tmp(envVal); + std::transform(tmp.begin(), tmp.end(), tmp.begin(), + [](unsigned char c) { return std::tolower(c); }); + return (tmp == "1" || tmp == "on" || tmp == "true" || tmp == "y" || tmp == "yes"); + } + return defaultVal; +} + +} // namespace cudaq diff --git a/runtime/common/Environment.h b/runtime/common/Environment.h new file mode 100644 index 0000000000..9bbea871f9 --- /dev/null +++ b/runtime/common/Environment.h @@ -0,0 +1,16 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#pragma once + +namespace cudaq { + +/// @brief Helper function to get boolean environment variable +bool getEnvBool(const char *envName, bool defaultVal); + +} // namespace cudaq diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index 1a1c7421aa..add36c5f31 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -6,7 +6,11 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// Simulators +// RUN: nvq++ %cpp_std --enable-mlir --target nvidia %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --enable-mlir --target nvidia-fp64 %s -o %t && %t | FileCheck %s + +// Quantum emulators // RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t | FileCheck %s // RUN: nvq++ %cpp_std --target ionq --emulate %s -o %t && %t | FileCheck %s // 2 different IQM machines for 2 different topologies @@ -18,16 +22,26 @@ #include __qpu__ void test_complex_constant_array() { - cudaq::qvector v(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); + cudaq::qvector v(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); +} + +#ifdef CUDAQ_SIMULATION_SCALAR_FP32 +__qpu__ void test_complex_constant_array_floating_point() { + cudaq::qvector v(std::vector>({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); } +#else +__qpu__ void test_complex_constant_array_floating_point() { + cudaq::qvector v(std::vector>({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); +} +#endif __qpu__ void test_complex_constant_array2() { - cudaq::qvector v1(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); - cudaq::qvector v2(std::vector({ 0., 0., M_SQRT1_2, M_SQRT1_2})); + cudaq::qvector v1(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); + cudaq::qvector v2(std::vector({ 0., 0., M_SQRT1_2, M_SQRT1_2})); } __qpu__ void test_complex_constant_array3() { - cudaq::qvector v({ + cudaq::qvector v({ cudaq::complex(M_SQRT1_2), cudaq::complex(M_SQRT1_2), cudaq::complex(0.0), @@ -39,14 +53,44 @@ __qpu__ void test_complex_array_param(std::vector inState) { cudaq::qvector q1 = inState; } +#ifdef CUDAQ_SIMULATION_SCALAR_FP32 +__qpu__ void test_complex_array_param_floating_point(std::vector> inState) { + cudaq::qvector q1 = inState; +} +#else +__qpu__ void test_complex_array_param_floating_point(std::vector> inState) { + cudaq::qvector q1 = inState; +} +#endif + __qpu__ void test_real_constant_array() { cudaq::qvector v({ M_SQRT1_2, M_SQRT1_2, 0., 0.}); } +#ifdef CUDAQ_SIMULATION_SCALAR_FP32 +__qpu__ void test_real_constant_array_floating_point() { + cudaq::qvector v(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); +} +#else +__qpu__ void test_real_constant_array_floating_point() { + cudaq::qvector v(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); +} +#endif + __qpu__ void test_real_array_param(std::vector inState) { cudaq::qvector q1 = inState; } +#ifdef CUDAQ_SIMULATION_SCALAR_FP32 +__qpu__ void test_real_array_param_floating_point(std::vector inState) { + cudaq::qvector q1 = inState; +} +#else +__qpu__ void test_real_array_param_floating_point(std::vector inState) { + cudaq::qvector q1 = inState; +} +#endif + void printCounts(cudaq::sample_result& result) { std::vector values{}; for (auto &&[bits, counts] : result) { @@ -60,105 +104,155 @@ void printCounts(cudaq::sample_result& result) { } int main() { - { - auto counts = cudaq::sample(test_complex_constant_array); - printCounts(counts); - } - - { - auto counts = cudaq::sample(test_complex_constant_array2); - printCounts(counts); - } - - { - auto counts = cudaq::sample(test_complex_constant_array3); - printCounts(counts); - } - - { - auto counts = cudaq::sample(test_real_constant_array); - printCounts(counts); - } + { + auto counts = cudaq::sample(test_complex_constant_array); + printCounts(counts); + } - { - std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; - { - // Passing state data as argument (kernel mode) - auto counts = cudaq::sample(test_complex_array_param, vec); - printCounts(counts); - - counts = cudaq::sample(test_complex_array_param, vec1); - printCounts(counts); - } - - { - // Passing state data as argument (builder mode) - auto [kernel, v] = cudaq::make_kernel>(); - auto qubits = kernel.qalloc(v); - - auto counts = cudaq::sample(kernel, vec); - printCounts(counts); - - counts = cudaq::sample(kernel, vec1); - printCounts(counts); - } - } +// CHECK: 00 +// CHECK: 10 - { - std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; - { - // Passing state data as argument (kernel mode) - auto counts = cudaq::sample(test_real_array_param, vec); - printCounts(counts); - - counts = cudaq::sample(test_real_array_param, vec1); - printCounts(counts); - } - - { - // Passing state data as argument (builder mode) - auto [kernel, v] = cudaq::make_kernel>(); - auto qubits = kernel.qalloc(v); - - auto counts = cudaq::sample(kernel, vec); - printCounts(counts); - - counts = cudaq::sample(kernel, vec1); - printCounts(counts); - } - } -} + { + auto counts = cudaq::sample(test_complex_constant_array_floating_point); + printCounts(counts); + } // CHECK: 00 // CHECK: 10 + { + auto counts = cudaq::sample(test_complex_constant_array2); + printCounts(counts); + } + // CHECK: 0001 // CHECK: 0011 // CHECK: 1001 // CHECK: 1011 + { + auto counts = cudaq::sample(test_complex_constant_array3); + printCounts(counts); + } + // CHECK: 00 // CHECK: 10 + { + auto counts = cudaq::sample(test_real_constant_array); + printCounts(counts); + } + // CHECK: 00 // CHECK: 10 + { + auto counts = cudaq::sample(test_real_constant_array_floating_point); + printCounts(counts); + } + // CHECK: 00 // CHECK: 10 + { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_complex_array_param, vec); + printCounts(counts); + +// CHECK: 00 +// CHECK: 10 + + counts = cudaq::sample(test_complex_array_param, vec1); + printCounts(counts); + +// CHECK: 01 +// CHECK: 11 + } + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_complex_array_param_floating_point, vec); + printCounts(counts); + +// CHECK: 00 +// CHECK: 10 + + counts = cudaq::sample(test_complex_array_param_floating_point, vec1); + printCounts(counts); + +// CHECK: 01 +// CHECK: 11 + } + + { + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); + + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); + +// CHECK: 00 +// CHECK: 10 + + counts = cudaq::sample(kernel, vec1); + printCounts(counts); + // CHECK: 01 // CHECK: 11 + } + } + + { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_real_array_param, vec); + printCounts(counts); // CHECK: 00 // CHECK: 10 + counts = cudaq::sample(test_real_array_param, vec1); + printCounts(counts); + // CHECK: 01 // CHECK: 11 + } + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_real_array_param_floating_point, vec); + printCounts(counts); // CHECK: 00 // CHECK: 10 + counts = cudaq::sample(test_real_array_param_floating_point, vec1); + printCounts(counts); + // CHECK: 01 // CHECK: 11 + } + + { + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); + + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); + +// CHECK: 00 +// CHECK: 10 + + counts = cudaq::sample(kernel, vec1); + printCounts(counts); + +// CHECK: 01 +// CHECK: 11 + } + } +} diff --git a/test/Quake/const_prop_complex.qke b/test/Quake/const_prop_complex.qke index 7b75d72ac9..884a21486b 100644 --- a/test/Quake/const_prop_complex.qke +++ b/test/Quake/const_prop_complex.qke @@ -8,6 +8,43 @@ // RUN: cudaq-opt -const-prop-complex %s | FileCheck %s +func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %cst = arith.constant 0.000000e+00 : f32 + %cst_0 = arith.constant 0.70710678118654757 : f64 + %0 = cc.cast %cst_0 : (f64) -> f32 + %1 = complex.create %0, %cst : complex + %2 = complex.create %cst, %cst : complex + %3 = cc.alloca !cc.array x 4> + %4 = cc.cast %3 : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %1, %4 : !cc.ptr> + %5 = cc.compute_ptr %3[1] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %1, %5 : !cc.ptr> + %6 = cc.compute_ptr %3[2] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %2, %6 : !cc.ptr> + %7 = cc.compute_ptr %3[3] : (!cc.ptr x 4>>) -> !cc.ptr> + cc.store %2, %7 : !cc.ptr> + %8 = quake.alloca !quake.veq<2> + %9 = quake.init_state %8, %4 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> + return +} + +// CHECK-LABEL: func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = complex.constant [0.707106769 : f32, 0.000000e+00 : f32] : complex +// CHECK: %[[VAL_1:.*]] = complex.constant [0.000000e+00 : f32, 0.000000e+00 : f32] : complex +// CHECK: %[[VAL_2:.*]] = cc.alloca !cc.array x 4> +// CHECK: %[[VAL_3:.*]] = cc.cast %[[VAL_2]] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_0]], %[[VAL_3]] : !cc.ptr> +// CHECK: %[[VAL_4:.*]] = cc.compute_ptr %[[VAL_2]][1] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_0]], %[[VAL_4]] : !cc.ptr> +// CHECK: %[[VAL_5:.*]] = cc.compute_ptr %[[VAL_2]][2] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_1]], %[[VAL_5]] : !cc.ptr> +// CHECK: %[[VAL_6:.*]] = cc.compute_ptr %[[VAL_2]][3] : (!cc.ptr x 4>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_1]], %[[VAL_6]] : !cc.ptr> +// CHECK: %[[VAL_7:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_8:.*]] = quake.init_state %[[VAL_7]], %[[VAL_3]] : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> +// CHECK: return +// CHECK: } + func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 0.70710678118654757 : f64 diff --git a/test/Quake/state_prep.qke b/test/Quake/state_prep.qke index 3ba6d077bb..4289571b33 100644 --- a/test/Quake/state_prep.qke +++ b/test/Quake/state_prep.qke @@ -6,7 +6,7 @@ // the terms of the Apache License 2.0 which accompanies this distribution. // // ========================================================================== // -// RUN: cudaq-opt -state-prep %s | FileCheck %s +// RUN: cudaq-opt -state-prep -canonicalize %s | FileCheck %s module { func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { @@ -18,23 +18,19 @@ module { cc.global constant @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv.rodata_0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<4xcomplex>) : !cc.array x 4> // CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { -// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> -// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 -// CHECK: %[[VAL_2:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_1]]] : (!quake.veq<2>, i64) -> !quake.ref -// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK: quake.ry (%[[VAL_3]]) %[[VAL_2]] : (f64, !quake.ref) -> () -// CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 -// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_4]]] : (!quake.veq<2>, i64) -> !quake.ref -// CHECK: %[[VAL_6:.*]] = arith.constant 0.78539816339744839 : f64 -// CHECK: quake.ry (%[[VAL_6]]) %[[VAL_5]] : (f64, !quake.ref) -> () -// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () -// CHECK: %[[VAL_7:.*]] = arith.constant 0.78539816339744839 : f64 -// CHECK: quake.ry (%[[VAL_7]]) %[[VAL_5]] : (f64, !quake.ref) -> () -// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: %[[VAL_0:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: %[[VAL_2:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_3:.*]] = quake.extract_ref %[[VAL_2]][1] : (!quake.veq<2>) -> !quake.ref +// CHECK: quake.ry (%[[VAL_1]]) %[[VAL_3]] : (f64, !quake.ref) -> () +// CHECK: %[[VAL_4:.*]] = quake.extract_ref %[[VAL_2]][0] : (!quake.veq<2>) -> !quake.ref +// CHECK: quake.ry (%[[VAL_0]]) %[[VAL_4:.*]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_3]]] %[[VAL_4:.*]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_0]]) %[[VAL_4:.*]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_3]]] %[[VAL_4:.*]] : (!quake.ref, !quake.ref) -> () // CHECK: return // CHECK: } - func.func @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %0 = cc.address_of @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv.rodata_0 : !cc.ptr> %1 = quake.alloca !quake.veq<2> @@ -44,19 +40,16 @@ module { cc.global constant @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv.rodata_0 (dense<[0.70710678118654757, 0.70710678118654757, 0.000000e+00, 0.000000e+00]> : tensor<4xf64>) : !cc.array // CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { -// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> -// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 -// CHECK: %[[VAL_2:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_1]]] : (!quake.veq<2>, i64) -> !quake.ref -// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK: quake.ry (%[[VAL_3]]) %[[VAL_2]] : (f64, !quake.ref) -> () -// CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 -// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_4]]] : (!quake.veq<2>, i64) -> !quake.ref -// CHECK: %[[VAL_6:.*]] = arith.constant 0.78539816339744839 : f64 -// CHECK: quake.ry (%[[VAL_6]]) %[[VAL_5]] : (f64, !quake.ref) -> () -// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () -// CHECK: %[[VAL_7:.*]] = arith.constant 0.78539816339744839 : f64 -// CHECK: quake.ry (%[[VAL_7]]) %[[VAL_5]] : (f64, !quake.ref) -> () -// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: %[[VAL_0:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: %[[VAL_3:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_4:.*]] = quake.extract_ref %[[VAL_3]][1] : (!quake.veq<2>) -> !quake.ref +// CHECK: quake.ry (%[[VAL_1]]) %[[VAL_4]] : (f64, !quake.ref) -> () +// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_3]][0] : (!quake.veq<2>) -> !quake.ref +// CHECK: quake.ry (%[[VAL_0]]) %[[VAL_5]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_4]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_0]]) %[[VAL_5]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_4]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () // CHECK: return // CHECK: } @@ -70,19 +63,16 @@ module { cc.global constant @__nvqpp_rodata_init_state.0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<4xcomplex>) : !cc.array x 4> // CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_complex_array_param._Z24test_complex_array_paramSt6vectorISt7complexIfESaIS1_EE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { -// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> -// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 -// CHECK: %[[VAL_2:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_1]]] : (!quake.veq<2>, i64) -> !quake.ref -// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK: quake.ry (%[[VAL_3]]) %[[VAL_2]] : (f64, !quake.ref) -> () -// CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 -// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_4]]] : (!quake.veq<2>, i64) -> !quake.ref -// CHECK: %[[VAL_6:.*]] = arith.constant 0.78539816339744839 : f64 -// CHECK: quake.ry (%[[VAL_6]]) %[[VAL_5]] : (f64, !quake.ref) -> () -// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () -// CHECK: %[[VAL_7:.*]] = arith.constant 0.78539816339744839 : f64 -// CHECK: quake.ry (%[[VAL_7]]) %[[VAL_5]] : (f64, !quake.ref) -> () -// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK: %[[VAL_0:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: %[[VAL_2:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_3:.*]] = quake.extract_ref %[[VAL_2]][1] : (!quake.veq<2>) -> !quake.ref +// CHECK: quake.ry (%[[VAL_1]]) %[[VAL_3]] : (f64, !quake.ref) -> () +// CHECK: %[[VAL_4:.*]] = quake.extract_ref %[[VAL_2]][0] : (!quake.veq<2>) -> !quake.ref +// CHECK: quake.ry (%[[VAL_0]]) %[[VAL_4]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_3]]] %[[VAL_4]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_0]]) %[[VAL_4]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_3]]] %[[VAL_4]] : (!quake.ref, !quake.ref) -> () // CHECK: return // CHECK: } @@ -95,20 +85,17 @@ module { } cc.global constant @__nvqpp_rodata_init_state.1 (dense<[0.707106769, 0.707106769, 0.000000e+00, 0.000000e+00]> : tensor<4xf32>) : !cc.array -// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_real_array_param._Z21test_real_array_paramSt6vectorIfSaIfEE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { -// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> -// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 -// CHECK: %[[VAL_2:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_1]]] : (!quake.veq<2>, i64) -> !quake.ref -// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK: quake.ry (%[[VAL_3]]) %[[VAL_2]] : (f64, !quake.ref) -> () -// CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 -// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_0]][%[[VAL_4]]] : (!quake.veq<2>, i64) -> !quake.ref -// CHECK: %[[VAL_6:.*]] = arith.constant 0.78539816339744839 : f64 -// CHECK: quake.ry (%[[VAL_6]]) %[[VAL_5]] : (f64, !quake.ref) -> () -// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () -// CHECK: %[[VAL_7:.*]] = arith.constant 0.78539816339744839 : f64 -// CHECK: quake.ry (%[[VAL_7]]) %[[VAL_5]] : (f64, !quake.ref) -> () -// CHECK: quake.x [%[[VAL_2]]] %[[VAL_5]] : (!quake.ref, !quake.ref) -> () +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_real_array_param._Z21test_real_array_paramSt6vectorIfSaIfEE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = arith.constant 0.78539816339744839 : f64 +// CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: %[[VAL_2:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_3:.*]] = quake.extract_ref %[[VAL_2:.*]][1] : (!quake.veq<2>) -> !quake.ref +// CHECK: quake.ry (%[[VAL_1]]) %[[VAL_3]] : (f64, !quake.ref) -> () +// CHECK: %[[VAL_4:.*]] = quake.extract_ref %0[0] : (!quake.veq<2>) -> !quake.ref +// CHECK: quake.ry (%[[VAL_0]]) %[[VAL_4]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_3]]] %[[VAL_4]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_0]]) %[[VAL_4]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_3]]] %[[VAL_4]] : (!quake.ref, !quake.ref) -> () // CHECK: return // CHECK: } } From 516e50e583f076b4e4ec97065e95a2d7caa031b3 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 17 Jul 2024 15:43:57 -0700 Subject: [PATCH 33/50] Format --- runtime/common/Environment.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/runtime/common/Environment.cpp b/runtime/common/Environment.cpp index e22e4a066e..130349e543 100644 --- a/runtime/common/Environment.cpp +++ b/runtime/common/Environment.cpp @@ -17,8 +17,9 @@ bool getEnvBool(const char *envName, bool defaultVal = false) { if (auto envVal = std::getenv(envName)) { std::string tmp(envVal); std::transform(tmp.begin(), tmp.end(), tmp.begin(), - [](unsigned char c) { return std::tolower(c); }); - return (tmp == "1" || tmp == "on" || tmp == "true" || tmp == "y" || tmp == "yes"); + [](unsigned char c) { return std::tolower(c); }); + return (tmp == "1" || tmp == "on" || tmp == "true" || tmp == "y" || + tmp == "yes"); } return defaultVal; } From 6ccfc63ac556bf4acfa08d761bfefcccac7f14b4 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 17 Jul 2024 17:27:10 -0700 Subject: [PATCH 34/50] Make lift alloc more tolerant, fixed failing test --- lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 68 +++++++++++++-------- runtime/common/RuntimeMLIRCommonImpl.h | 1 - 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index 9de57fad81..47c19b402d 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -46,42 +46,48 @@ std::vector readConstantValues(SmallVectorImpl &vec, Type eleTy) { } else if constexpr (std::is_same_v) { auto v = cast(a); result.emplace_back(v.getValue().convertToFloat()); - } else { - assert(false && "unexpected type in constant array"); } } return result; } -void genVectorOfConstantsFromAttributes(cudaq::IRBuilder irBuilder, - Location loc, ModuleOp module, - StringRef name, - SmallVector &values, - Type eleTy) { +LogicalResult genVectorOfConstantsFromAttributes(cudaq::IRBuilder irBuilder, + Location loc, ModuleOp module, + StringRef name, + SmallVector &values, + Type eleTy) { if (auto cTy = dyn_cast(eleTy)) { auto floatTy = cTy.getElementType(); if (floatTy == irBuilder.getF64Type()) { auto vals = readConstantValues>(values, cTy); - irBuilder.genVectorOfConstants(loc, module, name, vals); - return; + if (vals.size() == values.size()) { + irBuilder.genVectorOfConstants(loc, module, name, vals); + return success(); + } } else if (floatTy == irBuilder.getF32Type()) { auto vals = readConstantValues>(values, cTy); - irBuilder.genVectorOfConstants(loc, module, name, vals); - return; + if (vals.size() == values.size()) { + irBuilder.genVectorOfConstants(loc, module, name, vals); + return success(); + } } } else if (auto floatTy = dyn_cast(eleTy)) { if (floatTy == irBuilder.getF64Type()) { auto vals = readConstantValues(values, floatTy); - irBuilder.genVectorOfConstants(loc, module, name, vals); - return; + if (vals.size() == values.size()) { + irBuilder.genVectorOfConstants(loc, module, name, vals); + return success(); + } } else if (floatTy == irBuilder.getF32Type()) { auto vals = readConstantValues(values, floatTy); - irBuilder.genVectorOfConstants(loc, module, name, vals); - return; + if (vals.size() == values.size()) { + irBuilder.genVectorOfConstants(loc, module, name, vals); + return success(); + } } } - assert(false && "unexpected element type in constant array"); + return failure(); } } // namespace @@ -128,10 +134,14 @@ class AllocaPattern : public OpRewritePattern { // Build a new name based on the kernel name. std::string name = funcName + ".rodata_" + std::to_string(counter++); cudaq::IRBuilder irBuilder(rewriter.getContext()); - genVectorOfConstantsFromAttributes(irBuilder, loc, module, name, values, - eleTy); - conGlobal = rewriter.create(loc, ptrTy, name); - conArr = rewriter.create(loc, arrTy, conGlobal); + if (succeeded(genVectorOfConstantsFromAttributes(irBuilder, loc, module, + name, values, eleTy))) { + conGlobal = rewriter.create(loc, ptrTy, name); + conArr = rewriter.create(loc, arrTy, conGlobal); + } else { + conArr = + rewriter.create(loc, arrTy, valuesAttr); + } } else { conArr = rewriter.create(loc, arrTy, valuesAttr); @@ -169,6 +179,14 @@ class AllocaPattern : public OpRewritePattern { toErase.push_back(useuser); isLive = true; } + if (auto ist = dyn_cast(user)) { + rewriter.setInsertionPointAfter(user); + LLVM_DEBUG(llvm::dbgs() << "replaced init_state\n"); + assert(conGlobal && "global must be defined"); + rewriter.replaceOpWithNewOp( + ist, ist.getType(), ist.getTargets(), conGlobal); + continue; + } if (!isLive) toErase.push_back(user); } @@ -292,16 +310,16 @@ class AllocaPattern : public OpRewritePattern { << *op << '\n'); continue; } - if (isa(op)) { - toGlobalUses.push_back(op); - toGlobal = true; - continue; - } LLVM_DEBUG(llvm::dbgs() << "unexpected cast: " << *op << '\n'); toGlobalUses.push_back(op); toGlobal = true; continue; } + if (isa(op)) { + toGlobalUses.push_back(op); + toGlobal = true; + continue; + } LLVM_DEBUG(llvm::dbgs() << "unexpected use: " << *op << '\n'); toGlobalUses.push_back(op); toGlobal = true; diff --git a/runtime/common/RuntimeMLIRCommonImpl.h b/runtime/common/RuntimeMLIRCommonImpl.h index 586bcba422..c396136ce8 100644 --- a/runtime/common/RuntimeMLIRCommonImpl.h +++ b/runtime/common/RuntimeMLIRCommonImpl.h @@ -369,7 +369,6 @@ qirProfileTranslationFunction(const char *qirProfile, mlir::Operation *op, mlir::PassManager pm(context); if (printIntermediateMLIR) pm.enableIRPrinting(); - std::string errMsg; llvm::raw_string_ostream errOs(errMsg); cudaq::opt::addPipelineConvertToQIR(pm, qirProfile); From 2226653e06791c24dc48a8d25405f6d2279d7e0c Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 17 Jul 2024 17:42:33 -0700 Subject: [PATCH 35/50] Removed unneded changes --- lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 4 ++-- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 10 ++-------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index 47c19b402d..1867170141 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -229,8 +229,8 @@ class AllocaPattern : public OpRewritePattern { if (std::distance(alloc->getUses().begin(), alloc->getUses().end()) < size) return false; - // Keep a scoreboard for every element in the array. Every element *must* - // be stored to with a constant exactly one time. + // Keep a scoreboard for every element in the array. Every element *must* be + // stored to with a constant exactly one time. scoreboard.resize(size); for (int i = 0; i < size; i++) scoreboard[i] = nullptr; diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index e309f86214..5eb99d24cd 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -127,11 +127,9 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, auto eleTy = cast(strTy.getElementType()); builder.setInsertionPointToStart(argument.getOwner()); auto argLoc = argument.getLoc(); - - auto arrTy = cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()); auto conArray = builder.create( argLoc, cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()), arrayAttr); - + auto arrTy = cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()); std::optional arrayInMemory; auto ptrEleTy = cudaq::cc::PointerType::get(eleTy); bool generateNewValue = false; @@ -141,8 +139,6 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, if (arrayInMemory) return *arrayInMemory; OpBuilder::InsertionGuard guard(builder); - auto argLoc = argument.getLoc(); - Value buffer; if (hasInitStateUse(argument)) { // Stick global at end of Module. @@ -250,7 +246,7 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, // Check if there were other uses of `vec.data()` and simply forward the // constant array as materialized in memory. if (replaceOtherUses) { - Value memArr = getArrayInMemory(); + auto memArr = getArrayInMemory(); stdvecDataOp.replaceAllUsesWith(memArr); } continue; @@ -286,7 +282,6 @@ std::vector asI32(const std::vector &v) { static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector &vec) { - auto arrayAttr = builder.getI32ArrayAttr(asI32(vec)); return synthesizeVectorArgument(builder, module, counter, argument, vec, arrayAttr, @@ -359,7 +354,6 @@ static LogicalResult synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector> &vec) { - std::vector vec2; for (auto c : vec) { vec2.push_back(c.real()); From 96598f2eee88a176ccdfd8400a0cad7ee89045c0 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 17 Jul 2024 18:01:11 -0700 Subject: [PATCH 36/50] Addressed more CR comments --- lib/Optimizer/Transforms/ConstPropComplex.cpp | 10 ++++------ lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/Optimizer/Transforms/ConstPropComplex.cpp b/lib/Optimizer/Transforms/ConstPropComplex.cpp index d1ffc8d5af..9fe626c1e1 100644 --- a/lib/Optimizer/Transforms/ConstPropComplex.cpp +++ b/lib/Optimizer/Transforms/ConstPropComplex.cpp @@ -178,12 +178,10 @@ class ConstPropComplexPass DominanceInfo domInfo(func); std::string funcName = func.getName().str(); RewritePatternSet patterns(ctx); - patterns.insert(ctx); - patterns.insert(ctx); - patterns.insert(ctx); - patterns.insert(ctx); - patterns.insert(ctx); - patterns.insert(ctx); + patterns + .insert( + ctx); LLVM_DEBUG(llvm::dbgs() << "Before lifting constant array: " << func << '\n'); diff --git a/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp b/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp index 26dc40f9a9..1c3ec42a1c 100644 --- a/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp +++ b/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp @@ -106,8 +106,8 @@ class GenerateDeviceCodeLoader LLVM_DEBUG(llvm::dbgs() << "adding declaration: " << op); declarations.push_back(&op); } - } else if (auto globalOp = dyn_cast(op)) { - LLVM_DEBUG(llvm::dbgs() << "adding global: " << op); + } else if (auto ccGlobalOp = dyn_cast(op)) { + LLVM_DEBUG(llvm::dbgs() << "adding global constants: " << op); declarations.push_back(&op); } } From 63adba0fafd26f610787760ca6a964a776a7e770 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 18 Jul 2024 08:46:11 -0700 Subject: [PATCH 37/50] Fix failing test --- targettests/execution/state_preparation_vector.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index add36c5f31..c51a723460 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -7,8 +7,7 @@ ******************************************************************************/ // Simulators -// RUN: nvq++ %cpp_std --enable-mlir --target nvidia %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std --enable-mlir --target nvidia-fp64 %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s // Quantum emulators // RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t | FileCheck %s From d3d11373bb0cb9489911dc586bb37119991aa0e0 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 18 Jul 2024 09:22:25 -0700 Subject: [PATCH 38/50] Added a test for cast pattern in const_prop+complex --- lib/Optimizer/Transforms/ConstPropComplex.cpp | 6 ++- .../execution/state_preparation_vector.cpp | 17 ++++++ test/Quake/const_prop_complex.qke | 53 +++++++------------ 3 files changed, 40 insertions(+), 36 deletions(-) diff --git a/lib/Optimizer/Transforms/ConstPropComplex.cpp b/lib/Optimizer/Transforms/ConstPropComplex.cpp index 9fe626c1e1..939634bf83 100644 --- a/lib/Optimizer/Transforms/ConstPropComplex.cpp +++ b/lib/Optimizer/Transforms/ConstPropComplex.cpp @@ -63,13 +63,15 @@ class FloatCastPattern : public OpRewritePattern { auto valCon = val.getDefiningOp(); if (valCon) { auto fTy = dyn_cast(cast.getType()); - if (fTy == rewriter.getF64Type()) { + auto opTy = dyn_cast(cast.getOperand().getType()); + if (fTy == rewriter.getF64Type() && opTy == rewriter.getF32Type()) { auto v = valCon.value().convertToFloat(); auto fTy = dyn_cast(cast.getType()); rewriter.replaceOpWithNewOp( cast, APFloat{static_cast(v)}, fTy); return success(); - } else if (fTy == rewriter.getF32Type()) { + } else if (fTy == rewriter.getF32Type() && + opTy == rewriter.getF64Type()) { auto v = valCon.value().convertToDouble(); auto fTy = dyn_cast(cast.getType()); rewriter.replaceOpWithNewOp( diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index c51a723460..8bce594ee6 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -20,6 +20,15 @@ #include #include +__qpu__ float test_const_prop_cast() { + return M_SQRT1_2; +} + +__qpu__ void test_const_prop_cast_caller() { + auto c = test_const_prop_cast(); + cudaq::qvector v(std::vector({ c, c, 0., 0.})); +} + __qpu__ void test_complex_constant_array() { cudaq::qvector v(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); } @@ -103,6 +112,14 @@ void printCounts(cudaq::sample_result& result) { } int main() { + { + auto counts = cudaq::sample(test_const_prop_cast_caller); + printCounts(counts); + } + +// CHECK: 00 +// CHECK: 10 + { auto counts = cudaq::sample(test_complex_constant_array); printCounts(counts); diff --git a/test/Quake/const_prop_complex.qke b/test/Quake/const_prop_complex.qke index 884a21486b..2840d2cdaa 100644 --- a/test/Quake/const_prop_complex.qke +++ b/test/Quake/const_prop_complex.qke @@ -8,42 +8,27 @@ // RUN: cudaq-opt -const-prop-complex %s | FileCheck %s -func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - %cst = arith.constant 0.000000e+00 : f32 - %cst_0 = arith.constant 0.70710678118654757 : f64 - %0 = cc.cast %cst_0 : (f64) -> f32 - %1 = complex.create %0, %cst : complex - %2 = complex.create %cst, %cst : complex - %3 = cc.alloca !cc.array x 4> - %4 = cc.cast %3 : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %1, %4 : !cc.ptr> - %5 = cc.compute_ptr %3[1] : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %1, %5 : !cc.ptr> - %6 = cc.compute_ptr %3[2] : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %2, %6 : !cc.ptr> - %7 = cc.compute_ptr %3[3] : (!cc.ptr x 4>>) -> !cc.ptr> - cc.store %2, %7 : !cc.ptr> - %8 = quake.alloca !quake.veq<2> - %9 = quake.init_state %8, %4 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> - return +func.func @__nvqpp__mlirgen__function_test_const_prop_cast_double() -> f32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %cst = arith.constant 0.70710678118654757 : f64 + %0 = cc.cast %cst : (f64) -> f32 + return %0 : f32 } -// CHECK-LABEL: func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { -// CHECK: %[[VAL_0:.*]] = complex.constant [0.707106769 : f32, 0.000000e+00 : f32] : complex -// CHECK: %[[VAL_1:.*]] = complex.constant [0.000000e+00 : f32, 0.000000e+00 : f32] : complex -// CHECK: %[[VAL_2:.*]] = cc.alloca !cc.array x 4> -// CHECK: %[[VAL_3:.*]] = cc.cast %[[VAL_2]] : (!cc.ptr x 4>>) -> !cc.ptr> -// CHECK: cc.store %[[VAL_0]], %[[VAL_3]] : !cc.ptr> -// CHECK: %[[VAL_4:.*]] = cc.compute_ptr %[[VAL_2]][1] : (!cc.ptr x 4>>) -> !cc.ptr> -// CHECK: cc.store %[[VAL_0]], %[[VAL_4]] : !cc.ptr> -// CHECK: %[[VAL_5:.*]] = cc.compute_ptr %[[VAL_2]][2] : (!cc.ptr x 4>>) -> !cc.ptr> -// CHECK: cc.store %[[VAL_1]], %[[VAL_5]] : !cc.ptr> -// CHECK: %[[VAL_6:.*]] = cc.compute_ptr %[[VAL_2]][3] : (!cc.ptr x 4>>) -> !cc.ptr> -// CHECK: cc.store %[[VAL_1]], %[[VAL_6]] : !cc.ptr> -// CHECK: %[[VAL_7:.*]] = quake.alloca !quake.veq<2> -// CHECK: %[[VAL_8:.*]] = quake.init_state %[[VAL_7]], %[[VAL_3]] : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> -// CHECK: return -// CHECK: } +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_const_prop_cast_double() -> f32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = arith.constant 0.707106769 : f32 +// CHECK: return %[[VAL_0]] : f32 +// CHECK: } + +func.func @__nvqpp__mlirgen__function_test_const_prop_cast_float() -> f64 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %cst = arith.constant 0.7071067 : f32 + %0 = cc.cast %cst : (f32) -> f64 + return %0 : f64 +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_const_prop_cast_float() -> f64 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = arith.constant 0.70710670948028564 : f64 +// CHECK: return %[[VAL_0]] : f64 +// CHECK: } func.func @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %cst = arith.constant 0.000000e+00 : f32 From b3e8dcb1b7ccd75676817be0a224875799c2a611 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 18 Jul 2024 09:53:28 -0700 Subject: [PATCH 39/50] Fixed incorrect validation of InitStateOp --- lib/Optimizer/Dialect/Quake/QuakeOps.cpp | 12 +- .../state_preparation_vector_sizes.cpp | 250 ++++++++++++++++++ 2 files changed, 257 insertions(+), 5 deletions(-) create mode 100644 targettests/execution/state_preparation_vector_sizes.cpp diff --git a/lib/Optimizer/Dialect/Quake/QuakeOps.cpp b/lib/Optimizer/Dialect/Quake/QuakeOps.cpp index 4aef581938..093e209bc3 100644 --- a/lib/Optimizer/Dialect/Quake/QuakeOps.cpp +++ b/lib/Optimizer/Dialect/Quake/QuakeOps.cpp @@ -509,14 +509,16 @@ LogicalResult quake::ExtractRefOp::verify() { //===----------------------------------------------------------------------===// LogicalResult quake::InitializeStateOp::verify() { - auto veqTy = cast(getTargets().getType()); - if (veqTy.hasSpecifiedSize()) - if (!std::has_single_bit(veqTy.getSize())) - return emitOpError("initialize state vector must be power of 2, but is " + - std::to_string(veqTy.getSize()) + " instead."); auto ptrTy = cast(getState().getType()); Type ty = ptrTy.getElementType(); if (auto arrTy = dyn_cast(ty)) { + if (!arrTy.isUnknownSize()) { + std::size_t size = arrTy.getSize(); + if (!std::has_single_bit(size)) + return emitOpError( + "initialize state vector must be power of 2, but is " + + std::to_string(size) + " instead."); + } if (!isa(arrTy.getElementType())) return emitOpError("invalid data pointer type"); } else if (!isa(ty)) { diff --git a/targettests/execution/state_preparation_vector_sizes.cpp b/targettests/execution/state_preparation_vector_sizes.cpp new file mode 100644 index 0000000000..3c4d2a2ea7 --- /dev/null +++ b/targettests/execution/state_preparation_vector_sizes.cpp @@ -0,0 +1,250 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// Simulators +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s + +// Quantum emulators +// RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target ionq --emulate %s -o %t && %t | FileCheck %s +// 2 different IQM machines for 2 different topologies +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Adonis --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Apollo --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target oqc --emulate %s -o %t && %t | FileCheck %s + +#include +#include + +#include +#include + +__qpu__ void test(std::vector inState) { + cudaq::qvector q1 = inState; +} + +void printCounts(cudaq::sample_result &result) { + std::vector values{}; + for (auto &&[bits, counts] : result) { + values.push_back(bits); + } + + std::sort(values.begin(), values.end()); + for (auto &&bits : values) { + // Reverse the bits so that the output is a binary number. + std::reverse(bits.begin(), bits.end()); + std::cout << bits << '\n'; + } +} + +void printState(const std::vector &litudes) { + std::cout << "state: (size: " << amplitudes.size() << ") { "; + for (auto a : amplitudes) + std::cout << a << ' '; + std::cout << "}\n"; +} + +int main() { + constexpr auto kNUM_QUBITS = 5u; + for (auto n = 0u; n < kNUM_QUBITS; ++n) { + const auto dimension = (1ULL << (n + 1)); + for (auto i = 0u; i < dimension; ++i) { + std::vector amplitudes(dimension, 0.); + amplitudes[i] = 1.; + printState(amplitudes); + auto counts = cudaq::sample(test, amplitudes); + printCounts(counts); + std::cout << '\n'; + } + } +} + +// CHECK: state: (size: 2) { (1,0) (0,0) } +// CHECK: 0 + +// CHECK: state: (size: 2) { (0,0) (1,0) } +// CHECK: 1 + +// CHECK: state: (size: 4) { (1,0) (0,0) (0,0) (0,0) } +// CHECK: 00 + +// CHECK: state: (size: 4) { (0,0) (1,0) (0,0) (0,0) } +// CHECK: 01 + +// CHECK: state: (size: 4) { (0,0) (0,0) (1,0) (0,0) } +// CHECK: 10 + +// CHECK: state: (size: 4) { (0,0) (0,0) (0,0) (1,0) } +// CHECK: 11 + +// CHECK: state: (size: 8) { (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 000 + +// CHECK: state: (size: 8) { (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 001 + +// CHECK: state: (size: 8) { (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 010 + +// CHECK: state: (size: 8) { (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 011 + +// CHECK: state: (size: 8) { (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) } +// CHECK: 100 + +// CHECK: state: (size: 8) { (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) } +// CHECK: 101 + +// CHECK: state: (size: 8) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) } +// CHECK: 110 + +// CHECK: state: (size: 8) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) } +// CHECK: 111 + +// CHECK: state: (size: 16) { (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 0000 + +// CHECK: state: (size: 16) { (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 0001 + +// CHECK: state: (size: 16) { (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 0010 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 0011 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 0100 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 0101 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 0110 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 0111 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 1000 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 1001 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 1010 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 1011 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) } +// CHECK: 1100 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) } +// CHECK: 1101 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) } +// CHECK: 1110 + +// CHECK: state: (size: 16) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) } +// CHECK: 1111 + +// CHECK: state: (size: 32) { (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 00000 + +// CHECK: state: (size: 32) { (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 00001 + +// CHECK: state: (size: 32) { (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 00010 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 00011 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 00100 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 00101 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 00110 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 00111 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 01000 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 01001 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 01010 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 01011 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 01100 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 01101 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 01110 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 01111 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 10000 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 10001 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 10010 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 10011 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 10100 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 10101 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 10110 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 10111 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 11000 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 11001 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 11010 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) (0,0) } +// CHECK: 11011 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) (0,0) } +// CHECK: 11100 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) (0,0) } +// CHECK: 11101 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) (0,0) } +// CHECK: 11110 + +// CHECK: state: (size: 32) { (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (0,0) (1,0) } +// CHECK: 11111 \ No newline at end of file From a927660fb6d417c542f68c1f503c0d539aaf475a Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 18 Jul 2024 10:00:15 -0700 Subject: [PATCH 40/50] Addressed more comments --- lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index 1867170141..a5896e201c 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -197,9 +197,9 @@ class AllocaPattern : public OpRewritePattern { toErase.push_back(alloc); } - for (auto *op : toErase) { + for (auto *op : toErase) rewriter.eraseOp(op); - } + return success(); } From 22f2e5c3ad32674782e39e2bb049200213f4eb70 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 18 Jul 2024 10:51:03 -0700 Subject: [PATCH 41/50] Update lib/Optimizer/Transforms/StatePreparation.cpp Co-authored-by: Bruno Schmitt <7152025+boschmitt@users.noreply.github.com> --- lib/Optimizer/Transforms/StatePreparation.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index f7a104b2ae..53c6a972a0 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -52,7 +52,7 @@ std::vector getControlIndices(std::size_t numBits) { // the position where the lth and (l + 1)th bit strings g[l] and g[l+1] of // the binary reflected Gray code differ. auto position = std::log2(code[i] ^ code[(i + 1) % code.size()]); - // N.B: In CUDA Quantum we write the least significant bit (LSb) on the left + // N.B: The algorithm expects the least significant bit (LSb) on the left // // lsb -v // 001 @@ -63,8 +63,8 @@ std::vector getControlIndices(std::size_t numBits) { // numbers with the LSb on the left. // // Now, what we need to find out is the position of the 1 in the bitstring. - // If we take LSb as being position 0, then for the normal convention its - // position will be 0. Using CUDA Quantum convention it will be 2. Hence, + // If we take LSB as being position 0, then for the normal convention its + // position will be 0. Using the algorithm's convention it will be 2. Hence, // we need to convert the position we find using: // // numBits - position - 1 From 69afc99b3b68ad7aa21aa515404893c5e11f4565 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 18 Jul 2024 11:33:45 -0700 Subject: [PATCH 42/50] Added a threshold option to StatePrep pass, added tests --- include/cudaq/Optimizer/Transforms/Passes.td | 5 +++++ lib/Optimizer/Transforms/StatePreparation.cpp | 20 +++++++++-------- python/tests/backends/test_IQM.py | 22 +++++++++++++++++++ python/tests/backends/test_IonQ.py | 22 +++++++++++++++++++ python/tests/backends/test_OQC.py | 22 +++++++++++++++++++ .../test_Quantinuum_LocalEmulation_builder.py | 11 ++++++++++ .../test_Quantinuum_LocalEmulation_kernel.py | 12 ++++++++++ .../tests/backends/test_Quantinuum_builder.py | 11 ++++++++++ .../tests/backends/test_Quantinuum_kernel.py | 11 ++++++++++ 9 files changed, 127 insertions(+), 9 deletions(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 80e31c66fc..f226b7044a 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -618,6 +618,11 @@ def StatePreparation : Pass<"state-prep", "mlir::ModuleOp"> { } ``` }]; + + let options = [ + Option<"phaseThreshold", "threshold", "double", + /*default=*/"1e-10", "Equalize the state if larger than the threshold">, + ]; } def PromoteRefToVeqAlloc : Pass<"promote-qubit-allocation"> { diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index 53c6a972a0..f0df842885 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -200,8 +200,10 @@ class StateGateBuilder { class StateDecomposer { public: - StateDecomposer(StateGateBuilder &b, std::span> a) - : builder(b), amplitudes(a), numQubits(log2(a.size())) {} + StateDecomposer(StateGateBuilder &b, std::span> a, + double t) + : builder(b), amplitudes(a), numQubits(log2(a.size())), + phaseThreshold(t) {} /// @brief Decompose the input state vector data to a set of controlled /// operations and rotations. This function takes as input a `OpBuilder` @@ -217,8 +219,7 @@ class StateDecomposer { for (const auto &a : amplitudes) { phases.push_back(std::arg(a)); magnitudes.push_back(std::abs(a)); - // FIXME: remove magic number. - needsPhaseEqualization |= std::abs(phases.back()) > 1e-10; + needsPhaseEqualization |= std::abs(phases.back()) > phaseThreshold; } // N.B: The algorithm, as described in the paper, creates a circuit that @@ -279,6 +280,7 @@ class StateDecomposer { StateGateBuilder &builder; std::span> amplitudes; std::size_t numQubits; + double phaseThreshold; }; /// Replace a qubit initialization from vectors with quantum gates. @@ -355,7 +357,8 @@ readGlobalConstantArray(mlir::OpBuilder &builder, cudaq::cc::GlobalOp &global) { return result; } -LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { +LogicalResult transform(ModuleOp module, func::FuncOp funcOp, + double phaseThreshold) { auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); auto toErase = std::vector(); auto result = success(); @@ -385,7 +388,7 @@ LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { // Prepare state from vector data. auto gateBuilder = StateGateBuilder(builder, loc, qubits); - auto decomposer = StateDecomposer(gateBuilder, vec); + auto decomposer = StateDecomposer(gateBuilder, vec, phaseThreshold); decomposer.decompose(); initOp.replaceAllUsesWith(qubits); @@ -398,8 +401,7 @@ LogicalResult transform(ModuleOp module, func::FuncOp funcOp) { } } } - funcOp.emitOpError( - "StatePreparation failed to find to replace quake.state_init"); + funcOp.emitOpError("StatePreparation failed to replace quake.state_init"); result = failure(); } }); @@ -427,7 +429,7 @@ class StatePreparationPass continue; std::string kernelName = funcOp.getName().str(); - auto result = transform(module, funcOp); + auto result = transform(module, funcOp, phaseThreshold); if (result.failed()) { funcOp.emitOpError("Failed to prepare state for '" + kernelName); signalPassFailure(); diff --git a/python/tests/backends/test_IQM.py b/python/tests/backends/test_IQM.py index 38e2b55363..3408ef1602 100644 --- a/python/tests/backends/test_IQM.py +++ b/python/tests/backends/test_IQM.py @@ -174,6 +174,17 @@ def kernel(vec: List[complex]): assert assert_close(counts["01"], 0., 2) assert assert_close(counts["11"], 0., 2) + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0., 0., 0., 0., 0.] + counts = cudaq.sample(kernel, state) + assert assert_close(counts["000"], shots / 2, 2) + assert assert_close(counts["100"], shots / 2, 2) + assert assert_close(counts["001"], 0., 2) + assert assert_close(counts["010"], 0., 2) + assert assert_close(counts["011"], 0., 2) + assert assert_close(counts["101"], 0., 2) + assert assert_close(counts["110"], 0., 2) + assert assert_close(counts["111"], 0., 2) + def test_IQM_state_preparation_builder(): shots = 10000 @@ -187,6 +198,17 @@ def test_IQM_state_preparation_builder(): assert assert_close(counts["01"], 0., 2) assert assert_close(counts["11"], 0., 2) + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0., 0., 0., 0., 0.] + counts = cudaq.sample(kernel, state) + assert assert_close(counts["000"], shots / 2, 2) + assert assert_close(counts["100"], shots / 2, 2) + assert assert_close(counts["001"], 0., 2) + assert assert_close(counts["010"], 0., 2) + assert assert_close(counts["011"], 0., 2) + assert assert_close(counts["101"], 0., 2) + assert assert_close(counts["110"], 0., 2) + assert assert_close(counts["111"], 0., 2) + # leave for gdb debugging if __name__ == "__main__": diff --git a/python/tests/backends/test_IonQ.py b/python/tests/backends/test_IonQ.py index f468a1d9c8..dfba4c9f55 100644 --- a/python/tests/backends/test_IonQ.py +++ b/python/tests/backends/test_IonQ.py @@ -171,6 +171,17 @@ def kernel(vec: List[complex]): assert not '01' in counts assert not '11' in counts + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0., 0., 0., 0., 0.] + counts = cudaq.sample(kernel, state) + assert '000' in counts + assert '100' in counts + assert not '001' in counts + assert not '010' in counts + assert not '011' in counts + assert not '101' in counts + assert not '110' in counts + assert not '111' in counts + def test_ionq_state_preparation_builder(): kernel, state = cudaq.make_kernel(List[complex]) @@ -183,6 +194,17 @@ def test_ionq_state_preparation_builder(): assert not '01' in counts assert not '11' in counts + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0., 0., 0., 0., 0.] + counts = cudaq.sample(kernel, state) + assert '000' in counts + assert '100' in counts + assert not '001' in counts + assert not '010' in counts + assert not '011' in counts + assert not '101' in counts + assert not '110' in counts + assert not '111' in counts + # leave for gdb debugging if __name__ == "__main__": diff --git a/python/tests/backends/test_OQC.py b/python/tests/backends/test_OQC.py index 1ff86c535c..ee02efe4fc 100644 --- a/python/tests/backends/test_OQC.py +++ b/python/tests/backends/test_OQC.py @@ -172,6 +172,17 @@ def kernel(vec: List[complex]): assert not '01' in counts assert not '11' in counts + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0., 0., 0., 0., 0.] + counts = cudaq.sample(kernel, state) + assert '000' in counts + assert '100' in counts + assert not '001' in counts + assert not '010' in counts + assert not '011' in counts + assert not '101' in counts + assert not '110' in counts + assert not '111' in counts + def test_OQC_state_preparation_builder(): kernel, state = cudaq.make_kernel(List[complex]) @@ -184,6 +195,17 @@ def test_OQC_state_preparation_builder(): assert not '01' in counts assert not '11' in counts + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0., 0., 0., 0., 0.] + counts = cudaq.sample(kernel, state) + assert '000' in counts + assert '100' in counts + assert not '001' in counts + assert not '010' in counts + assert not '011' in counts + assert not '101' in counts + assert not '110' in counts + assert not '111' in counts + # leave for gdb debugging if __name__ == "__main__": diff --git a/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py b/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py index 58176b4e32..f03f1875bc 100644 --- a/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py +++ b/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py @@ -124,6 +124,17 @@ def test_quantinuum_state_preparation(): assert not '01' in counts assert not '11' in counts + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0., 0., 0., 0., 0.] + counts = cudaq.sample(kernel, state) + assert '000' in counts + assert '100' in counts + assert not '001' in counts + assert not '010' in counts + assert not '011' in counts + assert not '101' in counts + assert not '110' in counts + assert not '111' in counts + # leave for gdb debugging if __name__ == "__main__": diff --git a/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py b/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py index a32ad35f5f..0e21a5bf88 100644 --- a/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py +++ b/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py @@ -153,6 +153,18 @@ def kernel(vec: List[complex]): assert not '01' in counts assert not '11' in counts + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0., 0., 0., 0., 0.] + counts = cudaq.sample(kernel, state) + assert '000' in counts + assert '100' in counts + assert not '001' in counts + assert not '010' in counts + assert not '011' in counts + assert not '101' in counts + assert not '110' in counts + assert not '111' in counts + + def test_arbitrary_unitary_synthesis(): import numpy as np cudaq.register_operation("custom_h", diff --git a/python/tests/backends/test_Quantinuum_builder.py b/python/tests/backends/test_Quantinuum_builder.py index 48d50b7419..c0589552c9 100644 --- a/python/tests/backends/test_Quantinuum_builder.py +++ b/python/tests/backends/test_Quantinuum_builder.py @@ -158,6 +158,17 @@ def test_quantinuum_state_preparation(): assert not '01' in counts assert not '11' in counts + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0., 0., 0., 0., 0.] + counts = cudaq.sample(kernel, state) + assert '000' in counts + assert '100' in counts + assert not '001' in counts + assert not '010' in counts + assert not '011' in counts + assert not '101' in counts + assert not '110' in counts + assert not '111' in counts + # leave for gdb debugging if __name__ == "__main__": diff --git a/python/tests/backends/test_Quantinuum_kernel.py b/python/tests/backends/test_Quantinuum_kernel.py index 646f9cc787..b27c339419 100644 --- a/python/tests/backends/test_Quantinuum_kernel.py +++ b/python/tests/backends/test_Quantinuum_kernel.py @@ -184,6 +184,17 @@ def kernel(vec: List[complex]): assert not '01' in counts assert not '11' in counts + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0., 0., 0., 0., 0.] + counts = cudaq.sample(kernel, state) + assert '000' in counts + assert '100' in counts + assert not '001' in counts + assert not '010' in counts + assert not '011' in counts + assert not '101' in counts + assert not '110' in counts + assert not '111' in counts + # leave for gdb debugging if __name__ == "__main__": From da3d3f97308d1a035b6f0c85ac291322bffa942a Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 18 Jul 2024 13:16:48 -0700 Subject: [PATCH 43/50] Update include/cudaq/Optimizer/Transforms/Passes.td Co-authored-by: Bruno Schmitt <7152025+boschmitt@users.noreply.github.com> --- include/cudaq/Optimizer/Transforms/Passes.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index f226b7044a..1293cd2c5b 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -621,7 +621,7 @@ def StatePreparation : Pass<"state-prep", "mlir::ModuleOp"> { let options = [ Option<"phaseThreshold", "threshold", "double", - /*default=*/"1e-10", "Equalize the state if larger than the threshold">, + /*default=*/"1e-10", "Threshold to trigger phase equalization">, ]; } From 801512c0c7a0dad19106ad7fbe786d4d7757702d Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 18 Jul 2024 15:07:03 -0700 Subject: [PATCH 44/50] Cleanup --- include/cudaq/Optimizer/Transforms/Passes.h | 9 +- .../Optimizer/Transforms/SimulationData.h | 25 ++-- lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp | 6 +- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 137 +++++++++--------- runtime/common/BaseRemoteRESTQPU.h | 3 +- runtime/common/BaseRestRemoteClient.h | 31 +--- targettests/Remote-Sim/state_init.cpp | 47 ++---- 7 files changed, 113 insertions(+), 145 deletions(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 0b07b01cbb..ae9c6d2188 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -12,10 +12,10 @@ // These transforms can generally be thought of as "optimizations" or "rewrites" // on the IR. +#include "SimulationData.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassRegistry.h" -#include "SimulationData.h" namespace cudaq::opt { @@ -42,8 +42,11 @@ std::unique_ptr createObserveAnsatzPass(std::vector &); std::unique_ptr createQuakeAddMetadata(); std::unique_ptr createQuakeAddDeallocs(); std::unique_ptr createQuakeSynthesizer(); -std::unique_ptr createQuakeSynthesizer(std::string_view, const void *, SimulationStateData::getDataFunc*, std::size_t startingArgIdx = 0); -std::unique_ptr createQuakeSynthesizer(std::string_view, const void *, bool sameAddressSpace = false); +std::unique_ptr +createQuakeSynthesizer(std::string_view, const void *, + SimulationStateData::getDataFunc *, std::size_t); +std::unique_ptr createQuakeSynthesizer(std::string_view, + const void *, bool); std::unique_ptr createRaiseToAffinePass(); std::unique_ptr createUnwindLoweringPass(); diff --git a/include/cudaq/Optimizer/Transforms/SimulationData.h b/include/cudaq/Optimizer/Transforms/SimulationData.h index 9504dc7c08..d0c8b3b5b4 100644 --- a/include/cudaq/Optimizer/Transforms/SimulationData.h +++ b/include/cudaq/Optimizer/Transforms/SimulationData.h @@ -20,19 +20,18 @@ namespace cudaq { class state; } - /// Owns the data class SimulationStateData { - public: - typedef SimulationStateData (getDataFunc)(cudaq::state*); +public: + typedef SimulationStateData(getDataFunc)(cudaq::state *); + + SimulationStateData(void *data, std::size_t size, std::size_t elementSize) + : data(data), size(size), elementSize(elementSize) {} - SimulationStateData(void *data, std::size_t size, std::size_t elementSize): - data(data), size(size), elementSize(elementSize) {} - - // template + // template // std::vector toVector() { - // assert(sizeof(T) == elementSize && "incorrect element size in simulation data"); - // std::vector result; + // assert(sizeof(T) == elementSize && "incorrect element size in simulation + // data"); std::vector result; // std::cout << "SimulationStateData:" << std::endl; // for (std::size_t i = 0; i < size; i++) { @@ -44,13 +43,9 @@ class SimulationStateData { // return result; // } - ~SimulationStateData() { - delete reinterpret_cast(data); - } + ~SimulationStateData() { delete reinterpret_cast(data); } - void* data; + void *data; std::size_t size; std::size_t elementSize; }; - - diff --git a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp index 153f8d5b56..ed78fe7bd4 100644 --- a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp +++ b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp @@ -45,7 +45,7 @@ struct VerifyNVQIRCallOpsPass cudaq::opt::NVQIRPackSingleQubitInArray, cudaq::opt::NVQIRReleasePackedQubitArray, cudaq::getNumQubitsFromCudaqState, - }; + }; // It must be either NVQIR extension functions or in the allowed list. return std::find(NVQIR_FUNCS.begin(), NVQIR_FUNCS.end(), functionName) != NVQIR_FUNCS.end() || @@ -73,8 +73,8 @@ struct VerifyNVQIRCallOpsPass passFailed = true; return WalkResult::interrupt(); } else if (!isa(op)) { + LLVM::ExtractValueOp, LLVM::GEPOp, LLVM::IntToPtrOp, + LLVM::LoadOp, LLVM::StoreOp>(op)) { // No pointers allowed except for the above operations. for (auto oper : op->getOperands()) { if (isa(oper.getType())) { diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 839716a534..e1a708412a 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -118,33 +118,49 @@ static bool hasInitStateUse(BlockArgument argument) { return false; } -template -std::vector stateDataToVector(SimulationStateData& stateData) { - assert(sizeof(T) == stateData.elementSize && "incorrect element size in simulation data"); +template +std::vector stateDataToVector(SimulationStateData &stateData) { + assert(sizeof(T) == stateData.elementSize && + "incorrect element size in simulation data"); std::vector result; for (std::size_t i = 0; i < stateData.size; i++) { - auto elePtr = reinterpret_cast(stateData.data) + i; + auto elePtr = reinterpret_cast(stateData.data) + i; result.push_back(*elePtr); } return result; } -template -LogicalResult -synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, - BlockArgument argument, ELETY eleTy, std::vector &vec, - MAKER makeElementValue) { - auto *ctx = builder.getContext(); +template +Value createGlobalArray(OpBuilder &builder, ModuleOp module, unsigned &counter, + BlockArgument argument, Type arrTy, + std::vector vec) { + OpBuilder::InsertionGuard guard(builder); auto argLoc = argument.getLoc(); - //auto strTy = cudaq::cc::StdvecType::get(eleTy); + // Stick global at end of Module. + std::string symbol = "__nvqpp_rodata_init_state." + std::to_string(counter++); + + cudaq::IRBuilder irBuilder(builder); + irBuilder.genVectorOfConstants(argLoc, module, symbol, vec); + + builder.setInsertionPointToStart(argument.getOwner()); + return builder.create( + argLoc, cudaq::cc::PointerType::get(arrTy), symbol); +} + +template +LogicalResult synthesizeStateArgument(OpBuilder &builder, ModuleOp module, + unsigned &counter, BlockArgument argument, + Type eleTy, std::vector &vec) { + auto *ctx = builder.getContext(); + auto argLoc = argument.getLoc(); auto arrTy = cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()); builder.setInsertionPointToStart(argument.getOwner()); auto toErase = std::vector(); - + // Iterate over the users of this state argument. for (auto *argUser : argument.getUsers()) { // Replace a calls to runtime function that reads the number of qubits @@ -166,37 +182,33 @@ synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, } OpBuilder::InsertionGuard guard(builder); - auto [buffer, _] = - createArrayInMemory(builder, module, counter, argument, vec, arrTy); + auto buffer = + createGlobalArray(builder, module, counter, argument, arrTy, vec); auto ptrArrEleTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(eleTy)); Value memArr = builder.create(argLoc, ptrArrEleTy, buffer); - // builder.setInsertionPointAfter(memArr.getDefiningOp()); - // Value size = builder.create(argLoc, vec.size(), 64); - // Value newVec = - // builder.create(argLoc, strTy, memArr, size); argument.replaceAllUsesWith(memArr); - - for (auto &op : toErase) { + + for (auto &op : toErase) op->erase(); - } return success(); } -static LogicalResult -synthesizeStateArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, - BlockArgument argument, SimulationStateData& stateData) { - +static LogicalResult synthesizeStateArgument(OpBuilder &builder, + ModuleOp module, unsigned &counter, + BlockArgument argument, + SimulationStateData &stateData) { + if (stateData.elementSize == sizeof(std::complex)) { auto vec = stateDataToVector>(stateData); - return synthesizeStateArgument(builder, module, counter, argument, - ComplexType::get(builder.getF64Type()), vec, makeComplexElement); + return synthesizeStateArgument(builder, module, counter, argument, + ComplexType::get(builder.getF64Type()), vec); } else if (stateData.elementSize == sizeof(std::complex)) { auto vec = stateDataToVector>(stateData); - return synthesizeStateArgument(builder, module, counter, argument, - ComplexType::get(builder.getF32Type()), vec, makeComplexElement); + return synthesizeStateArgument(builder, module, counter, argument, + ComplexType::get(builder.getF32Type()), vec); } module.emitError("unexpected element size in simulation state data"); return failure(); @@ -209,7 +221,7 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, ATTR arrayAttr, MAKER makeElementValue) { auto *ctx = builder.getContext(); auto argTy = argument.getType(); - + assert(isa(argTy)); auto strTy = cast(argTy); auto eleTy = cast(strTy.getElementType()); @@ -230,16 +242,8 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, OpBuilder::InsertionGuard guard(builder); Value buffer; if (hasInitStateUse(argument)) { - // Stick global at end of Module. - std::string symbol = - "__nvqpp_rodata_init_state." + std::to_string(counter++); - - cudaq::IRBuilder irBuilder(builder); - irBuilder.genVectorOfConstants(argLoc, module, symbol, vec); - - builder.setInsertionPointToStart(argument.getOwner()); - buffer = builder.create( - argLoc, cudaq::cc::PointerType::get(arrTy), symbol); + buffer = + createGlobalArray(builder, module, counter, argument, arrTy, vec); } else { builder.setInsertionPointAfter(conArray); buffer = builder.create(argLoc, arrTy); @@ -478,19 +482,13 @@ class QuakeSynthesizer // The raw pointer to the runtime arguments. const void *args; - + // Function to read the state data, if any. - SimulationStateData::getDataFunc* getStateData = nullptr; - + SimulationStateData::getDataFunc *getStateData = nullptr; + // Is the simulation running in the same address space as synthesis? bool sameAddressSpace = false; -public: - QuakeSynthesizer() = default; - QuakeSynthesizer(std::string_view kernel, void *a, SimulationStateData::getDataFunc* getData, bool sameSpace) - : kernelName(kernel), args(a), getStateData(getData), sameAddressSpace(sameSpace) {} - const void *args; - // The starting argument index to synthesize. Typically 0 but may be >0 for // partial synthesis. If >0, it is assumed that the first argument(s) are NOT // in `args`. @@ -504,10 +502,10 @@ class QuakeSynthesizer : kernelName(kernel), args(a), sameAddressSpace(sameSpace) {} // Execution on a remote simulator - QuakeSynthesizer(std::string_view kernel, const void *a, SimulationStateData::getDataFunc* getData, std::size_t s) + QuakeSynthesizer(std::string_view kernel, const void *a, + SimulationStateData::getDataFunc *getData, std::size_t s) : kernelName(kernel), args(a), getStateData(getData), startingArgIdx(s) {} - mlir::ModuleOp getModule() { return getOperation(); } std::pair> @@ -647,8 +645,8 @@ class QuakeSynthesizer if (auto ptrTy = dyn_cast(type)) { if (isa(ptrTy.getElementType())) { if (sameAddressSpace) { - // Special case of a `cudaq::state*` which must be in the same address - // space. This references a container to a set of simulation + // Special case of a `cudaq::state*` which must be in the same + // address space. This references a container to a set of simulation // amplitudes. synthesizeRuntimeArgument( builder, argument, args, offset, sizeof(void *), @@ -656,21 +654,25 @@ class QuakeSynthesizer Value rawPtr = builder.create( loc, reinterpret_cast(*concrete), sizeof(void *) * 8); - auto stateTy = cudaq::cc::StateType::get(builder.getContext()); + auto stateTy = + cudaq::cc::StateType::get(builder.getContext()); return builder.create( loc, cudaq::cc::PointerType::get(stateTy), rawPtr); }); } else if (getStateData != nullptr) { - // Special case of running on a simulator in a different address space, - // when we know how to convert state to data. - cudaq::state* concrete; - std::memcpy(&concrete, ((char *)args) + offset, sizeof(cudaq::state*)); + // Special case of running on a simulator in a different address + // space, when we know how to convert state to data. + cudaq::state *concrete; + std::memcpy(&concrete, ((const char *)args) + offset, + sizeof(cudaq::state *)); auto stateData = getStateData(concrete); - if (failed(synthesizeStateArgument(builder, module, counter, argument, stateData))) - module.emitError("Failed to synthesize state*"); + if (failed(synthesizeStateArgument(builder, module, counter, + argument, stateData))) + module.emitError("Failed to synthesize state*"); } else { // All other cases are not yet supported (i.e. quantum hardware). - funcOp.emitOpError("synthesis: unsupported argument type: state*"); + funcOp.emitOpError("synthesis: unsupported argument type on " + "quantum devices: state*"); signalPassFailure(); } continue; @@ -879,16 +881,17 @@ std::unique_ptr cudaq::opt::createQuakeSynthesizer() { } /// Execution on remote simulator -cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, const void *a, SimulationStateData::getDataFunc* getData, +std::unique_ptr +cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, const void *a, + SimulationStateData::getDataFunc *getData, std::size_t startingArgIdx = 0) { - return std::make_unique(kernelName, a, getData, startingArgIdx); + return std::make_unique(kernelName, a, getData, + startingArgIdx); } /// Execution on the same address space in a simulator or a quantum device std::unique_ptr -cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, const void *a, bool sameAddressSpace = false) { +cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, const void *a, + bool sameAddressSpace = false) { return std::make_unique(kernelName, a, sameAddressSpace); } - - - diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index f185f24643..da595b2493 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -403,7 +403,8 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&context); - pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs, false)); + pm.addPass( + cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs, false)); pm.addPass(mlir::createCanonicalizerPass()); if (disableMLIRthreading || enablePrintMLIREachPass) moduleOp.getContext()->disableMultithreading(); diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index 66608327dc..639302fe04 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -92,34 +92,22 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { }); } - static SimulationStateData readSimulationStateData(cudaq::state* s) { - std::cout << "Reading sim state data" << std::endl; + static SimulationStateData readSimulationStateData(cudaq::state *s) { + ; void *dataPtr = nullptr; auto stateVector = s->get_tensor(); auto precision = s->get_precision(); auto numElements = stateVector.get_num_elements(); auto elementSize = 0; if (precision == SimulationState::precision::fp32) { - std::cout << "32 bit precision" << std::endl; elementSize = sizeof(std::complex); auto *hostData = new std::complex[numElements]; - std::cout << "Reading host data" << std::endl; s->to_host(hostData, numElements); - std::cout << "Host data:" << std::endl; - for (size_t i = 0; i< numElements; i++) { - std::cout << hostData[i] << std::endl; - } dataPtr = reinterpret_cast(hostData); } else { - std::cout << "64 bit precision" << std::endl; elementSize = sizeof(std::complex); auto *hostData = new std::complex[numElements]; - std::cout << "Reading host data" << std::endl; s->to_host(hostData, numElements); - std::cout << "Host data:" << std::endl; - for (size_t i = 0; i< numElements; i++) { - std::cout << hostData[i] << std::endl; - } dataPtr = reinterpret_cast(hostData); } return SimulationStateData(dataPtr, numElements, elementSize); @@ -205,26 +193,19 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { if (args) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&mlirContext); - moduleOp.getContext()->disableMultithreading(); - pm.enableIRPrinting(); - auto &platform = cudaq::get_platform(); - // For efficiency, we don't run state prep to convert states to gates on - // simulators, instead we synthesize them as vectors. - pm.addPass(cudaq::opt::createQuakeSynthesizer(name, args, readSimulationStateData, startingArgIdx)); + // remote simulators, instead we synthesize states as vectors. + // Pass the data reader function to the synthesizer for this purpose. + pm.addPass(cudaq::opt::createQuakeSynthesizer( + name, args, readSimulationStateData, startingArgIdx)); pm.addPass(mlir::createCanonicalizerPass()); if (failed(pm.run(moduleOp))) throw std::runtime_error("Could not successfully apply quake-synth."); } - // Note: do not run state preparation pass here since we are always - // using simulators. - // Run client-side passes. `clientPasses` is empty right now, but the code // below accommodates putting passes into it. mlir::PassManager pm(&mlirContext); - moduleOp.getContext()->disableMultithreading(); - pm.enableIRPrinting(); std::string errMsg; llvm::raw_string_ostream os(errMsg); const std::string pipeline = diff --git a/targettests/Remote-Sim/state_init.cpp b/targettests/Remote-Sim/state_init.cpp index 4615e2ec08..735cb16f43 100644 --- a/targettests/Remote-Sim/state_init.cpp +++ b/targettests/Remote-Sim/state_init.cpp @@ -10,7 +10,6 @@ // clang-format off // RUN: nvq++ %cpp_std --enable-mlir --target remote-mqpu %s -o %t && %t -// RUN: nvq++ %cpp_std --target remote-mqpu %s -o %t && %t // TODO: this fails to compile, do we need it? // clang-format on #include @@ -34,36 +33,22 @@ void printCounts(cudaq::sample_result& result) { } int main() { - { - std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; - auto state = cudaq::state::from_data(vec); - auto state1 = cudaq::state::from_data(vec1); - { - // Passing state data as argument (kernel mode) - auto counts = cudaq::sample(test_complex_array_param, &state); - printCounts(counts); - - counts = cudaq::sample(test_complex_array_param, &state1); - printCounts(counts); - } - - // { - // // Passing state data as argument (builder mode) - // auto [kernel, state] = cudaq::make_kernel(); - // auto qubits = kernel.qalloc(state); - - // auto counts = cudaq::sample(kernel, &state); - // printCounts(counts); - - // counts = cudaq::sample(kernel, &state1); - // printCounts(counts); - // } - } + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0., 0., 0., 0., 0.}; + std::vector vec1{0., 0., 0., 0., 0., 0., M_SQRT1_2, M_SQRT1_2}; + auto state = cudaq::state::from_data(vec); + auto state1 = cudaq::state::from_data(vec1); + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_complex_array_param, &state); + printCounts(counts); + + counts = cudaq::sample(test_complex_array_param, &state1); + printCounts(counts); + } } -// CHECK: 00 -// CHECK: 10 +// CHECK: 000 +// CHECK: 100 -// CHECK: 01 -// CHECK: 11 +// CHECK: 011 +// CHECK: 111 From e73ac1c948531cf89cbad989c9b1f6841699c518 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 18 Jul 2024 15:20:30 -0700 Subject: [PATCH 45/50] Add tests for vector data serialization for remote sim --- targettests/Remote-Sim/state_init_vector.cpp | 202 ++++++++++--------- 1 file changed, 107 insertions(+), 95 deletions(-) diff --git a/targettests/Remote-Sim/state_init_vector.cpp b/targettests/Remote-Sim/state_init_vector.cpp index 7e93b63dae..b8d6bdb3bb 100644 --- a/targettests/Remote-Sim/state_init_vector.cpp +++ b/targettests/Remote-Sim/state_init_vector.cpp @@ -10,14 +10,11 @@ // clang-format off // RUN: nvq++ %cpp_std --enable-mlir --target remote-mqpu %s -o %t && %t -// RUN: nvq++ %cpp_std --target remote-mqpu %s -o %t && %t // TODO: this fails to compile, do we need it? // clang-format on #include #include - - __qpu__ void test_complex_constant_array() { cudaq::qvector v(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); } @@ -69,27 +66,41 @@ void printCounts(cudaq::sample_result& result) { } int main() { - // { - // auto counts = cudaq::sample(test_complex_constant_array); - // printCounts(counts); - // } - - // { - // auto counts = cudaq::sample(test_complex_constant_array2); - // printCounts(counts); - // } - - // { - // auto counts = cudaq::sample(test_complex_constant_array3); - // printCounts(counts); - // } - - // { - // auto counts = cudaq::sample(test_real_constant_array); - // printCounts(counts); - // } - - // { + { + auto counts = cudaq::sample(test_complex_constant_array); + printCounts(counts); + } + +// CHECK: 00 +// CHECK: 10 + + { + auto counts = cudaq::sample(test_complex_constant_array2); + printCounts(counts); + } + +// CHECK: 0001 +// CHECK: 0011 +// CHECK: 1001 +// CHECK: 1011 + + { + auto counts = cudaq::sample(test_complex_constant_array3); + printCounts(counts); + } + +// CHECK: 00 +// CHECK: 10 + + { + auto counts = cudaq::sample(test_real_constant_array); + printCounts(counts); + } + +// CHECK: 00 +// CHECK: 10 + + { std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; { @@ -101,92 +112,79 @@ int main() { printCounts(counts); } - // { - // // Passing state data as argument (builder mode) - // auto [kernel, v] = cudaq::make_kernel>(); - // auto qubits = kernel.qalloc(v); - - // auto counts = cudaq::sample(kernel, vec); - // printCounts(counts); - - // counts = cudaq::sample(kernel, vec1); - // printCounts(counts); - // } - // } - - // { - // std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - // std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; - // { - // // Passing state data as argument (kernel mode) - // auto counts = cudaq::sample(test_real_array_param, vec); - // printCounts(counts); - - // counts = cudaq::sample(test_real_array_param, vec1); - // printCounts(counts); - // } - - // { - // // Passing state data as argument (builder mode) - // auto [kernel, v] = cudaq::make_kernel>(); - // auto qubits = kernel.qalloc(v); - - // auto counts = cudaq::sample(kernel, vec); - // printCounts(counts); - - // counts = cudaq::sample(kernel, vec1); - // printCounts(counts); - // } - // } - - // Error message: "Invalid user-provided state data. Simulator is FP64 but state data is FP32." - // { - // std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - // std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; - // { - // // Passing state data as argument (kernel mode) - // auto counts = cudaq::sample(test_double_array_param, vec); - // printCounts(counts); - - // counts = cudaq::sample(test_double_array_param, vec1); - // printCounts(counts); - // } - // } - - // UCX ERROR Failed to allocate memory pool (name=mm_recv_desc) chunk: Out of memory - // { - // std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - // std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; - // { - // // Passing state data as argument (kernel mode) - // auto counts = cudaq::sample(test_float_array_param, vec); - // printCounts(counts); - - // counts = cudaq::sample(test_float_array_param, vec1); - // printCounts(counts); - // } - // } -} - // CHECK: 00 // CHECK: 10 -// CHECK: 0001 -// CHECK: 0011 -// CHECK: 1001 -// CHECK: 1011 +// CHECK: 01 +// CHECK: 11 + + { + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); + + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); + + counts = cudaq::sample(kernel, vec1); + printCounts(counts); + } + } // CHECK: 00 // CHECK: 10 +// CHECK: 01 +// CHECK: 11 + + { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_real_array_param, vec); + printCounts(counts); + + counts = cudaq::sample(test_real_array_param, vec1); + printCounts(counts); + } + // CHECK: 00 // CHECK: 10 +// CHECK: 01 +// CHECK: 11 + + { + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); + + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); + + counts = cudaq::sample(kernel, vec1); + printCounts(counts); + } + // CHECK: 00 // CHECK: 10 // CHECK: 01 // CHECK: 11 + } + + { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_double_array_param, vec); + printCounts(counts); + + counts = cudaq::sample(test_double_array_param, vec1); + printCounts(counts); + } // CHECK: 00 // CHECK: 10 @@ -194,8 +192,22 @@ int main() { // CHECK: 01 // CHECK: 11 + { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; + + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_float_array_param, vec); + printCounts(counts); + + counts = cudaq::sample(test_float_array_param, vec1); + printCounts(counts); + } + // CHECK: 00 // CHECK: 10 // CHECK: 01 // CHECK: 11 +} + From f4cc697ea42ff3d649f71704709bfbb061ce7059 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 19 Jul 2024 10:42:44 -0700 Subject: [PATCH 46/50] Merge with main --- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 2 -- .../runtime/cudaq/platform/py_alt_launch_kernel.cpp | 11 ----------- runtime/common/BaseRestRemoteClient.h | 1 - 3 files changed, 14 deletions(-) diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 3ddb72b2fe..edbb25daa3 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -26,8 +26,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" -#include - #define DEBUG_TYPE "quake-synthesizer" using namespace mlir; diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 1b2b8e55c5..dfd2384b38 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -514,17 +514,6 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, pm.addPass(cudaq::opt::createQuakeSynthesizer(name, rawArgs, true)); pm.addPass(createCanonicalizerPass()); - // Run state preparation for quantum devices only. - // Simulators have direct implementation of state initialization - // in their runtime. - auto &platform = cudaq::get_platform(); - if (!platform.is_simulator() || platform.is_emulated()) { - pm.addPass(cudaq::opt::createConstPropComplex()); - pm.addPass(cudaq::opt::createLiftArrayAlloc()); - pm.addPass(cudaq::opt::createStatePreparation()); - } - pm.addPass(createCanonicalizerPass()); - // Run state preparation for quantum devices only. // Simulators have direct implementation of state initialization // in their runtime. diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index b86a4d0ebf..a752d2d35f 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -93,7 +93,6 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { } static SimulationStateData readSimulationStateData(cudaq::state *s) { - ; void *dataPtr = nullptr; auto stateVector = s->get_tensor(); auto precision = s->get_precision(); From 585973fc3759c85bbaf2992214a92a92a9476231 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 19 Jul 2024 11:35:41 -0700 Subject: [PATCH 47/50] Add more synth tests --- unittests/Optimizer/QuakeSynthTester.cpp | 59 ++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/unittests/Optimizer/QuakeSynthTester.cpp b/unittests/Optimizer/QuakeSynthTester.cpp index aa25940255..414d1d14c2 100644 --- a/unittests/Optimizer/QuakeSynthTester.cpp +++ b/unittests/Optimizer/QuakeSynthTester.cpp @@ -328,6 +328,65 @@ TEST(QuakeSynthTests, checkVectorOfInt) { EXPECT_EQ(countz.size(), 1); } +TEST(QuakeSynthTests, checkStatePointerLocalSim) { + auto [kernel, thetas] = cudaq::make_kernel(); + auto theta = thetas[0]; + auto phi = thetas[1]; + auto q = kernel.qalloc(3); + kernel.x(q[0]); + kernel.ry(theta, q[1]); + kernel.ry(phi, q[2]); + kernel.x(q[2], q[0]); + kernel.x(q[0], q[1]); + kernel.ry(-theta, q[1]); + kernel.x(q[0], q[1]); + kernel.x(q[1], q[0]); + + std::cout << kernel.to_quake() << '\n'; + + // Set the proper name for the kernel + auto properName = cudaq::runtime::cudaqGenPrefixName + kernel.name(); + + using namespace cudaq::spin; + cudaq::spin_op h = 5.907 - 2.1433 * x(0) * x(1) - 2.1433 * y(0) * y(1) + + .21829 * z(0) - 6.125 * z(1); + cudaq::spin_op h3 = h + 9.625 - 9.625 * z(2) - 3.913119 * x(1) * x(2) - + 3.913119 * y(1) * y(2); + + cudaq::state state = cudaq::state::from_data(std::vector>({.3591, .2569})); + double energy = cudaq::observe(kernel, h3, &state); + EXPECT_NEAR(energy, -2.045375, 1e-3); + + // Map the kernel_builder to_quake output to MLIR + auto context = cudaq::initializeMLIR(); + auto module = parseSourceString(kernel.to_quake(), context.get()); + + // Create a struct defining the runtime args for the kernel + auto [args, offset] = + cudaq::mapToRawArgs(kernel.name(), std::vector{.3591, .2569}); + + // Run quake-synth + EXPECT_TRUE(succeeded(runQuakeSynth(kernel.name(), args, module))); + + // Get the function, make sure that it has no arguments + auto func = module->lookupSymbol(properName); + EXPECT_TRUE(func); + EXPECT_TRUE(func.getArguments().empty()); + + func.dump(); + + // Lower to LLVM and create the JIT execution engine + EXPECT_TRUE(succeeded(lowerToLLVMDialect(*module))); + auto jitOrError = ExecutionEngine::create(*module); + EXPECT_TRUE(!!jitOrError); + std::unique_ptr jit = std::move(jitOrError.get()); + + // // Sample this new kernel processed with quake synth + energy = observeJitCode(jit.get(), h3, kernel.name()); + // Should see the same thing as before. + EXPECT_NEAR(energy, -2.045375, 1e-3); +} + TEST(QuakeSynthTests, checkCallable) { auto [ansatz, thetas] = cudaq::make_kernel>(); auto q = ansatz.qalloc(2); From c88c51e53423c44ed9394c3264a036641e6c4e51 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 19 Jul 2024 14:07:03 -0700 Subject: [PATCH 48/50] Cleanup --- include/cudaq/Optimizer/Transforms/Passes.h | 6 +-- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 43 +++++++------------ runtime/common/BaseRemoteRESTQPU.h | 3 +- unittests/Optimizer/QuakeSynthTester.cpp | 8 ++-- 4 files changed, 24 insertions(+), 36 deletions(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index ae9c6d2188..03af81ab35 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -44,9 +44,9 @@ std::unique_ptr createQuakeAddDeallocs(); std::unique_ptr createQuakeSynthesizer(); std::unique_ptr createQuakeSynthesizer(std::string_view, const void *, - SimulationStateData::getDataFunc *, std::size_t); -std::unique_ptr createQuakeSynthesizer(std::string_view, - const void *, bool); + std::size_t startingArgIdx = 0, + SimulationStateData::getDataFunc *getData = nullptr, + bool sameAddressSpace = false); std::unique_ptr createRaiseToAffinePass(); std::unique_ptr createUnwindLoweringPass(); diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index edbb25daa3..8f36b03db8 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -481,28 +481,23 @@ class QuakeSynthesizer // The raw pointer to the runtime arguments. const void *args; + // The starting argument index to synthesize. Typically 0 but may be >0 for + // partial synthesis. If >0, it is assumed that the first argument(s) are NOT + // in `args`. + std::size_t startingArgIdx = 0; + // Function to read the state data, if any. SimulationStateData::getDataFunc *getStateData = nullptr; // Is the simulation running in the same address space as synthesis? bool sameAddressSpace = false; - // The starting argument index to synthesize. Typically 0 but may be >0 for - // partial synthesis. If >0, it is assumed that the first argument(s) are NOT - // in `args`. - std::size_t startingArgIdx = 0; - public: QuakeSynthesizer() = default; - - // Execution in a same address space on a simulator, or a quantum device - QuakeSynthesizer(std::string_view kernel, const void *a, bool sameSpace) - : kernelName(kernel), args(a), sameAddressSpace(sameSpace) {} - - // Execution on a remote simulator - QuakeSynthesizer(std::string_view kernel, const void *a, - SimulationStateData::getDataFunc *getData, std::size_t s) - : kernelName(kernel), args(a), getStateData(getData), startingArgIdx(s) {} + QuakeSynthesizer(std::string_view kernel, const void *a, std::size_t s, + SimulationStateData::getDataFunc *getData, bool sameSpace) + : kernelName(kernel), args(a), startingArgIdx(s), getStateData(getData), + sameAddressSpace(sameSpace) {} mlir::ModuleOp getModule() { return getOperation(); } @@ -881,18 +876,10 @@ std::unique_ptr cudaq::opt::createQuakeSynthesizer() { return std::make_unique(); } -/// Execution on remote simulator -std::unique_ptr -cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, const void *a, - SimulationStateData::getDataFunc *getData, - std::size_t startingArgIdx = 0) { - return std::make_unique(kernelName, a, getData, - startingArgIdx); -} - -/// Execution on the same address space in a simulator or a quantum device -std::unique_ptr -cudaq::opt::createQuakeSynthesizer(std::string_view kernelName, const void *a, - bool sameAddressSpace = false) { - return std::make_unique(kernelName, a, sameAddressSpace); +std::unique_ptr cudaq::opt::createQuakeSynthesizer( + std::string_view kernelName, const void *a, std::size_t startingArgIdx = 0, + SimulationStateData::getDataFunc *getData = nullptr, + bool sameAddressSpace = false) { + return std::make_unique(kernelName, a, startingArgIdx, + getData, sameAddressSpace); } diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index f6d914d250..2ea79e3f91 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -406,8 +406,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&context); - pm.addPass( - cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs, false)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); pm.addPass(mlir::createCanonicalizerPass()); if (disableMLIRthreading || enablePrintMLIREachPass) moduleOp.getContext()->disableMultithreading(); diff --git a/unittests/Optimizer/QuakeSynthTester.cpp b/unittests/Optimizer/QuakeSynthTester.cpp index 414d1d14c2..a74a1c82bb 100644 --- a/unittests/Optimizer/QuakeSynthTester.cpp +++ b/unittests/Optimizer/QuakeSynthTester.cpp @@ -54,7 +54,8 @@ LogicalResult runQuakeSynth(std::string_view kernelName, void *rawArgs, PassManager pm(module->getContext()); module->getContext()->disableMultithreading(); pm.enableIRPrinting(); - pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, rawArgs, true)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, rawArgs, 0, nullptr, + true)); pm.addPass(createCanonicalizerPass()); pm.addPass(cudaq::opt::createExpandMeasurementsPass()); pm.addNestedPass(cudaq::opt::createClassicalMemToReg()); @@ -329,7 +330,7 @@ TEST(QuakeSynthTests, checkVectorOfInt) { } TEST(QuakeSynthTests, checkStatePointerLocalSim) { - auto [kernel, thetas] = cudaq::make_kernel(); + auto [kernel, thetas] = cudaq::make_kernel(); auto theta = thetas[0]; auto phi = thetas[1]; auto q = kernel.qalloc(3); @@ -353,7 +354,8 @@ TEST(QuakeSynthTests, checkStatePointerLocalSim) { cudaq::spin_op h3 = h + 9.625 - 9.625 * z(2) - 3.913119 * x(1) * x(2) - 3.913119 * y(1) * y(2); - cudaq::state state = cudaq::state::from_data(std::vector>({.3591, .2569})); + cudaq::state state = cudaq::state::from_data( + std::vector>({.3591, .2569})); double energy = cudaq::observe(kernel, h3, &state); EXPECT_NEAR(energy, -2.045375, 1e-3); From 10becb801b12d094955f12f7890478bf77c7f26d Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 19 Jul 2024 15:47:45 -0700 Subject: [PATCH 49/50] Cleanup --- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 6 +++--- python/runtime/cudaq/platform/py_alt_launch_kernel.cpp | 2 +- runtime/common/BaseRestRemoteClient.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 8f36b03db8..4a62afdf77 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -877,9 +877,9 @@ std::unique_ptr cudaq::opt::createQuakeSynthesizer() { } std::unique_ptr cudaq::opt::createQuakeSynthesizer( - std::string_view kernelName, const void *a, std::size_t startingArgIdx = 0, - SimulationStateData::getDataFunc *getData = nullptr, - bool sameAddressSpace = false) { + std::string_view kernelName, const void *a, std::size_t startingArgIdx, + SimulationStateData::getDataFunc *getData, + bool sameAddressSpace) { return std::make_unique(kernelName, a, startingArgIdx, getData, sameAddressSpace); } diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index dfd2384b38..93477941d3 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -511,7 +511,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, getEnvBool("CUDAQ_MLIR_PRINT_EACH_PASS", false); PassManager pm(context); - pm.addPass(cudaq::opt::createQuakeSynthesizer(name, rawArgs, true)); + pm.addPass(cudaq::opt::createQuakeSynthesizer(name, rawArgs, 0, nullptr, true)); pm.addPass(createCanonicalizerPass()); // Run state preparation for quantum devices only. diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index a752d2d35f..79ef3a5043 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -196,7 +196,7 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { // remote simulators, instead we synthesize states as vectors. // Pass the data reader function to the synthesizer for this purpose. pm.addPass(cudaq::opt::createQuakeSynthesizer( - name, args, readSimulationStateData, startingArgIdx)); + name, args, startingArgIdx, readSimulationStateData)); pm.addPass(mlir::createCanonicalizerPass()); if (failed(pm.run(moduleOp))) throw std::runtime_error("Could not successfully apply quake-synth."); From a4d16e7f02b5a1f2cbb58edf9a3a3376f135497c Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 19 Jul 2024 15:50:47 -0700 Subject: [PATCH 50/50] Format --- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 3 +-- python/runtime/cudaq/platform/py_alt_launch_kernel.cpp | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 4a62afdf77..a32eb6d737 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -878,8 +878,7 @@ std::unique_ptr cudaq::opt::createQuakeSynthesizer() { std::unique_ptr cudaq::opt::createQuakeSynthesizer( std::string_view kernelName, const void *a, std::size_t startingArgIdx, - SimulationStateData::getDataFunc *getData, - bool sameAddressSpace) { + SimulationStateData::getDataFunc *getData, bool sameAddressSpace) { return std::make_unique(kernelName, a, startingArgIdx, getData, sameAddressSpace); } diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 93477941d3..353e36bcd4 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -511,7 +511,8 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, getEnvBool("CUDAQ_MLIR_PRINT_EACH_PASS", false); PassManager pm(context); - pm.addPass(cudaq::opt::createQuakeSynthesizer(name, rawArgs, 0, nullptr, true)); + pm.addPass( + cudaq::opt::createQuakeSynthesizer(name, rawArgs, 0, nullptr, true)); pm.addPass(createCanonicalizerPass()); // Run state preparation for quantum devices only.