From a6d5f4e7ec30a7c6e3391c8647f41e966ef7f1ab Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Mon, 17 Jun 2024 10:08:03 -0700 Subject: [PATCH 1/9] Add a pass for state preparation from vectors --- include/cudaq/Optimizer/Transforms/Passes.h | 2 + include/cudaq/Optimizer/Transforms/Passes.td | 11 ++ lib/Optimizer/Transforms/CMakeLists.txt | 1 + .../Transforms/GenKernelExecution.cpp | 34 +++- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 34 ++++ lib/Optimizer/Transforms/StatePreparation.cpp | 119 +++++++++++++ program.py | 35 ++++ .../cudaq/platform/py_alt_launch_kernel.cpp | 1 + runtime/common/BaseRemoteRESTQPU.h | 1 + runtime/common/BaseRestRemoteClient.h | 1 + targettests/execution/from_state.cpp | 30 ++++ targettests/execution/from_state_complex.cpp | 27 +++ targettests/execution/program.cpp | 167 ++++++++++++++++++ 13 files changed, 457 insertions(+), 6 deletions(-) create mode 100644 lib/Optimizer/Transforms/StatePreparation.cpp create mode 100644 program.py create mode 100644 targettests/execution/from_state.cpp create mode 100644 targettests/execution/from_state_complex.cpp create mode 100644 targettests/execution/program.cpp diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 996b6e56a7..422032326c 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -40,6 +40,8 @@ std::unique_ptr createLowerToCFGPass(); std::unique_ptr createObserveAnsatzPass(std::vector &); std::unique_ptr createQuakeAddMetadata(); std::unique_ptr createQuakeAddDeallocs(); +std::unique_ptr createStatePreparation(); +std::unique_ptr createStatePreparation(std::string_view, void *); std::unique_ptr createQuakeSynthesizer(); std::unique_ptr createQuakeSynthesizer(std::string_view, void *); std::unique_ptr createRaiseToAffinePass(); diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 8d2f0c1821..e5e15a8776 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -512,6 +512,17 @@ def PruneCtrlRelations : Pass<"pruned-ctrl-form", "mlir::func::FuncOp"> { }]; } +def PrepareState : Pass<"state-prep", "mlir::ModuleOp"> { + let summary = + "Convert state vector data into gates"; + let description = [{ + Convert quake representation that includes qubit initialization + from data into qubit initialization using gates. + }]; + + let constructor = "cudaq::opt::createStatePreparation()"; +} + def QuakeSynthesize : Pass<"quake-synth", "mlir::ModuleOp"> { let summary = "Synthesize concrete quantum program from Quake code plus runtime values."; diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index 7600efe276..6a51057bd3 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -39,6 +39,7 @@ add_cudaq_library(OptTransforms ObserveAnsatz.cpp PruneCtrlRelations.cpp QuakeAddMetadata.cpp + StatePreparation.cpp QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp diff --git a/lib/Optimizer/Transforms/GenKernelExecution.cpp b/lib/Optimizer/Transforms/GenKernelExecution.cpp index c16a4af7dd..68ef5b21b7 100644 --- a/lib/Optimizer/Transforms/GenKernelExecution.cpp +++ b/lib/Optimizer/Transforms/GenKernelExecution.cpp @@ -434,8 +434,18 @@ class GenerateKernelExecution hasTrailingData = true; continue; } - if (isa(currEleTy) && - !isStatePointerType(currEleTy)) { + //if (isa(currEleTy) && + // !isStatePointerType(currEleTy)) { + if (auto ptrTy = dyn_cast(currEleTy)) { + if (isa(ptrTy.getElementType())) { + // Special case: if the argument is a `cudaq::state*`, then just pass + // the pointer. We can do that in this case because the synthesis step + // (which will receive the argument data) is assumed to run in the + // same memory space. + argPtr = builder.create(loc, currEleTy, argPtr); + stVal = builder.create(loc, stVal.getType(), + stVal, argPtr, idx); + } continue; } @@ -941,8 +951,8 @@ class GenerateKernelExecution cudaq::cc::numberOfHiddenArgs(hasThisPointer, hiddenSRet); if (count > 0 && args.size() >= count && std::all_of(args.begin(), args.begin() + count, [](auto i) { - return isa(i.getType()) && - !isStatePointerType(i.getType()); + return isa(i.getType());// && + // !isStatePointerType(i.getType()); })) return args.drop_front(count); return args; @@ -1208,9 +1218,21 @@ class GenerateKernelExecution hasTrailingData = true; continue; } - if (isa(inTy) && !isStatePointerType(inTy)) + //if (isa(inTy) && !isStatePointerType(inTy)) + // continue; + if (auto ptrTy = dyn_cast(inTy)) { + if (isa(ptrTy.getElementType())) { + // Special case: if the argument is a `cudaq::state*`, then just pass + // the pointer. We can do that in this case because the synthesis step + // (which will receive the argument data) is assumed to run in the + // same memory space. + Value argPtr = builder.create(loc, inTy, arg); + stVal = builder.create(loc, stVal.getType(), + stVal, argPtr, idx); + } continue; - + } + stVal = builder.create(loc, stVal.getType(), stVal, arg, idx); } diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index f371a8b9cd..dbb2b00cc8 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -23,10 +23,19 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" +#include + #define DEBUG_TYPE "quake-synthesizer" using namespace mlir; +// cudaq::state is defined in the runtime. The compiler will never need to know +// about its implementation and there should not be a circular build/library +// dependence because of it. Simply forward declare it, as it is notional. +namespace cudaq { +class state; +} + /// Replace a BlockArgument of a specific type with a concrete instantiation of /// that type, and add the generation of that constant as an MLIR Op to the /// beginning of the function. For example @@ -366,7 +375,9 @@ class QuakeSynthesizer } void runOnOperation() override final { + std::cout << "Module before synthesis " << std::endl; auto module = getModule(); + module.dump(); if (args == nullptr || kernelName.empty()) { module.emitOpError("Synthesis requires a kernel and the values of the " "arguments passed when it is called."); @@ -472,6 +483,27 @@ class QuakeSynthesizer continue; } + if (auto ptrTy = dyn_cast(type)) { + if (isa(ptrTy.getElementType())) { + // Special case of a `cudaq::state*` which must be in the same address + // space. This references a container to a set of simulation + // amplitudes. + synthesizeRuntimeArgument( + builder, argument, args, offset, sizeof(void *), + [=](OpBuilder &builder, cudaq::state **concrete) { + Value rawPtr = builder.create( + loc, reinterpret_cast(*concrete), + sizeof(void *) * 8); + auto stateTy = cudaq::cc::StateType::get(builder.getContext()); + return builder.create( + loc, cudaq::cc::PointerType::get(stateTy), rawPtr); + }); + continue; + } + // N.B. Other pointers will not be materialized and may be in a + // different address space. + } + // If std::vector type, add it to the list of vector info. // These will be processed when we reach the buffer's appendix. if (auto vecTy = dyn_cast(type)) { @@ -601,6 +633,8 @@ class QuakeSynthesizer } } funcOp.eraseArguments(argsToErase); + std::cout << "Module after synthesis " << std::endl; + module.dump(); } }; diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp new file mode 100644 index 0000000000..d7868b46ef --- /dev/null +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -0,0 +1,119 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Runtime.h" +#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" +#include "cudaq/Optimizer/Transforms/Passes.h" +#include "llvm/Support/Debug.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Target/LLVMIR/TypeToLLVM.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/RegionUtils.h" + +#include + +#define DEBUG_TYPE "state-preparation" + +using namespace mlir; + +/// Replace a qubit initialization from vectors with quantum gates. +/// For example: +/// +/// func.func @foo(%arg0 : !cc.stdvec>) { +/// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 +/// %1 = math.cttz %0 : i64 +/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> +/// %3 = quake.alloca !quake.veq[%1 : i64] +/// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq +/// return +/// } +/// +/// on call that passes std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2} as arg0 +/// will be updated to: +/// +/// func.func @foo(%arg0 : !cc.stdvec>) { +/// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 +/// %c4_i64 = arith.constant 4 : i64 +/// %3 = math.cttz %c4_i64 : i64 +/// %5 = quake.alloca !quake.veq[%3 : i64] +/// %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref +/// quake.h %6 : (!quake.ref) -> () +/// %7 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref +/// %8 = quake.extract_ref %5[1] : (!quake.veq) -> !quake.ref +/// quake.x [%7] %8 : (!quake.ref, !quake.ref) -> () +/// } +/// +/// Note: we rely on the later synthesis and const prop stages to replace +/// the argument by a constant and propagate the values and vector size +/// through those and other instructions. + +namespace { +class StatePreparation + : public cudaq::opt::StatePreparationBase { +protected: + // The name of the kernel to be synthesized + std::string kernelName; + + // The raw pointer to the runtime arguments. + void *args; + +public: + StatePreparation() = default; + StatePreparation(std::string_view kernel, void *a) + : kernelName(kernel), args(a) {} + + mlir::ModuleOp getModule() { return getOperation(); } + + + void runOnOperation() override final { + std::cout << "Module before state prep " << std::endl; + auto module = getModule(); + module.dump(); + if (args == nullptr || kernelName.empty()) { + module.emitOpError("Synthesis requires a kernel and the values of the " + "arguments passed when it is called."); + signalPassFailure(); + return; + } + + auto kernelNameInQuake = cudaq::runtime::cudaqGenPrefixName + kernelName; + // Get the function we care about (the one with kernelName) + auto funcOp = module.lookupSymbol(kernelNameInQuake); + if (!funcOp) { + module.emitOpError("The kernel '" + kernelName + + "' was not found in the module."); + signalPassFailure(); + return; + } + + // Create the builder. + auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); + + std::cout << "Module after synthesis " << std::endl; + module.dump(); + } +}; + +} // namespace + +std::unique_ptr cudaq::opt::createStatePreparation() { + return std::make_unique(); +} + +std::unique_ptr +cudaq::opt::createStatePreparation(std::string_view kernelName, void *a) { + return std::make_unique(kernelName, a); +} diff --git a/program.py b/program.py new file mode 100644 index 0000000000..e282d8cd5d --- /dev/null +++ b/program.py @@ -0,0 +1,35 @@ +# ============================================================================ # +# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # +# All rights reserved. # +# # +# This source code and the accompanying materials are made available under # +# the terms of the Apache License 2.0 which accompanies this distribution. # +# ============================================================================ # + +import numpy as np +import cudaq + +import cudaq +import numpy as np + +cudaq.reset_target() + +cudaq.set_target('nvidia') +#cudaq.set_target('nvidia-mqpu') +# cudaq.set_target('density-matrix-cpu') + + +c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], + dtype=np.complex128) +state = cudaq.State.from_data(c) + +@cudaq.kernel(verbose=True) +def kernel(vec: cudaq.State): + q = cudaq.qvector(vec) + +print(kernel) +print(cudaq.to_qir(kernel)) + +#print(cudaq.get_target()) +#counts = cudaq.sample(kernel, state) +#print(counts) \ No newline at end of file diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 51f86ce15f..ff0c0ce477 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -474,6 +474,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, PassManager pm(context); pm.addPass(createCanonicalizerPass()); + pm.addPass(cudaq::opt::createStatePreparation(name, rawArgs)); pm.addPass(cudaq::opt::createQuakeSynthesizer(name, rawArgs)); pm.addPass(cudaq::opt::createExpandMeasurementsPass()); pm.addNestedPass(cudaq::opt::createClassicalMemToReg()); diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index aa36a0c62d..08f41e60ec 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -401,6 +401,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&context); + pm.addPass(cudaq::opt::createStatePreparation(kernelName, updatedArgs)); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); if (disableMLIRthreading || enablePrintMLIREachPass) moduleOp.getContext()->disableMultithreading(); diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index 17c235a76b..9325d0345d 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -153,6 +153,7 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { if (args) { cudaq::info("Run Quake Synth.\n"); mlir::PassManager pm(&mlirContext); + pm.addPass(cudaq::opt::createStatePreparation(name, args)); pm.addPass(cudaq::opt::createQuakeSynthesizer(name, args)); if (failed(pm.run(moduleOp))) throw std::runtime_error("Could not successfully apply quake-synth."); diff --git a/targettests/execution/from_state.cpp b/targettests/execution/from_state.cpp new file mode 100644 index 0000000000..55438848cb --- /dev/null +++ b/targettests/execution/from_state.cpp @@ -0,0 +1,30 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s + +#include +#include "cudaq/builder/kernels.h" +#include + +__qpu__ void test(cudaq::state *inState) { + cudaq::qvector q(inState); +} + +// CHECK: size 2 + +int main() { + std::vector> vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; + auto state = cudaq::state::from_data(vec); + auto counts = cudaq::sample(test, &state); + counts.dump(); + + printf("size %zu\n", counts.size()); + return !(counts.size() == 2); +} diff --git a/targettests/execution/from_state_complex.cpp b/targettests/execution/from_state_complex.cpp new file mode 100644 index 0000000000..5ca8813393 --- /dev/null +++ b/targettests/execution/from_state_complex.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s + +#include + +__qpu__ void test(std::vector inState) { + cudaq::qvector q = inState; +} + +// CHECK: size 2 + +int main() { + std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; + auto counts = cudaq::sample(test, vec); + counts.dump(); + + printf("size %zu\n", counts.size()); + return !(counts.size() == 2); +} diff --git a/targettests/execution/program.cpp b/targettests/execution/program.cpp new file mode 100644 index 0000000000..b6a12ebb57 --- /dev/null +++ b/targettests/execution/program.cpp @@ -0,0 +1,167 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s + +#include +#include "cudaq/builder/kernels.h" +#include + +__qpu__ void test1(std::vector inState) { + cudaq::qvector q1 = inState; + h(q1[0]); + cx(q1[0], q1[1]); + +} + +// __qpu__ void test2(cudaq::state *inState) { +// cudaq::qvector q2(inState); +// cudaq::x(q2); +// } + +// __qpu__ void test3() { +// auto q3 = cudaq::qvector({M_SQRT1_2, 0., 0., M_SQRT1_2}); +// } + +// error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:1938: not yet implemented: unknown function, get_state, in cudaq namespace +// __qpu__ void test4() { +// cudaq::qvector q(cudaq::get_state(test3)); +// } + +// error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:392: not yet implemented: argument type conversion +// __qpu__ void test5(cudaq::state *inState) { +// test2(inState); +// } + + + +int main() { + std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; + + { + // Passing state data as argument (vector) + + // Before synthesis: + + // func.func @__nvqpp__mlirgen__function_test1._Z5test1St6vectorISt7complexIfESaIS1_EE(%arg0: !cc.stdvec>) attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + // %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 + // %1 = math.cttz %0 : i64 + // %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> + // %3 = quake.alloca !quake.veq[%1 : i64] + // %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq + // return + // } + + // After synthesis + + // func.func @__nvqpp__mlirgen__function_test1._Z5test1St6vectorISt7complexIfESaIS1_EE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + // %0 = cc.const_array [0.707106769 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.707106769 : f32, 0.000000e+00 : f32] : !cc.array x 4> + // %1 = cc.alloca !cc.array x 4> + // cc.store %0, %1 : !cc.ptr x 4>> + // %2 = cc.cast %1 : (!cc.ptr x 4>>) -> !cc.ptr> + // %c4_i64 = arith.constant 4 : i64 + // %3 = math.cttz %c4_i64 : i64 // (TODO: replace by a const) + // %4 = quake.alloca !quake.veq[%3 : i64] + // %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq // TODO: replace by gates + // return + // } + + // TODO: in StatePreparation pass + // input - vector, qubits + // output - MLIR replacing alloca+state_init instructions with gates on qubits + + // %3 = math.cttz %c4_i64 : i64 + // %4 = quake.alloca !quake.veq[%3 : i64] + // %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq + + // => (something like) + + // create a function that does the following and call it on qubits + // %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref + // quake.ry (%cst) %6 : (f64, !quake.ref) -> () + // ... + + // TODO: Run state preparation pass before synthesis + + std::cout << "test1(vec): " << "\n"; + auto counts = cudaq::sample(test1, vec); + counts.dump(); + } + + // { + // // Passing state ptr as argument - no support for from_data + + // // "func.func"() ({ + // // ^bb0(%arg0: !cc.ptr): + // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 + // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq + // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq + // // "func.return"() : () -> () + // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () + + // std::cout << "test2(state): " << "\n"; + // auto state = cudaq::state::from_data(vec); + + // // 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function + // //auto counts = cudaq::sample(test2, &state); + // //counts.dump(); + // } + + // { + // // Passing a state from another kernel as argument + + // // "func.func"() ({ + // // ^bb0(%arg0: !cc.ptr): + // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 + // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq + // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq + // // "func.return"() : () -> () + // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () + + // std::cout << "test2(test3): " << "\n"; + // auto state = cudaq::get_state(test3); + + // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function + // auto counts = cudaq::sample(test2, &state); + // counts.dump(); + // } + + // { + // // Passing a state to another kernel as argument + // std::cout << "test4(state): " << "\n"; + // //auto state = cudaq::state::from_data(vec); + // //auto counts = cudaq::sample(test4, &state); + // } + + // { + // // Creating a kernel from state and passing its state to another kernel + + // // "func.func"() ({ + // // ^bb0(%arg0: !cc.ptr): + // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 + // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq + // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq + // // "func.return"() : () -> () + // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () + + // std::cout << "test2(kernel): " << "\n"; + // std::vector> vec{.70710678, 0., 0., 0.70710678}; + // auto kernel = cudaq::make_kernel(); + // auto qubits = kernel.qalloc(2); + + // cudaq::from_state(kernel, qubits, vec); + + // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function + // //auto state = cudaq::get_state(kernel); + // //auto counts = cudaq::sample(test2, &state); + + // //counts.dump(); + // } + +} \ No newline at end of file From 93dd8d7f4ba31cc3869fd7fbaa399631c1cdaa97 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 25 Jun 2024 11:23:37 -0700 Subject: [PATCH 2/9] Implement state preparation --- lib/Optimizer/Transforms/CMakeLists.txt | 1 + lib/Optimizer/Transforms/StateDecomposer.cpp | 128 ++++++++++++++ lib/Optimizer/Transforms/StateDecomposer.h | 163 ++++++++++++++++++ lib/Optimizer/Transforms/StatePreparation.cpp | 151 ++++++++++------ targettests/execution/program.cpp | 118 +++---------- 5 files changed, 415 insertions(+), 146 deletions(-) create mode 100644 lib/Optimizer/Transforms/StateDecomposer.cpp create mode 100644 lib/Optimizer/Transforms/StateDecomposer.h diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index 6a51057bd3..b0a13571ec 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -39,6 +39,7 @@ add_cudaq_library(OptTransforms ObserveAnsatz.cpp PruneCtrlRelations.cpp QuakeAddMetadata.cpp + StateDecomposer.cpp StatePreparation.cpp QuakeSynthesizer.cpp RefToVeqAlloc.cpp diff --git a/lib/Optimizer/Transforms/StateDecomposer.cpp b/lib/Optimizer/Transforms/StateDecomposer.cpp new file mode 100644 index 0000000000..3105fad707 --- /dev/null +++ b/lib/Optimizer/Transforms/StateDecomposer.cpp @@ -0,0 +1,128 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "StateDecomposer.h" + +namespace cudaq::details { + +std::vector grayCode(std::size_t numBits) { + std::vector result(1ULL << numBits); + for (std::size_t i = 0; i < (1ULL << numBits); ++i) + result[i] = ((i >> 1) ^ i); + return result; +} + +std::vector getControlIndices(std::size_t numBits) { + auto code = grayCode(numBits); + std::vector indices; + for (auto i = 0u; i < code.size(); ++i) { + // The position of the control in the lth CNOT gate is set to match + // the position where the lth and (l + 1)th bit strings g[l] and g[l+1] of + // the binary reflected Gray code differ. + auto position = std::log2(code[i] ^ code[(i + 1) % code.size()]); + // N.B: In CUDA Quantum we write the least significant bit (LSb) on the left + // + // lsb -v + // 001 + // ^- msb + // + // Meaning that the bitstring 001 represents the number four instead of one. + // The above position calculation uses the 'normal' convention of writing + // numbers with the LSb on the left. + // + // Now, what we need to find out is the position of the 1 in the bitstring. + // If we take LSb as being position 0, then for the normal convention its + // position will be 0. Using CUDA Quantum convention it will be 2. Hence, + // we need to convert the position we find using: + // + // numBits - position - 1 + // + // The extra -1 is to account for indices starting at 0. Using the above + // examples: + // + // bitstring: 001 + // numBits: 3 + // position: 0 + // + // We have the converted position: 2, which is what we need. + indices.emplace_back(numBits - position - 1); + } + return indices; +} + +std::vector convertAngles(const std::span alphas) { + // Implements Eq. (3) from https://arxiv.org/pdf/quant-ph/0407010.pdf + // + // N.B: The paper does fails to explicitly define what is the dot operator in + // the exponent of -1. Ref. 3 solves the mystery: its the bitwise inner + // product. + auto bitwiseInnerProduct = [](std::size_t a, std::size_t b) { + auto product = a & b; + auto sumOfProducts = 0; + while (product) { + sumOfProducts += product & 0b1 ? 1 : 0; + product = product >> 1; + } + return sumOfProducts; + }; + std::vector thetas(alphas.size(), 0); + for (std::size_t i = 0u; i < alphas.size(); ++i) { + for (std::size_t j = 0u; j < alphas.size(); ++j) + thetas[i] += + bitwiseInnerProduct(j, ((i >> 1) ^ i)) & 0b1 ? -alphas[j] : alphas[j]; + thetas[i] /= alphas.size(); + } + return thetas; +} + +std::vector getAlphaZ(const std::span data, + std::size_t numQubits, std::size_t k) { + // Implements Eq. (5) from https://arxiv.org/pdf/quant-ph/0407010.pdf + std::vector angles; + double divisor = static_cast(1ULL << (k - 1)); + for (std::size_t j = 1; j <= (1ULL << (numQubits - k)); ++j) { + double angle = 0.0; + for (std::size_t l = 1; l <= (1ULL << (k - 1)); ++l) + // N.B: There is an extra '-1' on these indices computations to account + // for the fact that our indices start at 0. + angle += data[(2 * j - 1) * (1 << (k - 1)) + l - 1] - + data[(2 * j - 2) * (1 << (k - 1)) + l - 1]; + angles.push_back(angle / divisor); + } + return angles; +} + +std::vector getAlphaY(const std::span data, + std::size_t numQubits, std::size_t k) { + // Implements Eq. (8) from https://arxiv.org/pdf/quant-ph/0407010.pdf + // N.B: There is an extra '-1' on these indices computations to account for + // the fact that our indices start at 0. + std::vector angles; + for (std::size_t j = 1; j <= (1ULL << (numQubits - k)); ++j) { + double numerator = 0; + for (std::size_t l = 1; l <= (1ULL << (k - 1)); ++l) { + numerator += + std::pow(std::abs(data[(2 * j - 1) * (1 << (k - 1)) + l - 1]), 2); + } + + double denominator = 0; + for (std::size_t l = 1; l <= (1ULL << k); ++l) { + denominator += std::pow(std::abs(data[(j - 1) * (1 << k) + l - 1]), 2); + } + + if (denominator == 0.0) { + assert(numerator == 0.0 && + "If the denominator is zero, the numerator must also be zero."); + angles.push_back(0.0); + continue; + } + angles.push_back(2.0 * std::asin(std::sqrt(numerator / denominator))); + } + return angles; +} +} // namespace cudaq::details \ No newline at end of file diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h new file mode 100644 index 0000000000..bac6909708 --- /dev/null +++ b/lib/Optimizer/Transforms/StateDecomposer.h @@ -0,0 +1,163 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Runtime.h" +#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" +#include "cudaq/Optimizer/Transforms/Passes.h" +#include "llvm/Support/Debug.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Target/LLVMIR/TypeToLLVM.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/RegionUtils.h" +#include + +#include + +namespace cudaq::details { + + /// @brief Converts angles of a uniformly controlled rotation to angles of + /// non-controlled rotations. + std::vector convertAngles(const std::span alphas); + + /// @brief Return the control indices dictated by the gray code implementation. + /// + /// Here, numBits is the number of controls. + std::vector getControlIndices(std::size_t numBits); + + /// @brief Return angles required to implement a uniformly controlled z-rotation + /// on the `kth` qubit. + std::vector getAlphaZ(const std::span data, + std::size_t numQubits, std::size_t k); + + /// @brief Return angles required to implement a uniformly controlled y-rotation + /// on the `kth` qubit. + std::vector getAlphaY(const std::span data, + std::size_t numQubits, std::size_t k); +} // namespace cudaq::details + +class StateGateBuilder { +public: + StateGateBuilder(mlir::OpBuilder& b, mlir::Location& l, mlir::Value& q): builder(b), loc(l), qubits(q) {} + + template + void applyRotationOp(double theta, std::size_t target) { + auto qubit = createQubitRef(target); + auto thetaValue = createAngleValue(theta); + builder.create(loc, thetaValue, mlir::ValueRange{}, qubit); + }; + + void applyX(std::size_t control, std::size_t target) { + auto qubitC = createQubitRef(control); + auto qubitT = createQubitRef(target); + builder.create(loc, qubitC, qubitT); + }; + +private: + mlir::Value createQubitRef(std::size_t index) { + if (qubitRefs.contains(index)) { + return qubitRefs[index]; + } + + auto indexValue = builder.create(loc, index, builder.getIntegerType(64)); + auto ref = builder.create(loc, qubits, indexValue); + qubitRefs[index] = ref; + return ref; + } + + mlir::Value createAngleValue(double angle) { + return builder.create(loc, llvm::APFloat{angle}, builder.getF64Type()); + } + + mlir::OpBuilder& builder; + mlir::Location& loc; + mlir::Value& qubits; + + std::unordered_map qubitRefs = std::unordered_map(); +}; + +class StateDecomposer { +public: + StateDecomposer(StateGateBuilder& b, std::vector>& a): builder(b), amplitudes(a), numQubits(log2(a.size())) {} + + /// @brief Decompose the input state vector data to a set of controlled + /// operations and rotations. This function takes as input a `OpBuilder` + /// and appends the operations of the decomposition to its internal + /// representation. This implementation follows the algorithm defined in + /// `https://arxiv.org/pdf/quant-ph/0407010.pdf`. + void decompose() { + + // Decompose the state into phases and magnitudes. + bool needsPhaseEqualization = false; + std::vector phases; + std::vector magnitudes; + for (const auto &a : amplitudes) { + phases.push_back(std::arg(a)); + magnitudes.push_back(std::abs(a)); + // FIXME: remove magic number. + needsPhaseEqualization |= std::abs(phases.back()) > 1e-10; + } + + // N.B: The algorithm, as described in the paper, creates a circuit that + // begins with a target state and brings it to the all zero state. Hence, this + // implementation do the two steps described in Section III in reverse order. + + // Apply uniformly controlled y-rotations, the construction in Eq. (4). + for (std::size_t j = 1; j <= numQubits; ++j) { + auto k = numQubits - j + 1; + auto numControls = j - 1; + auto target = j - 1; + auto alphaYk = cudaq::details::getAlphaY(magnitudes, numQubits, k); + applyRotation(alphaYk, numControls, target); + } + + if (!needsPhaseEqualization) + return; + + // Apply uniformly controlled z-rotations, the construction in Eq. (4). + for (std::size_t j = 1; j <= numQubits; ++j) { + auto k = numQubits - j + 1; + auto numControls = j - 1; + auto target = j - 1; + auto alphaZk = cudaq::details::getAlphaZ(phases, numQubits, k); + if (alphaZk.empty()) + continue; + applyRotation(alphaZk, numControls, target); + } + } + +private: + /// @brief Apply a uniformly controlled rotation on the target qubit. + template + void applyRotation(const std::span alphas, std::size_t numControls, std::size_t target) { + auto thetas = cudaq::details::convertAngles(alphas); + if (numControls == 0) { + builder.applyRotationOp(thetas[0], target); + return; + } + + auto controlIndices = cudaq::details::getControlIndices(numControls); + assert(thetas.size() == controlIndices.size()); + for (auto [i, c] : llvm::enumerate(controlIndices)) { + builder.applyRotationOp(thetas[i], target); + builder.applyX(c, target); + } + } + + StateGateBuilder& builder; + std::span> amplitudes; + std::size_t numQubits; +}; diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index ce46efecc0..86bb911a3a 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -18,10 +18,13 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Pass/Pass.h" #include "mlir/Target/LLVMIR/TypeToLLVM.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" +#include +#include "StateDecomposer.h" #include @@ -35,33 +38,44 @@ using namespace mlir; /// func.func @foo(%arg0 : !cc.stdvec>) { /// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 /// %1 = math.cttz %0 : i64 -/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> -/// !cc.ptr> %3 = quake.alloca !quake.veq[%1 : i64] %4 = -/// quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> -/// !quake.veq return +/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> +/// %3 = quake.alloca !quake.veq[%1 : i64] +/// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq +/// return /// } /// -/// on call that passes std::vector vec{M_SQRT1_2, 0., 0., -/// M_SQRT1_2} as arg0 will be updated to: +/// On a call that passes std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2} as arg0: /// /// func.func @foo(%arg0 : !cc.stdvec>) { -/// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 -/// %c4_i64 = arith.constant 4 : i64 -/// %3 = math.cttz %c4_i64 : i64 -/// %5 = quake.alloca !quake.veq[%3 : i64] -/// %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref -/// quake.h %6 : (!quake.ref) -> () -/// %7 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref -/// %8 = quake.extract_ref %5[1] : (!quake.veq) -> !quake.ref -/// quake.x [%7] %8 : (!quake.ref, !quake.ref) -> () +/// %0 = quake.alloca !quake.veq<2> +/// %c0_i64 = arith.constant 0 : i64 +/// %1 = quake.extract_ref %0[%c0_i64] : (!quake.veq<2>, i64) -> !quake.ref +/// %cst = arith.constant 1.5707963267948968 : f64 +/// quake.ry (%cst) %1 : (f64, !quake.ref) -> () +/// %c1_i64 = arith.constant 1 : i64 +/// %2 = quake.extract_ref %0[%c1_i64] : (!quake.veq<2>, i64) -> !quake.ref +/// %cst_0 = arith.constant 1.5707963267948966 : f64 +/// quake.ry (%cst_0) %2 : (f64, !quake.ref) -> () +/// quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () +/// %cst_1 = arith.constant -1.5707963267948966 : f64 +/// quake.ry (%cst_1) %2 : (f64, !quake.ref) -> () +/// quake.x [%1] %2 : (!quake.ref, !quake.ref) -> () +/// return /// } /// -/// Note: we rely on the later synthesis and const prop stages to replace +/// Note: the following synthesis and const prop passes will replace /// the argument by a constant and propagate the values and vector size -/// through those and other instructions. +/// through other instructions. namespace { +template +concept IntegralType = std::is_same::value + || std::is_same::value + || std::is_same::value + || std::is_same::value + || std::is_same::value; + template concept FloatingType = std::is_same::value; @@ -69,12 +83,11 @@ template concept DoubleType = std::is_same::value; template -concept ComplexDataType = FloatingType || DoubleType; +concept ComplexDataType = FloatingType || DoubleType || IntegralType; /// Input was complex/complex but we prefer /// complex/complex. Make a copy, extending or truncating the /// values. -/// TODO: dont convert if not needed template std::vector> convertToComplex(std::complex *data, std::uint64_t size) { auto convertData = std::vector>(size); @@ -86,7 +99,7 @@ std::vector> convertToComplex(std::complex *data, std template std::vector> convertToComplex(std::complex *data, std::uint64_t size) { - return std::vector>(data, size); + return std::vector>(data, data+size); } /// Input was float/double but we prefer complex/complex. @@ -104,7 +117,7 @@ LogicalResult prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector> &vec) { - // auto *ctx = builder.getContext(); + auto *ctx = builder.getContext(); // builder.setInsertionPointToStart(argument.getOwner()); auto argLoc = argument.getLoc(); @@ -132,30 +145,67 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, /// %8 = quake.extract_ref %5[1] : (!quake.veq) -> !quake.ref /// quake.x [%7] %8 : (!quake.ref, !quake.ref) -> () + auto toErase = std::vector(); + for (auto *argUser : argument.getUsers()) { + // Handle the `StdvecSize` and `quake.alloca` use case: + // - Replace a `vec.size()` with the vector length. + // - Replace the number of qubits calculation with the vector length logarithm. + // - Replace `quake.alloca` with a constant size qvector allocation. + if (auto stdvecSizeOp = dyn_cast(argUser)) { + builder.setInsertionPointAfter(stdvecSizeOp); + Value length = builder.create( + argLoc, vec.size(), stdvecSizeOp.getType()); + + Value numQubits = builder.create( + argLoc, log2(vec.size()), stdvecSizeOp.getType()); + + for (auto *sizeUser: argUser->getUsers()) { + if (auto countZeroesOp = dyn_cast(sizeUser)) { + for (auto *numQubitsUser: sizeUser->getUsers()) { + if (auto quakeAllocaOp = dyn_cast(numQubitsUser)) { + builder.setInsertionPointAfter(quakeAllocaOp); + auto veqTy = quake::VeqType::get(ctx, log2(vec.size())); + Value newAlloc = builder.create(argLoc, veqTy); + quakeAllocaOp.replaceAllUsesWith(newAlloc); + toErase.push_back(quakeAllocaOp); + } + } + countZeroesOp.replaceAllUsesWith(numQubits); + toErase.push_back(countZeroesOp); + } + } + + stdvecSizeOp.replaceAllUsesWith(length); + toErase.push_back(stdvecSizeOp); + continue; + } + + // Handle the `StdvecDataOp` and `quake.init_state` use case: + // - Replace a `quake.init_state` with gates preparing the state. if (auto stdvecDataOp = dyn_cast(argUser)) { for (auto *dataUser : stdvecDataOp->getUsers()) { if (auto initOp = dyn_cast(dataUser)) { builder.setInsertionPointAfter(initOp); // Find the qvector alloc instruction - auto qvector = initOp.getOperand(0); - - // Replace! - auto zero = builder.create( - argLoc, 0, builder.getIntegerType(64)); - auto one = builder.create( - argLoc, 1, builder.getIntegerType(64)); - Value q0 = builder.create(argLoc, qvector, zero); - Value q1 = builder.create(argLoc, qvector, one); - /*auto hval =*/ builder.create(argLoc, q0); - /*auto xval =*/ builder.create(argLoc, q0, q1); - - initOp.replaceAllUsesWith(qvector); + auto qubits = initOp.getOperand(0); + + // Prepare state from vector data. + auto gateBuilder = StateGateBuilder(builder, argLoc, qubits); + auto decomposer = StateDecomposer(gateBuilder, vec); + decomposer.decompose(); + + initOp.replaceAllUsesWith(qubits); + toErase.push_back(initOp); } } } } + for (auto& op: toErase) { + op->erase(); + } + return success(); } @@ -294,20 +344,20 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { }; if (auto ty = dyn_cast(eleTy)) { switch (ty.getIntOrFloatBitWidth()) { - // case 1: - // doVector(false); - // break; - // case 8: - // doVector(std::int8_t{}); - // break; - // case 16: - // doVector(std::int16_t{}); - // break; - // case 32: - // doVector(std::int32_t{}); - // break; - // case 64: - // doVector(std::int64_t{}); + case 1: + doVector(false); + break; + case 8: + doVector(std::int8_t{}); + break; + case 16: + doVector(std::int16_t{}); + break; + case 32: + doVector(std::int32_t{}); + break; + case 64: + doVector(std::int64_t{}); break; default: bufferAppendix += vecLength * cudaq::opt::convertBitsToBytes( @@ -334,10 +384,9 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { doVector(std::complex{}); continue; } - - std::cout << "Module after state preparation " << std::endl; - module.dump(); } + std::cout << "Module after state preparation " << std::endl; + module.dump(); } }; diff --git a/targettests/execution/program.cpp b/targettests/execution/program.cpp index 055084455c..be4855e3de 100644 --- a/targettests/execution/program.cpp +++ b/targettests/execution/program.cpp @@ -15,9 +15,6 @@ __qpu__ void test1(std::vector inState) { cudaq::qvector q1 = inState; - // Should synthesize to - // h(q1[0]); - // cx(q1[0], q1[1]); } // __qpu__ void test2(cudaq::state *inState) { @@ -29,10 +26,6 @@ __qpu__ void test1(std::vector inState) { // auto q3 = cudaq::qvector({M_SQRT1_2, 0., 0., M_SQRT1_2}); // } -// error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:1938: not yet implemented: unknown function, get_state, in cudaq namespace -// __qpu__ void test4() { -// cudaq::qvector q(cudaq::get_state(test3)); -// } // error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:392: not yet implemented: argument type conversion // __qpu__ void test5(cudaq::state *inState) { @@ -42,53 +35,9 @@ __qpu__ void test1(std::vector inState) { int main() { - std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; - + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; { // Passing state data as argument (vector) - - // Before synthesis: - - // func.func @__nvqpp__mlirgen__function_test1._Z5test1St6vectorISt7complexIfESaIS1_EE(%arg0: !cc.stdvec>) attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - // %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 - // %1 = math.cttz %0 : i64 - // %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> - // %3 = quake.alloca !quake.veq[%1 : i64] - // %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq - // return - // } - - // After synthesis - - // func.func @__nvqpp__mlirgen__function_test1._Z5test1St6vectorISt7complexIfESaIS1_EE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - // %0 = cc.const_array [0.707106769 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32, 0.707106769 : f32, 0.000000e+00 : f32] : !cc.array x 4> - // %1 = cc.alloca !cc.array x 4> - // cc.store %0, %1 : !cc.ptr x 4>> - // %2 = cc.cast %1 : (!cc.ptr x 4>>) -> !cc.ptr> - // %c4_i64 = arith.constant 4 : i64 - // %3 = math.cttz %c4_i64 : i64 // (TODO: replace by a const) - // %4 = quake.alloca !quake.veq[%3 : i64] - // %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq // TODO: replace by gates - // return - // } - - // TODO: in StatePreparation pass - // input - vector, qubits - // output - MLIR replacing alloca+state_init instructions with gates on qubits - - // %3 = math.cttz %c4_i64 : i64 - // %4 = quake.alloca !quake.veq[%3 : i64] - // %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq - - // => (something like) - - // create a function that does the following and call it on qubits - // %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref - // quake.ry (%cst) %6 : (f64, !quake.ref) -> () - // ... - - // TODO: Run state preparation pass before synthesis - std::cout << "test1(vec): " << "\n"; auto counts = cudaq::sample(test1, vec); counts.dump(); @@ -96,37 +45,21 @@ int main() { // { // // Passing state ptr as argument - no support for from_data - - // // "func.func"() ({ - // // ^bb0(%arg0: !cc.ptr): - // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 - // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq - // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq - // // "func.return"() : () -> () - // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () - + // // std::cout << "test2(state): " << "\n"; // auto state = cudaq::state::from_data(vec); - + // // // 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // //auto counts = cudaq::sample(test2, &state); - // //counts.dump(); + // auto counts = cudaq::sample(test2, &state); + // counts.dump(); // } // { // // Passing a state from another kernel as argument - - // // "func.func"() ({ - // // ^bb0(%arg0: !cc.ptr): - // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 - // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq - // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq - // // "func.return"() : () -> () - // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () - + // // std::cout << "test2(test3): " << "\n"; // auto state = cudaq::get_state(test3); - + // // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function // auto counts = cudaq::sample(test2, &state); // counts.dump(); @@ -134,34 +67,29 @@ int main() { // { // // Passing a state to another kernel as argument + // // std::cout << "test4(state): " << "\n"; - // //auto state = cudaq::state::from_data(vec); - // //auto counts = cudaq::sample(test4, &state); + // + // auto state = cudaq::state::from_data(vec); + // auto counts = cudaq::sample(test4, &state); // } // { - // // Creating a kernel from state and passing its state to another kernel - - // // "func.func"() ({ - // // ^bb0(%arg0: !cc.ptr): - // // %0 = "func.call"(%arg0) {callee = @__nvqpp_cudaq_state_numberOfQubits} : (!cc.ptr) -> i64 - // // %1 = "quake.alloca"(%0) : (i64) -> !quake.veq - // // %2 = "quake.init_state"(%1, %arg0) : (!quake.veq, !cc.ptr) -> !quake.veq - // // "func.return"() : () -> () - // // }) {"cudaq-entrypoint", "cudaq-kernel", function_type = (!cc.ptr) -> (), no_this, sym_name = "__nvqpp__mlirgen__function_test2._Z5test2PN5cudaq5stateE"} : () -> () - - // std::cout << "test2(kernel): " << "\n"; - // std::vector> vec{.70710678, 0., 0., 0.70710678}; - // auto kernel = cudaq::make_kernel(); - // auto qubits = kernel.qalloc(2); - - // cudaq::from_state(kernel, qubits, vec); - + // // Creating a kernel from state and passing its state to another kernel - is it deprecated? + // + std::cout << "test2(kernel): " << "\n"; + std::vector> vec{.70710678, 0., 0., 0.70710678}; + auto kernel = cudaq::make_kernel(); + auto qubits = kernel.qalloc(2); + + cudaq::from_state(kernel, qubits, vec); + auto counts = cudaq::sample(kernel); + // // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function // //auto state = cudaq::get_state(kernel); // //auto counts = cudaq::sample(test2, &state); - - // //counts.dump(); + // + counts.dump(); // } } \ No newline at end of file From 1cd5cbe8ee8a196aa7bc364b77b03d1060ee2b58 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 25 Jun 2024 15:29:40 -0700 Subject: [PATCH 3/9] Cleanup --- lib/Optimizer/Transforms/CMakeLists.txt | 4 +- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 5 - lib/Optimizer/Transforms/StateDecomposer.h | 69 ++++---- lib/Optimizer/Transforms/StatePreparation.cpp | 108 ++++-------- program.py | 35 ---- .../tests/kernel/test_kernel_qvector_init.py | 162 ++---------------- targettests/execution/from_state.cpp | 30 ---- targettests/execution/program.cpp | 95 ---------- .../execution/state_preparation_vector.cpp | 57 ++++++ 9 files changed, 148 insertions(+), 417 deletions(-) delete mode 100644 program.py delete mode 100644 targettests/execution/from_state.cpp delete mode 100644 targettests/execution/program.cpp create mode 100644 targettests/execution/state_preparation_vector.cpp diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index b0a13571ec..173cec4538 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -39,11 +39,11 @@ add_cudaq_library(OptTransforms ObserveAnsatz.cpp PruneCtrlRelations.cpp QuakeAddMetadata.cpp - StateDecomposer.cpp - StatePreparation.cpp QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp + StateDecomposer.cpp + StatePreparation.cpp PySynthCallableBlockArgs.cpp DEPENDS diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 0fa859f175..cc9279c79c 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -23,8 +23,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" -#include - #define DEBUG_TYPE "quake-synthesizer" using namespace mlir; @@ -419,9 +417,7 @@ class QuakeSynthesizer } void runOnOperation() override final { - std::cout << "Module before synthesis " << std::endl; auto module = getModule(); - // module.dump(); unsigned counter = 0; if (args == nullptr || kernelName.empty()) { @@ -680,7 +676,6 @@ class QuakeSynthesizer } } funcOp.eraseArguments(argsToErase); - // std::cout << "Module after synthesis " << std::endl; module.dump(); } }; diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h index bac6909708..2d17edb768 100644 --- a/lib/Optimizer/Transforms/StateDecomposer.h +++ b/lib/Optimizer/Transforms/StateDecomposer.h @@ -29,31 +29,32 @@ namespace cudaq::details { - /// @brief Converts angles of a uniformly controlled rotation to angles of - /// non-controlled rotations. - std::vector convertAngles(const std::span alphas); - - /// @brief Return the control indices dictated by the gray code implementation. - /// - /// Here, numBits is the number of controls. - std::vector getControlIndices(std::size_t numBits); - - /// @brief Return angles required to implement a uniformly controlled z-rotation - /// on the `kth` qubit. - std::vector getAlphaZ(const std::span data, - std::size_t numQubits, std::size_t k); - - /// @brief Return angles required to implement a uniformly controlled y-rotation - /// on the `kth` qubit. - std::vector getAlphaY(const std::span data, - std::size_t numQubits, std::size_t k); +/// @brief Converts angles of a uniformly controlled rotation to angles of +/// non-controlled rotations. +std::vector convertAngles(const std::span alphas); + +/// @brief Return the control indices dictated by the gray code implementation. +/// +/// Here, numBits is the number of controls. +std::vector getControlIndices(std::size_t numBits); + +/// @brief Return angles required to implement a uniformly controlled z-rotation +/// on the `kth` qubit. +std::vector getAlphaZ(const std::span data, + std::size_t numQubits, std::size_t k); + +/// @brief Return angles required to implement a uniformly controlled y-rotation +/// on the `kth` qubit. +std::vector getAlphaY(const std::span data, + std::size_t numQubits, std::size_t k); } // namespace cudaq::details class StateGateBuilder { public: - StateGateBuilder(mlir::OpBuilder& b, mlir::Location& l, mlir::Value& q): builder(b), loc(l), qubits(q) {} + StateGateBuilder(mlir::OpBuilder &b, mlir::Location &l, mlir::Value &q) + : builder(b), loc(l), qubits(q) {} - template + template void applyRotationOp(double theta, std::size_t target) { auto qubit = createQubitRef(target); auto thetaValue = createAngleValue(theta); @@ -72,26 +73,30 @@ class StateGateBuilder { return qubitRefs[index]; } - auto indexValue = builder.create(loc, index, builder.getIntegerType(64)); + auto indexValue = builder.create( + loc, index, builder.getIntegerType(64)); auto ref = builder.create(loc, qubits, indexValue); qubitRefs[index] = ref; return ref; } mlir::Value createAngleValue(double angle) { - return builder.create(loc, llvm::APFloat{angle}, builder.getF64Type()); + return builder.create( + loc, llvm::APFloat{angle}, builder.getF64Type()); } - mlir::OpBuilder& builder; - mlir::Location& loc; - mlir::Value& qubits; + mlir::OpBuilder &builder; + mlir::Location &loc; + mlir::Value &qubits; - std::unordered_map qubitRefs = std::unordered_map(); + std::unordered_map qubitRefs = + std::unordered_map(); }; class StateDecomposer { public: - StateDecomposer(StateGateBuilder& b, std::vector>& a): builder(b), amplitudes(a), numQubits(log2(a.size())) {} + StateDecomposer(StateGateBuilder &b, std::vector> &a) + : builder(b), amplitudes(a), numQubits(log2(a.size())) {} /// @brief Decompose the input state vector data to a set of controlled /// operations and rotations. This function takes as input a `OpBuilder` @@ -112,8 +117,9 @@ class StateDecomposer { } // N.B: The algorithm, as described in the paper, creates a circuit that - // begins with a target state and brings it to the all zero state. Hence, this - // implementation do the two steps described in Section III in reverse order. + // begins with a target state and brings it to the all zero state. Hence, + // this implementation do the two steps described in Section III in reverse + // order. // Apply uniformly controlled y-rotations, the construction in Eq. (4). for (std::size_t j = 1; j <= numQubits; ++j) { @@ -142,7 +148,8 @@ class StateDecomposer { private: /// @brief Apply a uniformly controlled rotation on the target qubit. template - void applyRotation(const std::span alphas, std::size_t numControls, std::size_t target) { + void applyRotation(const std::span alphas, std::size_t numControls, + std::size_t target) { auto thetas = cudaq::details::convertAngles(alphas); if (numControls == 0) { builder.applyRotationOp(thetas[0], target); @@ -157,7 +164,7 @@ class StateDecomposer { } } - StateGateBuilder& builder; + StateGateBuilder &builder; std::span> amplitudes; std::size_t numQubits; }; diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index 86bb911a3a..785e70b3f8 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ #include "PassDetails.h" +#include "StateDecomposer.h" #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" @@ -24,9 +25,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" #include -#include "StateDecomposer.h" - -#include #define DEBUG_TYPE "state-preparation" @@ -38,13 +36,14 @@ using namespace mlir; /// func.func @foo(%arg0 : !cc.stdvec>) { /// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 /// %1 = math.cttz %0 : i64 -/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> -/// %3 = quake.alloca !quake.veq[%1 : i64] -/// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq -/// return +/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> +/// !cc.ptr> %3 = quake.alloca !quake.veq[%1 : i64] %4 = +/// quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> +/// !quake.veq return /// } /// -/// On a call that passes std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2} as arg0: +/// On a call that passes std::vector vec{M_SQRT1_2, 0., 0., +/// M_SQRT1_2} as arg0: /// /// func.func @foo(%arg0 : !cc.stdvec>) { /// %0 = quake.alloca !quake.veq<2> @@ -70,11 +69,11 @@ using namespace mlir; namespace { template -concept IntegralType = std::is_same::value - || std::is_same::value - || std::is_same::value - || std::is_same::value - || std::is_same::value; +concept IntegralType = + std::is_same::value || std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; template concept FloatingType = std::is_same::value; @@ -85,31 +84,33 @@ concept DoubleType = std::is_same::value; template concept ComplexDataType = FloatingType || DoubleType || IntegralType; -/// Input was complex/complex but we prefer -/// complex/complex. Make a copy, extending or truncating the -/// values. +/// Input was complex but we prefer +/// complex. Make a copy, extending the values. template -std::vector> convertToComplex(std::complex *data, std::uint64_t size) { +std::vector> convertToComplex(std::complex *data, + std::uint64_t size) { auto convertData = std::vector>(size); for (std::size_t i = 0; i < size; ++i) convertData[i] = std::complex{static_cast(data[i].real()), - static_cast(data[i].imag())}; + static_cast(data[i].imag())}; return convertData; } template -std::vector> convertToComplex(std::complex *data, std::uint64_t size) { - return std::vector>(data, data+size); +std::vector> convertToComplex(std::complex *data, + std::uint64_t size) { + return std::vector>(data, data + size); } -/// Input was float/double but we prefer complex/complex. +/// Input was float/double but we prefer complex. /// Make a copy, extending or truncating the values. template -std::vector> convertToComplex(From *data, std::uint64_t size) { +std::vector> convertToComplex(From *data, + std::uint64_t size) { auto convertData = std::vector>(size); for (std::size_t i = 0; i < size; ++i) - convertData[i] = - std::complex{static_cast(data[i]), static_cast(0.0)}; + convertData[i] = std::complex{static_cast(data[i]), + static_cast(0.0)}; return convertData; } @@ -118,39 +119,15 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector> &vec) { auto *ctx = builder.getContext(); - // builder.setInsertionPointToStart(argument.getOwner()); auto argLoc = argument.getLoc(); - // TODO: look at quake.init_state instructions from vector data and track them - // to the argument vector, then replace the instruction by gates preparing the - // state (or a call to a kernel with gates) - - /// func.func @foo(%arg0 : !cc.stdvec>) { - /// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 - /// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> - /// !cc.ptr> - /// - /// %3 = quake.alloca !quake.veq[%1 : i64] - /// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) - /// -> !quake.veq return - /// } - - /// => - - /// ... - /// %5 = quake.alloca !quake.veq[%3 : i64] - /// %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref - /// quake.h %6 : (!quake.ref) -> () - /// %7 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref - /// %8 = quake.extract_ref %5[1] : (!quake.veq) -> !quake.ref - /// quake.x [%7] %8 : (!quake.ref, !quake.ref) -> () - - auto toErase = std::vector(); + auto toErase = std::vector(); for (auto *argUser : argument.getUsers()) { // Handle the `StdvecSize` and `quake.alloca` use case: // - Replace a `vec.size()` with the vector length. - // - Replace the number of qubits calculation with the vector length logarithm. + // - Replace the number of qubits calculation with the vector length + // logarithm. // - Replace `quake.alloca` with a constant size qvector allocation. if (auto stdvecSizeOp = dyn_cast(argUser)) { builder.setInsertionPointAfter(stdvecSizeOp); @@ -160,9 +137,10 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, Value numQubits = builder.create( argLoc, log2(vec.size()), stdvecSizeOp.getType()); - for (auto *sizeUser: argUser->getUsers()) { - if (auto countZeroesOp = dyn_cast(sizeUser)) { - for (auto *numQubitsUser: sizeUser->getUsers()) { + for (auto *sizeUser : argUser->getUsers()) { + if (auto countZeroesOp = + dyn_cast(sizeUser)) { + for (auto *numQubitsUser : sizeUser->getUsers()) { if (auto quakeAllocaOp = dyn_cast(numQubitsUser)) { builder.setInsertionPointAfter(quakeAllocaOp); auto veqTy = quake::VeqType::get(ctx, log2(vec.size())); @@ -175,7 +153,7 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, toErase.push_back(countZeroesOp); } } - + stdvecSizeOp.replaceAllUsesWith(length); toErase.push_back(stdvecSizeOp); continue; @@ -202,7 +180,7 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, } } - for (auto& op: toErase) { + for (auto &op : toErase) { op->erase(); } @@ -249,9 +227,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { } void runOnOperation() override final { - std::cout << "Module before state prep " << std::endl; auto module = getModule(); - module.dump(); unsigned counter = 0; if (args == nullptr || kernelName.empty()) { @@ -286,13 +262,12 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { // Get the argument type auto type = argument.getType(); - // auto loc = argument.getLoc(); if (auto ptrTy = dyn_cast(type)) { if (isa(ptrTy.getElementType())) { - std::cout << "State pointer found, TODO: call a kernel that created " - "the state" - << std::endl; + funcOp.emitOpError( + "State preparation from cudaq::state is not supported."); + return; } } @@ -301,9 +276,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { if (auto vecTy = dyn_cast(type)) { auto eleTy = vecTy.getElementType(); if (!isa(eleTy)) { - funcOp.emitOpError("synthesis: unsupported argument type"); - signalPassFailure(); - return; + continue; } char *ptrToSizeInBuffer = static_cast(args) + offset; auto sizeFromBuffer = @@ -328,10 +301,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { char *bufferAppendix = static_cast(args) + structSize; for (auto [idx, eleTy, vecLength] : stdVecInfo) { if (!eleTy) { - // FIXME: Skip struct values. bufferAppendix += vecLength; - funcOp.emitOpError( - "argument to kernel may be a struct and was not synthesized"); continue; } auto doVector = [&](T) { @@ -385,8 +355,6 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { continue; } } - std::cout << "Module after state preparation " << std::endl; - module.dump(); } }; diff --git a/program.py b/program.py deleted file mode 100644 index e282d8cd5d..0000000000 --- a/program.py +++ /dev/null @@ -1,35 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # - -import numpy as np -import cudaq - -import cudaq -import numpy as np - -cudaq.reset_target() - -cudaq.set_target('nvidia') -#cudaq.set_target('nvidia-mqpu') -# cudaq.set_target('density-matrix-cpu') - - -c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], - dtype=np.complex128) -state = cudaq.State.from_data(c) - -@cudaq.kernel(verbose=True) -def kernel(vec: cudaq.State): - q = cudaq.qvector(vec) - -print(kernel) -print(cudaq.to_qir(kernel)) - -#print(cudaq.get_target()) -#counts = cudaq.sample(kernel, state) -#print(counts) \ No newline at end of file diff --git a/python/tests/kernel/test_kernel_qvector_init.py b/python/tests/kernel/test_kernel_qvector_init.py index ddaeb6cc4d..f998a82dd1 100644 --- a/python/tests/kernel/test_kernel_qvector_init.py +++ b/python/tests/kernel/test_kernel_qvector_init.py @@ -20,28 +20,8 @@ # float -@skipIfNvidiaFP64NotInstalled -def test_kernel_float_params_f64(): - +def test_kernel_float_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[float]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, f) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_float_params_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] @@ -156,10 +136,8 @@ def kernel(): # complex -@skipIfNvidiaFP64NotInstalled -def test_kernel_complex_params_rotate_f64(): +def test_kernel_complex_params_rotate(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [0. + 0j, 0., 0., 1.] @@ -179,50 +157,8 @@ def kernel(vec: list[complex]): assert '10' in counts -@skipIfNvidiaNotInstalled -def test_kernel_complex_params_rotate_f32(): +def test_kernel_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia') - - c = [0. + 0j, 0., 0., 1.] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - x(q.front()) - y(q.back()) - h(q) - mz(q) - - counts = cudaq.sample(kernel, c) - print(f'rotate: {counts}') - assert '11' in counts - assert '00' in counts - assert '01' in counts - assert '10' in counts - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_complex_params_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_complex_params_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -337,10 +273,8 @@ def kernel(): # np arrays -@skipIfNvidiaFP64NotInstalled -def test_kernel_dtype_complex_params_f64(): +def test_kernel_dtype_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -354,10 +288,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_dtype_complex128_params_f64(): +def test_kernel_dtype_complex128_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -425,10 +357,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_amplitudes_complex_params_f64(): +def test_kernel_amplitudes_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = cudaq.amplitudes([1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)]) @@ -442,27 +372,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaNotInstalled -def test_kernel_amplitudes_complex_params_f32(): +def test_kernel_amplitudes_complex_from_capture(): cudaq.reset_target() - cudaq.set_target('nvidia') - - c = cudaq.amplitudes([1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)]) - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_amplitudes_complex_from_capture_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] @@ -476,23 +387,6 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaNotInstalled -def test_kernel_amplitudes_complex_from_capture_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') - - c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(cudaq.amplitudes(vec)) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - @skipIfNvidiaFP64NotInstalled def test_kernel_simulation_dtype_np_array_from_capture_f64(): cudaq.reset_target() @@ -568,40 +462,8 @@ def kernel(): # test errors -@skipIfNvidiaFP64NotInstalled -def test_kernel_error_invalid_array_size_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - @cudaq.kernel - def kernel(): - qubits = cudaq.qvector(np.array([1., 0., 0.], dtype=complex)) - - with pytest.raises(RuntimeError) as e: - counts = cudaq.sample(kernel) - assert 'Invalid input state size for qvector init (not a power of 2)' in repr( - e) - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_error_invalid_list_size_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - @cudaq.kernel - def kernel(): - qubits = cudaq.qvector([1., 0., 0.]) - - with pytest.raises(RuntimeError) as e: - counts = cudaq.sample(kernel) - assert 'Invalid input state size for qvector init (not a power of 2)' in repr( - e) - - -@skipIfNvidiaNotInstalled -def test_kernel_error_invalid_array_size_f32(): +def test_kernel_error_invalid_array_size_(): cudaq.reset_target() - cudaq.set_target('nvidia') @cudaq.kernel def kernel(): @@ -613,10 +475,8 @@ def kernel(): e) -@skipIfNvidiaNotInstalled -def test_kernel_error_invalid_list_size_f32(): +def test_kernel_error_invalid_list_size(): cudaq.reset_target() - cudaq.set_target('nvidia') @cudaq.kernel def kernel(): @@ -629,6 +489,7 @@ def kernel(): def test_kernel_qvector_init_from_param_int(): + cudaq.reset_target() @cudaq.kernel def kernel(n: int): @@ -643,6 +504,8 @@ def kernel(n: int): def test_kernel_qvector_init_from_capture_int(): + cudaq.reset_target() + n = 2 @cudaq.kernel @@ -658,6 +521,7 @@ def kernel(): def test_kernel_qvector_init_from_int(): + cudaq.reset_target() @cudaq.kernel def kernel(): diff --git a/targettests/execution/from_state.cpp b/targettests/execution/from_state.cpp deleted file mode 100644 index 55438848cb..0000000000 --- a/targettests/execution/from_state.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s - -#include -#include "cudaq/builder/kernels.h" -#include - -__qpu__ void test(cudaq::state *inState) { - cudaq::qvector q(inState); -} - -// CHECK: size 2 - -int main() { - std::vector> vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; - auto state = cudaq::state::from_data(vec); - auto counts = cudaq::sample(test, &state); - counts.dump(); - - printf("size %zu\n", counts.size()); - return !(counts.size() == 2); -} diff --git a/targettests/execution/program.cpp b/targettests/execution/program.cpp deleted file mode 100644 index be4855e3de..0000000000 --- a/targettests/execution/program.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s - -#include -#include "cudaq/builder/kernels.h" -#include - -__qpu__ void test1(std::vector inState) { - cudaq::qvector q1 = inState; -} - -// __qpu__ void test2(cudaq::state *inState) { -// cudaq::qvector q2(inState); -// cudaq::x(q2); -// } - -// __qpu__ void test3() { -// auto q3 = cudaq::qvector({M_SQRT1_2, 0., 0., M_SQRT1_2}); -// } - - -// error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:392: not yet implemented: argument type conversion -// __qpu__ void test5(cudaq::state *inState) { -// test2(inState); -// } - - - -int main() { - std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - { - // Passing state data as argument (vector) - std::cout << "test1(vec): " << "\n"; - auto counts = cudaq::sample(test1, vec); - counts.dump(); - } - - // { - // // Passing state ptr as argument - no support for from_data - // - // std::cout << "test2(state): " << "\n"; - // auto state = cudaq::state::from_data(vec); - // - // // 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // auto counts = cudaq::sample(test2, &state); - // counts.dump(); - // } - - // { - // // Passing a state from another kernel as argument - // - // std::cout << "test2(test3): " << "\n"; - // auto state = cudaq::get_state(test3); - // - // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // auto counts = cudaq::sample(test2, &state); - // counts.dump(); - // } - - // { - // // Passing a state to another kernel as argument - // - // std::cout << "test4(state): " << "\n"; - // - // auto state = cudaq::state::from_data(vec); - // auto counts = cudaq::sample(test4, &state); - // } - - // { - // // Creating a kernel from state and passing its state to another kernel - is it deprecated? - // - std::cout << "test2(kernel): " << "\n"; - std::vector> vec{.70710678, 0., 0., 0.70710678}; - auto kernel = cudaq::make_kernel(); - auto qubits = kernel.qalloc(2); - - cudaq::from_state(kernel, qubits, vec); - auto counts = cudaq::sample(kernel); - // - // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // //auto state = cudaq::get_state(kernel); - // //auto counts = cudaq::sample(test2, &state); - // - counts.dump(); - // } - -} \ No newline at end of file diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp new file mode 100644 index 0000000000..dbe9b15d86 --- /dev/null +++ b/targettests/execution/state_preparation_vector.cpp @@ -0,0 +1,57 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s + +#include +#include "cudaq/builder/kernels.h" +#include + +__qpu__ void test(std::vector inState) { + cudaq::qvector q1 = inState; +} + +__qpu__ void test2() { + cudaq::qvector q1({M_SQRT1_2, M_SQRT1_2, 0., 0.}); +} + +void printCounts(cudaq::sample_result& result) { + for (auto &&[bits, counts] : result) { + std::cout << bits << '\n'; + } +} + +int main() { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test, vec); + printCounts(counts); + } + + { + // Using state data inside kernel (kernel mode) - not implemented yet. + // auto counts = cudaq::sample(test2); + // printCounts(counts); + } + + { + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); + + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); + } +} + +// CHECK: 01 +// CHECK: 00 + +// CHECK: 01 +// CHECK: 00 \ No newline at end of file From 0a04d33ce4c7b734348784df2d14d3958827a592 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 25 Jun 2024 15:29:40 -0700 Subject: [PATCH 4/9] Cleanup --- lib/Optimizer/Transforms/CMakeLists.txt | 4 +- lib/Optimizer/Transforms/QuakeSynthesizer.cpp | 6 - lib/Optimizer/Transforms/StateDecomposer.h | 69 ++++---- lib/Optimizer/Transforms/StatePreparation.cpp | 108 ++++-------- program.py | 35 ---- .../tests/kernel/test_kernel_qvector_init.py | 162 ++---------------- targettests/execution/from_state.cpp | 30 ---- targettests/execution/program.cpp | 95 ---------- .../execution/state_preparation_vector.cpp | 57 ++++++ 9 files changed, 148 insertions(+), 418 deletions(-) delete mode 100644 program.py delete mode 100644 targettests/execution/from_state.cpp delete mode 100644 targettests/execution/program.cpp create mode 100644 targettests/execution/state_preparation_vector.cpp diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index b0a13571ec..173cec4538 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -39,11 +39,11 @@ add_cudaq_library(OptTransforms ObserveAnsatz.cpp PruneCtrlRelations.cpp QuakeAddMetadata.cpp - StateDecomposer.cpp - StatePreparation.cpp QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp + StateDecomposer.cpp + StatePreparation.cpp PySynthCallableBlockArgs.cpp DEPENDS diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 0fa859f175..7d83c152dd 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -23,8 +23,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" -#include - #define DEBUG_TYPE "quake-synthesizer" using namespace mlir; @@ -419,9 +417,7 @@ class QuakeSynthesizer } void runOnOperation() override final { - std::cout << "Module before synthesis " << std::endl; auto module = getModule(); - // module.dump(); unsigned counter = 0; if (args == nullptr || kernelName.empty()) { @@ -680,8 +676,6 @@ class QuakeSynthesizer } } funcOp.eraseArguments(argsToErase); - // std::cout << "Module after synthesis " << std::endl; - module.dump(); } }; diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h index bac6909708..2d17edb768 100644 --- a/lib/Optimizer/Transforms/StateDecomposer.h +++ b/lib/Optimizer/Transforms/StateDecomposer.h @@ -29,31 +29,32 @@ namespace cudaq::details { - /// @brief Converts angles of a uniformly controlled rotation to angles of - /// non-controlled rotations. - std::vector convertAngles(const std::span alphas); - - /// @brief Return the control indices dictated by the gray code implementation. - /// - /// Here, numBits is the number of controls. - std::vector getControlIndices(std::size_t numBits); - - /// @brief Return angles required to implement a uniformly controlled z-rotation - /// on the `kth` qubit. - std::vector getAlphaZ(const std::span data, - std::size_t numQubits, std::size_t k); - - /// @brief Return angles required to implement a uniformly controlled y-rotation - /// on the `kth` qubit. - std::vector getAlphaY(const std::span data, - std::size_t numQubits, std::size_t k); +/// @brief Converts angles of a uniformly controlled rotation to angles of +/// non-controlled rotations. +std::vector convertAngles(const std::span alphas); + +/// @brief Return the control indices dictated by the gray code implementation. +/// +/// Here, numBits is the number of controls. +std::vector getControlIndices(std::size_t numBits); + +/// @brief Return angles required to implement a uniformly controlled z-rotation +/// on the `kth` qubit. +std::vector getAlphaZ(const std::span data, + std::size_t numQubits, std::size_t k); + +/// @brief Return angles required to implement a uniformly controlled y-rotation +/// on the `kth` qubit. +std::vector getAlphaY(const std::span data, + std::size_t numQubits, std::size_t k); } // namespace cudaq::details class StateGateBuilder { public: - StateGateBuilder(mlir::OpBuilder& b, mlir::Location& l, mlir::Value& q): builder(b), loc(l), qubits(q) {} + StateGateBuilder(mlir::OpBuilder &b, mlir::Location &l, mlir::Value &q) + : builder(b), loc(l), qubits(q) {} - template + template void applyRotationOp(double theta, std::size_t target) { auto qubit = createQubitRef(target); auto thetaValue = createAngleValue(theta); @@ -72,26 +73,30 @@ class StateGateBuilder { return qubitRefs[index]; } - auto indexValue = builder.create(loc, index, builder.getIntegerType(64)); + auto indexValue = builder.create( + loc, index, builder.getIntegerType(64)); auto ref = builder.create(loc, qubits, indexValue); qubitRefs[index] = ref; return ref; } mlir::Value createAngleValue(double angle) { - return builder.create(loc, llvm::APFloat{angle}, builder.getF64Type()); + return builder.create( + loc, llvm::APFloat{angle}, builder.getF64Type()); } - mlir::OpBuilder& builder; - mlir::Location& loc; - mlir::Value& qubits; + mlir::OpBuilder &builder; + mlir::Location &loc; + mlir::Value &qubits; - std::unordered_map qubitRefs = std::unordered_map(); + std::unordered_map qubitRefs = + std::unordered_map(); }; class StateDecomposer { public: - StateDecomposer(StateGateBuilder& b, std::vector>& a): builder(b), amplitudes(a), numQubits(log2(a.size())) {} + StateDecomposer(StateGateBuilder &b, std::vector> &a) + : builder(b), amplitudes(a), numQubits(log2(a.size())) {} /// @brief Decompose the input state vector data to a set of controlled /// operations and rotations. This function takes as input a `OpBuilder` @@ -112,8 +117,9 @@ class StateDecomposer { } // N.B: The algorithm, as described in the paper, creates a circuit that - // begins with a target state and brings it to the all zero state. Hence, this - // implementation do the two steps described in Section III in reverse order. + // begins with a target state and brings it to the all zero state. Hence, + // this implementation do the two steps described in Section III in reverse + // order. // Apply uniformly controlled y-rotations, the construction in Eq. (4). for (std::size_t j = 1; j <= numQubits; ++j) { @@ -142,7 +148,8 @@ class StateDecomposer { private: /// @brief Apply a uniformly controlled rotation on the target qubit. template - void applyRotation(const std::span alphas, std::size_t numControls, std::size_t target) { + void applyRotation(const std::span alphas, std::size_t numControls, + std::size_t target) { auto thetas = cudaq::details::convertAngles(alphas); if (numControls == 0) { builder.applyRotationOp(thetas[0], target); @@ -157,7 +164,7 @@ class StateDecomposer { } } - StateGateBuilder& builder; + StateGateBuilder &builder; std::span> amplitudes; std::size_t numQubits; }; diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index 86bb911a3a..785e70b3f8 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ #include "PassDetails.h" +#include "StateDecomposer.h" #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" @@ -24,9 +25,6 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" #include -#include "StateDecomposer.h" - -#include #define DEBUG_TYPE "state-preparation" @@ -38,13 +36,14 @@ using namespace mlir; /// func.func @foo(%arg0 : !cc.stdvec>) { /// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 /// %1 = math.cttz %0 : i64 -/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> !cc.ptr> -/// %3 = quake.alloca !quake.veq[%1 : i64] -/// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> !quake.veq -/// return +/// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> +/// !cc.ptr> %3 = quake.alloca !quake.veq[%1 : i64] %4 = +/// quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) -> +/// !quake.veq return /// } /// -/// On a call that passes std::vector vec{M_SQRT1_2, 0., 0., M_SQRT1_2} as arg0: +/// On a call that passes std::vector vec{M_SQRT1_2, 0., 0., +/// M_SQRT1_2} as arg0: /// /// func.func @foo(%arg0 : !cc.stdvec>) { /// %0 = quake.alloca !quake.veq<2> @@ -70,11 +69,11 @@ using namespace mlir; namespace { template -concept IntegralType = std::is_same::value - || std::is_same::value - || std::is_same::value - || std::is_same::value - || std::is_same::value; +concept IntegralType = + std::is_same::value || std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; template concept FloatingType = std::is_same::value; @@ -85,31 +84,33 @@ concept DoubleType = std::is_same::value; template concept ComplexDataType = FloatingType || DoubleType || IntegralType; -/// Input was complex/complex but we prefer -/// complex/complex. Make a copy, extending or truncating the -/// values. +/// Input was complex but we prefer +/// complex. Make a copy, extending the values. template -std::vector> convertToComplex(std::complex *data, std::uint64_t size) { +std::vector> convertToComplex(std::complex *data, + std::uint64_t size) { auto convertData = std::vector>(size); for (std::size_t i = 0; i < size; ++i) convertData[i] = std::complex{static_cast(data[i].real()), - static_cast(data[i].imag())}; + static_cast(data[i].imag())}; return convertData; } template -std::vector> convertToComplex(std::complex *data, std::uint64_t size) { - return std::vector>(data, data+size); +std::vector> convertToComplex(std::complex *data, + std::uint64_t size) { + return std::vector>(data, data + size); } -/// Input was float/double but we prefer complex/complex. +/// Input was float/double but we prefer complex. /// Make a copy, extending or truncating the values. template -std::vector> convertToComplex(From *data, std::uint64_t size) { +std::vector> convertToComplex(From *data, + std::uint64_t size) { auto convertData = std::vector>(size); for (std::size_t i = 0; i < size; ++i) - convertData[i] = - std::complex{static_cast(data[i]), static_cast(0.0)}; + convertData[i] = std::complex{static_cast(data[i]), + static_cast(0.0)}; return convertData; } @@ -118,39 +119,15 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, BlockArgument argument, std::vector> &vec) { auto *ctx = builder.getContext(); - // builder.setInsertionPointToStart(argument.getOwner()); auto argLoc = argument.getLoc(); - // TODO: look at quake.init_state instructions from vector data and track them - // to the argument vector, then replace the instruction by gates preparing the - // state (or a call to a kernel with gates) - - /// func.func @foo(%arg0 : !cc.stdvec>) { - /// %0 = cc.stdvec_size %arg0 : (!cc.stdvec>) -> i64 - /// %2 = cc.stdvec_data %arg0 : (!cc.stdvec>) -> - /// !cc.ptr> - /// - /// %3 = quake.alloca !quake.veq[%1 : i64] - /// %4 = quake.init_state %3, %2 : (!quake.veq, !cc.ptr>) - /// -> !quake.veq return - /// } - - /// => - - /// ... - /// %5 = quake.alloca !quake.veq[%3 : i64] - /// %6 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref - /// quake.h %6 : (!quake.ref) -> () - /// %7 = quake.extract_ref %5[0] : (!quake.veq) -> !quake.ref - /// %8 = quake.extract_ref %5[1] : (!quake.veq) -> !quake.ref - /// quake.x [%7] %8 : (!quake.ref, !quake.ref) -> () - - auto toErase = std::vector(); + auto toErase = std::vector(); for (auto *argUser : argument.getUsers()) { // Handle the `StdvecSize` and `quake.alloca` use case: // - Replace a `vec.size()` with the vector length. - // - Replace the number of qubits calculation with the vector length logarithm. + // - Replace the number of qubits calculation with the vector length + // logarithm. // - Replace `quake.alloca` with a constant size qvector allocation. if (auto stdvecSizeOp = dyn_cast(argUser)) { builder.setInsertionPointAfter(stdvecSizeOp); @@ -160,9 +137,10 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, Value numQubits = builder.create( argLoc, log2(vec.size()), stdvecSizeOp.getType()); - for (auto *sizeUser: argUser->getUsers()) { - if (auto countZeroesOp = dyn_cast(sizeUser)) { - for (auto *numQubitsUser: sizeUser->getUsers()) { + for (auto *sizeUser : argUser->getUsers()) { + if (auto countZeroesOp = + dyn_cast(sizeUser)) { + for (auto *numQubitsUser : sizeUser->getUsers()) { if (auto quakeAllocaOp = dyn_cast(numQubitsUser)) { builder.setInsertionPointAfter(quakeAllocaOp); auto veqTy = quake::VeqType::get(ctx, log2(vec.size())); @@ -175,7 +153,7 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, toErase.push_back(countZeroesOp); } } - + stdvecSizeOp.replaceAllUsesWith(length); toErase.push_back(stdvecSizeOp); continue; @@ -202,7 +180,7 @@ prepareStateFromVectorArgument(OpBuilder &builder, ModuleOp module, } } - for (auto& op: toErase) { + for (auto &op : toErase) { op->erase(); } @@ -249,9 +227,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { } void runOnOperation() override final { - std::cout << "Module before state prep " << std::endl; auto module = getModule(); - module.dump(); unsigned counter = 0; if (args == nullptr || kernelName.empty()) { @@ -286,13 +262,12 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { // Get the argument type auto type = argument.getType(); - // auto loc = argument.getLoc(); if (auto ptrTy = dyn_cast(type)) { if (isa(ptrTy.getElementType())) { - std::cout << "State pointer found, TODO: call a kernel that created " - "the state" - << std::endl; + funcOp.emitOpError( + "State preparation from cudaq::state is not supported."); + return; } } @@ -301,9 +276,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { if (auto vecTy = dyn_cast(type)) { auto eleTy = vecTy.getElementType(); if (!isa(eleTy)) { - funcOp.emitOpError("synthesis: unsupported argument type"); - signalPassFailure(); - return; + continue; } char *ptrToSizeInBuffer = static_cast(args) + offset; auto sizeFromBuffer = @@ -328,10 +301,7 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { char *bufferAppendix = static_cast(args) + structSize; for (auto [idx, eleTy, vecLength] : stdVecInfo) { if (!eleTy) { - // FIXME: Skip struct values. bufferAppendix += vecLength; - funcOp.emitOpError( - "argument to kernel may be a struct and was not synthesized"); continue; } auto doVector = [&](T) { @@ -385,8 +355,6 @@ class StatePreparation : public cudaq::opt::PrepareStateBase { continue; } } - std::cout << "Module after state preparation " << std::endl; - module.dump(); } }; diff --git a/program.py b/program.py deleted file mode 100644 index e282d8cd5d..0000000000 --- a/program.py +++ /dev/null @@ -1,35 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # - -import numpy as np -import cudaq - -import cudaq -import numpy as np - -cudaq.reset_target() - -cudaq.set_target('nvidia') -#cudaq.set_target('nvidia-mqpu') -# cudaq.set_target('density-matrix-cpu') - - -c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], - dtype=np.complex128) -state = cudaq.State.from_data(c) - -@cudaq.kernel(verbose=True) -def kernel(vec: cudaq.State): - q = cudaq.qvector(vec) - -print(kernel) -print(cudaq.to_qir(kernel)) - -#print(cudaq.get_target()) -#counts = cudaq.sample(kernel, state) -#print(counts) \ No newline at end of file diff --git a/python/tests/kernel/test_kernel_qvector_init.py b/python/tests/kernel/test_kernel_qvector_init.py index ddaeb6cc4d..f998a82dd1 100644 --- a/python/tests/kernel/test_kernel_qvector_init.py +++ b/python/tests/kernel/test_kernel_qvector_init.py @@ -20,28 +20,8 @@ # float -@skipIfNvidiaFP64NotInstalled -def test_kernel_float_params_f64(): - +def test_kernel_float_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[float]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, f) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_float_params_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') f = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] @@ -156,10 +136,8 @@ def kernel(): # complex -@skipIfNvidiaFP64NotInstalled -def test_kernel_complex_params_rotate_f64(): +def test_kernel_complex_params_rotate(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [0. + 0j, 0., 0., 1.] @@ -179,50 +157,8 @@ def kernel(vec: list[complex]): assert '10' in counts -@skipIfNvidiaNotInstalled -def test_kernel_complex_params_rotate_f32(): +def test_kernel_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia') - - c = [0. + 0j, 0., 0., 1.] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - x(q.front()) - y(q.back()) - h(q) - mz(q) - - counts = cudaq.sample(kernel, c) - print(f'rotate: {counts}') - assert '11' in counts - assert '00' in counts - assert '01' in counts - assert '10' in counts - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_complex_params_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaNotInstalled -def test_kernel_complex_params_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -337,10 +273,8 @@ def kernel(): # np arrays -@skipIfNvidiaFP64NotInstalled -def test_kernel_dtype_complex_params_f64(): +def test_kernel_dtype_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -354,10 +288,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_dtype_complex128_params_f64(): +def test_kernel_dtype_complex128_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] @@ -425,10 +357,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaFP64NotInstalled -def test_kernel_amplitudes_complex_params_f64(): +def test_kernel_amplitudes_complex_params(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = cudaq.amplitudes([1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)]) @@ -442,27 +372,8 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaNotInstalled -def test_kernel_amplitudes_complex_params_f32(): +def test_kernel_amplitudes_complex_from_capture(): cudaq.reset_target() - cudaq.set_target('nvidia') - - c = cudaq.amplitudes([1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)]) - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(vec) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_amplitudes_complex_from_capture_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') c = [1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)] @@ -476,23 +387,6 @@ def kernel(vec: list[complex]): assert '00' in counts -@skipIfNvidiaNotInstalled -def test_kernel_amplitudes_complex_from_capture_f32(): - cudaq.reset_target() - cudaq.set_target('nvidia') - - c = [1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)] - - @cudaq.kernel - def kernel(vec: list[complex]): - q = cudaq.qvector(cudaq.amplitudes(vec)) - - counts = cudaq.sample(kernel, c) - print(counts) - assert '11' in counts - assert '00' in counts - - @skipIfNvidiaFP64NotInstalled def test_kernel_simulation_dtype_np_array_from_capture_f64(): cudaq.reset_target() @@ -568,40 +462,8 @@ def kernel(): # test errors -@skipIfNvidiaFP64NotInstalled -def test_kernel_error_invalid_array_size_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - @cudaq.kernel - def kernel(): - qubits = cudaq.qvector(np.array([1., 0., 0.], dtype=complex)) - - with pytest.raises(RuntimeError) as e: - counts = cudaq.sample(kernel) - assert 'Invalid input state size for qvector init (not a power of 2)' in repr( - e) - - -@skipIfNvidiaFP64NotInstalled -def test_kernel_error_invalid_list_size_f64(): - cudaq.reset_target() - cudaq.set_target('nvidia-fp64') - - @cudaq.kernel - def kernel(): - qubits = cudaq.qvector([1., 0., 0.]) - - with pytest.raises(RuntimeError) as e: - counts = cudaq.sample(kernel) - assert 'Invalid input state size for qvector init (not a power of 2)' in repr( - e) - - -@skipIfNvidiaNotInstalled -def test_kernel_error_invalid_array_size_f32(): +def test_kernel_error_invalid_array_size_(): cudaq.reset_target() - cudaq.set_target('nvidia') @cudaq.kernel def kernel(): @@ -613,10 +475,8 @@ def kernel(): e) -@skipIfNvidiaNotInstalled -def test_kernel_error_invalid_list_size_f32(): +def test_kernel_error_invalid_list_size(): cudaq.reset_target() - cudaq.set_target('nvidia') @cudaq.kernel def kernel(): @@ -629,6 +489,7 @@ def kernel(): def test_kernel_qvector_init_from_param_int(): + cudaq.reset_target() @cudaq.kernel def kernel(n: int): @@ -643,6 +504,8 @@ def kernel(n: int): def test_kernel_qvector_init_from_capture_int(): + cudaq.reset_target() + n = 2 @cudaq.kernel @@ -658,6 +521,7 @@ def kernel(): def test_kernel_qvector_init_from_int(): + cudaq.reset_target() @cudaq.kernel def kernel(): diff --git a/targettests/execution/from_state.cpp b/targettests/execution/from_state.cpp deleted file mode 100644 index 55438848cb..0000000000 --- a/targettests/execution/from_state.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s - -#include -#include "cudaq/builder/kernels.h" -#include - -__qpu__ void test(cudaq::state *inState) { - cudaq::qvector q(inState); -} - -// CHECK: size 2 - -int main() { - std::vector> vec{M_SQRT1_2, 0., 0., M_SQRT1_2}; - auto state = cudaq::state::from_data(vec); - auto counts = cudaq::sample(test, &state); - counts.dump(); - - printf("size %zu\n", counts.size()); - return !(counts.size() == 2); -} diff --git a/targettests/execution/program.cpp b/targettests/execution/program.cpp deleted file mode 100644 index be4855e3de..0000000000 --- a/targettests/execution/program.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s - -#include -#include "cudaq/builder/kernels.h" -#include - -__qpu__ void test1(std::vector inState) { - cudaq::qvector q1 = inState; -} - -// __qpu__ void test2(cudaq::state *inState) { -// cudaq::qvector q2(inState); -// cudaq::x(q2); -// } - -// __qpu__ void test3() { -// auto q3 = cudaq::qvector({M_SQRT1_2, 0., 0., M_SQRT1_2}); -// } - - -// error: /workspaces/cuda-quantum/lib/Frontend/nvqpp/ConvertExpr.cpp:392: not yet implemented: argument type conversion -// __qpu__ void test5(cudaq::state *inState) { -// test2(inState); -// } - - - -int main() { - std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; - { - // Passing state data as argument (vector) - std::cout << "test1(vec): " << "\n"; - auto counts = cudaq::sample(test1, vec); - counts.dump(); - } - - // { - // // Passing state ptr as argument - no support for from_data - // - // std::cout << "test2(state): " << "\n"; - // auto state = cudaq::state::from_data(vec); - // - // // 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // auto counts = cudaq::sample(test2, &state); - // counts.dump(); - // } - - // { - // // Passing a state from another kernel as argument - // - // std::cout << "test2(test3): " << "\n"; - // auto state = cudaq::get_state(test3); - // - // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // auto counts = cudaq::sample(test2, &state); - // counts.dump(); - // } - - // { - // // Passing a state to another kernel as argument - // - // std::cout << "test4(state): " << "\n"; - // - // auto state = cudaq::state::from_data(vec); - // auto counts = cudaq::sample(test4, &state); - // } - - // { - // // Creating a kernel from state and passing its state to another kernel - is it deprecated? - // - std::cout << "test2(kernel): " << "\n"; - std::vector> vec{.70710678, 0., 0., 0.70710678}; - auto kernel = cudaq::make_kernel(); - auto qubits = kernel.qalloc(2); - - cudaq::from_state(kernel, qubits, vec); - auto counts = cudaq::sample(kernel); - // - // // error: 'func.call' op '__nvqpp_cudaq_state_numberOfQubits' does not reference a valid function - // //auto state = cudaq::get_state(kernel); - // //auto counts = cudaq::sample(test2, &state); - // - counts.dump(); - // } - -} \ No newline at end of file diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp new file mode 100644 index 0000000000..dbe9b15d86 --- /dev/null +++ b/targettests/execution/state_preparation_vector.cpp @@ -0,0 +1,57 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s + +#include +#include "cudaq/builder/kernels.h" +#include + +__qpu__ void test(std::vector inState) { + cudaq::qvector q1 = inState; +} + +__qpu__ void test2() { + cudaq::qvector q1({M_SQRT1_2, M_SQRT1_2, 0., 0.}); +} + +void printCounts(cudaq::sample_result& result) { + for (auto &&[bits, counts] : result) { + std::cout << bits << '\n'; + } +} + +int main() { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + { + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test, vec); + printCounts(counts); + } + + { + // Using state data inside kernel (kernel mode) - not implemented yet. + // auto counts = cudaq::sample(test2); + // printCounts(counts); + } + + { + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); + + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); + } +} + +// CHECK: 01 +// CHECK: 00 + +// CHECK: 01 +// CHECK: 00 \ No newline at end of file From 3660e278407719c7aa7ba82f93f08261dc936635 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 26 Jun 2024 09:51:42 -0700 Subject: [PATCH 5/9] Updated test --- .../execution/state_preparation_vector.cpp | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index dbe9b15d86..d415072ce7 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -16,10 +16,6 @@ __qpu__ void test(std::vector inState) { cudaq::qvector q1 = inState; } -__qpu__ void test2() { - cudaq::qvector q1({M_SQRT1_2, M_SQRT1_2, 0., 0.}); -} - void printCounts(cudaq::sample_result& result) { for (auto &&[bits, counts] : result) { std::cout << bits << '\n'; @@ -28,20 +24,18 @@ void printCounts(cudaq::sample_result& result) { int main() { std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; + std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; { // Passing state data as argument (kernel mode) auto counts = cudaq::sample(test, vec); printCounts(counts); - } - - { - // Using state data inside kernel (kernel mode) - not implemented yet. - // auto counts = cudaq::sample(test2); - // printCounts(counts); + + counts = cudaq::sample(test, vec1); + printCounts(counts); } { - // Passing state data as argument (builder mode) + // Passing state data as argument (builder mode) auto [kernel, v] = cudaq::make_kernel>(); auto qubits = kernel.qalloc(v); @@ -53,5 +47,8 @@ int main() { // CHECK: 01 // CHECK: 00 +// CHECK: 10 +// CHECK: 10 + // CHECK: 01 // CHECK: 00 \ No newline at end of file From 8cbc1f6905babbfe1e123840d9d1b6e1a00747fa Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 26 Jun 2024 12:37:52 -0700 Subject: [PATCH 6/9] Fix test failures --- .../tests/kernel/test_kernel_qvector_init.py | 21 +++++++++++++++++-- .../execution/state_preparation_vector.cpp | 16 ++++++++------ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/python/tests/kernel/test_kernel_qvector_init.py b/python/tests/kernel/test_kernel_qvector_init.py index f998a82dd1..6f2fd07152 100644 --- a/python/tests/kernel/test_kernel_qvector_init.py +++ b/python/tests/kernel/test_kernel_qvector_init.py @@ -5,11 +5,18 @@ # This source code and the accompanying materials are made available under # # the terms of the Apache License 2.0 which accompanies this distribution. # # ============================================================================ # + +import os, sys import pytest import cudaq import numpy as np +## [PYTHON_VERSION_FIX] +skipIfPythonLessThan39 = pytest.mark.skipif( + sys.version_info < (3, 9), + reason="built-in collection types such as `list` not supported") + skipIfNvidiaFP64NotInstalled = pytest.mark.skipif( not (cudaq.num_available_gpus() > 0 and cudaq.has_target('nvidia-fp64')), reason='Could not find nvidia-fp64 in installation') @@ -18,8 +25,10 @@ not (cudaq.num_available_gpus() > 0 and cudaq.has_target('nvidia')), reason='Could not find nvidia in installation') - # float + + +@skipIfPythonLessThan39 def test_kernel_float_params(): cudaq.reset_target() @@ -136,6 +145,7 @@ def kernel(): # complex +@skipIfPythonLessThan39 def test_kernel_complex_params_rotate(): cudaq.reset_target() @@ -157,6 +167,7 @@ def kernel(vec: list[complex]): assert '10' in counts +@skipIfPythonLessThan39 def test_kernel_complex_params(): cudaq.reset_target() @@ -273,6 +284,7 @@ def kernel(): # np arrays +@skipIfPythonLessThan39 def test_kernel_dtype_complex_params(): cudaq.reset_target() @@ -288,6 +300,7 @@ def kernel(vec: list[complex]): assert '00' in counts +@skipIfPythonLessThan39 def test_kernel_dtype_complex128_params(): cudaq.reset_target() @@ -357,6 +370,7 @@ def kernel(vec: list[complex]): assert '00' in counts +@skipIfPythonLessThan39 def test_kernel_amplitudes_complex_params(): cudaq.reset_target() @@ -372,6 +386,7 @@ def kernel(vec: list[complex]): assert '00' in counts +@skipIfPythonLessThan39 def test_kernel_amplitudes_complex_from_capture(): cudaq.reset_target() @@ -462,7 +477,8 @@ def kernel(): # test errors -def test_kernel_error_invalid_array_size_(): +@skipIfPythonLessThan39 +def test_kernel_error_invalid_array_size(): cudaq.reset_target() @cudaq.kernel @@ -475,6 +491,7 @@ def kernel(): e) +@skipIfPythonLessThan39 def test_kernel_error_invalid_list_size(): cudaq.reset_target() diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index d415072ce7..ef4ea69b92 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -17,7 +17,13 @@ __qpu__ void test(std::vector inState) { } void printCounts(cudaq::sample_result& result) { + std::vector values{}; for (auto &&[bits, counts] : result) { + values.push_back(bits); + } + + std::sort(values.begin(), values.end()); + for (auto &&bits : values) { std::cout << bits << '\n'; } } @@ -44,11 +50,9 @@ int main() { } } -// CHECK: 01 // CHECK: 00 - -// CHECK: 10 -// CHECK: 10 - // CHECK: 01 -// CHECK: 00 \ No newline at end of file +// CHECK: 10 +// CHECK: 11 +// CHECK: 00 +// CHECK: 01 \ No newline at end of file From 6d4433d5cf40835dfb42c67f180062b8aac7d601 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 27 Jun 2024 09:46:13 -0700 Subject: [PATCH 7/9] Revert the order of qubits in state prep --- lib/Optimizer/Transforms/StateDecomposer.h | 13 ++++++++--- .../tests/backends/test_Quantinuum_kernel.py | 15 +++++++++++++ .../tests/kernel/test_kernel_qvector_init.py | 22 +++++++++++++++++++ .../execution/state_preparation_vector.cpp | 4 ++-- 4 files changed, 49 insertions(+), 5 deletions(-) diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h index 2d17edb768..b433089258 100644 --- a/lib/Optimizer/Transforms/StateDecomposer.h +++ b/lib/Optimizer/Transforms/StateDecomposer.h @@ -150,17 +150,24 @@ class StateDecomposer { template void applyRotation(const std::span alphas, std::size_t numControls, std::size_t target) { + + // In our model the index 1 (i.e. |01>) in quantum state data + // corresponds to qubits[0]=1 and qubits[1] = 0. + // Revert the order of qubits as the state preparation algorithm + // we use assumes the opposite. + auto qubitIndex = [&](std::size_t i) { return numQubits - i - 1; }; + auto thetas = cudaq::details::convertAngles(alphas); if (numControls == 0) { - builder.applyRotationOp(thetas[0], target); + builder.applyRotationOp(thetas[0], qubitIndex(target)); return; } auto controlIndices = cudaq::details::getControlIndices(numControls); assert(thetas.size() == controlIndices.size()); for (auto [i, c] : llvm::enumerate(controlIndices)) { - builder.applyRotationOp(thetas[i], target); - builder.applyX(c, target); + builder.applyRotationOp(thetas[i], qubitIndex(target)); + builder.applyX(qubitIndex(c), qubitIndex(target)); } } diff --git a/python/tests/backends/test_Quantinuum_kernel.py b/python/tests/backends/test_Quantinuum_kernel.py index de072335bf..b0ca043060 100644 --- a/python/tests/backends/test_Quantinuum_kernel.py +++ b/python/tests/backends/test_Quantinuum_kernel.py @@ -7,6 +7,7 @@ # ============================================================================ # import cudaq, pytest, os, time +import numpy as np from cudaq import spin from multiprocessing import Process try: @@ -169,6 +170,20 @@ def kernel(): result = cudaq.sample(kernel) +def test_quantinuum_state_preparation(): + + @cudaq.kernel + def kernel(vec: list[complex]): + qubits = cudaq.qvector(vec) + + state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + counts = cudaq.sample(kernel, state) + assert '11' in counts + assert '10' in counts + assert not '01' in counts + assert not '11' in counts + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/kernel/test_kernel_qvector_init.py b/python/tests/kernel/test_kernel_qvector_init.py index 6f2fd07152..28260dcb4d 100644 --- a/python/tests/kernel/test_kernel_qvector_init.py +++ b/python/tests/kernel/test_kernel_qvector_init.py @@ -25,6 +25,28 @@ not (cudaq.num_available_gpus() > 0 and cudaq.has_target('nvidia')), reason='Could not find nvidia in installation') +# state preparation and synthesis + + +@skipIfPythonLessThan39 +def test_kernel_state_preparation(): + cudaq.reset_target() + + c = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + + @cudaq.kernel + def kernel(vec: list[complex]): + q = cudaq.qvector(vec) + + synthesized = cudaq.synthesize(kernel, c) + assert 'quake.init_state' in kernel.__str__() + assert not 'quake.init_state' in synthesized.__str__() + + counts = cudaq.sample(synthesized) + assert '00' in counts + assert '10' in counts + + # float diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index ef4ea69b92..fccf6d872c 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -51,8 +51,8 @@ int main() { } // CHECK: 00 -// CHECK: 01 // CHECK: 10 +// CHECK: 01 // CHECK: 11 // CHECK: 00 -// CHECK: 01 \ No newline at end of file +// CHECK: 10 From 46f247728cf2ca22cda3bbf417007c63db1a1bed Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 27 Jun 2024 10:20:09 -0700 Subject: [PATCH 8/9] Fixed failing tests --- python/tests/backends/test_Quantinuum_kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/backends/test_Quantinuum_kernel.py b/python/tests/backends/test_Quantinuum_kernel.py index b0ca043060..fc11224f5e 100644 --- a/python/tests/backends/test_Quantinuum_kernel.py +++ b/python/tests/backends/test_Quantinuum_kernel.py @@ -178,7 +178,7 @@ def kernel(vec: list[complex]): state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] counts = cudaq.sample(kernel, state) - assert '11' in counts + assert '00' in counts assert '10' in counts assert not '01' in counts assert not '11' in counts From fb0994f8cb2c8459d715a27c208cef19c58542cb Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 27 Jun 2024 11:27:27 -0700 Subject: [PATCH 9/9] Fix test faiure --- lib/Optimizer/Transforms/StateDecomposer.h | 2 +- targettests/execution/state_preparation_vector.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Optimizer/Transforms/StateDecomposer.h b/lib/Optimizer/Transforms/StateDecomposer.h index b433089258..a698ac83c2 100644 --- a/lib/Optimizer/Transforms/StateDecomposer.h +++ b/lib/Optimizer/Transforms/StateDecomposer.h @@ -95,7 +95,7 @@ class StateGateBuilder { class StateDecomposer { public: - StateDecomposer(StateGateBuilder &b, std::vector> &a) + StateDecomposer(StateGateBuilder &b, std::span> a) : builder(b), amplitudes(a), numQubits(log2(a.size())) {} /// @brief Decompose the input state vector data to a set of controlled diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index fccf6d872c..35a628c06a 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -9,7 +9,6 @@ // RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s #include -#include "cudaq/builder/kernels.h" #include __qpu__ void test(std::vector inState) {