Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
sacpis committed Jul 19, 2024
2 parents 0cb40c3 + d8a69a1 commit 6251f98
Show file tree
Hide file tree
Showing 20 changed files with 325 additions and 190 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/integration_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ jobs:
create_function_result=$(ngc-cli/ngc cloud-function function create \
--container-image nvcr.io/${{ env.NGC_QUANTUM_ORG }}/${{ env.NGC_QUANTUM_TEAM }}/cuda-quantum:nightly \
--container-environment-variable NUM_GPUS:1 \
--container-environment-variable NVQC_REST_PAYLOAD_VERSION:1 \
--container-environment-variable NVQC_REST_PAYLOAD_VERSION:1.1 \
--api-body-format CUSTOM \
--inference-port 3030 \
--health-uri / \
Expand Down Expand Up @@ -541,8 +541,7 @@ jobs:
# unsupport_args and compile_errors are compile error tests
# pauli_word: https://github.com/NVIDIA/cuda-quantum/issues/1957
# nested_vectors: related to vector of pauli_words (https://github.com/NVIDIA/cuda-quantum/issues/1957)
# vqe_h2: "Unknown gep/load configuration for synthesis", only on NVQC.
if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"vqe_h2"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]]; then
if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]]; then
echo "$filename"
nvqc_config=""
# Look for a --remote-mqpu-auto-launch to determine the number of QPUs
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/nvqc_regression_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ jobs:
# unsupport_args and compile_errors are compile error tests
# pauli_word: https://github.com/NVIDIA/cuda-quantum/issues/1957
# nested_vectors: related to vector of pauli_words (https://github.com/NVIDIA/cuda-quantum/issues/1957)
# vqe_h2: "Unknown gep/load configuration for synthesis", only on NVQC.
if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"state_overlap"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"vqe_h2"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]]; then
if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"state_overlap"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]]; then
echo "$filename"
nvqc_config=""
# Look for a --remote-mqpu-auto-launch to determine the number of QPUs
Expand Down
3 changes: 3 additions & 0 deletions docs/sphinx/api/languages/cpp_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ Platform
.. doxygenclass:: cudaq::quantum_platform
:members:

.. doxygenstruct:: cudaq::RemoteCapabilities
:members:

.. doxygenclass:: cudaq::SerializedCodeExecutionContext

.. doxygentypedef:: cudaq::QuantumTask
Expand Down
7 changes: 5 additions & 2 deletions lib/Optimizer/Transforms/QuakeSynthesizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,8 +573,11 @@ class QuakeSynthesizer
auto sizeFromBuffer =
*reinterpret_cast<const std::uint64_t *>(ptrToSizeInBuffer);
auto bytesInType = [&eleTy]() -> unsigned {
if (isa<cudaq::cc::CharspanType>(eleTy))
return 16 /*bytes: sizeof(ptr) + sizeof(i64)*/;
if (isa<cudaq::cc::CharspanType>(eleTy)) {
/* A charspan is a struct{ ptr, i64 }, which is just an i64 in
* pointer-free encoding. */
return sizeof(std::int64_t);
}
if (auto complexTy = dyn_cast<ComplexType>(eleTy))
return 2 * cudaq::opt::convertBitsToBytes(
complexTy.getElementType().getIntOrFloatBitWidth());
Expand Down
6 changes: 5 additions & 1 deletion python/cudaq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
# the terms of the Apache License 2.0 which accompanies this distribution. #
# ============================================================================ #

import sys, os, numpy, platform
import sys, os, numpy, platform, multiprocessing
from ._packages import *

# Set the multiprocessing start method to 'spawn' if not already set
if multiprocessing.get_start_method(allow_none=True) is None:
multiprocessing.set_start_method('forkserver')

# CUDAQ_DYNLIBS must be set before any other imports that would initialize
# LinkedLibraryHolder.
if not "CUDAQ_DYNLIBS" in os.environ:
Expand Down
2 changes: 1 addition & 1 deletion python/runtime/cudaq/algorithms/py_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ py::class_<OptimizerT> addPyOptimizer(py::module &mod, std::string &&name) {
"optimize",
[](OptimizerT &opt, const int dim, py::function &func) {
auto &platform = cudaq::get_platform();
if (platform.supports_remote_serialized_code() &&
if (platform.get_remote_capabilities().serializedCodeExec &&
platform.num_qpus() == 1) {
std::string optimizer_var_name =
cudaq::get_var_name_for_handle(py::cast(&opt));
Expand Down
16 changes: 8 additions & 8 deletions python/runtime/cudaq/algorithms/py_vqe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,14 +276,14 @@ optimization_result pyVQE(py::object &kernel, spin_op &hamiltonian,
cudaq::optimizer &optimizer, const int n_params,
const int shots = -1) {
auto &platform = cudaq::get_platform();
if (platform.supports_remote_vqe()) {
if (platform.get_remote_capabilities().vqe) {
if (firstArgIsCompatibleWithRemoteVQE(kernel))
return pyVQE_remote_cpp(platform, kernel, hamiltonian, optimizer,
/*gradient=*/nullptr, /*argumentMapper=*/nullptr,
n_params, shots);
throwPerformanceError();
}
if (platform.supports_remote_serialized_code())
if (platform.get_remote_capabilities().serializedCodeExec)
return pyVQE_remote(platform, kernel, hamiltonian, optimizer,
/*gradient=*/nullptr, /*argumentMapper=*/nullptr,
n_params, shots);
Expand All @@ -302,14 +302,14 @@ optimization_result pyVQE(py::object &kernel, spin_op &hamiltonian,
cudaq::optimizer &optimizer, const int n_params,
py::function &argumentMapper, const int shots = -1) {
auto &platform = cudaq::get_platform();
if (platform.supports_remote_vqe()) {
if (platform.get_remote_capabilities().vqe) {
if (firstArgIsCompatibleWithRemoteVQE(kernel))
return pyVQE_remote_cpp(platform, kernel, hamiltonian, optimizer,
/*gradient=*/nullptr, &argumentMapper, n_params,
shots);
throwPerformanceError();
}
if (platform.supports_remote_serialized_code())
if (platform.get_remote_capabilities().serializedCodeExec)
return pyVQE_remote(platform, kernel, hamiltonian, optimizer,
/*gradient=*/nullptr, &argumentMapper, n_params, shots);
return optimizer.optimize(n_params, [&](const std::vector<double> &x,
Expand All @@ -335,14 +335,14 @@ optimization_result pyVQE(py::object &kernel, cudaq::gradient &gradient,
// to allow for the calculation of the gradient vector with the
// provided gradient strategy.
auto &platform = cudaq::get_platform();
if (platform.supports_remote_vqe()) {
if (platform.get_remote_capabilities().vqe) {
if (firstArgIsCompatibleWithRemoteVQE(kernel))
return pyVQE_remote_cpp(platform, kernel, hamiltonian, optimizer,
&gradient,
/*argumentMapper=*/nullptr, n_params, shots);
throwPerformanceError();
}
if (platform.supports_remote_serialized_code())
if (platform.get_remote_capabilities().serializedCodeExec)
return pyVQE_remote(platform, kernel, hamiltonian, optimizer, &gradient,
/*argumentMapper=*/nullptr, n_params, shots);
std::function<double(std::vector<double>)> get_expected_value =
Expand Down Expand Up @@ -374,13 +374,13 @@ optimization_result pyVQE(py::object &kernel, cudaq::gradient &gradient,
// to allow for the calculation of the gradient vector with the
// provided gradient strategy.
auto &platform = cudaq::get_platform();
if (platform.supports_remote_vqe()) {
if (platform.get_remote_capabilities().vqe) {
if (firstArgIsCompatibleWithRemoteVQE(kernel))
return pyVQE_remote_cpp(platform, kernel, hamiltonian, optimizer,
&gradient, &argumentMapper, n_params, shots);
throwPerformanceError();
}
if (platform.supports_remote_serialized_code())
if (platform.get_remote_capabilities().serializedCodeExec)
return pyVQE_remote(platform, kernel, hamiltonian, optimizer, &gradient,
&argumentMapper, n_params, shots);
std::function<double(std::vector<double>)> get_expected_value =
Expand Down
151 changes: 82 additions & 69 deletions python/runtime/utils/PyRemoteSimulatorQPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,66 @@ using namespace mlir;

namespace {

// This is a helper function to help reduce duplicated code across
// PyRemoteSimulatorQPU and PyNvcfSimulatorQPU.
static void _launchVQE(cudaq::ExecutionContext *executionContextPtr,
std::unique_ptr<cudaq::RemoteRuntimeClient> &m_client,
const std::string &m_simName, const std::string &name,
const void *kernelArgs, cudaq::gradient *gradient,
cudaq::spin_op &H, cudaq::optimizer &optimizer,
const int n_params, const std::size_t shots) {
auto *wrapper = reinterpret_cast<const cudaq::ArgWrapper *>(kernelArgs);
auto m_module = wrapper->mod;
auto *mlirContext = m_module->getContext();

if (executionContextPtr && executionContextPtr->name == "tracer")
return;

auto ctx = std::make_unique<cudaq::ExecutionContext>("observe", shots);
ctx->kernelName = name;
ctx->spin = &H;
if (shots > 0)
ctx->shots = shots;

std::string errorMsg;
const bool requestOkay = m_client->sendRequest(
*mlirContext, *executionContextPtr, /*serializedCodeContext=*/nullptr,
gradient, &optimizer, n_params, m_simName, name, /*kernelFunc=*/nullptr,
wrapper->rawArgs, /*argSize=*/0, &errorMsg);
if (!requestOkay)
throw std::runtime_error("Failed to launch VQE. Error: " + errorMsg);
}

// This is a helper function to help reduce duplicated code across
// PyRemoteSimulatorQPU and PyNvcfSimulatorQPU.
static void _launchKernel(cudaq::ExecutionContext *executionContextPtr,
std::unique_ptr<cudaq::RemoteRuntimeClient> &m_client,
const std::string &m_simName, const std::string &name,
void (*kernelFunc)(void *), void *args,
std::uint64_t voidStarSize,
std::uint64_t resultOffset) {
auto *wrapper = reinterpret_cast<cudaq::ArgWrapper *>(args);
auto m_module = wrapper->mod;
auto callableNames = wrapper->callableNames;

auto *mlirContext = m_module->getContext();

// Default context for a 'fire-and-ignore' kernel launch; i.e., no context
// was set before launching the kernel. Use a static variable per thread to
// set up a single-shot execution context for this case.
static thread_local cudaq::ExecutionContext defaultContext("sample",
/*shots=*/1);
cudaq::ExecutionContext &executionContext =
executionContextPtr ? *executionContextPtr : defaultContext;
std::string errorMsg;
const bool requestOkay = m_client->sendRequest(
*mlirContext, executionContext, /*serializedCodeContext=*/nullptr,
/*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0,
m_simName, name, kernelFunc, wrapper->rawArgs, voidStarSize, &errorMsg);
if (!requestOkay)
throw std::runtime_error("Failed to launch kernel. Error: " + errorMsg);
}

// Remote QPU: delegating the execution to a remotely-hosted server, which can
// reinstate the execution context and JIT-invoke the kernel.
class PyRemoteSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU {
Expand All @@ -26,29 +86,12 @@ class PyRemoteSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU {
cudaq::gradient *gradient, cudaq::spin_op H,
cudaq::optimizer &optimizer, const int n_params,
const std::size_t shots) override {
cudaq::ExecutionContext *executionContextPtr =
getExecutionContextForMyThread();

auto *wrapper = reinterpret_cast<const cudaq::ArgWrapper *>(kernelArgs);
auto m_module = wrapper->mod;
auto *mlirContext = m_module->getContext();

if (executionContextPtr && executionContextPtr->name == "tracer")
return;

auto ctx = std::make_unique<cudaq::ExecutionContext>("observe", shots);
ctx->kernelName = name;
ctx->spin = &H;
if (shots > 0)
ctx->shots = shots;

std::string errorMsg;
const bool requestOkay = m_client->sendRequest(
*mlirContext, *executionContextPtr, /*serializedCodeContext=*/nullptr,
gradient, &optimizer, n_params, m_simName, name, /*kernelFunc=*/nullptr,
wrapper->rawArgs, /*argSize=*/0, &errorMsg);
if (!requestOkay)
throw std::runtime_error("Failed to launch VQE. Error: " + errorMsg);
cudaq::info(
"PyRemoteSimulatorQPU: Launch VQE kernel named '{}' remote QPU {} "
"(simulator = {})",
name, qpu_id, m_simName);
::_launchVQE(getExecutionContextForMyThread(), m_client, m_simName, name,
kernelArgs, gradient, H, optimizer, n_params, shots);
}

void launchKernel(const std::string &name, void (*kernelFunc)(void *),
Expand All @@ -57,29 +100,8 @@ class PyRemoteSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU {
cudaq::info("PyRemoteSimulatorQPU: Launch kernel named '{}' remote QPU {} "
"(simulator = {})",
name, qpu_id, m_simName);
auto *wrapper = reinterpret_cast<cudaq::ArgWrapper *>(args);
auto m_module = wrapper->mod;
auto callableNames = wrapper->callableNames;

auto *mlirContext = m_module->getContext();

cudaq::ExecutionContext *executionContextPtr =
getExecutionContextForMyThread();

// Default context for a 'fire-and-ignore' kernel launch; i.e., no context
// was set before launching the kernel. Use a static variable per thread to
// set up a single-shot execution context for this case.
static thread_local cudaq::ExecutionContext defaultContext("sample",
/*shots=*/1);
cudaq::ExecutionContext &executionContext =
executionContextPtr ? *executionContextPtr : defaultContext;
std::string errorMsg;
const bool requestOkay = m_client->sendRequest(
*mlirContext, executionContext, /*serializedCodeContext=*/nullptr,
/*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0,
m_simName, name, kernelFunc, wrapper->rawArgs, voidStarSize, &errorMsg);
if (!requestOkay)
throw std::runtime_error("Failed to launch kernel. Error: " + errorMsg);
::_launchKernel(getExecutionContextForMyThread(), m_client, m_simName, name,
kernelFunc, args, voidStarSize, resultOffset);
}

PyRemoteSimulatorQPU(PyRemoteSimulatorQPU &&) = delete;
Expand All @@ -96,35 +118,26 @@ class PyNvcfSimulatorQPU : public cudaq::BaseNvcfSimulatorQPU {

virtual bool isEmulated() override { return true; }

void launchVQE(const std::string &name, const void *kernelArgs,
cudaq::gradient *gradient, cudaq::spin_op H,
cudaq::optimizer &optimizer, const int n_params,
const std::size_t shots) override {
cudaq::info(
"PyNvcfSimulatorQPU: Launch VQE kernel named '{}' remote QPU {} "
"(simulator = {})",
name, qpu_id, m_simName);
::_launchVQE(getExecutionContextForMyThread(), m_client, m_simName, name,
kernelArgs, gradient, H, optimizer, n_params, shots);
}

void launchKernel(const std::string &name, void (*kernelFunc)(void *),
void *args, std::uint64_t voidStarSize,
std::uint64_t resultOffset) override {
cudaq::info("PyNvcfSimulatorQPU: Launch kernel named '{}' remote QPU {} "
"(simulator = {})",
name, qpu_id, m_simName);
auto *wrapper = reinterpret_cast<cudaq::ArgWrapper *>(args);
auto m_module = wrapper->mod;
auto callableNames = wrapper->callableNames;

auto *mlirContext = m_module->getContext();

cudaq::ExecutionContext *executionContextPtr =
getExecutionContextForMyThread();

// Default context for a 'fire-and-ignore' kernel launch; i.e., no context
// was set before launching the kernel. Use a static variable per thread to
// set up a single-shot execution context for this case.
static thread_local cudaq::ExecutionContext defaultContext("sample",
/*shots=*/1);
cudaq::ExecutionContext &executionContext =
executionContextPtr ? *executionContextPtr : defaultContext;
std::string errorMsg;
const bool requestOkay = m_client->sendRequest(
*mlirContext, executionContext, /*serializedCodeContext=*/nullptr,
/*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0,
m_simName, name, kernelFunc, wrapper->rawArgs, voidStarSize, &errorMsg);
if (!requestOkay)
throw std::runtime_error("Failed to launch kernel. Error: " + errorMsg);
::_launchKernel(getExecutionContextForMyThread(), m_client, m_simName, name,
kernelFunc, args, voidStarSize, resultOffset);
}

PyNvcfSimulatorQPU(PyNvcfSimulatorQPU &&) = delete;
Expand Down
Loading

0 comments on commit 6251f98

Please sign in to comment.