Merge branch 'main' into main

NVIDIA · Jul 19, 2024 · 6251f98 · 6251f98
2 parents 0cb40c3 + d8a69a1
commit 6251f98
Show file tree

Hide file tree

Showing 20 changed files with 325 additions and 190 deletions.
diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
@@ -192,7 +192,7 @@ jobs:
  create_function_result=$(ngc-cli/ngc cloud-function function create \
  --container-image nvcr.io/${{ env.NGC_QUANTUM_ORG }}/${{ env.NGC_QUANTUM_TEAM }}/cuda-quantum:nightly \
  --container-environment-variable NUM_GPUS:1 \
- --container-environment-variable NVQC_REST_PAYLOAD_VERSION:1 \
+ --container-environment-variable NVQC_REST_PAYLOAD_VERSION:1.1 \
  --api-body-format CUSTOM \
  --inference-port 3030 \
  --health-uri / \
@@ -541,8 +541,7 @@ jobs:
  # unsupport_args and compile_errors are compile error tests
  # pauli_word: https://github.com/NVIDIA/cuda-quantum/issues/1957
  # nested_vectors: related to vector of pauli_words (https://github.com/NVIDIA/cuda-quantum/issues/1957)
- # vqe_h2: "Unknown gep/load configuration for synthesis", only on NVQC.
- if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"vqe_h2"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]]; then
+ if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]]; then
  echo "$filename"
  nvqc_config=""
  # Look for a --remote-mqpu-auto-launch to determine the number of QPUs

diff --git a/.github/workflows/nvqc_regression_tests.yml b/.github/workflows/nvqc_regression_tests.yml
@@ -126,8 +126,7 @@ jobs:
  # unsupport_args and compile_errors are compile error tests
  # pauli_word: https://github.com/NVIDIA/cuda-quantum/issues/1957
  # nested_vectors: related to vector of pauli_words (https://github.com/NVIDIA/cuda-quantum/issues/1957)
- # vqe_h2: "Unknown gep/load configuration for synthesis", only on NVQC.
- if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"state_overlap"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"vqe_h2"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]]; then
+ if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"state_overlap"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]]; then
  echo "$filename"
  nvqc_config=""
  # Look for a --remote-mqpu-auto-launch to determine the number of QPUs

diff --git a/docs/sphinx/api/languages/cpp_api.rst b/docs/sphinx/api/languages/cpp_api.rst
@@ -171,6 +171,9 @@ Platform
 .. doxygenclass:: cudaq::quantum_platform
  :members:
 
+.. doxygenstruct:: cudaq::RemoteCapabilities
+ :members:
+
 .. doxygenclass:: cudaq::SerializedCodeExecutionContext
 
 .. doxygentypedef:: cudaq::QuantumTask

diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp
@@ -573,8 +573,11 @@ class QuakeSynthesizer
  auto sizeFromBuffer =
  *reinterpret_cast<const std::uint64_t *>(ptrToSizeInBuffer);
  auto bytesInType = [&eleTy]() -> unsigned {
- if (isa<cudaq::cc::CharspanType>(eleTy))
- return 16 /*bytes: sizeof(ptr) + sizeof(i64)*/;
+ if (isa<cudaq::cc::CharspanType>(eleTy)) {
+ /* A charspan is a struct{ ptr, i64 }, which is just an i64 in
+ * pointer-free encoding. */
+ return sizeof(std::int64_t);
+ }
  if (auto complexTy = dyn_cast<ComplexType>(eleTy))
  return 2 * cudaq::opt::convertBitsToBytes(
  complexTy.getElementType().getIntOrFloatBitWidth());

diff --git a/python/cudaq/__init__.py b/python/cudaq/__init__.py
@@ -6,9 +6,13 @@
 # the terms of the Apache License 2.0 which accompanies this distribution. #
 # ============================================================================ #
 
-import sys, os, numpy, platform
+import sys, os, numpy, platform, multiprocessing
 from ._packages import *
 
+# Set the multiprocessing start method to 'spawn' if not already set
+if multiprocessing.get_start_method(allow_none=True) is None:
+ multiprocessing.set_start_method('forkserver')
+
 # CUDAQ_DYNLIBS must be set before any other imports that would initialize
 # LinkedLibraryHolder.
 if not "CUDAQ_DYNLIBS" in os.environ:

diff --git a/python/runtime/cudaq/algorithms/py_optimizer.cpp b/python/runtime/cudaq/algorithms/py_optimizer.cpp
@@ -189,7 +189,7 @@ py::class_<OptimizerT> addPyOptimizer(py::module &mod, std::string &&name) {
  "optimize",
  [](OptimizerT &opt, const int dim, py::function &func) {
  auto &platform = cudaq::get_platform();
- if (platform.supports_remote_serialized_code() &&
+ if (platform.get_remote_capabilities().serializedCodeExec &&
  platform.num_qpus() == 1) {
  std::string optimizer_var_name =
  cudaq::get_var_name_for_handle(py::cast(&opt));

diff --git a/python/runtime/cudaq/algorithms/py_vqe.cpp b/python/runtime/cudaq/algorithms/py_vqe.cpp
@@ -276,14 +276,14 @@ optimization_result pyVQE(py::object &kernel, spin_op &hamiltonian,
  cudaq::optimizer &optimizer, const int n_params,
  const int shots = -1) {
  auto &platform = cudaq::get_platform();
- if (platform.supports_remote_vqe()) {
+ if (platform.get_remote_capabilities().vqe) {
  if (firstArgIsCompatibleWithRemoteVQE(kernel))
  return pyVQE_remote_cpp(platform, kernel, hamiltonian, optimizer,
  /*gradient=*/nullptr, /*argumentMapper=*/nullptr,
  n_params, shots);
  throwPerformanceError();
  }
- if (platform.supports_remote_serialized_code())
+ if (platform.get_remote_capabilities().serializedCodeExec)
  return pyVQE_remote(platform, kernel, hamiltonian, optimizer,
  /*gradient=*/nullptr, /*argumentMapper=*/nullptr,
  n_params, shots);
@@ -302,14 +302,14 @@ optimization_result pyVQE(py::object &kernel, spin_op &hamiltonian,
  cudaq::optimizer &optimizer, const int n_params,
  py::function &argumentMapper, const int shots = -1) {
  auto &platform = cudaq::get_platform();
- if (platform.supports_remote_vqe()) {
+ if (platform.get_remote_capabilities().vqe) {
  if (firstArgIsCompatibleWithRemoteVQE(kernel))
  return pyVQE_remote_cpp(platform, kernel, hamiltonian, optimizer,
  /*gradient=*/nullptr, &argumentMapper, n_params,
  shots);
  throwPerformanceError();
  }
- if (platform.supports_remote_serialized_code())
+ if (platform.get_remote_capabilities().serializedCodeExec)
  return pyVQE_remote(platform, kernel, hamiltonian, optimizer,
  /*gradient=*/nullptr, &argumentMapper, n_params, shots);
  return optimizer.optimize(n_params, [&](const std::vector<double> &x,
@@ -335,14 +335,14 @@ optimization_result pyVQE(py::object &kernel, cudaq::gradient &gradient,
  // to allow for the calculation of the gradient vector with the
  // provided gradient strategy.
  auto &platform = cudaq::get_platform();
- if (platform.supports_remote_vqe()) {
+ if (platform.get_remote_capabilities().vqe) {
  if (firstArgIsCompatibleWithRemoteVQE(kernel))
  return pyVQE_remote_cpp(platform, kernel, hamiltonian, optimizer,
  &gradient,
  /*argumentMapper=*/nullptr, n_params, shots);
  throwPerformanceError();
  }
- if (platform.supports_remote_serialized_code())
+ if (platform.get_remote_capabilities().serializedCodeExec)
  return pyVQE_remote(platform, kernel, hamiltonian, optimizer, &gradient,
  /*argumentMapper=*/nullptr, n_params, shots);
  std::function<double(std::vector<double>)> get_expected_value =
@@ -374,13 +374,13 @@ optimization_result pyVQE(py::object &kernel, cudaq::gradient &gradient,
  // to allow for the calculation of the gradient vector with the
  // provided gradient strategy.
  auto &platform = cudaq::get_platform();
- if (platform.supports_remote_vqe()) {
+ if (platform.get_remote_capabilities().vqe) {
  if (firstArgIsCompatibleWithRemoteVQE(kernel))
  return pyVQE_remote_cpp(platform, kernel, hamiltonian, optimizer,
  &gradient, &argumentMapper, n_params, shots);
  throwPerformanceError();
  }
- if (platform.supports_remote_serialized_code())
+ if (platform.get_remote_capabilities().serializedCodeExec)
  return pyVQE_remote(platform, kernel, hamiltonian, optimizer, &gradient,
  &argumentMapper, n_params, shots);
  std::function<double(std::vector<double>)> get_expected_value =

diff --git a/python/runtime/utils/PyRemoteSimulatorQPU.cpp b/python/runtime/utils/PyRemoteSimulatorQPU.cpp
@@ -14,6 +14,66 @@ using namespace mlir;
 
 namespace {
 
+// This is a helper function to help reduce duplicated code across
+// PyRemoteSimulatorQPU and PyNvcfSimulatorQPU.
+static void _launchVQE(cudaq::ExecutionContext *executionContextPtr,
+ std::unique_ptr<cudaq::RemoteRuntimeClient> &m_client,
+ const std::string &m_simName, const std::string &name,
+ const void *kernelArgs, cudaq::gradient *gradient,
+ cudaq::spin_op &H, cudaq::optimizer &optimizer,
+ const int n_params, const std::size_t shots) {
+ auto *wrapper = reinterpret_cast<const cudaq::ArgWrapper *>(kernelArgs);
+ auto m_module = wrapper->mod;
+ auto *mlirContext = m_module->getContext();
+
+ if (executionContextPtr && executionContextPtr->name == "tracer")
+ return;
+
+ auto ctx = std::make_unique<cudaq::ExecutionContext>("observe", shots);
+ ctx->kernelName = name;
+ ctx->spin = &H;
+ if (shots > 0)
+ ctx->shots = shots;
+
+ std::string errorMsg;
+ const bool requestOkay = m_client->sendRequest(
+ *mlirContext, *executionContextPtr, /*serializedCodeContext=*/nullptr,
+ gradient, &optimizer, n_params, m_simName, name, /*kernelFunc=*/nullptr,
+ wrapper->rawArgs, /*argSize=*/0, &errorMsg);
+ if (!requestOkay)
+ throw std::runtime_error("Failed to launch VQE. Error: " + errorMsg);
+}
+
+// This is a helper function to help reduce duplicated code across
+// PyRemoteSimulatorQPU and PyNvcfSimulatorQPU.
+static void _launchKernel(cudaq::ExecutionContext *executionContextPtr,
+ std::unique_ptr<cudaq::RemoteRuntimeClient> &m_client,
+ const std::string &m_simName, const std::string &name,
+ void (*kernelFunc)(void *), void *args,
+ std::uint64_t voidStarSize,
+ std::uint64_t resultOffset) {
+ auto *wrapper = reinterpret_cast<cudaq::ArgWrapper *>(args);
+ auto m_module = wrapper->mod;
+ auto callableNames = wrapper->callableNames;
+
+ auto *mlirContext = m_module->getContext();
+
+ // Default context for a 'fire-and-ignore' kernel launch; i.e., no context
+ // was set before launching the kernel. Use a static variable per thread to
+ // set up a single-shot execution context for this case.
+ static thread_local cudaq::ExecutionContext defaultContext("sample",
+ /*shots=*/1);
+ cudaq::ExecutionContext &executionContext =
+ executionContextPtr ? *executionContextPtr : defaultContext;
+ std::string errorMsg;
+ const bool requestOkay = m_client->sendRequest(
+ *mlirContext, executionContext, /*serializedCodeContext=*/nullptr,
+ /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0,
+ m_simName, name, kernelFunc, wrapper->rawArgs, voidStarSize, &errorMsg);
+ if (!requestOkay)
+ throw std::runtime_error("Failed to launch kernel. Error: " + errorMsg);
+}
+
 // Remote QPU: delegating the execution to a remotely-hosted server, which can
 // reinstate the execution context and JIT-invoke the kernel.
 class PyRemoteSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU {
@@ -26,29 +86,12 @@ class PyRemoteSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU {
  cudaq::gradient *gradient, cudaq::spin_op H,
  cudaq::optimizer &optimizer, const int n_params,
  const std::size_t shots) override {
- cudaq::ExecutionContext *executionContextPtr =
- getExecutionContextForMyThread();
-
- auto *wrapper = reinterpret_cast<const cudaq::ArgWrapper *>(kernelArgs);
- auto m_module = wrapper->mod;
- auto *mlirContext = m_module->getContext();
-
- if (executionContextPtr && executionContextPtr->name == "tracer")
- return;
-
- auto ctx = std::make_unique<cudaq::ExecutionContext>("observe", shots);
- ctx->kernelName = name;
- ctx->spin = &H;
- if (shots > 0)
- ctx->shots = shots;
-
- std::string errorMsg;
- const bool requestOkay = m_client->sendRequest(
- *mlirContext, *executionContextPtr, /*serializedCodeContext=*/nullptr,
- gradient, &optimizer, n_params, m_simName, name, /*kernelFunc=*/nullptr,
- wrapper->rawArgs, /*argSize=*/0, &errorMsg);
- if (!requestOkay)
- throw std::runtime_error("Failed to launch VQE. Error: " + errorMsg);
+ cudaq::info(
+ "PyRemoteSimulatorQPU: Launch VQE kernel named '{}' remote QPU {} "
+ "(simulator = {})",
+ name, qpu_id, m_simName);
+ ::_launchVQE(getExecutionContextForMyThread(), m_client, m_simName, name,
+ kernelArgs, gradient, H, optimizer, n_params, shots);
  }
 
  void launchKernel(const std::string &name, void (*kernelFunc)(void *),
@@ -57,29 +100,8 @@ class PyRemoteSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU {
  cudaq::info("PyRemoteSimulatorQPU: Launch kernel named '{}' remote QPU {} "
  "(simulator = {})",
  name, qpu_id, m_simName);
- auto *wrapper = reinterpret_cast<cudaq::ArgWrapper *>(args);
- auto m_module = wrapper->mod;
- auto callableNames = wrapper->callableNames;
-
- auto *mlirContext = m_module->getContext();
-
- cudaq::ExecutionContext *executionContextPtr =
- getExecutionContextForMyThread();
-
- // Default context for a 'fire-and-ignore' kernel launch; i.e., no context
- // was set before launching the kernel. Use a static variable per thread to
- // set up a single-shot execution context for this case.
- static thread_local cudaq::ExecutionContext defaultContext("sample",
- /*shots=*/1);
- cudaq::ExecutionContext &executionContext =
- executionContextPtr ? *executionContextPtr : defaultContext;
- std::string errorMsg;
- const bool requestOkay = m_client->sendRequest(
- *mlirContext, executionContext, /*serializedCodeContext=*/nullptr,
- /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0,
- m_simName, name, kernelFunc, wrapper->rawArgs, voidStarSize, &errorMsg);
- if (!requestOkay)
- throw std::runtime_error("Failed to launch kernel. Error: " + errorMsg);
+ ::_launchKernel(getExecutionContextForMyThread(), m_client, m_simName, name,
+ kernelFunc, args, voidStarSize, resultOffset);
  }
 
  PyRemoteSimulatorQPU(PyRemoteSimulatorQPU &&) = delete;
@@ -96,35 +118,26 @@ class PyNvcfSimulatorQPU : public cudaq::BaseNvcfSimulatorQPU {
 
  virtual bool isEmulated() override { return true; }
 
+ void launchVQE(const std::string &name, const void *kernelArgs,
+ cudaq::gradient *gradient, cudaq::spin_op H,
+ cudaq::optimizer &optimizer, const int n_params,
+ const std::size_t shots) override {
+ cudaq::info(
+ "PyNvcfSimulatorQPU: Launch VQE kernel named '{}' remote QPU {} "
+ "(simulator = {})",
+ name, qpu_id, m_simName);
+ ::_launchVQE(getExecutionContextForMyThread(), m_client, m_simName, name,
+ kernelArgs, gradient, H, optimizer, n_params, shots);
+ }
+
  void launchKernel(const std::string &name, void (*kernelFunc)(void *),
  void *args, std::uint64_t voidStarSize,
  std::uint64_t resultOffset) override {
  cudaq::info("PyNvcfSimulatorQPU: Launch kernel named '{}' remote QPU {} "
  "(simulator = {})",
  name, qpu_id, m_simName);
- auto *wrapper = reinterpret_cast<cudaq::ArgWrapper *>(args);
- auto m_module = wrapper->mod;
- auto callableNames = wrapper->callableNames;
-
- auto *mlirContext = m_module->getContext();
-
- cudaq::ExecutionContext *executionContextPtr =
- getExecutionContextForMyThread();
-
- // Default context for a 'fire-and-ignore' kernel launch; i.e., no context
- // was set before launching the kernel. Use a static variable per thread to
- // set up a single-shot execution context for this case.
- static thread_local cudaq::ExecutionContext defaultContext("sample",
- /*shots=*/1);
- cudaq::ExecutionContext &executionContext =
- executionContextPtr ? *executionContextPtr : defaultContext;
- std::string errorMsg;
- const bool requestOkay = m_client->sendRequest(
- *mlirContext, executionContext, /*serializedCodeContext=*/nullptr,
- /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0,
- m_simName, name, kernelFunc, wrapper->rawArgs, voidStarSize, &errorMsg);
- if (!requestOkay)
- throw std::runtime_error("Failed to launch kernel. Error: " + errorMsg);
+ ::_launchKernel(getExecutionContextForMyThread(), m_client, m_simName, name,
+ kernelFunc, args, voidStarSize, resultOffset);
  }
 
  PyNvcfSimulatorQPU(PyNvcfSimulatorQPU &&) = delete;