NVIDIA · 1tnguyen · Jul 25, 2024 · Jul 19, 2024 · Jul 19, 2024 · Jul 19, 2024
@@ -126,7 +126,8 @@ jobs:
  # unsupport_args and compile_errors are compile error tests
  # pauli_word: https://github.com/NVIDIA/cuda-quantum/issues/1957
  # nested_vectors: related to vector of pauli_words (https://github.com/NVIDIA/cuda-quantum/issues/1957)
- if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"state_overlap"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]]; then
+ # return_values: only supported in 0.8 NVQC service.
+ if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"state_overlap"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]] && [[ "$filename" != *"return_values"* ]]; then
  echo "$filename"
  nvqc_config=""
  # Look for a --remote-mqpu-auto-launch to determine the number of QPUs

diff --git a/runtime/common/BaseRemoteSimulatorQPU.h b/runtime/common/BaseRemoteSimulatorQPU.h
@@ -126,15 +126,38 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU {
  // set up a single-shot execution context for this case.
  static thread_local cudaq::ExecutionContext defaultContext("sample",
  /*shots=*/1);
+ // This is a kernel invocation outside the CUDA-Q APIs (sample/observe).
+ const bool isDirectInvocation = !executionContextPtr;
  cudaq::ExecutionContext &executionContext =
  executionContextPtr ? *executionContextPtr : defaultContext;
+
+ // Populate the conditional feedback metadata if this is a direct
+ // invocation (not otherwise populated by cudaq::sample)
+ if (isDirectInvocation)
+ executionContext.hasConditionalsOnMeasureResults =
+ cudaq::kernelHasConditionalFeedback(name);
+
  std::string errorMsg;
  const bool requestOkay = m_client->sendRequest(
  *m_mlirContext, executionContext, /*serializedCodeContext=*/nullptr,
  /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0,
  m_simName, name, kernelFunc, args, voidStarSize, &errorMsg);
  if (!requestOkay)
  throw std::runtime_error("Failed to launch kernel. Error: " + errorMsg);
+ if (isDirectInvocation &&
+ !executionContext.invocationResultBuffer.empty()) {
+ if (executionContext.invocationResultBuffer.size() + resultOffset >
+ voidStarSize)
+ throw std::runtime_error(
+ "Unexpected result: return type size of " +
+ std::to_string(executionContext.invocationResultBuffer.size()) +
+ " bytes overflows the argument buffer.");
+ char *resultBuf = reinterpret_cast<char *>(args) + resultOffset;
+ // Copy the result data to the args buffer.
+ std::memcpy(resultBuf, executionContext.invocationResultBuffer.data(),
+ executionContext.invocationResultBuffer.size());
+ executionContext.invocationResultBuffer.clear();
+ }
  }
 
  void

diff --git a/runtime/common/ExecutionContext.h b/runtime/common/ExecutionContext.h
@@ -100,6 +100,11 @@ class ExecutionContext {
  /// register after execution. Empty means no reordering.
  std::vector<std::size_t> reorderIdx;
 
+ /// @brief A buffer containing the return value of a kernel invocation.
+ /// Note: this is only needed for invocation not able to return a
+ /// `sample_result`.
+ std::vector<char> invocationResultBuffer;
+
  /// @brief The Constructor, takes the name of the context
  /// @param n The name of the context
  ExecutionContext(const std::string n) : name(n) {}

diff --git a/runtime/common/JsonConvert.h b/runtime/common/JsonConvert.h
@@ -153,6 +153,9 @@ inline void to_json(json &j, const ExecutionContext &context) {
 
  if (context.amplitudeMaps.has_value())
  j["amplitudeMaps"] = context.amplitudeMaps.value();
+
+ if (!context.invocationResultBuffer.empty())
+ j["invocationResultBuffer"] = context.invocationResultBuffer;
 }
 
 inline void from_json(const json &j, ExecutionContext &context) {
@@ -214,6 +217,9 @@ inline void from_json(const json &j, ExecutionContext &context) {
 
  if (j.contains("amplitudeMaps"))
  context.amplitudeMaps = j["amplitudeMaps"];
+
+ if (j.contains("invocationResultBuffer"))
+ context.invocationResultBuffer = j["invocationResultBuffer"];
 }
 
 // Enum data to denote the payload format.

diff --git a/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteServer.cpp b/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteServer.cpp
@@ -420,7 +420,7 @@ class RemoteRestRuntimeServer : public cudaq::RemoteRuntimeServer {
  io_context.hasConditionalsOnMeasureResults) {
  // Need to run simulation shot-by-shot
  cudaq::sample_result counts;
- invokeMlirKernel(m_mlirContext, ir, requestInfo.passes,
+ invokeMlirKernel(io_context, m_mlirContext, ir, requestInfo.passes,
  std::string(kernelName), io_context.shots,
  [&](std::size_t i) {
  // Reset the context and get the single
@@ -436,7 +436,7 @@ class RemoteRestRuntimeServer : public cudaq::RemoteRuntimeServer {
  io_context.result = counts;
  platform.set_exec_ctx(&io_context);
  } else {
- invokeMlirKernel(m_mlirContext, ir, requestInfo.passes,
+ invokeMlirKernel(io_context, m_mlirContext, ir, requestInfo.passes,
  std::string(kernelName));
  }
  }
@@ -537,7 +537,8 @@ class RemoteRestRuntimeServer : public cudaq::RemoteRuntimeServer {
  }
 
  void
- invokeMlirKernel(std::unique_ptr<MLIRContext> &contextPtr,
+ invokeMlirKernel(cudaq::ExecutionContext &io_context,
+ std::unique_ptr<MLIRContext> &contextPtr,
  std::string_view irString,
  const std::vector<std::string> &passes,
  const std::string &entryPointFn, std::size_t numTimes = 1,
@@ -549,21 +550,54 @@ class RemoteRestRuntimeServer : public cudaq::RemoteRuntimeServer {
  if (!module)
  throw std::runtime_error("Failed to parse the input MLIR code");
  auto engine = jitMlirCode(*module, passes);
+ llvm::SmallVector<void *> returnArg;
  const std::string entryPointFunc =
  std::string(cudaq::runtime::cudaqGenPrefixName) + entryPointFn;
- auto fnPtr =
- getValueOrThrow(engine->lookup(entryPointFunc),
- "Failed to look up entry-point function symbol");
- if (!fnPtr)
- throw std::runtime_error("Failed to get entry function");
+ if (auto funcOp = module->lookupSymbol<LLVM::LLVMFuncOp>(entryPointFunc)) {
+ auto funcTy = funcOp.getFunctionType();
+ auto returnTy = funcTy.getReturnType();
+ // These are the returned types that we support.
+ if (returnTy.isF32()) {
+ io_context.invocationResultBuffer.resize(sizeof(float));
+ returnArg.push_back(io_context.invocationResultBuffer.data());
+ } else if (returnTy.isF64()) {
+ io_context.invocationResultBuffer.resize(sizeof(double));
+ returnArg.push_back(io_context.invocationResultBuffer.data());
+ } else if (returnTy.isInteger(1)) {
+ io_context.invocationResultBuffer.resize(sizeof(bool));
+ returnArg.push_back(io_context.invocationResultBuffer.data());
+ } else if (returnTy.isIntOrIndex()) {
+ io_context.invocationResultBuffer.resize(
+ (returnTy.getIntOrFloatBitWidth() + 7) / 8);
+ returnArg.push_back(io_context.invocationResultBuffer.data());
+ }
+ }
 
- auto fn = reinterpret_cast<void (*)()>(fnPtr);
- simulationStart = std::chrono::high_resolution_clock::now();
- for (std::size_t i = 0; i < numTimes; ++i) {
- // Invoke the kernel
- fn();
- if (postExecCallback) {
- postExecCallback(i);
+ // Note: currently, we only return data from kernel on single-shot
+ // execution. Once we enable arbitrary sample return type, we can run this
+ // in a loop and return a vector of return type.
+ if (numTimes == 1 && !returnArg.empty()) {
+ simulationStart = std::chrono::high_resolution_clock::now();
+ llvm::Error error = engine->invokePacked(entryPointFunc, returnArg);
+ if (error) 
+ throw std::runtime_error("JIT invocation failed");
+ if (postExecCallback)
+ postExecCallback(0);
+ } else {
+ auto fnPtr =
+ getValueOrThrow(engine->lookup(entryPointFunc),
+ "Failed to look up entry-point function symbol");
+ if (!fnPtr)
+ throw std::runtime_error("Failed to get entry function");
+
+ auto fn = reinterpret_cast<void (*)()>(fnPtr);
+ simulationStart = std::chrono::high_resolution_clock::now();
+ for (std::size_t i = 0; i < numTimes; ++i) {
+ // Invoke the kernel
+ fn();
+ if (postExecCallback) 
+ postExecCallback(i);
+
  }
  }
  }

diff --git a/targettests/Remote-Sim/return_values.cpp b/targettests/Remote-Sim/return_values.cpp
@@ -0,0 +1,101 @@
+/*******************************************************************************
+ * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. *
+ * All rights reserved. *
+ * *
+ * This source code and the accompanying materials are made available under *
+ * the terms of the Apache License 2.0 which accompanies this distribution. *
+ ******************************************************************************/
+
+// REQUIRES: remote-sim
+// REQUIRES: c++20
+
+// clang-format off
+// RUN: nvq++ %cpp_std --enable-mlir --target remote-mqpu --remote-mqpu-auto-launch 1 %s -o %t && %t
+// clang-format on
+
+#include <cudaq.h>
+#include <iostream>
+
+struct rwpe {
+ double operator()(const int n_iter, double mu, double sigma) __qpu__ {
+ int iteration = 0;
+
+ // Allocate the qubits
+ cudaq::qvector q(2);
+
+ // Alias them
+ auto &aux = q.front();
+ auto &target = q.back();
+
+ x(q[1]);
+
+ while (iteration < n_iter) {
+ h(aux);
+ rz(1.0 - (mu / sigma), aux);
+ rz(.25 / sigma, target);
+ x<cudaq::ctrl>(aux, target);
+ rz(-.25 / sigma, target);
+ x<cudaq::ctrl>(aux, target);
+ h(aux);
+ if (mz(aux)) {
+ x(aux);
+ mu += sigma * .6065;
+ } else {
+ mu -= sigma * .6065;
+ }
+
+ sigma *= .7951;
+ iteration += 1;
+ }
+
+ return 2. * mu;
+ }
+};
+
+struct returnTrue {
+ bool operator()() __qpu__ {
+ cudaq::qubit q;
+ x(q);
+ return mz(q);
+ }
+};
+
+struct returnFalse {
+ bool operator()() __qpu__ {
+ cudaq::qubit q, r;
+ x(q);
+ return mz(q) && mz(r);
+ }
+};
+
+struct returnInt {
+ int operator()(int iters) __qpu__ {
+ cudaq::qubit q;
+ int count = 0;
+ for (int i = 0; i < iters; ++i) {
+ h(q);
+ if (mz(q)) {
+ count++;
+ x(q);
+ }
+ }
+ return count;
+ }
+};
+
+int main() {
+ int n_iterations = 24;
+ double mu = 0.7951, sigma = 0.6065;
+ auto phase = rwpe{}(n_iterations, mu, sigma);
+
+ assert(std::abs(phase - 0.49) < 0.05);
+
+ assert(returnTrue{}());
+
+ assert(!returnFalse{}());
+ cudaq::set_random_seed(123);
+ const int oneCount = returnInt{}(1000);
+ std::cout << "One count = " << oneCount << "\n";
+ // We expect ~ 50% one.
+ assert(oneCount > 100);
+}