Docs preview for PR #1969.

NVIDIA · Jul 22, 2024 · 6288c3e · 6288c3e
1 parent 697f129
commit 6288c3e
Show file tree

Hide file tree

Showing 39 changed files with 357 additions and 170 deletions.
diff --git a/pr-1969/CMakeLists.txt b/pr-1969/CMakeLists.txt
@@ -12,13 +12,17 @@
 # SOURCE_LOCATION: location of the source file (relative to 'sphinx/examples/cpp' directory by default) 
 # Optional keyword args:
 # TARGET <TARGET_NAME>: name of the target to use
+# TARGET_OPTION <Option>: extra option for the target
 # SOURCE_DIR <DIR>: the directory that SOURCE_LOCATION is relative to (if not the default)
 # LAUNCH_COMMAND <COMMAND>: the command to launch the test (e.g., mpirun)
 function(add_nvqpp_test TEST_NAME SOURCE_LOCATION)
- cmake_parse_arguments(PARSED_ARGS "" "TARGET;LABELS;SOURCE_DIR;LAUNCH_COMMAND;APPLICATION_ARGS" "" ${ARGN}) 
+ cmake_parse_arguments(PARSED_ARGS "" "TARGET;LABELS;SOURCE_DIR;LAUNCH_COMMAND;APPLICATION_ARGS;TARGET_OPTION" "" ${ARGN}) 
  set(NVQPP_COMPILE_ARGS "")
  if(PARSED_ARGS_TARGET)
  set(NVQPP_COMPILE_ARGS "${NVQPP_COMPILE_ARGS} --target ${PARSED_ARGS_TARGET}")
+ if (PARSED_ARGS_TARGET_OPTION)
+ set(NVQPP_COMPILE_ARGS "${NVQPP_COMPILE_ARGS} --${PARSED_ARGS_TARGET}-option ${PARSED_ARGS_TARGET_OPTION}")
+ endif()
  endif()
  if (NOT PARSED_ARGS_SOURCE_DIR)
  set(PARSED_ARGS_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/sphinx/examples/cpp")
@@ -68,9 +72,14 @@ if (CUSTATEVEC_ROOT AND CUDA_FOUND)
  add_nvqpp_test(QuickStart_nvidia quick_start.cpp TARGET nvidia LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
 
  # mqpu snippets need custatevec backend and optionally MPI
- add_nvqpp_test(SampleAsync using/cudaq/platform/sample_async.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
- add_nvqpp_test(ObserveMQPU using/cudaq/platform/observe_mqpu.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
- add_nvqpp_test(StateAsyncMQPU using/cudaq/platform/get_state_async.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
+ add_nvqpp_test(SampleAsync using/cudaq/platform/sample_async.cpp TARGET nvidia TARGET_OPTION mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
+ add_nvqpp_test(ObserveMQPU using/cudaq/platform/observe_mqpu.cpp TARGET nvidia TARGET_OPTION mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
+ add_nvqpp_test(StateAsyncMQPU using/cudaq/platform/get_state_async.cpp TARGET nvidia TARGET_OPTION mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
+
+ # Legacy check for the `nvidia-mqpu` target
+ add_nvqpp_test(LegacySampleAsync using/cudaq/platform/sample_async.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
+ add_nvqpp_test(LegacyObserveMQPU using/cudaq/platform/observe_mqpu.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
+ add_nvqpp_test(LegacyStateAsyncMQPU using/cudaq/platform/get_state_async.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
 
  # Add the MPI test if MPI was found and there are more than 2 GPUs
  if (MPI_CXX_FOUND)
@@ -81,6 +90,11 @@ if (CUSTATEVEC_ROOT AND CUDA_FOUND)
  # Only build this test if we have more than 1 GPU
  if (${NGPUS} GREATER_EQUAL 2)
  add_nvqpp_test(ObserveMQPU_MPI using/cudaq/platform/observe_mqpu_mpi.cpp
+ TARGET nvidia 
+ TARGET_OPTION mqpu 
+ SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp 
+ LAUNCH_COMMAND "${MPIEXEC} --allow-run-as-root -np 2")
+ add_nvqpp_test(LegacyObserveMQPU_MPI using/cudaq/platform/observe_mqpu_mpi.cpp
  TARGET nvidia-mqpu 
  SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp 
  LAUNCH_COMMAND "${MPIEXEC} --allow-run-as-root -np 2")

diff --git a/..._tutorials_vqe_water_active_space_3_2.png → ..._tutorials_vqe_water_active_space_4_3.png b/..._tutorials_vqe_water_active_space_3_2.png → ..._tutorials_vqe_water_active_space_4_3.png
diff --git a/pr-1969/_sources/examples/python/tutorials/hadamard_test.ipynb.txt b/pr-1969/_sources/examples/python/tutorials/hadamard_test.ipynb.txt
@@ -256,7 +256,7 @@
  "\n",
  "result = []\n",
  "for i in range(4): \n",
- " count = cudaq.sample_async(kernel, qubit_num, angle[i], theta[i], shots_count = shots, qpu_id = i) \n",
+ " count = cudaq.sample_async(kernel, qubit_num, angle[i], theta[i], shots_count = shots, qpu_id = i%qpu_count) \n",
  " result.append(count) \n",
  "\n",
  "mean_val = np.zeros(len(angle))\n",

diff --git a/pr-1969/_sources/examples/python/tutorials/vqe_water_active_space.ipynb.txt b/pr-1969/_sources/examples/python/tutorials/vqe_water_active_space.ipynb.txt
diff --git a/pr-1969/_sources/using/backends/backends.rst.txt b/pr-1969/_sources/using/backends/backends.rst.txt
@@ -17,8 +17,9 @@ CUDA-Q Backends
 * :ref:`iqm <iqm-backend>`
 * :ref:`nvidia <nvidia-backend>`
 * :ref:`nvidia-fp64 <nvidia-fp64-backend>`
-* :ref:`nvidia-mqpu <nvidia-mgpu-backend>`
-* :ref:`nvidia-mqpu-fp64 <nvidia-mgpu-backend>`
+* :ref:`nvidia-mgpu <nvidia-mgpu-backend>`
+* :ref:`nvidia-mqpu <mqpu-platform>`
+* :ref:`nvidia-mqpu-fp64 <mqpu-platform>`
 * :doc:`nvqc <nvqc>`
 * :ref:`oqc <oqc-backend>`
 * :ref:`orca <orca-backend>`
@@ -27,3 +28,8 @@ CUDA-Q Backends
 * :ref:`remote-mqpu <mqpu-platform>`
 * :ref:`tensornet <tensor-backends>`
 * :ref:`tensornet-mps <tensor-backends>`
+
+.. deprecated:: 0.8
+ The `nvidia-fp64`, `nvidia-mgpu`, `nvidia-mqpu`, and `nvidia-mqpu-fp64` targets can be 
+ enabled as extensions of the unified `nvidia` target.
+ These target names might be removed in a future release.
diff --git a/pr-1969/_sources/using/backends/platform.rst.txt b/pr-1969/_sources/using/backends/platform.rst.txt
@@ -21,8 +21,8 @@ NVIDIA `MQPU` Platform
 
 .. _mqpu-platform:
 
-The NVIDIA `MQPU` target (:code:`nvidia-mqpu`) provides a simulated QPU for every available NVIDIA GPU on the underlying system. 
-Each QPU is simulated via a `cuStateVec` simulator backend. For more information about using multiple GPUs 
+In the multi-QPU mode (:code:`mqpu` option), the NVIDIA target provides a simulated QPU for every available NVIDIA GPU on the underlying system. 
+Each QPU is simulated via a `cuStateVec` simulator backend as defined by the NVIDIA target. For more information about using multiple GPUs 
 to simulate each virtual QPU, or using a different backend for virtual QPUs, please see :ref:`remote MQPU platform <remote-mqpu-platform>`.
 This target enables asynchronous parallel execution of quantum kernel tasks.
 
@@ -42,17 +42,17 @@ Here is a simple example demonstrating its usage.
  :end-before: [End Documentation]
 
 
- One can specify the target multi-QPU architecture (:code:`nvidia-mqpu`) with the :code:`--target` flag:
+ One can specify the target multi-QPU architecture with the :code:`--target` flag:
 
  .. code-block:: console
 
- nvq++ sample_async.cpp -target nvidia-mqpu
+ nvq++ sample_async.cpp --target nvidia --target-option mqpu
  ./a.out
 
 CUDA-Q exposes asynchronous versions of the default :code:`cudaq` algorithmic
 primitive functions like :code:`sample` and :code:`observe` (e.g., :code:`sample_async` function in the above code snippets).
 
-Depending on the number of GPUs available on the system, the :code:`nvidia-mqpu` platform will create the same number of virtual QPU instances.
+Depending on the number of GPUs available on the system, the :code:`nvidia` multi-QPU platform will create the same number of virtual QPU instances.
 For example, on a system with 4 GPUs, the above code will distribute the four sampling tasks among those :code:`GPUEmulatedQPU` instances.
 
 The results might look like the following 4 different random samplings:
@@ -67,15 +67,17 @@ The results might look like the following 4 different random samplings:
 
 .. note::
 
- By default, the :code:`nvidia-mqpu` platform will utilize all available GPUs (number of QPUs instances is equal to the number of GPUs).
+ By default, the :code:`nvidia` multi-QPU platform will utilize all available GPUs (number of QPUs instances is equal to the number of GPUs).
  To specify the number QPUs to be instantiated, one can set the :code:`CUDAQ_MQPU_NGPUS` environment variable.
  For example, use :code:`export CUDAQ_MQPU_NGPUS=2` to specify that only 2 QPUs (GPUs) are needed.
 
+.. deprecated:: 0.8
+ The :code:`nvidia-mqpu` and :code:`nvidia-mqpu-fp64` targets, which are equivalent to the multi-QPU options `mgpu,fp32` and `mgpu,fp64`, respectively, of the :code:`nvidia` target, are deprecated and will be removed in a future release.
 
 Parallel distribution mode
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The CUDA-Q :code:`nvidia-mqpu` platform supports two modes of parallel distribution of expectation value computation:
+The CUDA-Q :code:`nvidia` multi-QPU platform supports two modes of parallel distribution of expectation value computation:
 
 * MPI: distribute the expectation value computations across available MPI ranks and GPUs for each Hamiltonian term.
 * Thread: distribute the expectation value computations among available GPUs via standard C++ threads (each thread handles one GPU).
@@ -106,7 +108,7 @@ An example of MPI distribution mode usage in both C++ and Python is given below:
 
  .. code-block:: console
 
- nvq++ file.cpp -target nvidia-mqpu
+ nvq++ file.cpp --target nvidia --target-option mqpu
  mpiexec -np <N> a.out
 
 In the above example, the parallel distribution mode was set to :code:`mpi` using :code:`cudaq::parallel::mpi` in C++ or :code:`cudaq.parallel.mpi` in Python.
@@ -119,7 +121,7 @@ Remote `MQPU` Platform
 
 .. _remote-mqpu-platform:
 
-As shown in the above examples, the :code:`nvidia-mqpu` platform enables
+As shown in the above examples, the multi-QPU NVIDIA platform enables
 multi-QPU distribution whereby each QPU is simulated by a :ref:`single NVIDIA GPU <cuQuantum single-GPU>`.
 To run multi-QPU workloads on different simulator backends, one can use the :code:`remote-mqpu` platform,
 which encapsulates simulated QPUs as independent HTTP REST server instances. 
@@ -201,7 +203,7 @@ With these invocations, each virtual QPU is locally addressable at the URL `loca
  Hence, please make sure to either (1) use a non-public TCP/IP port for internal use or 
  (2) use firewalls or other security mechanisms to manage user access. 
 
-User code can then target these QPUs for multi-QPU workloads, such as asynchronous sample or observe shown above for the :code:`nvidia-mqpu` platform.
+User code can then target these QPUs for multi-QPU workloads, such as asynchronous sample or observe shown above for the multi-QPU NVIDIA platform platform.
 
 .. tab:: Python
 

diff --git a/pr-1969/_sources/using/backends/simulators.rst.txt b/pr-1969/_sources/using/backends/simulators.rst.txt
@@ -42,29 +42,43 @@ To execute a program on the :code:`nvidia` target, use the following commands:
 .. _nvidia-fp64-backend:
 
 By default, this will leverage :code:`FP32` floating point types for the simulation. To 
-switch to :code:`FP64`, specify the :code:`nvidia-fp64` target instead. 
+switch to :code:`FP64`, specify the :code:`--target-option fp64` `nvq++` command line option for `C++` or 
+use `cudaq.set_target('nvidia', option='fp64')` for Python instead. 
 
 .. note:: 
 
  This backend requires an NVIDIA GPU and CUDA runtime libraries. If you do not have these dependencies installed, you may encounter an error stating `Invalid simulator requested`. See the section :ref:`dependencies-and-compatibility` for more information about how to install dependencies.
 
+.. deprecated:: 0.8
+ The :code:`nvidia-fp64` targets, which is equivalent setting the `fp64` option on the :code:`nvidia` target, 
+ is deprecated and will be removed in a future release.
+
 Multi-node multi-GPU
 ++++++++++++++++++++++++++++++++++
 
 .. _nvidia-mgpu-backend:
 
-The :code:`nvidia-mgpu` target provides a state vector simulator accelerated with 
+The multi-node multi-GPU NVIDIA target provides a state vector simulator accelerated with 
 the :code:`cuStateVec` library but with support for Multi-Node, Multi-GPU distribution of the 
 state vector, in addition to a single GPU.
 
 The multi-node multi-GPU simulator expects to run within an MPI context.
-To execute a program on the :code:`nvidia-mgpu` target, use the following commands (adjust the value of the :code:`-np` flag as needed to reflect available GPU resources on your system):
+To execute a program on the multi-node multi-GPU NVIDIA target, use the following commands 
+(adjust the value of the :code:`-np` flag as needed to reflect available GPU resources on your system):
 
 .. tab:: Python
 
+ Double precision simulation:
+
+ .. code:: bash 
+
+ mpiexec -np 2 python3 program.py [...] --target nvidia --target-option fp64,mgpu
+
+ Single precision simulation:
+
  .. code:: bash 
 
- mpiexec -np 2 python3 program.py [...] --target nvidia-mgpu
+ mpiexec -np 2 python3 program.py [...] --target nvidia --target-option fp32,mgpu
 
  .. note::
 
@@ -76,28 +90,47 @@ To execute a program on the :code:`nvidia-mgpu` target, use the following comman
 
  .. code:: bash 
 
- mpiexec -np 2 python3 -m mpi4py program.py [...] --target nvidia-mgpu
+ mpiexec -np 2 python3 -m mpi4py program.py [...] --target nvidia --target-option fp64,mgpu
 
  The target can also be defined in the application code by calling
 
  .. code:: python 
 
- cudaq.set_target('nvidia-mgpu')
+ cudaq.set_target('nvidia', option='mgpu,fp64')
 
  If a target is set in the application code, this target will override the :code:`--target` command line flag given during program invocation.
 
+ .. note::
+ (1) The order of the option settings are interchangeable.
+ For example, `cudaq.set_target('nvidia', option='mgpu,fp64')` is equivalent to `cudaq.set_target('nvidia', option='fp64.mgpu')`.
+
+ (2) The `nvidia` target has single-precision as the default setting. Thus, using `option='mgpu'` implies that `option='mgpu,fp32'`. 
+
 .. tab:: C++
 
+ Double precision simulation:
+
  .. code:: bash 
 
- nvq++ --target nvidia-mgpu program.cpp [...] -o program.x
+ nvq++ --target nvidia --target-option mgpu,fp64 program.cpp [...] -o program.x
+ mpiexec -np 2 ./program.x
+
+ Single precision simulation:
+
+ .. code:: bash 
+
+ nvq++ --target nvidia --target-option mgpu,fp32 program.cpp [...] -o program.x
  mpiexec -np 2 ./program.x
 
 .. note:: 
 
  This backend requires an NVIDIA GPU, CUDA runtime libraries, as well as an MPI installation. If you do not have these dependencies installed, you may encounter either an error stating `invalid simulator requested` (missing CUDA libraries), or an error along the lines of `failed to launch kernel` (missing MPI installation). See the section :ref:`dependencies-and-compatibility` for more information about how to install dependencies.
 
-The :code:`nvidia-mgpu` backend has additional performance improvements to
+.. deprecated:: 0.8
+ The :code:`nvidia-mgpu` target, which is equivalent to the multi-node multi-GPU double-precision option (`mgpu,fp64`) of the :code:`nvidia`
+ is deprecated and will be removed in a future release.
+
+The :code:`nvidia` backend has additional performance improvements to
 help reduce your simulation runtimes, even on a single GPU. One of the
 performance improvements is to fuse multiple gates together during runtime. For
 example, :code:`x(qubit0)` and :code:`x(qubit1)` can be fused together into a
@@ -114,13 +147,13 @@ environment variable to another integer value as shown below.
 
  .. code:: bash 
 
- CUDAQ_MGPU_FUSE=5 mpiexec -np 2 python3 program.py [...] --target nvidia-mgpu
+ CUDAQ_MGPU_FUSE=5 mpiexec -np 2 python3 program.py [...] --target nvidia --target-option mgpu,fp64
 
 .. tab:: C++
 
  .. code:: bash 
 
- nvq++ --target nvidia-mgpu program.cpp [...] -o program.x
+ nvq++ --target nvidia --target-option mgpu,fp64 program.cpp [...] -o program.x
  CUDAQ_MGPU_FUSE=5 mpiexec -np 2 ./program.x
 
 .. _OpenMP CPU-only:

diff --git a/pr-1969/_sources/using/examples/multi_gpu_workflows.rst.txt b/pr-1969/_sources/using/examples/multi_gpu_workflows.rst.txt
@@ -11,7 +11,7 @@ Available Targets
 - **`qpp-cpu`**: The QPP based CPU backend which is multithreaded to
  maximize the usage of available cores on your system.
 
-- **`nvidia`**: Single GPU based backend which accelerates quantum circuit
+- **`nvidia`**: GPU-accelerated state-vector based backend which accelerates quantum circuit
  simulation on NVIDIA GPUs powered by cuQuantum.
 
 - **`nvidia-mgpu`**: Allows for scaling circuit simulation on multiple GPUs.

diff --git a/pr-1969/api/languages/python_api.html b/pr-1969/api/languages/python_api.html
@@ -2064,7 +2064,7 @@ <h2>Data Types<a class="headerlink" href="#data-types" title="Permalink to this
 <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">random</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#cudaq.SpinOperator.random" title="Permalink to this definition">¶</a></dt>
 <dd><dl class="py function">
 <dt class="sig sig-object py">
-<span class="sig-name descname"><span class="pre">random</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">qubit_count</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">term_count</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">seed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1221185055</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#cudaq.SpinOperator" title="cudaq.mlir._mlir_libs._quakeDialects.cudaq_runtime.SpinOperator"><span class="pre">cudaq.mlir._mlir_libs._quakeDialects.cudaq_runtime.SpinOperator</span></a></span></span></dt>
+<span class="sig-name descname"><span class="pre">random</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">qubit_count</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">term_count</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">seed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">818229446</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#cudaq.SpinOperator" title="cudaq.mlir._mlir_libs._quakeDialects.cudaq_runtime.SpinOperator"><span class="pre">cudaq.mlir._mlir_libs._quakeDialects.cudaq_runtime.SpinOperator</span></a></span></span></dt>
 <dd></dd></dl>
 
 <p>Return a random <a class="reference internal" href="#cudaq.SpinOperator" title="cudaq.SpinOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">SpinOperator</span></code></a> on the given number of qubits (<code class="code docutils literal notranslate"><span class="pre">qubit_count</span></code>) and composed of the given number of terms (<code class="code docutils literal notranslate"><span class="pre">term_count</span></code>). An optional seed value may also be provided.</p>