Skip to content

Commit

Permalink
Docs preview for PR #1969.
Browse files Browse the repository at this point in the history
  • Loading branch information
cuda-quantum-bot committed Jul 22, 2024
1 parent 697f129 commit 6288c3e
Show file tree
Hide file tree
Showing 39 changed files with 357 additions and 170 deletions.
22 changes: 18 additions & 4 deletions pr-1969/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,17 @@
# SOURCE_LOCATION: location of the source file (relative to 'sphinx/examples/cpp' directory by default)
# Optional keyword args:
# TARGET <TARGET_NAME>: name of the target to use
# TARGET_OPTION <Option>: extra option for the target
# SOURCE_DIR <DIR>: the directory that SOURCE_LOCATION is relative to (if not the default)
# LAUNCH_COMMAND <COMMAND>: the command to launch the test (e.g., mpirun)
function(add_nvqpp_test TEST_NAME SOURCE_LOCATION)
cmake_parse_arguments(PARSED_ARGS "" "TARGET;LABELS;SOURCE_DIR;LAUNCH_COMMAND;APPLICATION_ARGS" "" ${ARGN})
cmake_parse_arguments(PARSED_ARGS "" "TARGET;LABELS;SOURCE_DIR;LAUNCH_COMMAND;APPLICATION_ARGS;TARGET_OPTION" "" ${ARGN})
set(NVQPP_COMPILE_ARGS "")
if(PARSED_ARGS_TARGET)
set(NVQPP_COMPILE_ARGS "${NVQPP_COMPILE_ARGS} --target ${PARSED_ARGS_TARGET}")
if (PARSED_ARGS_TARGET_OPTION)
set(NVQPP_COMPILE_ARGS "${NVQPP_COMPILE_ARGS} --${PARSED_ARGS_TARGET}-option ${PARSED_ARGS_TARGET_OPTION}")
endif()
endif()
if (NOT PARSED_ARGS_SOURCE_DIR)
set(PARSED_ARGS_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/sphinx/examples/cpp")
Expand Down Expand Up @@ -68,9 +72,14 @@ if (CUSTATEVEC_ROOT AND CUDA_FOUND)
add_nvqpp_test(QuickStart_nvidia quick_start.cpp TARGET nvidia LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)

# mqpu snippets need custatevec backend and optionally MPI
add_nvqpp_test(SampleAsync using/cudaq/platform/sample_async.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
add_nvqpp_test(ObserveMQPU using/cudaq/platform/observe_mqpu.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
add_nvqpp_test(StateAsyncMQPU using/cudaq/platform/get_state_async.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
add_nvqpp_test(SampleAsync using/cudaq/platform/sample_async.cpp TARGET nvidia TARGET_OPTION mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
add_nvqpp_test(ObserveMQPU using/cudaq/platform/observe_mqpu.cpp TARGET nvidia TARGET_OPTION mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
add_nvqpp_test(StateAsyncMQPU using/cudaq/platform/get_state_async.cpp TARGET nvidia TARGET_OPTION mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)

# Legacy check for the `nvidia-mqpu` target
add_nvqpp_test(LegacySampleAsync using/cudaq/platform/sample_async.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
add_nvqpp_test(LegacyObserveMQPU using/cudaq/platform/observe_mqpu.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)
add_nvqpp_test(LegacyStateAsyncMQPU using/cudaq/platform/get_state_async.cpp TARGET nvidia-mqpu LABELS gpu_required SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp)

# Add the MPI test if MPI was found and there are more than 2 GPUs
if (MPI_CXX_FOUND)
Expand All @@ -81,6 +90,11 @@ if (CUSTATEVEC_ROOT AND CUDA_FOUND)
# Only build this test if we have more than 1 GPU
if (${NGPUS} GREATER_EQUAL 2)
add_nvqpp_test(ObserveMQPU_MPI using/cudaq/platform/observe_mqpu_mpi.cpp
TARGET nvidia
TARGET_OPTION mqpu
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp
LAUNCH_COMMAND "${MPIEXEC} --allow-run-as-root -np 2")
add_nvqpp_test(LegacyObserveMQPU_MPI using/cudaq/platform/observe_mqpu_mpi.cpp
TARGET nvidia-mqpu
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp
LAUNCH_COMMAND "${MPIEXEC} --allow-run-as-root -np 2")
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@
"\n",
"result = []\n",
"for i in range(4): \n",
" count = cudaq.sample_async(kernel, qubit_num, angle[i], theta[i], shots_count = shots, qpu_id = i) \n",
" count = cudaq.sample_async(kernel, qubit_num, angle[i], theta[i], shots_count = shots, qpu_id = i%qpu_count) \n",
" result.append(count) \n",
"\n",
"mean_val = np.zeros(len(angle))\n",
Expand Down

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions pr-1969/_sources/using/backends/backends.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ CUDA-Q Backends
* :ref:`iqm <iqm-backend>`
* :ref:`nvidia <nvidia-backend>`
* :ref:`nvidia-fp64 <nvidia-fp64-backend>`
* :ref:`nvidia-mqpu <nvidia-mgpu-backend>`
* :ref:`nvidia-mqpu-fp64 <nvidia-mgpu-backend>`
* :ref:`nvidia-mgpu <nvidia-mgpu-backend>`
* :ref:`nvidia-mqpu <mqpu-platform>`
* :ref:`nvidia-mqpu-fp64 <mqpu-platform>`
* :doc:`nvqc <nvqc>`
* :ref:`oqc <oqc-backend>`
* :ref:`orca <orca-backend>`
Expand All @@ -27,3 +28,8 @@ CUDA-Q Backends
* :ref:`remote-mqpu <mqpu-platform>`
* :ref:`tensornet <tensor-backends>`
* :ref:`tensornet-mps <tensor-backends>`

.. deprecated:: 0.8
The `nvidia-fp64`, `nvidia-mgpu`, `nvidia-mqpu`, and `nvidia-mqpu-fp64` targets can be
enabled as extensions of the unified `nvidia` target.
These target names might be removed in a future release.
22 changes: 12 additions & 10 deletions pr-1969/_sources/using/backends/platform.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ NVIDIA `MQPU` Platform

.. _mqpu-platform:

The NVIDIA `MQPU` target (:code:`nvidia-mqpu`) provides a simulated QPU for every available NVIDIA GPU on the underlying system.
Each QPU is simulated via a `cuStateVec` simulator backend. For more information about using multiple GPUs
In the multi-QPU mode (:code:`mqpu` option), the NVIDIA target provides a simulated QPU for every available NVIDIA GPU on the underlying system.
Each QPU is simulated via a `cuStateVec` simulator backend as defined by the NVIDIA target. For more information about using multiple GPUs
to simulate each virtual QPU, or using a different backend for virtual QPUs, please see :ref:`remote MQPU platform <remote-mqpu-platform>`.
This target enables asynchronous parallel execution of quantum kernel tasks.

Expand All @@ -42,17 +42,17 @@ Here is a simple example demonstrating its usage.
:end-before: [End Documentation]


One can specify the target multi-QPU architecture (:code:`nvidia-mqpu`) with the :code:`--target` flag:
One can specify the target multi-QPU architecture with the :code:`--target` flag:

.. code-block:: console
nvq++ sample_async.cpp -target nvidia-mqpu
nvq++ sample_async.cpp --target nvidia --target-option mqpu
./a.out
CUDA-Q exposes asynchronous versions of the default :code:`cudaq` algorithmic
primitive functions like :code:`sample` and :code:`observe` (e.g., :code:`sample_async` function in the above code snippets).

Depending on the number of GPUs available on the system, the :code:`nvidia-mqpu` platform will create the same number of virtual QPU instances.
Depending on the number of GPUs available on the system, the :code:`nvidia` multi-QPU platform will create the same number of virtual QPU instances.
For example, on a system with 4 GPUs, the above code will distribute the four sampling tasks among those :code:`GPUEmulatedQPU` instances.

The results might look like the following 4 different random samplings:
Expand All @@ -67,15 +67,17 @@ The results might look like the following 4 different random samplings:
.. note::

By default, the :code:`nvidia-mqpu` platform will utilize all available GPUs (number of QPUs instances is equal to the number of GPUs).
By default, the :code:`nvidia` multi-QPU platform will utilize all available GPUs (number of QPUs instances is equal to the number of GPUs).
To specify the number QPUs to be instantiated, one can set the :code:`CUDAQ_MQPU_NGPUS` environment variable.
For example, use :code:`export CUDAQ_MQPU_NGPUS=2` to specify that only 2 QPUs (GPUs) are needed.

.. deprecated:: 0.8
The :code:`nvidia-mqpu` and :code:`nvidia-mqpu-fp64` targets, which are equivalent to the multi-QPU options `mgpu,fp32` and `mgpu,fp64`, respectively, of the :code:`nvidia` target, are deprecated and will be removed in a future release.

Parallel distribution mode
^^^^^^^^^^^^^^^^^^^^^^^^^^

The CUDA-Q :code:`nvidia-mqpu` platform supports two modes of parallel distribution of expectation value computation:
The CUDA-Q :code:`nvidia` multi-QPU platform supports two modes of parallel distribution of expectation value computation:

* MPI: distribute the expectation value computations across available MPI ranks and GPUs for each Hamiltonian term.
* Thread: distribute the expectation value computations among available GPUs via standard C++ threads (each thread handles one GPU).
Expand Down Expand Up @@ -106,7 +108,7 @@ An example of MPI distribution mode usage in both C++ and Python is given below:

.. code-block:: console
nvq++ file.cpp -target nvidia-mqpu
nvq++ file.cpp --target nvidia --target-option mqpu
mpiexec -np <N> a.out
In the above example, the parallel distribution mode was set to :code:`mpi` using :code:`cudaq::parallel::mpi` in C++ or :code:`cudaq.parallel.mpi` in Python.
Expand All @@ -119,7 +121,7 @@ Remote `MQPU` Platform

.. _remote-mqpu-platform:

As shown in the above examples, the :code:`nvidia-mqpu` platform enables
As shown in the above examples, the multi-QPU NVIDIA platform enables
multi-QPU distribution whereby each QPU is simulated by a :ref:`single NVIDIA GPU <cuQuantum single-GPU>`.
To run multi-QPU workloads on different simulator backends, one can use the :code:`remote-mqpu` platform,
which encapsulates simulated QPUs as independent HTTP REST server instances.
Expand Down Expand Up @@ -201,7 +203,7 @@ With these invocations, each virtual QPU is locally addressable at the URL `loca
Hence, please make sure to either (1) use a non-public TCP/IP port for internal use or
(2) use firewalls or other security mechanisms to manage user access.

User code can then target these QPUs for multi-QPU workloads, such as asynchronous sample or observe shown above for the :code:`nvidia-mqpu` platform.
User code can then target these QPUs for multi-QPU workloads, such as asynchronous sample or observe shown above for the multi-QPU NVIDIA platform platform.

.. tab:: Python

Expand Down
53 changes: 43 additions & 10 deletions pr-1969/_sources/using/backends/simulators.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,29 +42,43 @@ To execute a program on the :code:`nvidia` target, use the following commands:
.. _nvidia-fp64-backend:

By default, this will leverage :code:`FP32` floating point types for the simulation. To
switch to :code:`FP64`, specify the :code:`nvidia-fp64` target instead.
switch to :code:`FP64`, specify the :code:`--target-option fp64` `nvq++` command line option for `C++` or
use `cudaq.set_target('nvidia', option='fp64')` for Python instead.

.. note::

This backend requires an NVIDIA GPU and CUDA runtime libraries. If you do not have these dependencies installed, you may encounter an error stating `Invalid simulator requested`. See the section :ref:`dependencies-and-compatibility` for more information about how to install dependencies.

.. deprecated:: 0.8
The :code:`nvidia-fp64` targets, which is equivalent setting the `fp64` option on the :code:`nvidia` target,
is deprecated and will be removed in a future release.

Multi-node multi-GPU
++++++++++++++++++++++++++++++++++

.. _nvidia-mgpu-backend:

The :code:`nvidia-mgpu` target provides a state vector simulator accelerated with
The multi-node multi-GPU NVIDIA target provides a state vector simulator accelerated with
the :code:`cuStateVec` library but with support for Multi-Node, Multi-GPU distribution of the
state vector, in addition to a single GPU.

The multi-node multi-GPU simulator expects to run within an MPI context.
To execute a program on the :code:`nvidia-mgpu` target, use the following commands (adjust the value of the :code:`-np` flag as needed to reflect available GPU resources on your system):
To execute a program on the multi-node multi-GPU NVIDIA target, use the following commands
(adjust the value of the :code:`-np` flag as needed to reflect available GPU resources on your system):

.. tab:: Python

Double precision simulation:

.. code:: bash
mpiexec -np 2 python3 program.py [...] --target nvidia --target-option fp64,mgpu
Single precision simulation:

.. code:: bash
mpiexec -np 2 python3 program.py [...] --target nvidia-mgpu
mpiexec -np 2 python3 program.py [...] --target nvidia --target-option fp32,mgpu
.. note::

Expand All @@ -76,28 +90,47 @@ To execute a program on the :code:`nvidia-mgpu` target, use the following comman

.. code:: bash
mpiexec -np 2 python3 -m mpi4py program.py [...] --target nvidia-mgpu
mpiexec -np 2 python3 -m mpi4py program.py [...] --target nvidia --target-option fp64,mgpu
The target can also be defined in the application code by calling

.. code:: python
cudaq.set_target('nvidia-mgpu')
cudaq.set_target('nvidia', option='mgpu,fp64')
If a target is set in the application code, this target will override the :code:`--target` command line flag given during program invocation.

.. note::
(1) The order of the option settings are interchangeable.
For example, `cudaq.set_target('nvidia', option='mgpu,fp64')` is equivalent to `cudaq.set_target('nvidia', option='fp64.mgpu')`.

(2) The `nvidia` target has single-precision as the default setting. Thus, using `option='mgpu'` implies that `option='mgpu,fp32'`.

.. tab:: C++

Double precision simulation:

.. code:: bash
nvq++ --target nvidia-mgpu program.cpp [...] -o program.x
nvq++ --target nvidia --target-option mgpu,fp64 program.cpp [...] -o program.x
mpiexec -np 2 ./program.x
Single precision simulation:

.. code:: bash
nvq++ --target nvidia --target-option mgpu,fp32 program.cpp [...] -o program.x
mpiexec -np 2 ./program.x
.. note::

This backend requires an NVIDIA GPU, CUDA runtime libraries, as well as an MPI installation. If you do not have these dependencies installed, you may encounter either an error stating `invalid simulator requested` (missing CUDA libraries), or an error along the lines of `failed to launch kernel` (missing MPI installation). See the section :ref:`dependencies-and-compatibility` for more information about how to install dependencies.

The :code:`nvidia-mgpu` backend has additional performance improvements to
.. deprecated:: 0.8
The :code:`nvidia-mgpu` target, which is equivalent to the multi-node multi-GPU double-precision option (`mgpu,fp64`) of the :code:`nvidia`
is deprecated and will be removed in a future release.

The :code:`nvidia` backend has additional performance improvements to
help reduce your simulation runtimes, even on a single GPU. One of the
performance improvements is to fuse multiple gates together during runtime. For
example, :code:`x(qubit0)` and :code:`x(qubit1)` can be fused together into a
Expand All @@ -114,13 +147,13 @@ environment variable to another integer value as shown below.

.. code:: bash
CUDAQ_MGPU_FUSE=5 mpiexec -np 2 python3 program.py [...] --target nvidia-mgpu
CUDAQ_MGPU_FUSE=5 mpiexec -np 2 python3 program.py [...] --target nvidia --target-option mgpu,fp64
.. tab:: C++

.. code:: bash
nvq++ --target nvidia-mgpu program.cpp [...] -o program.x
nvq++ --target nvidia --target-option mgpu,fp64 program.cpp [...] -o program.x
CUDAQ_MGPU_FUSE=5 mpiexec -np 2 ./program.x
.. _OpenMP CPU-only:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Available Targets
- **`qpp-cpu`**: The QPP based CPU backend which is multithreaded to
maximize the usage of available cores on your system.

- **`nvidia`**: Single GPU based backend which accelerates quantum circuit
- **`nvidia`**: GPU-accelerated state-vector based backend which accelerates quantum circuit
simulation on NVIDIA GPUs powered by cuQuantum.

- **`nvidia-mgpu`**: Allows for scaling circuit simulation on multiple GPUs.
Expand Down
2 changes: 1 addition & 1 deletion pr-1969/api/languages/python_api.html
Original file line number Diff line number Diff line change
Expand Up @@ -2064,7 +2064,7 @@ <h2>Data Types<a class="headerlink" href="#data-types" title="Permalink to this
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">random</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#cudaq.SpinOperator.random" title="Permalink to this definition"></a></dt>
<dd><dl class="py function">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">random</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">qubit_count</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">term_count</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">seed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1221185055</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#cudaq.SpinOperator" title="cudaq.mlir._mlir_libs._quakeDialects.cudaq_runtime.SpinOperator"><span class="pre">cudaq.mlir._mlir_libs._quakeDialects.cudaq_runtime.SpinOperator</span></a></span></span></dt>
<span class="sig-name descname"><span class="pre">random</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">qubit_count</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">term_count</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">seed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><span class="pre">int</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">818229446</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#cudaq.SpinOperator" title="cudaq.mlir._mlir_libs._quakeDialects.cudaq_runtime.SpinOperator"><span class="pre">cudaq.mlir._mlir_libs._quakeDialects.cudaq_runtime.SpinOperator</span></a></span></span></dt>
<dd></dd></dl>

<p>Return a random <a class="reference internal" href="#cudaq.SpinOperator" title="cudaq.SpinOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">SpinOperator</span></code></a> on the given number of qubits (<code class="code docutils literal notranslate"><span class="pre">qubit_count</span></code>) and composed of the given number of terms (<code class="code docutils literal notranslate"><span class="pre">term_count</span></code>). An optional seed value may also be provided.</p>
Expand Down
Loading

0 comments on commit 6288c3e

Please sign in to comment.