diff --git a/CHANGELOG.md b/CHANGELOG.md index 69d9f78ba9..8d51867d65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ performance of AMD Instinct™ MI300 GPU applications. Notably, we've added: YUV frames in video memory. With decoded frames in video memory, you can run video post-processing using ROCm HIP, avoiding unnecessary data copies via the PCIe bus. - To learn more, refer to our + To learn more, refer to the rocDecode [documentation](https://rocm.docs.amd.com/projects/rocDecode/en/latest/). ### OS and GPU support changes @@ -60,109 +60,6 @@ environments where legacy `DT_RPATH` is the preferred form of linking (instead o do **not** recommend trying to install both sets of packages. ``` -### Library changes in ROCm 6.1.0 - -| Library | Version | -|---------|---------| -| AMDMIGraphX | 2.8 ⇒ [2.9](https://github.com/ROCm/AMDMIGraphX/releases/tag/rocm-6.1.0) | -| hipBLAS | 2.0.0 ⇒ [2.1.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-6.1.0) | -| hipBLASLt | [0.7.0](https://github.com/ROCm/hipBLASLt/releases/tag/rocm-6.1.0) | -| hipCUB | 3.0.0 ⇒ [3.1.0](https://github.com/ROCm/hipCUB/releases/tag/rocm-6.1.0) | -| hipFFT | 1.0.13 ⇒ [1.0.14](https://github.com/ROCm/hipFFT/releases/tag/rocm-6.1.0) | -| hipRAND | [2.10.17](https://github.com/ROCm/hipRAND/releases/tag/rocm-6.1.0) | -| hipSOLVER | 2.0.0 ⇒ [2.1.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-6.1.0) | -| hipSPARSE | 3.0.0 ⇒ [3.0.1](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-6.1.0) | -| hipSPARSELt | [0.2.0](https://github.com/ROCm/hipSPARSELt/releases/tag/rocm-6.1.0) | -| hipTensor | 1.1.0 ⇒ [1.2.0](https://github.com/ROCm/hipTensor/releases/tag/rocm-6.1.0) | -| MIOpen | 2.19.0 ⇒ [3.1.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-6.1.0) | -| rccl | [2.18.6](https://github.com/ROCm/rccl/releases/tag/rocm-6.1.0) | -| rocALUTION | 3.0.3 ⇒ [3.1.1](https://github.com/ROCm/rocALUTION/releases/tag/rocm-6.1.0) | -| rocBLAS | 4.0.0 ⇒ [4.1.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-6.1.0) | -| rocDecode | [0.5.0](https://github.com/ROCm/rocDecode/releases/tag/rocm-6.1.0) | -| rocFFT | 1.0.25 ⇒ [1.0.26](https://github.com/ROCm/rocFFT/releases/tag/rocm-6.1.0) | -| rocm-cmake | 0.11.0 ⇒ [0.12.0](https://github.com/ROCm/rocm-cmake/releases/tag/rocm-6.1.0) | -| rocPRIM | 3.0.0 ⇒ [3.1.0](https://github.com/ROCm/rocPRIM/releases/tag/rocm-6.1.0) | -| rocRAND | 3.0.0 ⇒ [3.0.1](https://github.com/ROCm/rocRAND/releases/tag/rocm-6.1.0) | -| rocSOLVER | 3.24.0 ⇒ [3.25.0](https://github.com/ROCm/rocSOLVER/releases/tag/rocm-6.1.0) | -| rocSPARSE | 3.0.2 ⇒ [3.1.2](https://github.com/ROCm/rocSPARSE/releases/tag/rocm-6.1.0) | -| rocThrust | 3.0.0 ⇒ [3.0.1](https://github.com/ROCm/rocThrust/releases/tag/rocm-6.1.0) | -| rocWMMA | 1.3.0 ⇒ [1.4.0](https://github.com/ROCm/rocWMMA/releases/tag/rocm-6.1.0) | -| Tensile | 4.39.0 ⇒ [4.40.0](https://github.com/ROCm/Tensile/releases/tag/rocm-6.1.0) | - - -#### AMDMIGraphX - -MIGraphX 2.9 for ROCm 6.1.0 - -##### Additions - -* Added FP8 support -* Created a Docker file with MIGraphX+ONNX Runtime EP+Torch -* Added support for the `Hardmax`, `DynamicQuantizeLinear`, `Qlinearconcat`, `Unique`, - `QLinearAveragePool`, `QLinearSigmoid`, `QLinearLeakyRelu`, `QLinearMul`, `IsInf` operators -* Created web site examples for `Whisper`, `Llama-2`, and `Stable Diffusion 2.1` -* Created examples of using the ONNX Runtime MIGraphX Execution Provider with the `InceptionV3` - and `Resnet50` models -* Updated operators to support ONNX Opset 19 -* Enable `fuse_pointwise` and `fuse_reduce` in the driver -* Add support for `dot-(mul)-softmax-dot` offloads to MLIR -* Added BLAS auto-tuning for GEMMs -* Added dynamic shape support for the multinomial operator -* Added FP16 to accuracy checker -* Added initial code for running on Windows OS - -##### Optimizations - -* Improved the output of `migraphx-driver` command -* Documentation now shows all environment variables -* Updates needed for general stride support -* Enabled asymmetric quantization -* Added ScatterND unsupported reduction modes -* Rewrote Softmax for better performance -* General improvement to how quantization is performed to support INT8 -* Used `problem_cache` for GEMM tuning -* Improved performance by always using rocMLIR for quantized convolution -* Improved group convolutions by using rocMLIR -* Improved accuracy of FP16 models -* ScatterElements unsupported reduction -* Added concat fusions -* Improved INT8 support to include UINT8 -* Allow reshape ops between `dq` and `quant_op` -* Improve dpp reductions on RDNA -* Have the accuracy checker print the whole final buffer -* Added support for handling dynamic `Slice` and `ConstantOfShape` ONNX operators -* Add support for the dilations attribute to `Pooling` operations -* Add layout attribute support for LSTM operator -* Improved performance by removing contiguous for reshapes -* Handle all slice input variations -* Add scales attribute parse in upsample for older opset versions -* Added support for uneven Split operations -* Improved unit testing to run in Python virtual environments - -##### Fixes - -* Fixed outstanding issues in autogenerated documentation -* Updated model zoo paths for examples -* Fixed `promote_literals_test` by using additional if condition -* Fixed export API symbols from dynamic library -* Fixed bug in pad operator from dimension reduction -* Fixed using the LD to embed files and enable by default when building shared libraries on Linux -* Fixed `get_version()` -* Fixed Round operator inaccuracy -* Fixed wrong size check when axes not present for slice -* Set the `.SO` version correctly - -##### Changes - -* Cleanup LSTM and RNN activation functions -* Placed `gemm_pointwise` at a higher priority than `layernorm_pointwise` -* Updated README to mention the need to include `GPU_TARGETS` when building MIGraphX - -##### Removals - -* Removed unused device kernels from Gather and Pad operators -* Removed INT8x4 format - #### AMD SMI AMD SMI for ROCm 6.1.0 @@ -235,26 +132,6 @@ AMD SMI for ROCm 6.1.0 * bad pages results with "ValueError: NULL pointer access" * Some RDNA3 cards may enumerate to `Slot type = UNKNOWN` -#### Composable Kernel - -Composable Kernel for ROCm 6.1.0 - -##### Additions - -* Added generic instances for GEMM XDL operations -* Added gamma and beta parameters for the `layernorm` and `groupnorm` bwd operations -* Introduced wrapper sublibrary (limited functionality). -* Added an option to vary the number of warm-up cycles and iterations for ckProfiler - -##### Optimizations - -* New performance optimizations for GEMM operations on MI200 and MI300 architectures - -##### Fixes - -* Reduced the build time for most GPU architectures -* Fixed some conversion issues for FP8 data type - #### HIP HIP 6.1 for ROCm 6.1 @@ -291,424 +168,518 @@ HIP 6.1 for ROCm 6.1 Note that these complex operations are equivalent to corresponding types/functions on an NVIDIA platform. -#### rocRAND +#### ROCm Compiler -rocRAND 3.0.1 for ROCm 6.1.0 +ROCm Compiler for ROCm 6.1.0 + +##### Additions + +* Compiler now generates `.uniform_work_group_size` and records it in the metadata. It indicates if the + kernel requires that each dimension of global size is a multiple of the corresponding dimension of + work-group size. A value of 1 is true, and 0 is false. This metadata is only provided when the value is + 1. +* Added the `rocm-llvm-docs` package. +* Added ROCm Device-Libs, ROCm Compiler Support, and hipCC within the `llvm-project/amd` + subdirectory to AMD’s fork of the LLVM project. +* Added support for C++ Parallel Algorithm Offload via HIP (HIPSTDPAR), which allows parallel + algorithms to run on the GPU. + +##### Changes + +* `rocm-clang-ocl` is now an optional package and will require manual installation. + +##### Deprecations + +* hipCC adds `-mllvm`, `-amdgpu-early-inline-all=true`, and `-mllvm` `-amdgpu-function-calls=false` by + default to compiler invocations. These flags will be removed from hipCC in a future ROCm release. ##### Fixes -* Implemented workaround for regressions in XORWOW and LFSR on MI200. +AddressSanitizer (ASan): +* Added `sanitized_padded_global` LLVM ir attribute to identify sanitizer instrumented globals. +* For ASan instrumented global, emit two symbols: one with actual size and the other with + instrumented size. + + [On GitHub](https://github.com/ROCm/ROCm/issues/2551) -#### rocWMMA +##### Known issues -rocWMMA 1.4.0 for ROCm 6.1.0 +* Due to an issue within the `amd-llvm` compiler shipping with ROCm 6.1, HIPSTDPAR's interposition mode, which is enabled by `--hipstdpar-interpose-alloc` is currently broken. -##### Additions +The temporary workaround is to use the upstream LLVM 18 (or newer) compiler. This issue will be addressed in a future ROCm release ." -* Added BF16 support for hipRTC sample +#### ROCm Data Center (RDC) + +RDC for ROCm 6.1.0 ##### Changes -* Changed Clang C++ version to C++17 -* Updated rocwmma_coop API -* Linked rocWMMA to hiprtc +* Added `--address` flag to rdcd +* Upgraded from C++11 to C++17 +* Upgraded gRPC + +#### ROCDebugger (ROCgdb) + +ROCgdb for ROCm 6.1.0 ##### Fixes -* Fixed compile/runtime arch checks -* Built all test in large code model -* Removed inefficient branching in layout loop unrolling +Previously, ROCDebugger encountered hangs and crashes when stepping over the `s_endpgm` +instruction at the end of a HIP kernel entry function, which caused the stepped wave to exit. This issue +is fixed in the ROCm 6.1 release. You can now step over the last instruction of any HIP kernel without +debugger hangs or crashes. -#### hipBLAS +#### ROCm SMI -hipBLAS 2.1.0 for ROCm 6.1.0 +ROCm SMI for ROCm 6.1.0 ##### Additions -* New build option to automatically use `hipconfig --platform` to determine HIP platform -* Level 1 functions have additional ILP64 API for both C and Fortran (`_64` name suffix) with `int64_t` - function arguments -* New functions: `hipblasGetMathMode`, `hipblasSetMathMode` - -##### Deprecations +* **Added support to set max/min clock level for sclk ('RSMI_CLK_TYPE_SYS') or mclk ('RSMI_CLK_TYPE_MEM')**. + You can now set a maximum or minimum `sclk` or `mclk` value through the + `rsmi_dev_clk_extremum_set()` API provided ASIC support. Alternatively, you can use our Python CLI + tool (`rocm-smi --setextremum max sclk 1500`). -* `USE_CUDA` build option; instead, use `HIP_PLATFORM=amd` or `HIP_PLATFORM=nvidia` to override - `hipconfig` +* **Added `rsmi_dev_target_graphics_version_get()`**. You can now query through ROCm SMI API + (`rsmi_dev_target_graphics_version_get()`) to retreive the target graphics version for a GPU device. + Currently, this output is not supplied through our ROCm SMI CLI. ##### Changes -* Some Level 2 function argument names have changed from `m` to `n` to match legacy BLAS; there - was no change in implementation. -* Updated client code to use YAML-based testing -* Renamed `.doxygen` and `.sphinx` folders to `doxygen` and `sphinx`, respectively -* Added CMake support for documentation +* **Removed non-unified API headers: Individual GPU metric APIs are no longer supported**. + The individual metric APIs (`rsmi_dev_metrics_*`) were removed in order to keep updates easier for + new GPU metric support. By providing a simple API (`rsmi_dev_gpu_metrics_info_get()`) with its + reported device metrics, it is worth noting there is a risk for ABI break-age using + `rsmi_dev_gpu_metrics_info_get()`. It is vital to understand that ABI breaks are necessary (in some + cases) in order to support newer ASICs and metrics for our customers. We will continue to support + `rsmi_dev_gpu_metrics_info_get()` with these considerations and limitations in mind. -#### hipTensor - -hipTensor 1.2.0 for ROCm 6.1.0 - -##### Additions - -* Added API support for permutation of rank 4 tensors: F16 and F32 -* New datatype support in contractions of rank 4: F16, BF16, complex F32, complex F64 -* Added scale and bilinear contraction samples and tests for new supported data types -* Added permutation samples and tests for F16, F32 types - -##### Fixes - -* Fixed bug in contraction calculation with data type F32 +* **Deprecated 'rsmi_dev_power_ave_get()'; use the newer API, 'rsmi_dev_power_get()'**. As + outlined in the change for 6.0.0 (*Added a generic power API: rsmi_dev_power_get*), is now + deprecated. You must update your ROCm SMI API calls accordingly. -#### hipBLASLt +##### Fixes -hipBLASLt 0.7.0 for ROCm 6.1.0 +* Fixed `--showpids` reporting `[PID] [PROCESS NAME] 1 UNKNOWN UNKNOWN UNKNOWN`. + Output was failing because `cu_occupancy debugfs` method is not provided on some graphics cards + by design. `get_compute_process_info_by_pid` was updated to reflect this and returns with the output + needed by the CLI. -##### Additions +* Fixed `rocm-smi --showpower` output, which was inconsistent on some RDNA3 devices. + We updated this to use `rsmi_dev_power_get()` within the CLI to provide a consistent device power + output. This was caused by using the now-deprecated `rsmi_dev_average_power_get()` API. -* Extension APIs: - * `hipblasltExtSoftmax` - * `hipblasltExtLayerNorm` - * `hipblasltExtAMax` -* `GemmTuning` extension parameter to set split-k by user -* Support for mixed-precision datatype FP16/FP8 in with FP16 out +* Fixed `rocm-smi --setcomputepartition` and `rocm-smi --resetcomputepartition` to notate if device is + `EBUSY` -#### hipCUB +* Fixed `rocm-smi --setmemorypartition` and `rocm-smi --resetmemorypartition` read only SYSFS to + return `RSMI_STATUS_NOT_SUPPORTED` + The `rsmi_dev_memory_partition_set` API is updated to handle the read-only SYSFS check. + Corresponding tests and CLI (`rocm-smi --setmemorypartition` and + `rocm-smi --resetmemorypartition`) calls were updated accordingly. -hipCUB 3.1.0 for ROCm 6.1.0 +* Fixed `rocm-smi --showclkvolt` and `rocm-smi --showvc`, which were displaying 0 for overdrive and + that the voltage curve is not supported. -##### Changes +#### ROCProfiler -* CUB backend references CUB and Thrust version 2.1.0. -* Updated `HIPCUB_HOST_WARP_THREADS` macro definition to match `host_warp_size` changes from - rocPRIM 3.0. -* Implemented `__int128_t` and `__uint128_t` support for `radix_sort`. +ROCProfiler for ROCm 6.1.0 ##### Fixes -* Fixed build issues with `rmake.py` on Windows when using VS 2017 15.8 or later due to a breaking fix - with extended aligned storage. +* Fixed ROCprofiler to match versioning changes in HIP Runtime +* Fixed plugins race condition +* Updated metrics to MI300 -##### Additions +#### ROCm Validation Suite -* Added interface `DeviceMemcpy::Batched` for batched `memcpy` from rocPRIM and CUB. +##### Known issue -#### hipFFT +* In a future release, the ROCm Validation Suite P2P Benchmark and Qualification Tool (PBQT) tests will be optimized to meet the target bandwidth requirements for MI300X. -hipFFT 1.0.14 for ROCm 6.1.0 + [On GitHub](https://github.com/ROCm/ROCm/issues/3027) -##### Changes +#### MI200 SR-IOV -* When building hipFFT from source, rocFFT code no longer needs to be initialized as a Git submodule. +##### Known issue -##### Fixes +* Multimedia applications may encounter compilation errors in the MI200 Single Root Input/Output Virtualization (SR-IOV) environment. This is because MI200 SR-IOV does not currently support multimedia applications. -* Fixed error when creating length-1 plans. + [On GitHub](https://github.com/ROCm/ROCm/issues/3028) -#### HIPIFY +### AMD MI300A RAS -HIPIFY for ROCm 6.1.0 +#### Fixed defect -##### Additions +##### GFX correctable and uncorrectable error inject failures -* CUDA 12.3.2 support -* cuDNN 8.9.7 support -* LLVM 17.0.6 support -* Full `hipSOLVER` support -* Full `rocSPARSE` support -* New option: `--amap`, which will hipify as much as possible, ignoring `--default-preprocessor` - behavior +* Previously, the AMD CPU Reliability, Availability, and Serviceability (RAS) installation encountered correctable and uncorrectable failures while injecting an error. -##### Fixes + This issue is resolved in the ROCm 6.1 release, and users will no longer encounter the GFX correctable error (CE) and uncorrectable error (UE) failures. -* Code blocks skipped by the preprocessor are no longer hipified under the `--default-preprocessor` - option +### Library changes in ROCm 6.1.0 -#### hipSOLVER +| Library | Version | +|---------|---------| +| AMDMIGraphX | 2.8 ⇒ [2.9](https://github.com/ROCm/AMDMIGraphX/releases/tag/rocm-6.1.0) | +| composable_kernel | [0.2.0](https://github.com/ROCm/composable_kernel/releases/tag/rocm-6.1.0) | +| hipBLAS | 2.0.0 ⇒ [2.1.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-6.1.0) | +| hipBLASLt | [0.7.0](https://github.com/ROCm/hipBLASLt/releases/tag/rocm-6.1.0) | +| hipCUB | 3.0.0 ⇒ [3.1.0](https://github.com/ROCm/hipCUB/releases/tag/rocm-6.1.0) | +| hipFFT | 1.0.13 ⇒ [1.0.14](https://github.com/ROCm/hipFFT/releases/tag/rocm-6.1.0) | +| hipRAND | [2.10.17](https://github.com/ROCm/hipRAND/releases/tag/rocm-6.1.0) | +| hipSOLVER | 2.0.0 ⇒ [2.1.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-6.1.0) | +| hipSPARSE | 3.0.0 ⇒ [3.0.1](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-6.1.0) | +| hipSPARSELt | [0.2.0](https://github.com/ROCm/hipSPARSELt/releases/tag/rocm-6.1.0) | +| hipTensor | 1.1.0 ⇒ [1.2.0](https://github.com/ROCm/hipTensor/releases/tag/rocm-6.1.0) | +| MIOpen | 2.19.0 ⇒ [3.1.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-6.1.0) | +| MIVisionX | [2.5.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-6.1.0) | +| rccl | [2.18.6](https://github.com/ROCm/rccl/releases/tag/rocm-6.1.0) | +| rocALUTION | 3.0.3 ⇒ [3.1.1](https://github.com/ROCm/rocALUTION/releases/tag/rocm-6.1.0) | +| rocBLAS | 4.0.0 ⇒ [4.1.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-6.1.0) | +| rocDecode | [0.5.0](https://github.com/ROCm/rocDecode/releases/tag/rocm-6.1.0) | +| rocFFT | 1.0.25 ⇒ [1.0.26](https://github.com/ROCm/rocFFT/releases/tag/rocm-6.1.0) | +| rocm-cmake | 0.11.0 ⇒ [0.12.0](https://github.com/ROCm/rocm-cmake/releases/tag/rocm-6.1.0) | +| rocPRIM | 3.0.0 ⇒ [3.1.0](https://github.com/ROCm/rocPRIM/releases/tag/rocm-6.1.0) | +| rocRAND | 3.0.0 ⇒ [3.0.1](https://github.com/ROCm/rocRAND/releases/tag/rocm-6.1.0) | +| rocSOLVER | 3.24.0 ⇒ [3.25.0](https://github.com/ROCm/rocSOLVER/releases/tag/rocm-6.1.0) | +| rocSPARSE | 3.0.2 ⇒ [3.1.2](https://github.com/ROCm/rocSPARSE/releases/tag/rocm-6.1.0) | +| rocThrust | 3.0.0 ⇒ [3.0.1](https://github.com/ROCm/rocThrust/releases/tag/rocm-6.1.0) | +| rocWMMA | 1.3.0 ⇒ [1.4.0](https://github.com/ROCm/rocWMMA/releases/tag/rocm-6.1.0) | +| rpp | 1.4.0 ⇒ [1.5.0](https://github.com/ROCm/rpp/releases/tag/rocm-6.1.0) | +| Tensile | 4.39.0 ⇒ [4.40.0](https://github.com/ROCm/Tensile/releases/tag/rocm-6.1.0) | -hipSOLVER 2.1.0 for ROCm 6.1.0 +#### AMDMIGraphX 2.9 + +MIGraphX 2.9 for ROCm 6.1.0 ##### Additions -* Added compatibility API with `hipsolverSp` prefix -* Added compatibility-only functions - * `csrlsvchol` - * `hipsolverSpScsrlsvcholHost`, `hipsolverSpDcsrlsvcholHost` - * `hipsolverSpScsrlsvchol`, `hipsolverSpDcsrlsvchol` -* Added rocSPARSE and SuiteSparse as optional dependencies to hipSOLVER (rocSOLVER backend - only). Use the `BUILD_WITH_SPARSE` CMake option to enable functionality for the `hipsolverSp` API - (on by default). -* Added hipSPARSE as an optional dependency to `hipsolver-test`. Use the `BUILD_WITH_SPARSE` - CMake option to enable tests of the `hipsolverSp` API (on by default). +* Added FP8 support +* Created a dockerfile with MIGraphX+ONNX Runtime EP+Torch +* Added support for the `Hardmax`, `DynamicQuantizeLinear`, `Qlinearconcat`, `Unique`, `QLinearAveragePool`, `QLinearSigmoid`, `QLinearLeakyRelu`, `QLinearMul`, `IsInf` operators +* Created web site examples for `Whisper`, `Llama-2`, and `Stable Diffusion 2.1` +* Created examples of using the ONNX Runtime MIGraphX Execution Provider with the `InceptionV3` and `Resnet50` models +* Updated operators to support ONNX Opset 19 +* Enable fuse_pointwise and fuse_reduce in the driver +* Add support for dot-(mul)-softmax-dot offloads to MLIR +* Added Blas auto-tuning for GEMMs +* Added dynamic shape support for the multinomial operator +* Added fp16 to accuracy checker +* Added initial code for running on Windows OS -##### Changes +##### Optimizations -* Relax array length requirements for GESVDA +* Improved the output of migraphx-driver command +* Documentation now shows all environment variables +* Updates needed for general stride support +* Enabled Asymmetric Quantization +* Added ScatterND unsupported reduction modes +* Rewrote softmax for better performance +* General improvement to how quantization is performed to support INT8 +* Used problem_cache for gemm tuning +* Improved performance by always using rocMLIR for quantized convolution +* Improved group convolutions by using rocMLIR +* Improved accuracy of fp16 models +* ScatterElements unsupported reduction +* Added concat fusions +* Improved INT8 support to include UINT8 +* Allow reshape ops between dq and quant_op +* Improve dpp reductions on navi +* Have the accuracy checker print the whole final buffer +* Added support for handling dynamic Slice and ConstantOfShape ONNX operators +* Add support for the dilations attribute to Pooling ops +* Add layout attribute support for LSTM operator +* Improved performance by removing contiguous for reshapes +* Handle all slice input variations +* Add scales attribute parse in upsample for older opset versions +* Added support for uneven Split operations +* Improved unit testing to run in python virtual environments ##### Fixes -* Fixed incorrect singular vectors returned from GESVDA +* Fixed outstanding issues in autogenerated documentation +* Update model zoo paths for examples +* Fixed promote_literals_test by using additional if condition +* Fixed export API symbols from dynamic library +* Fixed bug in pad operator from dimension reduction +* Fixed using the LD to embed files and enable by default when building shared libraries on linux +* fixed get_version() +* Fixed Round operator inaccuracy +* Fixed wrong size check when axes not present for slice +* Set the .SO version correctly -#### hipSPARSE +##### Changes -hipSPARSE 3.0.1 for ROCm 6.1.0 +* Cleanup LSTM and RNN activation functions +* Placed gemm_pointwise at a higher priority than layernorm_pointwise +* Updated README to mention the need to include GPU_TARGETS when building MIGraphX -##### Fixes +##### Removals -* Fixes to the build chain +* Removed unused device kernels from Gather and Pad operators +* Removed int8x4 format -#### hipSPARSELt +#### hipBLAS 2.1.0 -hipSPARSELt 0.2.0 for ROCm 6.1.0 +hipBLAS 2.1.0 for ROCm 6.1.0 ##### Additions -* Support Matrix B is a Structured Sparsity Matrix. +* New build option to automatically use hipconfig --platform to determine HIP platform +* Level 1 functions have additional ILP64 API for both C and Fortran (`_64` name + suffix) with int64_t function arguments +* New functions hipblasGetMathMode and hipblasSetMathMode -#### rocm-cmake +##### Deprecations -rocm-cmake 0.12.0 for ROCm 6.1.0 +* USE_CUDA build option; use HIP_PLATFORM=amd or HIP_PLATFORM=nvidia to override hipconfig ##### Changes -* ROCMSphinxDoc: Allow separate source and config directories. -* ROCMCreatePackage: Allow additional `PROVIDES` on header-only packages. -* ROCMInstallTargets: Don't install executable targets by default for ASAN builds. -* ROCMTest: Add RPATH for installed tests. -* Finalize rename to ROCmCMakeBuildTools +* Some Level 2 function argument names have changed from `m` to `n` to match legacy BLAS; there + was no change in implementation. +* Updated client code to use YAML-based testing +* Renamed `.doxygen` and `.sphinx` folders to `doxygen` and `sphinx`, respectively +* Added CMake support for documentation -##### Fixes +#### hipBLASLt 0.7.0 -* ROCMClangTidy: Fixed invalid list index. -* Test failures when ROCM_CMAKE_GENERATOR is empty. +hipBLASLt 0.7.0 for ROCm 6.1.0 -#### MIOpen +##### Additions -MIOpen 3.1.0 for ROCm 6.1.0 +* Added `hipblasltExtSoftmax` extension API +* Added `hipblasltExtLayerNorm` extension API +* Added `hipblasltExtAMax` extension API +* Added `GemmTuning` extension parameter to set split-k by user +* Support for mix precision datatype: fp16/fp8 in with fp16 out -##### Additions +#### hipCUB 3.1.0 -* CK-based 2D/3D convolution solvers to support `nchw`/`ncdhw` layout -* Fused solver for Fwd Convolution with residual, bias, and activation -* AI-based parameter prediction model for `conv_hip_igemm_group_fwd_xdlops` Solver -* Forward and backward data, and backward weight convolution solver with FP8/BFP8 -* Check for packed tensors for convolution solvers -* Integrated CK's layer norm -* Combined GoogleTests into single binary +hipCUB 3.1.0 for ROCm 6.1.0 -##### Fixes +##### Changed -* Fix for backward passes bwd/wrw for CK group conv 3D -* Fixed out-of-bounds memory access: `ConvOclDirectFwdGen` -* Fixed build failure due to hipRTC +- CUB backend references CUB and Thrust version 2.1.0. +- Updated `HIPCUB_HOST_WARP_THREADS` macro definition to match `host_warp_size` changes from rocPRIM 3.0. +- Implemented `__int128_t` and `__uint128_t` support for radix_sort. -##### Changes +##### Fixed -* Standardized workspace abstraction -* Use split CK libraries +- Fixed build issues with `rmake.py` on Windows when using VS 2017 15.8 or later due to a breaking fix with extended aligned storage. -##### Removals +##### Added -* Clamping to MAX from CastTensor used in Bwd and WrW convolution +- Added interface `DeviceMemcpy::Batched` for batched memcpy from rocPRIM and CUB. -#### RCCL +#### hipFFT 1.0.14 -RCCL 2.18.6 for ROCm 6.1.0 +hipFFT 1.0.14 for ROCm 6.1.0 ##### Changes -* Compatibility with NCCL 2.18.6 +* When building hipFFT from source, rocFFT code no longer needs to be initialized as a git submodule. +##### Fixes -#### rocALUTION +* Fixed error when creating length-1 plans. -rocALUTION 3.1.1 for ROCm 6.1.0 +#### hipSOLVER 2.1.0 -##### Additions +hipSOLVER 2.1.0 for ROCm 6.1.0 -* `TripleMatrixProduct` functionality for `GlobalMatrix` -* Multi-Node/GPU support for `UA-AMG`, `SA-AMG`, and `RS-AMG` -* Iterative ILU0 preconditioner `ItILU0` -* Iterative triangular solve, selectable via `SolverDecr` class +##### Added -##### Deprecations +- Added compatibility API with hipsolverSp prefix +- Added compatibility-only functions + - csrlsvchol + - hipsolverSpScsrlsvcholHost, hipsolverSpDcsrlsvcholHost + - hipsolverSpScsrlsvchol, hipsolverSpDcsrlsvchol +- Added rocSPARSE and SuiteSparse as optional dependencies to hipSOLVER (rocSOLVER backend only). Use the `BUILD_WITH_SPARSE` CMake option to enable + functionality for the hipsolverSp API (on by default). +- Added hipSPARSE as an optional dependency to hipsolver-test. Use the `BUILD_WITH_SPARSE` CMake option to enable tests of the hipsolverSp API (on by default). -* `LocalMatrix::AMGConnect` -* `LocalMatrix::AMGAggregate` -* `LocalMatrix::AMGPMISAggregate` -* `LocalMatrix::AMGSmoothedAggregation` -* `LocalMatrix::AMGAggregation` -* `PairwiseAMG` +##### Changed -##### Known issues +- Relax array length requirements for GESVDA. -* `PairwiseAMG` does not currently support matrix sizes that exceed INT32 range -* `PairwiseAMG` might fail when building the hierarchy on certain input matrices +##### Fixed -#### rocBLAS +- Fixed incorrect singular vectors returned from GESVDA. -rocBLAS 4.1.0 for ROCm 6.1.0 +#### hipSPARSE 3.0.1 -##### Additions +hipSPARSE 3.0.1 for ROCm 6.1.0 + +##### Fixes -* Level 1 and Level 1 Extension functions have additional ILP64 API for both C and FORTRAN (`_64` - name suffix) with `int64_t` function arguments. -* Cache flush timing for `gemm_ex`. +* Fixes to the build chain -##### Changes +#### hipSPARSELt 0.2.0 -* Some Level 2 function argument names have changed 'm' to 'n' to match legacy BLAS, there was no - change in implementation. -* Standardized the use of non-blocking streams for copying results from device to host. +hipSPARSELt 0.2.0 for ROCm 6.1.0 + +##### Added + +- Support Matrix B is a Structured Sparsity Matrix. + +#### hipTensor 1.2.0 + +hipTensor 1.2.0 for ROCm 6.1.0 + +##### Additions + +* API support for permutation of rank 4 tensors: f16 and f32 +* New datatype support in contractions of rank 4: f16, bf16, complex f32, complex f64 +* Added scale and bilinear contraction samples and tests for new supported data types +* Added permutation samples and tests for f16, f32 types ##### Fixes -* Fixed host-pointer mode reductions for non-blocking streams. +* Fixed bug in contraction calculation with data type f32 -#### rocFFT +#### MIOpen 3.1.0 -rocFFT 1.0.26 for ROCm 6.1.0 +MIOpen 3.1.0 for ROCm 6.1.0 -##### Changes +##### Added -* Multi-device FFTs now allow batch greater than 1 -* Multi-device, real-complex FFTs are now supported -* rocFFT now statically links libstdc++ when only `std::experimental::filesystem` is available (to guard - against ABI incompatibilities with newer libstdc++ libraries that include `std::filesystem`) +- CK-based 2d/3d convolution solvers to support nchw/ncdhw layout +- Fused solver for Fwd Convolution with Residual, Bias and activation +- AI Based Parameter Prediction Model for conv_hip_igemm_group_fwd_xdlops Solver +- Forward, backward data and backward weight convolution solver with fp8/bfp8 +- check for packed tensors for convolution solvers +- Integrate CK's layer norm +- Combine gtests into single binary -#### ROCm Compiler +##### Fixed -ROCm Compiler for ROCm 6.1.0 +- fix for backward passes bwd/wrw for CK group conv 3d +- Fixed out-of-bounds memory access : ConvOclDirectFwdGen +- fixed build failure due to hipRTC -##### Additions +##### Changed -* Compiler now generates `.uniform_work_group_size` and records it in the metadata. It indicates if the - kernel requires that each dimension of global size is a multiple of the corresponding dimension of - work-group size. A value of 1 is true, and 0 is false. This metadata is only provided when the value is - 1. -* Added the `rocm-llvm-docs` package. -* Added ROCm Device-Libs, ROCm Compiler Support, and hipCC within the `llvm-project/amd` - subdirectory to AMD’s fork of the LLVM project. -* Added support for C++ Parallel Algorithm Offload via HIP (HIPSTDPAR), which allows parallel - algorithms to run on the GPU. +- Standardize workspace abstraction +- Use split CK libraries -##### Changes +##### Removed -* `rocm-clang-ocl` is now an optional package and will require manual installation. +- clamping to MAX from CastTensor used in Bwd and WrW convolution -##### Deprecations +#### rccl 2.18.6 -* hipCC adds `-mllvm`, `-amdgpu-early-inline-all=true`, and `-mllvm` `-amdgpu-function-calls=false` by - default to compiler invocations. These flags will be removed from hipCC in a future ROCm release. +RCCL 2.18.6 for ROCm 6.1.0 -##### Fixes +##### Changed -AddressSanitizer (ASan): -* Added `sanitized_padded_global` LLVM ir attribute to identify sanitizer instrumented globals. -* For ASan instrumented global, emit two symbols: one with actual size and the other with - instrumented size. +- Compatibility with NCCL 2.18.6 -##### Known issues +#### rocALUTION 3.1.1 -* Due to an issue within the `amd-llvm` compiler shipping with ROCm 6.1, HIPSTDPAR's interposition mode, which is enabled by `--hipstdpar-interpose-alloc` is currently broken. +rocALUTION 3.1.1 for ROCm 6.1.0 -The temporary workaround is to use the upstream LLVM 18 (or newer) compiler. This issue will be addressed in a future ROCm release ." +##### Additions -#### ROCm Data Center (RDC) +* `TripleMatrixProduct` functionality for `GlobalMatrix` +* Multi-Node/GPU support for `UA-AMG`, `SA-AMG` and `RS-AMG` +* Iterative ILU0 preconditioner `ItILU0` +* Iterative triangular solve, selectable via `SolverDecr` class -RDC for ROCm 6.1.0 +##### Deprecations + +* `LocalMatrix::AMGConnect` +* `LocalMatrix::AMGAggregate` +* `LocalMatrix::AMGPMISAggregate` +* `LocalMatrix::AMGSmoothedAggregation` +* `LocalMatrix::AMGAggregation` +* `PairwiseAMG` + +##### Known Issues + +* `PairwiseAMG` does currently not support matrix sizes that exceed int32 range +* `PairwiseAMG` might fail building the hierarchy on certain input matrices + +#### rocBLAS 4.1.0 + +rocBLAS 4.1.0 for ROCm 6.1.0 -##### Changes +##### Additions -* Added `--address` flag to rdcd -* Upgraded from C++11 to C++17 -* Upgraded gRPC +* Level 1 and Level 1 Extension functions have additional ILP64 API for both C and FORTRAN (_64 name suffix) with int64_t function arguments. +* Cache flush timing for gemm_ex. -#### ROCDebugger (ROCgdb) +##### Changes -ROCgdb for ROCm 6.1.0 +* Some Level 2 function argument names have changed 'm' to 'n' to match legacy BLAS, there was no change in implementation. +* Standardized the use of non-blocking streams for copying results from device to host. ##### Fixes -Previously, ROCDebugger encountered hangs and crashes when stepping over the `s_endpgm` -instruction at the end of a HIP kernel entry function, which caused the stepped wave to exit. This issue -is fixed in the ROCm 6.1 release. You can now step over the last instruction of any HIP kernel without -debugger hangs or crashes. +* Fixed host-pointer mode reductions for non-blocking streams. -#### rocDecode +#### rocDecode 0.5.0 rocDecode 0.5.0 for ROCm 6.1.0 -##### Additions -* AMDGPU package support - ##### Changes -* Set up updates -* README updates -##### Optimizations -* Package dependencies +* Changed setup updates +* Added AMDGPU package support +* Optimized package dependencies +* Updated README ##### Fixes -* Minor bug fixes and updates -#### ROCm SMI +* Minor bug fix and updates -ROCm SMI for ROCm 6.1.0 +##### Tested Configurations -##### Additions +* Linux distribution + * Ubuntu - `20.04` / `22.04` +* ROCm: + * rocm-core - `6.1.0.60100-28` + * amdgpu-core - `1:6.1.60100-1731559` +* FFMPEG - `4.2.7` / `4.4.2-0` +* rocDecode Setup Script - `V1.4` -* **Added support to set max/min clock level for sclk ('RSMI_CLK_TYPE_SYS') or mclk ('RSMI_CLK_TYPE_MEM')**. - You can now set a maximum or minimum `sclk` or `mclk` value through the - `rsmi_dev_clk_extremum_set()` API provided ASIC support. Alternatively, you can use our Python CLI - tool (`rocm-smi --setextremum max sclk 1500`). +#### rocFFT 1.0.26 -* **Added `rsmi_dev_target_graphics_version_get()`**. You can now query through ROCm SMI API - (`rsmi_dev_target_graphics_version_get()`) to retreive the target graphics version for a GPU device. - Currently, this output is not supplied through our ROCm SMI CLI. +rocFFT 1.0.26 for ROCm 6.1.0 ##### Changes -* **Removed non-unified API headers: Individual GPU metric APIs are no longer supported**. - The individual metric APIs (`rsmi_dev_metrics_*`) were removed in order to keep updates easier for - new GPU metric support. By providing a simple API (`rsmi_dev_gpu_metrics_info_get()`) with its - reported device metrics, it is worth noting there is a risk for ABI break-age using - `rsmi_dev_gpu_metrics_info_get()`. It is vital to understand that ABI breaks are necessary (in some - cases) in order to support newer ASICs and metrics for our customers. We will continue to support - `rsmi_dev_gpu_metrics_info_get()` with these considerations and limitations in mind. - -* **Deprecated 'rsmi_dev_power_ave_get()'; use the newer API, 'rsmi_dev_power_get()'**. As - outlined in the change for 6.0.0 (*Added a generic power API: rsmi_dev_power_get*), is now - deprecated. You must update your ROCm SMI API calls accordingly. +* Multi-device FFTs now allow batch greater than 1 +* Multi-device, real-complex FFTs are now supported +* rocFFT now statically links libstdc++ when only `std::experimental::filesystem` is available (to guard + against ABI incompatibilities with newer libstdc++ libraries that include `std::filesystem`) -##### Fixes +#### rocm-cmake 0.12.0 -* Fixed `--showpids` reporting `[PID] [PROCESS NAME] 1 UNKNOWN UNKNOWN UNKNOWN`. - Output was failing because `cu_occupancy debugfs` method is not provided on some graphics cards - by design. `get_compute_process_info_by_pid` was updated to reflect this and returns with the output - needed by the CLI. +rocm-cmake 0.12.0 for ROCm 6.1.0 -* Fixed `rocm-smi --showpower` output, which was inconsistent on some RDNA3 devices. - We updated this to use `rsmi_dev_power_get()` within the CLI to provide a consistent device power - output. This was caused by using the now-deprecated `rsmi_dev_average_power_get()` API. +##### Changed -* Fixed `rocm-smi --setcomputepartition` and `rocm-smi --resetcomputepartition` to notate if device is - `EBUSY` +- ROCMSphinxDoc: Allow separate source and config directories. +- ROCMCreatePackage: Allow additional `PROVIDES` on header-only packages. +- ROCMInstallTargets: Don't install executable targets by default for ASAN builds. +- ROCMTest: Add RPATH for installed tests. +- Finalize rename to ROCmCMakeBuildTools -* Fixed `rocm-smi --setmemorypartition` and `rocm-smi --resetmemorypartition` read only SYSFS to - return `RSMI_STATUS_NOT_SUPPORTED` - The `rsmi_dev_memory_partition_set` API is updated to handle the read-only SYSFS check. - Corresponding tests and CLI (`rocm-smi --setmemorypartition` and - `rocm-smi --resetmemorypartition`) calls were updated accordingly. +##### Fixed -* Fixed `rocm-smi --showclkvolt` and `rocm-smi --showvc`, which were displaying 0 for overdrive and - that the voltage curve is not supported. +- ROCMClangTidy: Fixed invalid list index. +- Test failures when ROCM_CMAKE_GENERATOR is empty. -#### rocPRIM +#### rocPRIM 3.1.0 rocPRIM 3.1.0 for ROCm 6.1.0 ##### Additions -* New primitives: `block_run_length_decode` and `batch_memcpy` +* New primitive: `block_run_length_decode` +* New primitive: `batch_memcpy` ##### Changes @@ -723,7 +694,7 @@ rocPRIM 3.1.0 for ROCm 6.1.0 a `stream` parameter to query the proper device and a `device_id` out parameter * The return type is `hipError_t` * Added support for `__int128_t` in `device_radix_sort` and `block_radix_sort` -* Improved the performance of `match_any` (and `block_histogram`, which uses it) +* Improved the performance of `match_any`, and `block_histogram` which uses it ##### Deprecations @@ -735,46 +706,41 @@ rocPRIM 3.1.0 for ROCm 6.1.0 * Build issues with `rmake.py` on Windows when using VS 2017 15.8 or later (due to a breaking fix with extended aligned storage) -#### ROCProfiler +#### rocRAND 3.0.1 -ROCProfiler for ROCm 6.1.0 +rocRAND 3.0.1 for ROCm 6.1.0 ##### Fixes -* Fixed ROCprofiler to match versioning changes in HIP Runtime -* Fixed plugins race condition -* Updated metrics to MI300 +* Implemented workaround for regressions in XORWOW and LFSR on MI200 -#### rocSOLVER +#### rocSOLVER 3.25.0 rocSOLVER 3.25.0 for ROCm 6.1.0 -##### Additions +##### Added -* EigenSolver routines for symmetric/hermitian matrices using Divide & Conquer and Jacobi - algorithm: - * SYEVDJ (with `batched` and `strided_batched` versions) - * HEEVDJ (with `batched` and `strided_batched` versions) -* Generalized symmetric/hermitian-definite EigenSolvers using Divide & Conquer and Jacobi - algorithm: - * SYGVDJ (with `batched` and `strided_batched` versions) - * HEGVDJ (with `batched` and `strided_batched` versions) +- Eigensolver routines for symmetric/hermitian matrices using Divide & Conquer and Jacobi algorithm: + - SYEVDJ (with batched and strided\_batched versions) + - HEEVDJ (with batched and strided\_batched versions) +- Generalized symmetric/hermitian-definite eigensolvers using Divide & Conquer and Jacobi algorithm: + - SYGVDJ (with batched and strided\_batched versions) + - HEGVDJ (with batched and strided\_batched versions) -##### Changes +##### Changed -* Relaxed array length requirements for GESVDX with `rocblas_srange_index` +- Relaxed array length requirements for GESVDX with `rocblas_srange_index`. -##### Removals +##### Removed -* Removed gfx803 and gfx900 from default build targets +- Removed gfx803 and gfx900 from default build targets. -##### Fixes +##### Fixed -* Corrected singular vector normalization in `BDSVDX` and `GESVDX` -* Fixed potential memory access fault in `STEIN`, `SYEVX/HEEVX`, `SYGVX/HEGVX`, `BDSVDX`, and - `GESVDX` +- Corrected singular vector normalization in BDSVDX and GESVDX +- Fixed potential memory access fault in STEIN, SYEVX/HEEVX, SYGVX/HEGVX, BDSVDX and GESVDX -#### rocSPARSE +#### rocSPARSE 3.1.2 rocSPARSE 3.1.2 for ROCm 6.1.0 @@ -786,121 +752,106 @@ rocSPARSE 3.1.2 for ROCm 6.1.0 ##### Optimizations -* Triangular solve with multiple rhs (such as SpSM and csrsm) now calls SpSV, csrsv, etcetera when nrhs - equals 1 +* Triangular solve with multiple rhs (SpSM, csrsm, ...) now calls SpSV, csrsv, etcetera when nrhs equals 1 * Improved user manual section *Installation and Building for Linux and Windows* * Improved SpMV in CSR format on MI300 -#### rocThrust +#### rocThrust 3.0.1 rocThrust 3.0.1 for ROCm 6.1.0 ##### Fixes -* Ported a fix from Thrust 2.2 that ensures `thrust::optional` is trivially copyable +* Ported a fix from thrust 2.2 that ensures `thrust::optional` is trivially copyable. -#### Tensile +#### rocWMMA 1.4.0 -Tensile 4.40.0 for ROCm 6.1.0 +rocWMMA 1.4.0 for ROCm 6.1.0 ##### Additions -* New `DisableKernelPieces` values to invalidate local read, local write, and global read -* Added Stream-K kernel generation, including two-tile Stream-K algorithm by setting `StreamK=3` -* New feature to allow testing Stream-K grid multipliers -* Added debug output to check occupancy for Stream-K -* Added reject condition for FractionalLoad + DepthU!=power of 2 -* New `TENSILE_DB` debugging value to dump the common kernel parameters -* Added predicate for APU libs -* New parameter (`ClusterLocalRead`) to turn on/off wider local read opt for `TileMajorLDS` -* New parameter (`ExtraLatencyForLR`) to add extra interval between local read and wait -* New logic to check LDS size with auto LdsPad(=1) and change LdsPad to 0 if LDS overflows -* Added initialization type and general batched options to the `rocblas-bench` input creator script - -##### Optimizations - -* Enabled `MFMA` + `LocalSplitU=4` for `MT16x16` -* Enabled (`DirectToVgpr` + `MI4x4`) and supported skinny MacroTile -* Optimized postGSU kernel: separate postGSU kernels for different GSU values, loop unroll for GSU - loop, wider global load depending on array size, and parallel reduction depending on array size -* Auto LdsPad calculation for `TileMajorLds` + `MI16x16` -* Auto LdsPad calculation for `UnrollMajorLds` + `MI16x16` + `VectorWidth` +* Added bf16 support for hipRTC sample ##### Changes -* Cleared `hipErrorNotFound` error since it is an expected part of the search -* Modified hipCC search path for Linux -* Changed PCI ID from 32-bit to 64-bit for ROCm SMI HW monitor -* Changed `LdsBlockSizePerPad` to `LdsBlockSizePerPadA`, B to specify LBSPP separately -* Changed the default value of `LdsPadA`, B, `LdsBlockSizePerPadA`, B from 0 to -1 -* Updated test cases according to parameter changes for LdsPad, LBSPP and ClusterLocalRead -* Replaced `std::regex` with `fnmatch()/PathMatchSpec` as a workaround to `std::regex` stack overflow - known bug +* Changed Clang C++ version to C++17 +* Updated rocwmma_coop API +* Linked rocWMMA to hiprtc ##### Fixes -* hipCC compile append flag `parallel-jobs=4` -* Race condition in Stream-K that appeared with large grids and small sizes -* Mismatch issue with `LdsPad` + `LdsBlockSizePerPad!=0` and `TailLoop` -* Mismatch issue with `LdsPad` + `LdsBlockSizePerPad!=0` and `SplitLds` -* Incorrect reject condition check for `DirectToLds` + `LdsBlockSizePerPad=-1` case -* Small fix for `LdsPad` optimization (`LdsElement` calculation) +* Fixed compile/runtime arch checks +* Built all test in large code model +* Removed inefficient branching in layout loop unrolling -#### ROCm Validation Suite +#### rpp 1.5.0 -##### Known issue +rpp for ROCm 6.1.0 -* In a future release, the ROCm Validation Suite P2P Benchmark and Qualification Tool (PBQT) tests will be optimized to meet the target bandwidth requirements for MI300X. +##### Changes -#### MI200 SR-IOV +* Prerequisites -##### Known issue +##### Tested Configurations -* Multimedia applications may encounter compilation errors in the MI200 Single Root Input/Output Virtualization (SR-IOV) environment. This is because MI200 SR-IOV does not currently support multimedia applications. +* Linux distribution + * Ubuntu - `20.04` / `22.04` + * CentOS - `7` + * RHEL - `8`/`9` +* ROCm: rocm-core - `5.5.0.50500-63` +* Clang - Version `5.0.1` and above +* CMake - Version `3.22.3` +* IEEE 754-based half-precision floating-point library - Version `1.12.0` -### AMD MI300A RAS +#### Tensile 4.40.0 -#### Fixed defect +Tensile 4.40.0 for ROCm 6.1.0 -##### GFX correctable and uncorrectable error inject failures +##### Additions -* Previously, the AMD CPU Reliability, Availability, and Serviceability (RAS) installation encountered correctable and uncorrectable failures while injecting an error. +- new DisableKernelPieces values to invalidate local read, local write, and global read +- stream-K kernel generation, including two-tile stream-k algorithm by setting StreamK=3 +- feature to allow testing stream-k grid multipliers +- debug output to check occupancy for Stream-K +- reject condition for FractionalLoad + DepthU!=power of 2 +- new TENSILE_DB debugging value to dump the common kernel parameters +- predicate for APU libs +- new parameter (ClusterLocalRead) to turn on/off wider local read opt for TileMajorLDS +- new parameter (ExtraLatencyForLR) to add extra interval between local read and wait +- new logic to check LDS size with auto LdsPad(=1) and change LdsPad to 0 if LDS overflows +- initialization type and general batched options to the rocblas-bench input creator script - This issue is resolved in the ROCm 6.1 release, and users will no longer encounter the GFX correctable error (CE) and uncorrectable error (UE) failures. +##### Optimizations -## ROCm 6.0.2 +- enabled MFMA + LocalSplitU=4 for MT16x16 +- enabled (DirectToVgpr + MI4x4) and supported skinny MacroTile +- optimized postGSU kernel: separate postGSU kernels for different GSU values, loop unroll for GSU loop, wider global load depending on array size, and parallel reduction depending on array size +- auto LdsPad calculation for TileMajorLds + MI16x16 +- auto LdsPad calculation for UnrollMajorLds + MI16x16 + VectorWidth -The ROCm 6.0.2 point release consists of minor bug fixes to improve the stability of MI300 GPU -applications. This release introduces several new driver features for system qualification on our partner -server offerings. +##### Changes -### Library changes in ROCm 6.0.2 +- cleared hipErrorNotFound error since it is an expected part of the search +- modified hipcc search path for Linux +- changed PCI ID from 32bit to 64bit for ROCm SMI HW monitor +- changed LdsBlockSizePerPad to LdsBlockSizePerPadA, B to specify LBSPP separately +- changed the default value of LdsPadA, B, LdsBlockSizePerPadA, B from 0 to -1 +- updated test cases according to parameter changes for LdsPad, LBSPP and ClusterLocalRead +- Replaced std::regex with fnmatch()/PathMatchSpec as a workaround to std::regex stack overflow known bug -| Library | Version | -|---------|---------| -| AMDMIGraphX | ⇒ [2.8](https://github.com/ROCm/AMDMIGraphX/releases/tag/rocm-6.0.2) | -| hipBLAS | ⇒ [2.0.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-6.0.2) | -| hipBLASLt | ⇒ [0.6.0](https://github.com/ROCm/hipBLASLt/releases/tag/rocm-6.0.2) | -| hipCUB | ⇒ [3.0.0](https://github.com/ROCm/hipCUB/releases/tag/rocm-6.0.2) | -| hipFFT | ⇒ [1.0.13](https://github.com/ROCm/hipFFT/releases/tag/rocm-6.0.2) | -| hipRAND | ⇒ [2.10.17](https://github.com/ROCm/hipRAND/releases/tag/rocm-6.0.2) | -| hipSOLVER | ⇒ [2.0.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-6.0.2) | -| hipSPARSE | ⇒ [3.0.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-6.0.2) | -| hipSPARSELt | ⇒ [0.1.0](https://github.com/ROCm/hipSPARSELt/releases/tag/rocm-6.0.2) | -| hipTensor | ⇒ [1.1.0](https://github.com/ROCm/hipTensor/releases/tag/rocm-6.0.2) | -| MIOpen | ⇒ [2.19.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-6.0.2) | -| rccl | ⇒ [2.15.5](https://github.com/ROCm/rccl/releases/tag/rocm-6.0.2) | -| rocALUTION | ⇒ [3.0.3](https://github.com/ROCm/rocALUTION/releases/tag/rocm-6.0.2) | -| rocBLAS | ⇒ [4.0.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-6.0.2) | -| rocFFT | ⇒ [1.0.25](https://github.com/ROCm/rocFFT/releases/tag/rocm-6.0.2) | -| rocm-cmake | ⇒ [0.11.0](https://github.com/ROCm/rocm-cmake/releases/tag/rocm-6.0.2) | -| rocPRIM | ⇒ [3.0.0](https://github.com/ROCm/rocPRIM/releases/tag/rocm-6.0.2) | -| rocRAND | ⇒ [3.0.0](https://github.com/ROCm/rocRAND/releases/tag/rocm-6.0.2) | -| rocSOLVER | ⇒ [3.24.0](https://github.com/ROCm/rocSOLVER/releases/tag/rocm-6.0.2) | -| rocSPARSE | ⇒ [3.0.2](https://github.com/ROCm/rocSPARSE/releases/tag/rocm-6.0.2) | -| rocThrust | ⇒ [3.0.0](https://github.com/ROCm/rocThrust/releases/tag/rocm-6.0.2) | -| rocWMMA | ⇒ [1.3.0](https://github.com/ROCm/rocWMMA/releases/tag/rocm-6.0.2) | -| Tensile | ⇒ [4.39.0](https://github.com/ROCm/Tensile/releases/tag/rocm-6.0.2) | +##### Fixes + +- hipcc compile append flag parallel-jobs=4 +- race condition in Stream-K that appeared with large grids and small sizes +- mismatch issue with LdsPad + LdsBlockSizePerPad!=0 and TailLoop +- mismatch issue with LdsPad + LdsBlockSizePerPad!=0 and SplitLds +- incorrect reject condition check for DirectToLds + LdsBlockSizePerPad=-1 case +- small fix for LdsPad optimization (LdsElement calculation) + +------------------- + +## ROCm 6.0.2 +The ROCm 6.0.2 point release consists of minor bug fixes to improve the stability of MI300 GPU applications. This release introduces several new driver features for system qualification on our partner server offerings. #### hipFFT 1.0.13 @@ -911,6 +862,66 @@ hipFFT 1.0.13 for ROCm 6.0.2 * Removed the Git submodule for shared files between rocFFT and hipFFT; instead, just copy the files over (this should help simplify downstream builds and packaging) +### Library changes in ROCm 6.0.2 + +| Library | Version | +|---------|---------| +| AMDMIGraphX | [2.8](https://github.com/ROCm/AMDMIGraphX/releases/tag/rocm-6.0.2) | +| composable_kernel | [0.2.0](https://github.com/ROCm/composable_kernel/releases/tag/rocm-6.0.2) | +| hipBLAS | [2.0.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-6.0.2) | +| hipCUB | [3.0.0](https://github.com/ROCm/hipCUB/releases/tag/rocm-6.0.2) | +| hipFFT | [1.0.13](https://github.com/ROCm/hipFFT/releases/tag/rocm-6.0.2) | +| hipRAND | [2.10.17](https://github.com/ROCm/hipRAND/releases/tag/rocm-6.0.2) | +| hipSOLVER | [2.0.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-6.0.2) | +| hipSPARSE | [3.0.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-6.0.2) | +| hipTensor | [1.1.0](https://github.com/ROCm/hipTensor/releases/tag/rocm-6.0.2) | +| MIOpen | [2.19.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-6.0.2) | +| MIVisionX | [2.5.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-6.0.2) | +| rccl | [2.15.5](https://github.com/ROCm/rccl/releases/tag/rocm-6.0.2) | +| rocALUTION | [3.0.3](https://github.com/ROCm/rocALUTION/releases/tag/rocm-6.0.2) | +| rocBLAS | [4.0.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-6.0.2) | +| rocFFT | [1.0.25](https://github.com/ROCm/rocFFT/releases/tag/rocm-6.0.2) | +| rocm-cmake | [0.11.0](https://github.com/ROCm/rocm-cmake/releases/tag/rocm-6.0.2) | +| rocPRIM | [3.0.0](https://github.com/ROCm/rocPRIM/releases/tag/rocm-6.0.2) | +| rocRAND | 2.10.17 ⇒ [3.0.0](https://github.com/ROCm/rocRAND/releases/tag/rocm-6.0.2) | +| rocSOLVER | [3.24.0](https://github.com/ROCm/rocSOLVER/releases/tag/rocm-6.0.2) | +| rocSPARSE | [3.0.2](https://github.com/ROCm/rocSPARSE/releases/tag/rocm-6.0.2) | +| rocThrust | [3.0.0](https://github.com/ROCm/rocThrust/releases/tag/rocm-6.0.2) | +| rocWMMA | [1.3.0](https://github.com/ROCm/rocWMMA/releases/tag/rocm-6.0.2) | +| rpp | [1.4.0](https://github.com/ROCm/rpp/releases/tag/rocm-6.0.2) | +| Tensile | [4.39.0](https://github.com/ROCm/Tensile/releases/tag/rocm-6.0.2) | + +#### rocRAND 3.0.0 + +rocRAND 3.0.0 for ROCm 6.0.2 + +##### Changed + +- Generator classes from `rocrand.hpp` are no longer copyable, in previous versions these copies +would copy internal references to the generators and would lead to double free or memory leak errors. + These types should be moved instead of copied, and move constructors and operators are now defined + for them. + +##### Optimized + +- Improved MT19937 initialization and generation performance. + +##### Removed + +- Removed hipRAND submodule from rocRAND. hipRAND is now only available as a separate package. +- Removed references to and workarounds for deprecated hcc + +##### Fixed + +- `mt19937_engine` from `rocrand.hpp` is now move-constructible and move-assignable. Previously the +move constructor and move assignment operator was deleted for this class. +- Various fixes for the C++ wrapper header rocrand.hpp + - fixed the name of `mrg31k3p` it is now correctly spelled (was incorrectly named`mrg31k3a` in + previous versions). + - added missing `order` setter method for `threefry4x64` + - fixed the default ordering parameter for `lfsr113` +- Build error when using clang++ directly due to unsupported references to amdgpu-target + ------------------- ## ROCm 6.0.0 @@ -958,8 +969,6 @@ We've added a new ROCm meta package for easy installation of all ROCm core packa libraries. For example, the following command will install the full ROCm package: `apt-get install rocm` (Ubuntu), or `yum install rocm` (RHEL). - > To use ROCm on Radeon GPUs, refer to [Install Radeon software for Linux with ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/install/install-radeon.html). - ### Filesystem Hierarchy Standard ROCm 6.0 fully adopts the Filesystem Hierarchy Standard (FHS) reorganization goals. We've removed @@ -981,7 +990,7 @@ CMake support has been added for documentation in the ### AMD Instinct™ MI50 end-of-support notice -AMD Instinct MI50, Radeon™ PRO VII, and Radeon™ VII products (collectively gfx906 GPUs) enters +AMD Instinct MI50, Radeon Pro VII, and Radeon VII products (collectively gfx906 GPUs) enters maintenance mode in ROCm 6.0. As outlined in [5.6.0](https://rocm.docs.amd.com/en/docs-5.6.0/release.html), ROCm 5.7 was the @@ -1144,9 +1153,9 @@ HIP 6.0.0 for ROCm 6.0.0 * `char luid[8];` * `unsigned int luidDeviceNodeMask;` -```{note} +:::{note} HIP only supports LUID on Windows OS. -``` +::: ##### Changes @@ -1199,7 +1208,7 @@ Note: These complex operations are equivalent to corresponding types/functions o * `HIP_ROCclr` * NVIDIA platform * `HIP_PLATFORM_NVCC` -* The `hcc_detail` and `nvcc_detail` directories in the clr repository are removed. +* The [hcc_detail](https://github.com/ROCm/clr/tree/1949b1621a802ffb1492616adbae6154bfbe64ef/hipamd/include/hip/hcc_detail) and [nvcc_detail](https://github.com/ROCm/clr/tree/1949b1621a802ffb1492616adbae6154bfbe64ef/hipamd/include/hips/nvcc_detail) directories in the clr repository are removed. * Deprecated gcnArch is removed from hip device struct `hipDeviceProp_t`. * Deprecated `enum hipMemoryType memoryType;` is removed from HIP struct `hipPointerAttribute_t` union. @@ -1224,25 +1233,6 @@ hipBLAS 2.0.0 for ROCm 6.0.0 * `hipblasXtrmm` (calculates B <- alpha * op(A) * B) has been replaced with `hipblasXtrmm` (calculates C <- alpha * op(A) * B) -#### hipBLASLt 0.6.0 - -hipBLASLt 0.6.0 for ROCm 6.0.0 - -##### Additions - -* Added `UserArguments` for `GroupedGemm` -* Support for datatype: FP16 in with FP32 out -* New samples -* Support for datatype: `Int8` in `Int32` out -* Support for gfx94x platform -* Support for FP8/BF8 datatype (only for gfx94x platform) -* Support Scalar A,B,C,D for FP8/BF8 datatype - -##### Changes - -* Replaced `hipblasDatatype_t` with `hipDataType` -* Replaced `hipblasLtComputeType_t` with `hipblasComputeType_t` -* Deprecated `HIPBLASLT_MATMUL_DESC_D_SCALE_VECTOR_POINTER` #### hipCUB 3.0.0 @@ -1341,41 +1331,15 @@ MIOpen 2.19.0 for ROCm 6.0.0 ##### Fixes -* 3D convolution host API bug -* `[HOTFIX][MI200][FP16]` has been disabled for `ConvHipImplicitGemmBwdXdlops` when `FP16_ALT` is +* 3-D convolution host API bug +* `[HOTFIX][MI200][FP16]` has been disabled for `ConvHipImplicitGemmBwdXdlops` when FP16_ALT is required -#### MIVisionX 2.5.0 - -### Additions - -* CTest: Tests for install verification -* Hardware support updates -* Doxygen support for API documentation +#### MIVisionX -### Optimizations - -* CMakeList Cleanup -* Readme - -### Changes - -* rocAL: PyBind Link to prebuilt library - * PyBind11 - * RapidJSON -* Setup Updates -* RPP - Use package install -* Dockerfiles: Updates & bugfix -* CuPy - No longer installed with setup.py - -### Fixes - -* rocAL bug fix and updates - -### Known issues - -* OpenCV 4.X support for some applications is missing -* MIVisionX package install requires manual prerequisites installation +* Added Comprehensive CTests to aid developers +* Introduced Doxygen support for complete API documentation +* Simplified dependencies for rocAL #### OpenMP @@ -1447,14 +1411,13 @@ RCCL 2.15.5 for ROCm 6.0.0 * Removed TransferBench from tools as it exists in standalone repo: [https://github.com/ROCm/TransferBench](https://github.com/ROCm/TransferBench) - #### rocALUTION 3.0.3 rocALUTION 3.0.3 for ROCm 6.0.0 ##### Additions -* Support for 64-bit integer vectors +* Support for 64bit integer vectors * Inclusive and exclusive sum functionality for vector classes * Transpose functionality for `GlobalMatrix` and `LocalMatrix` * `TripleMatrixProduct` functionality for `LocalMatrix` @@ -1493,8 +1456,8 @@ rocBLAS 4.0.0 for ROCm 6.0.0 ##### Additions * Beta API `rocblas_gemm_batched_ex3` and `rocblas_gemm_strided_batched_ex3` -* Input/output type `F16_r`/`BF16_r` and execution type `f32_r` support for Level 2 `gemv_batched` and - `gemv_strided_batched` +* Input/output type f16_r/bf16_r and execution type f32_r support for Level 2 gemv_batched and + gemv_strided_batched * Use of `rocblas_status_excluded_from_build` when calling functions that require Tensile (when using rocBLAS built without Tensile) * System for asynchronous kernel launches that set a `rocblas_status` failure based on a @@ -1514,7 +1477,7 @@ rocBLAS 4.0.0 for ROCm 6.0.0 * `rocblas_gemm_ext2` API function * In-place trmm API from Legacy BLAS is replaced by an API that supports both in-place and out-of-place trmm -* INT8x4 support is removed (INT8 support is unchanged) +* int8x4 support is removed (int8 support is unchanged) * `#define __STDC_WANT_IEC_60559_TYPES_EXT__` is removed from `rocblas-types.h` (if you want ISO/IEC TS 18661-3:2015 functionality, you must define `__STDC_WANT_IEC_60559_TYPES_EXT__` before including `float.h`, `math.h`, and `rocblas.h`) @@ -1571,6 +1534,7 @@ rocFFT 1.0.25 for ROCm 6.0.0 * Built kernels in a solution map to the library kernel cache * Real forward transforms (real-to-complex) no longer overwrite input; rocFFT may still overwrite real inverse (complex-to-real) input, as this allows for faster performance + * `rocfft-rider` and `dyna-rocfft-rider` have been renamed to `rocfft-bench` and `dyna-rocfft-bench`; these are controlled by the `BUILD_CLIENTS_BENCH` CMake option * Links for the former file names are installed, and the former `BUILD_CLIENTS_RIDER` CMake option @@ -1742,7 +1706,7 @@ rocSPARSE 3.0.2 for ROCm 6.0.0 * `rocsparse_inverse_permutation` * Mixed-precisions for SpVV -* Uniform INT8 precision for gather and scatter +* Uniform int8 precision for gather and scatter #### rocThrust 3.0.0 @@ -1777,7 +1741,7 @@ rocWMMA 1.3.0 for ROCm 6.0.0 ##### Additions * Support for gfx942 -* Support for F8, BF8, and xfloat32 data types +* Support for f8, bf8, and xfloat32 data types * support for `HIP_NO_HALF`, `__ HIP_NO_HALF_CONVERSIONS__`, and `__ HIP_NO_HALF_OPERATORS__` (e.g., PyTorch environment) @@ -1798,7 +1762,7 @@ Tensile 4.39.0 for ROCm 6.0.0 ##### Additions -* Added Aqua Vanjaram support: gfx942, FP8/BF8 datatype, F32 datatype, and +* Added `aquavanjaram` support: gfx942, fp8/bf8 datatype, xf32 datatype, and stochastic rounding for various datatypes * Added and updated tuning scripts * Added `DirectToLds` support for larger data types with 32-bit global load (old parameter `DirectToLds` @@ -1812,7 +1776,7 @@ Tensile 4.39.0 for ROCm 6.0.0 ##### Optimizations * Enabled `InitAccVgprOpt` for `MatrixInstruction` cases -* Implemented local read-related parameter calculations with `DirectToVgpr` +* Implemented local read related parameter calculations with `DirectToVgpr` * Enabled dedicated vgpr allocation for local read + pack * Optimized code initialization * Optimized sgpr allocation @@ -1841,7 +1805,7 @@ Tensile 4.39.0 for ROCm 6.0.0 ##### Fixes -* Predicate ordering for FP16alt impl round near zero mode to unbreak distance modes +* Predicate ordering for fp16alt impl round near zero mode to unbreak distance modes * Boundary check for mirror dims and re-enable disabled mirror dims test cases * Merge error affecting i8 with WMMA * Mismatch issue with DTLds + TSGR + TailLoop @@ -1853,19 +1817,21 @@ Tensile 4.39.0 for ROCm 6.0.0 * Compiler directive for gfx942 * Formatting for `DecisionTree_test.cpp` -### Library changes in ROCM 6.0.0 +### Library changes in ROCm 6.0.0 | Library | Version | |---------|---------| | AMDMIGraphX | 2.7 ⇒ [2.8](https://github.com/ROCm/AMDMIGraphX/releases/tag/rocm-6.0.0) | +| composable_kernel | [0.2.0](https://github.com/ROCm/composable_kernel/releases/tag/rocm-6.0.0) | | hipBLAS | 1.1.0 ⇒ [2.0.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-6.0.0) | | hipCUB | 2.13.1 ⇒ [3.0.0](https://github.com/ROCm/hipCUB/releases/tag/rocm-6.0.0) | | hipFFT | 1.0.12 ⇒ [1.0.13](https://github.com/ROCm/hipFFT/releases/tag/rocm-6.0.0) | | hipRAND | 2.10.16 ⇒ [2.10.17](https://github.com/ROCm/hipRAND/releases/tag/rocm-6.0.0) | | hipSOLVER | 1.8.2 ⇒ [2.0.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-6.0.0) | | hipSPARSE | 2.3.8 ⇒ [3.0.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-6.0.0) | -| hipTensor | ⇒ [1.1.0](https://github.com/ROCm/hipTensor/releases/tag/rocm-6.0.0) | +| hipTensor | [1.1.0](https://github.com/ROCm/hipTensor/releases/tag/rocm-6.0.0) | | MIOpen | [2.19.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-6.0.0) | +| MIVisionX | [2.5.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-6.0.0) | | rccl | [2.15.5](https://github.com/ROCm/rccl/releases/tag/rocm-6.0.0) | | rocALUTION | 2.1.11 ⇒ [3.0.3](https://github.com/ROCm/rocALUTION/releases/tag/rocm-6.0.0) | | rocBLAS | 3.1.0 ⇒ [4.0.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-6.0.0) | @@ -1877,6 +1843,7 @@ Tensile 4.39.0 for ROCm 6.0.0 | rocSPARSE | 2.5.4 ⇒ [3.0.2](https://github.com/ROCm/rocSPARSE/releases/tag/rocm-6.0.0) | | rocThrust | 2.18.0 ⇒ [3.0.0](https://github.com/ROCm/rocThrust/releases/tag/rocm-6.0.0) | | rocWMMA | 1.2.0 ⇒ [1.3.0](https://github.com/ROCm/rocWMMA/releases/tag/rocm-6.0.0) | +| rpp | 1.2.0 ⇒ [1.4.0](https://github.com/ROCm/rpp/releases/tag/rocm-6.0.0) | | Tensile | 4.38.0 ⇒ [4.39.0](https://github.com/ROCm/Tensile/releases/tag/rocm-6.0.0) | #### AMDMIGraphX 2.8 @@ -1937,8 +1904,8 @@ hipBLAS 2.0.0 for ROCm 6.0.0 ##### Added -- added option to define `HIPBLAS_USE_HIP_BFLOAT16` to switch API to use `hip_bfloat16` type -- added `hipblasGemmExWithFlags` API +- added option to define HIPBLAS_USE_HIP_BFLOAT16 to switch API to use hip_bfloat16 type +- added hipblasGemmExWithFlags API ##### Deprecated @@ -2009,7 +1976,7 @@ hipSPARSE 3.0.0 for ROCm 6.0.0 ##### Changed -- Changed hipsparseSpSV_solve() API function to match cuSPARSE API +- Changed hipsparseSpSV_solve() API function to match cusparse API - Changed generic API functions to use const descriptors - Documentation improved @@ -2079,12 +2046,9 @@ rocBLAS 4.0.0 for ROCm 6.0.0 ##### Added - Addition of beta API rocblas_gemm_batched_ex3 and rocblas_gemm_strided_batched_ex3 -- Added input/output type `F16_r`/`BF16_r` and execution type `f32_r` support for Level 2 - `gemv_batched` and `gemv_strided_batched` -- Added `rocblas_status_excluded_from_build` to be used when calling functions which require Tensile - when using rocBLAS built without Tensile -- Added system for async kernel launches setting a failure `rocblas_status` based on - `hipPeekAtLastError` discrepancy +- Added input/output type f16_r/bf16_r and execution type f32_r support for Level 2 gemv_batched and gemv_strided_batched +- Added rocblas_status_excluded_from_build to be used when calling functions which require Tensile when using rocBLAS built without Tensile +- Added system for async kernel launches setting a failure rocblas_status based on hipPeekAtLastError discrepancy ##### Optimized @@ -2098,7 +2062,7 @@ rocBLAS 4.0.0 for ROCm 6.0.0 - rocblas_gemm_ext2 API function is removed - in-place trmm API from Legacy BLAS is removed. It is replaced by an API that supports both in-place and out-of-place trmm -- INT8x4 support is removed. INT8 support is unchanged +- int8x4 support is removed. int8 support is unchanged - The #define STDC_WANT_IEC_60559_TYPES_EXT has been removed from rocblas-types.h. Users who want ISO/IEC TS 18661-3:2015 functionality must define STDC_WANT_IEC_60559_TYPES_EXT before including float.h, math.h, and rocblas.h - The default build removes device code for gfx803 architecture from the fat binary @@ -2123,7 +2087,7 @@ rocFFT 1.0.25 for ROCm 6.0.0 `rocfft_field` is a new type that can be added to a plan description, to describe layout of FFT input or output. `rocfft_field_add_brick` can be called one or more times to describe a brick decomposition of an FFT field, where each brick can be assigned a different device. - These interfaces are still experimental and subject to change. We are interested to hear feedback on them. Questions and concerns may be raised by opening issues on the [rocFFT issue tracker](https://github.com/ROCm/rocFFT/issues). + These interfaces are still experimental and subject to change. We are interested to hear feedback on them. Questions and concerns may be raised by opening issues on the [rocFFT issue tracker](https://github.com/ROCmSoftwarePlatform/rocFFT/issues). Note that at this time, multi-device FFTs have several limitations: @@ -2164,8 +2128,8 @@ rocm-cmake 0.11.0 for ROCm 6.0.0 ##### Fixed -- ROCMClangTidy: Fixed extra make flags passed for Clang-Tidy -- ROCMTest: Fixed issues when using module in a subdirectory +- ROCMClangTidy: Fixed extra make flags passed for clang tidy. +- ROCMTest: Fixed issues when using module in a subdirectory. #### rocPRIM 3.0.0 @@ -2204,87 +2168,162 @@ rocSPARSE 3.0.2 for ROCm 6.0.0 ##### Added -- Added rocsparse_inverse_permutation -- Added mixed precisions for SpVV -- Added uniform INT8 precision for Gather and Scatter +- Added rocsparse_inverse_permutation +- Added mixed precisions for SpVV +- Added uniform int8 precision for Gather and Scatter + +##### Optimized + +- Optimization to doti routine +- Optimization to spin-looping algorithms + +##### Changed + +- Changed rocsparse_spmv function arguments +- Changed rocsparse_xbsrmv routines function arguments +- doti, dotci, spvv, and csr2ell now require calling hipStreamSynchronize after when using host pointer mode +- Improved documentation +- Improved verbose output during argument checking on API function calls + +##### Deprecated + +- Deprecated rocsparse_spmv_ex +- Deprecated rocsparse_xbsrmv_ex routines + +##### Removed + +- Removed auto stages from spmv, spmm, spgemm, spsv, spsm, and spitsv. +- Removed rocsparse_spmm_ex routine + +##### Fixed + +- Fixed a bug in rocsparse-bench, where SpMV algorithm was not taken into account in CSR format +- Fixed the BSR/GEBSR routines bsrmv, bsrsv, bsrmm, bsrgeam, gebsrmv, gebsrmm so that block_dim==0 is considered an invalid size +- Fixed bug where passing nnz = 0 to doti or dotci did not always return a dot product of 0 + +#### rocThrust 3.0.0 + +rocThrust 3.0.0 for ROCm 6.0.0 + +##### Added + +- Updated to match upstream Thrust 2.0.1 +- NV_IF_TARGET macro from libcu++ for NVIDIA backend and HIP implementation for HIP backend. + +##### Changed + +- The cmake build system now additionally accepts `GPU_TARGETS` in addition to `AMDGPU_TARGETS` for + setting the targeted gpu architectures. `GPU_TARGETS=all` will compile for all supported architectures. + `AMDGPU_TARGETS` is only provided for backwards compatibility, `GPU_TARGETS` should be preferred. + +##### Removed + +- Removed cub symlink from the root of the repository. +- Removed support for deprecated macros (THRUST_DEVICE_BACKEND and THRUST_HOST_BACKEND). + +##### Fixed + +- Fixed a segmentation fault when binary search / upper bound / lower bound / equal range was invoked with `hip_rocprim::execute_on_stream_base` policy. + +##### Known Issues + +- For NVIDIA backend, `NV_IF_TARGET` and `THRUST_RDC_ENABLED` intend to substitute the `THRUST_HAS_CUDART` macro, which is now no longer used in Thrust (provided for legacy support only). However, there is no `THRUST_RDC_ENABLED` macro available for the HIP backend, so some branches in Thrust's code may be unreachable in the HIP backend. + +#### rocWMMA 1.3.0 + +rocWMMA 1.3.0 for ROCm 6.0.0 + +##### Added + +- Added support for gfx940, gfx941 and gfx942 targets +- Added support for f8, bf8 and xfloat32 datatypes +- Added support for HIP_NO_HALF, __ HIP_NO_HALF_CONVERSIONS__ and __ HIP_NO_HALF_OPERATORS__ (e.g. pytorch environment) + +##### Changed + +- rocWMMA with hipRTC now supports bfloat16_t datatype +- gfx11 wmma now uses lane swap instead of broadcast for layout adjustment +- Updated samples GEMM parameter validation on host arch + +##### Fixed + +- Disabled gtest static library deployment +- Extended tests now build in large code model + +#### rpp 1.4.0 + +rpp for ROCm 6.0.0 + +##### Added + +* New Tests + +##### Optimizations + +* Readme Updates + +##### Changed + +* **Backend** - Default Backend set to `HIP` + +##### Fixed + +* Minor bugs and warnings + +##### Tested Configurations + +* Linux distribution + + Ubuntu - `18.04` / `20.04` + + CentOS - `8` +* ROCm: rocm-core - `5.0.0.50000-49` +* Clang - Version `6.0` +* CMake - Version `3.22.3` +* Boost - Version `1.72` +* IEEE 754-based half-precision floating-point library - Version `1.12.0` + +##### Rpp 1.3.0 + -##### Optimized -- Optimization to doti routine -- Optimization to spin-looping algorithms +##### Rpp 1.2.0 -##### Changed -- Changed rocsparse_spmv function arguments -- Changed rocsparse_xbsrmv routines function arguments -- doti, dotci, spvv, and csr2ell now require calling hipStreamSynchronize after when using host pointer mode -- Improved documentation -- Improved verbose output during argument checking on API function calls -##### Deprecated +##### Known Issues -- Deprecated rocsparse_spmv_ex -- Deprecated rocsparse_xbsrmv_ex routines +* `CPU` only backend not enabled -##### Removed +##### Rpp 1.1.0 -- Removed auto stages from spmv, spmm, spgemm, spsv, spsm, and spitsv. -- Removed rocsparse_spmm_ex routine -##### Fixed -- Fixed a bug in rocsparse-bench, where SpMV algorithm was not taken into account in CSR format -- Fixed the BSR/GEBSR routines bsrmv, bsrsv, bsrmm, bsrgeam, gebsrmv, gebsrmm so that block_dim==0 is considered an invalid size -- Fixed bug where passing nnz = 0 to doti or dotci did not always return a dot product of 0 +##### Rpp 1.0.0 -#### rocThrust 3.0.0 -rocThrust 3.0.0 for ROCm 6.0.0 -##### Added +##### Rpp 0.99 -- Updated to match upstream Thrust 2.0.1 -- NV_IF_TARGET macro from libcu++ for NVIDIA backend and HIP implementation for HIP backend. -##### Changed -- The cmake build system now additionally accepts `GPU_TARGETS` in addition to `AMDGPU_TARGETS` for - setting the targeted gpu architectures. `GPU_TARGETS=all` will compile for all supported architectures. - `AMDGPU_TARGETS` is only provided for backwards compatibility, `GPU_TARGETS` should be preferred. +##### Rpp 0.98 -##### Removed -- Removed CUB symlink from the root of the repository. -- Removed support for deprecated macros (THRUST_DEVICE_BACKEND and THRUST_HOST_BACKEND). -##### Fixed +##### Rpp 0.97 -- Fixed a segmentation fault when binary search / upper bound / lower bound / equal range was invoked with `hip_rocprim::execute_on_stream_base` policy. -##### Known Issues -- For NVIDIA backend, `NV_IF_TARGET` and `THRUST_RDC_ENABLED` intend to substitute the `THRUST_HAS_CUDART` macro, which is now no longer used in Thrust (provided for legacy support only). However, there is no `THRUST_RDC_ENABLED` macro available for the HIP backend, so some branches in Thrust's code may be unreachable in the HIP backend. +##### Rpp 0.96 -#### rocWMMA 1.3.0 -rocWMMA 1.3.0 for ROCm 6.0.0 -##### Added +##### Rpp 0.95 -- Added support for gfx940, gfx941 and gfx942 targets -- Added support for f8, BF8 and xfloat32 datatypes -- Added support for HIP_NO_HALF, __ HIP_NO_HALF_CONVERSIONS__ and __ HIP_NO_HALF_OPERATORS__ (e.g. pytorch environment) -##### Changed -- rocWMMA with hipRTC now supports `bfloat16_t` datatype -- gfx11 WMMA now uses lane swap instead of broadcast for layout adjustment -- Updated samples GEMM parameter validation on host arch +##### Rpp 0.93 -##### Fixed -- Disabled gtest static library deployment -- Extended tests now build in large code model #### Tensile 4.39.0 @@ -2292,7 +2331,7 @@ Tensile 4.39.0 for ROCm 6.0.0 ##### Added -- Added aquavanjaram support: gfx940/gfx941/gfx942, FP8/BF8 datatype, xF32 datatype, and stochastic rounding for various datatypes +- Added aquavanjaram support: gfx940/gfx941/gfx942, fp8/bf8 datatype, xf32 datatype, and stochastic rounding for various datatypes - Added/updated tuning scripts - Added DirectToLds support for larger data types with 32bit global load (old parameter DirectToLds is replaced with DirectToLdsA and DirectToLdsB), and the corresponding test cases - Added the average of frequency, power consumption, and temperature information for the winner kernels to the CSV file @@ -2331,9 +2370,9 @@ Tensile 4.39.0 for ROCm 6.0.0 ##### Fixed -- Fixed predicate ordering for FP16alt impl round near zero mode to unbreak distance modes +- Fixed predicate ordering for fp16alt impl round near zero mode to unbreak distance modes - Fixed boundary check for mirror dims and re-enable disabled mirror dims test cases -- Fixed merge error affecting i8 with WMMA +- Fixed merge error affecting i8 with wmma - Fixed mismatch issue with DTLds + TSGR + TailLoop - Fixed a bug with InitAccVgprOpt + GSU>1 and a mismatch issue with PGR=0 - Fixed override for unloaded solutions when lazy loading @@ -2389,7 +2428,7 @@ kernels found by setting the environment variable ROCBLAS_TENSILE_GEMM_OVERRIDE_ points to the stored file. For more details, refer to the -[rocBLAS Programmer's Guide](https://rocm.docs.amd.com/projects/rocBLAS/en/latest/Programmers_Guide.html#rocblas-gemm-tune). +[rocBLAS Programmer's Guide](https://rocm.docs.amd.com/projects/rocBLAS/en/docs-5.7.1/Programmers_Guide.html). #### HIP 5.7.1 (for ROCm 5.7.1) @@ -2400,11 +2439,12 @@ ROCm 5.7.1 is a point release with several bug fixes in the HIP runtime. The `hipPointerGetAttributes` API returns the correct HIP memory type as `hipMemoryTypeManaged` for managed memory. -### Library changes in ROCM 5.7.1 +### Library changes in ROCm 5.7.1 | Library | Version | |---------|---------| | AMDMIGraphX | [2.7](https://github.com/ROCm/AMDMIGraphX/releases/tag/rocm-5.7.1) | +| composable_kernel | [0.2.0](https://github.com/ROCm/composable_kernel/releases/tag/rocm-5.7.1) | | hipBLAS | [1.1.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-5.7.1) | | hipCUB | [2.13.1](https://github.com/ROCm/hipCUB/releases/tag/rocm-5.7.1) | | hipFFT | [1.0.12](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.7.1) | @@ -2412,6 +2452,7 @@ for managed memory. | hipSOLVER | 1.8.1 ⇒ [1.8.2](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.7.1) | | hipSPARSE | [2.3.8](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.7.1) | | MIOpen | [2.19.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-5.7.1) | +| MIVisionX | [2.5.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.7.1) | | rocALUTION | [2.1.11](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.7.1) | | rocBLAS | [3.1.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.7.1) | | rocFFT | [1.0.24](https://github.com/ROCm/rocFFT/releases/tag/rocm-5.7.1) | @@ -2422,6 +2463,7 @@ for managed memory. | rocSPARSE | [2.5.4](https://github.com/ROCm/rocSPARSE/releases/tag/rocm-5.7.1) | | rocThrust | [2.18.0](https://github.com/ROCm/rocThrust/releases/tag/rocm-5.7.1) | | rocWMMA | [1.2.0](https://github.com/ROCm/rocWMMA/releases/tag/rocm-5.7.1) | +| rpp | [1.2.0](https://github.com/ROCm/rpp/releases/tag/rocm-5.7.1) | | Tensile | [4.38.0](https://github.com/ROCm/Tensile/releases/tag/rocm-5.7.1) | #### hipSOLVER 1.8.2 @@ -2450,15 +2492,15 @@ New features include: Note that ROCm 5.7.0 is EOS for MI50. 5.7 versions of ROCm are the last major releases in the ROCm 5 series. This release is Linux-only. -```{important} +:::{important} The next major ROCm release (ROCm 6.0) will not be backward compatible with the ROCm 5 series. Changes will include: splitting LLVM packages into more manageable sizes, changes to the HIP runtime API, splitting rocRAND and hipRAND into separate packages, and reorganizing our file structure. -``` +::: #### AMD Instinct™ MI50 end-of-support notice -AMD Instinct MI50, Radeon PRO VII, and Radeon VII products (collectively gfx906 GPUs) will enter +AMD Instinct MI50, Radeon Pro VII, and Radeon VII products (collectively gfx906 GPUs) will enter maintenance mode starting Q3 2023. As outlined in [5.6.0](https://rocm.docs.amd.com/en/docs-5.6.0/release.html), ROCm 5.7 will be the @@ -2536,7 +2578,7 @@ the GPU in heterogeneous applications. Ideally, developers should treat heteroge OpenMP applications like pure CPU applications. However, this simplicity has not been achieved yet. Refer to the documentation on LLVM ASan with the GPU at -{doc}`LLVM AddressSanitizer User Guide`. +[LLVM AddressSanitizer User Guide](../conceptual/using-gpu-sanitizer.md). :::{note} The beta release of LLVM ASan for ROCm is currently tested and validated on Ubuntu 20.04. @@ -2634,11 +2676,12 @@ The following defects are fixed in ROCm v5.7: * Remove `hiparray*` and make it opaque with `hipArray_t` -### Library changes in ROCM 5.7.0 +### Library changes in ROCm 5.7.0 | Library | Version | |---------|---------| | AMDMIGraphX | 2.5 ⇒ [2.7](https://github.com/ROCm/AMDMIGraphX/releases/tag/rocm-5.7.0) | +| composable_kernel | [0.2.0](https://github.com/ROCm/composable_kernel/releases/tag/rocm-5.7.0) | | hipBLAS | 0.54.0 ⇒ [1.1.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-5.7.0) | | hipCUB | [2.13.1](https://github.com/ROCm/hipCUB/releases/tag/rocm-5.7.0) | | hipFFT | [1.0.12](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.7.0) | @@ -2646,6 +2689,7 @@ The following defects are fixed in ROCm v5.7: | hipSOLVER | 1.8.0 ⇒ [1.8.1](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.7.0) | | hipSPARSE | 2.3.7 ⇒ [2.3.8](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.7.0) | | MIOpen | [2.19.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-5.7.0) | +| MIVisionX | 2.4.0 ⇒ [2.5.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.7.0) | | rocALUTION | 2.1.9 ⇒ [2.1.11](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.7.0) | | rocBLAS | 3.0.0 ⇒ [3.1.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.7.0) | | rocFFT | 1.0.23 ⇒ [1.0.24](https://github.com/ROCm/rocFFT/releases/tag/rocm-5.7.0) | @@ -2656,6 +2700,7 @@ The following defects are fixed in ROCm v5.7: | rocSPARSE | 2.5.2 ⇒ [2.5.4](https://github.com/ROCm/rocSPARSE/releases/tag/rocm-5.7.0) | | rocThrust | [2.18.0](https://github.com/ROCm/rocThrust/releases/tag/rocm-5.7.0) | | rocWMMA | 1.1.0 ⇒ [1.2.0](https://github.com/ROCm/rocWMMA/releases/tag/rocm-5.7.0) | +| rpp | [1.2.0](https://github.com/ROCm/rpp/releases/tag/rocm-5.7.0) | | Tensile | 4.37.0 ⇒ [4.38.0](https://github.com/ROCm/Tensile/releases/tag/rocm-5.7.0) | #### AMDMIGraphX 2.7 @@ -2708,9 +2753,37 @@ MIGraphX 2.7 for ROCm 5.7.0 ##### Removed -- Removed INT8x4 rocBlas calls due to deprecation +- Removed int8x4 rocBlas calls due to deprecation - removed std::reduce usage since not all OS' support it +#### composable_kernel 0.2.0 + +CK 0.2.0 for ROCm 5.7.0 + +##### Fixed + +- Fixed a bug in 6-dimensional kernels (#555). +- Fixed grouped ConvBwdWeight test case failure (#524). + +##### Optimizations + +- Improve proformance of normalization kernel + +##### Added + +- Added support on NAVI3x. +- Added user tutorial (#563). +- Added more instances for irregular GEMM sizes (#560). +- Added inter-wave consumer-producer programming model for GEMM kernels (#310). +- Added multi-D GEMM client APIs (#534). +- Added multi-embeddings support (#542). +- Added Navi3x blockwise GEMM and real GEMM support (#541). +- Added Navi grouped ConvBwdWeight support (#505). + +##### Changed + +- Changed ... + #### hipBLAS 1.1.0 hipBLAS 1.1.0 for ROCm 5.7.0 @@ -2737,11 +2810,96 @@ hipSPARSE 2.3.8 for ROCm 5.7.0 ##### Improved -- Fix compilation failures when using cuSPARSE 12.1.0 backend -- Fix compilation failures when using cuSPARSE 12.0.0 backend -- Fix compilation failures when using cuSPARSE 10.1 (non-update versions) as backend +- Fix compilation failures when using cusparse 12.1.0 backend +- Fix compilation failures when using cusparse 12.0.0 backend +- Fix compilation failures when using cusparse 10.1 (non-update versions) as backend - Minor improvements +#### MIVisionX 2.5.0 + +MIVisionX for ROCm 5.7.0 + +##### Added + +* CTest - OpenVX Tests +* Hardware Support + +##### Optimizations + +* CMakeList Cleanup + +##### Changed + +* rocAL - PyBind Link to prebuilt library + + PyBind11 + + RapidJSON +* Setup Updates +* RPP Version - 1.2.0 +* Dockerfiles - Updates & bugfix + +##### Fixed + +* rocAL bug fix and updates + +##### Tested Configurations + +* Windows `10` / `11` +* Linux distribution + + Ubuntu - `20.04` / `22.04` + + CentOS - `7` / `8` + + RHEL - `8` / `9` + + SLES - `15-SP4` +* ROCm: rocm-core - `5.4.3.50403-121` +* miopen-hip - `2.19.0.50403-121` +* miopen-opencl - `2.18.0.50300-63` +* migraphx - `2.4.0.50403-121` +* Protobuf - [V3.12.4](https://github.com/protocolbuffers/protobuf/releases/tag/v3.12.4) +* OpenCV - [4.6.0](https://github.com/opencv/opencv/releases/tag/4.6.0) +* RPP - [1.2.0](https://github.com/GPUOpen-ProfessionalCompute-Libraries/rpp/releases/tag/1.2.0) +* FFMPEG - [n4.4.2](https://github.com/FFmpeg/FFmpeg/releases/tag/n4.4.2) +* Dependencies for all the above packages +* MIVisionX Setup Script - `V2.5.4` + +##### Known Issues + +* OpenCV 4.X support for some apps missing + +##### Mivisionx Dependency Map + + + +##### Hip Backend + +**Docker Image:** `sudo docker build -f docker/ubuntu20/{DOCKER_LEVEL_FILE_NAME}.dockerfile -t {mivisionx-level-NUMBER} .` + +- ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `new component added to the level` +- ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `existing component from the previous level` + +| Build Level | MIVisionX Dependencies | Modules | Libraries and Executables | Docker Tag | +| ----------- | -------------------------------------------------- | ------------------------------------------------------------------------- || -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Level_1` | cmake <br> gcc <br> g++ | amd_openvx <br> utilities | ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libopenvx.so` - OpenVX&trade; Lib - CPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `runvx` - OpenVX&trade; Graph Executor - CPU with Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-1?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_2` | ROCm HIP <br> +Level 1 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libopenvx.so` - OpenVX&trade; Lib - CPU/GPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU/GPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `runvx` - OpenVX&trade; Graph Executor - Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-2?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_3` | OpenCV <br> FFMPEG <br> +Level 2 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `mv_compile` - Neural Net Model Compile <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-3?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_4` | MIOpenGEMM <br> MIOpen <br> ProtoBuf <br> +Level 3 | amd_openvx <br> amd_openvx_extensions <br> apps <br> utilities | ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_nn.so` - OpenVX&trade; Neural Net Extension | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-4?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_5` | AMD_RPP <br> rocAL deps <br> +Level 4 | amd_openvx <br> amd_openvx_extensions <br> apps <br> rocAL <br> utilities | ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_rpp.so` - OpenVX&trade; RPP Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `librocal.so` - Radeon Augmentation Library <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `rocal_pybind.so` - rocAL Pybind Lib | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-5?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | + +##### Opencl Backend + +**Docker Image:** `sudo docker build -f docker/ubuntu20/{DOCKER_LEVEL_FILE_NAME}.dockerfile -t {mivisionx-level-NUMBER} .` + +- ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `new component added to the level` +- ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `existing component from the previous level` + +| Build Level | MIVisionX Dependencies | Modules | Libraries and Executables | Docker Tag | +| ----------- | -------------------------------------------------- | ------------------------------------------------------------------------- || -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Level_1` | cmake <br> gcc <br> g++ | amd_openvx <br> utilities | ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libopenvx.so` - OpenVX&trade; Lib - CPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `runvx` - OpenVX&trade; Graph Executor - CPU with Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-1?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_2` | ROCm OpenCL <br> +Level 1 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libopenvx.so` - OpenVX&trade; Lib - CPU/GPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU/GPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `loom_shell` - 360 Stitch App <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `runvx` - OpenVX&trade; Graph Executor - Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-2?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_3` | OpenCV <br> FFMPEG <br> +Level 2 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `mv_compile` - Neural Net Model Compile <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-3?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_4` | MIOpenGEMM <br> MIOpen <br> ProtoBuf <br> +Level 3 | amd_openvx <br> amd_openvx_extensions <br> apps <br> utilities | ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `inference_server_app` - Cloud Inference App | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-4?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_5` | AMD_RPP <br> rocAL deps <br> +Level 4 | amd_openvx <br> amd_openvx_extensions <br> apps <br> rocAL <br> utilities | ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `inference_server_app` - Cloud Inference App <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_rpp.so` - OpenVX&trade; RPP Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `librocal.so` - Radeon Augmentation Library <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `rocal_pybind.so` - rocAL Pybind Lib | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-5?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | + +**NOTE:** OpenVX and the OpenVX logo are trademarks of the Khronos Group Inc. + #### rocALUTION 2.1.11 rocALUTION 2.1.11 for ROCm 5.7.0 @@ -2875,6 +3033,73 @@ rocWMMA 1.2.0 for ROCm 5.7.0 - Fixed a bug with synchronization - Updated rocWMMA cmake versioning +#### rpp 1.2.0 + +rpp for ROCm 5.7.0 + +##### Added + +* New Tests + +##### Optimizations + +* Readme Updates + +##### Changed + +* **Backend** - Default Backend set to `HIP` + +##### Fixed + +* Minor bugs and warnings + +##### Tested Configurations + +* Linux distribution + + Ubuntu - `18.04` / `20.04` + + CentOS - `8` +* ROCm: rocm-core - `5.0.0.50000-49` +* Clang - Version `6.0` +* CMake - Version `3.22.3` +* Boost - Version `1.72` +* IEEE 754-based half-precision floating-point library - Version `1.12.0` + +##### Known Issues + +* `CPU` only backend not enabled + +##### Rpp 1.1.0 + + + +##### Rpp 1.0.0 + + + +##### Rpp 0.99 + + + +##### Rpp 0.98 + + + +##### Rpp 0.97 + + + +##### Rpp 0.96 + + + +##### Rpp 0.95 + + + +##### Rpp 0.93 + + + #### Tensile 4.38.0 Tensile 4.38.0 for ROCm 5.7.0 @@ -2924,7 +3149,7 @@ ROCm 5.6.1 is a point release with several bug fixes in the HIP runtime. * Memory leak when code object files are loaded/unloaded via hipModuleLoad/hipModuleUnload APIs * Using `hipGraphAddMemFreeNode` no longer results in a crash -### Library changes in ROCM 5.6.1 +### Library changes in ROCm 5.6.1 | Library | Version | |---------|---------| @@ -2936,6 +3161,7 @@ ROCm 5.6.1 is a point release with several bug fixes in the HIP runtime. | hipSOLVER | [1.8.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.6.1) | | hipSPARSE | 2.3.6 ⇒ [2.3.7](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.6.1) | | MIOpen | [2.19.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-5.6.1) | +| MIVisionX | [2.4.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.6.1) | | rccl | [2.15.5](https://github.com/ROCm/rccl/releases/tag/rocm-5.6.1) | | rocALUTION | [2.1.9](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.6.1) | | rocBLAS | [3.0.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.6.1) | @@ -2981,7 +3207,7 @@ few examples include: ### OS and GPU support changes * SLES15 SP5 support was added this release. SLES15 SP3 support was dropped. -* AMD Instinct MI50, Radeon PRO VII, and Radeon VII products (collectively referred to as gfx906 GPUs) +* AMD Instinct MI50, Radeon Pro VII, and Radeon VII products (collectively referred to as gfx906 GPUs) will be entering the maintenance mode starting Q3 2023. This will be aligned with ROCm 5.7 GA release date. * No new features and performance optimizations will be supported for the gfx906 GPUs beyond @@ -3023,7 +3249,7 @@ few examples include: #### Additions -* Added hipRTC support for `amd_hip_fp16` +* Added hipRTC support for amd_hip_fp16 * Added hipStreamGetDevice implementation to get the device associated with the stream * Added HIP_AD_FORMAT_SIGNED_INT16 in hipArray formats * hipArrayGetInfo for getting information about the specified array @@ -3163,7 +3389,7 @@ The resulting `a.out` will depend on * rocprof in ROCm/5.4.1 fails to generate kernel info. * rocprof clobbers LD_PRELOAD. -### Library changes in ROCM 5.6.0 +### Library changes in ROCm 5.6.0 | Library | Version | |---------|---------| @@ -3175,6 +3401,7 @@ The resulting `a.out` will depend on | hipSOLVER | 1.7.0 ⇒ [1.8.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.6.0) | | hipSPARSE | 2.3.5 ⇒ [2.3.6](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.6.0) | | MIOpen | [2.19.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-5.6.0) | +| MIVisionX | 2.3.0 ⇒ [2.4.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.6.0) | | rccl | [2.15.5](https://github.com/ROCm/rccl/releases/tag/rocm-5.6.0) | | rocALUTION | 2.1.8 ⇒ [2.1.9](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.6.0) | | rocBLAS | 2.47.0 ⇒ [3.0.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.6.0) | @@ -3220,6 +3447,82 @@ hipSPARSE 2.3.6 for ROCm 5.6.0 - For hipsparseXbsr2csr and hipsparseXcsr2bsr, blockDim == 0 now returns HIPSPARSE_STATUS_INVALID_SIZE +#### MIVisionX 2.4.0 + +MIVisionX for ROCm 5.6.0 + +##### Added + +* OpenVX FP16 Support +* rocAL - CPU, HIP, & OCL backends +* AMD RPP - CPU, HIP, & OCL backends +* MIVisionX Setup Support for RHEL +* Extended OS Support +* Docker Support for Ubuntu `22.04` +* Tests + +##### Optimizations + +* CMakeList Cleanup +* MIGraphX Extension Updates +* rocAL - Documentation +* CMakeList Updates & Cleanup + +##### Changed + +* rocAL - Changing Python Lib Path +* Docker Support - Ubuntu 18 Support Dropped +* RPP - Link to Version 1.0.0 +* rocAL - support updates +* Setup Updates + +##### Fixed + +* rocAL bug fix and updates +* AMD RPP - bug fixes +* CMakeLists - Issues +* RPATH - Link Issues + +##### Tested Configurations + +* Windows `10` / `11` +* Linux distribution + + Ubuntu - `20.04` / `22.04` + + CentOS - `7` / `8` + + RHEL - `8` / `9` + + SLES - `15-SP3` +* ROCm: rocm-core - `5.4.3.50403-121` +* miopen-hip - `2.19.0.50403-121` +* miopen-opencl - `2.18.0.50300-63` +* migraphx - `2.4.0.50403-121` +* Protobuf - [V3.12.4](https://github.com/protocolbuffers/protobuf/releases/tag/v3.12.4) +* OpenCV - [4.6.0](https://github.com/opencv/opencv/releases/tag/4.6.0) +* RPP - [1.0.0](https://github.com/GPUOpen-ProfessionalCompute-Libraries/rpp/releases/tag/1.0.0) +* FFMPEG - [n4.4.2](https://github.com/FFmpeg/FFmpeg/releases/tag/n4.4.2) +* Dependencies for all the above packages +* MIVisionX Setup Script - `V2.4.2` + +##### Known Issues + +* OpenCV 4.X support for some apps missing + +##### Mivisionx Dependency Map + +**Docker Image:** `sudo docker build -f docker/ubuntu20/{DOCKER_LEVEL_FILE_NAME}.dockerfile -t {mivisionx-level-NUMBER} .` + +- ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `new component added to the level` +- ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `existing component from the previous level` + +| Build Level | MIVisionX Dependencies | Modules | Libraries and Executables | Docker Tag | +| ----------- | -------------------------------------------------- | ------------------------------------------------------------------------- || -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Level_1` | cmake <br> gcc <br> g++ | amd_openvx <br> utilities | ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libopenvx.so` - OpenVX&trade; Lib - CPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `runvx` - OpenVX&trade; Graph Executor - CPU with Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-1?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_2` | ROCm OpenCL <br> +Level 1 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libopenvx.so` - OpenVX&trade; Lib - CPU/GPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU/GPU <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `loom_shell` - 360 Stitch App <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `runvx` - OpenVX&trade; Graph Executor - Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-2?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_3` | OpenCV <br> FFMPEG <br> +Level 2 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `mv_compile` - Neural Net Model Compile <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-3?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_4` | MIOpenGEMM <br> MIOpen <br> ProtoBuf <br> +Level 3 | amd_openvx <br> amd_openvx_extensions <br> apps <br> utilities | ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `inference_server_app` - Cloud Inference App | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-4?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_5` | AMD_RPP <br> rocAL deps <br> +Level 4 | amd_openvx <br> amd_openvx_extensions <br> apps <br> rocAL <br> utilities | ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#1589F0](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/blue_square.png) `inference_server_app` - Cloud Inference App <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `libvx_rpp.so` - OpenVX&trade; RPP Extension <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `librocal.so` - Radeon Augmentation Library <br> ![#c5f015](https://raw.githubusercontent.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/develop/docs/data/green_square.png) `rocal_pybind.so` - rocAL Pybind Lib | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-5?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | + +**NOTE:** OpenVX and the OpenVX logo are trademarks of the Khronos Group Inc. + #### rocALUTION 2.1.9 rocALUTION 2.1.9 for ROCm 5.6.0 @@ -3239,7 +3542,7 @@ rocBLAS 3.0.0 for ROCm 5.6.0 ##### Added -- Added BF16 inputs and F32 compute support to Level 1 rocBLAS Extension functions axpy_ex, scal_ex and nrm2_ex. +- Added bf16 inputs and f32 compute support to Level 1 rocBLAS Extension functions axpy_ex, scal_ex and nrm2_ex. ##### Deprecated @@ -3341,7 +3644,7 @@ rocThrust 2.18.0 for ROCm 5.6.0 ##### Changed -- Updated `docs` directory structure to match the standard of [rocm-docs-core](https://github.com/ROCm/rocm-docs-core). +- Updated `docs` directory structure to match the standard of [rocm-docs-core](https://github.com/RadeonOpenCompute/rocm-docs-core). #### rocWMMA 1.1.0 @@ -3360,7 +3663,7 @@ rocWMMA 1.1.0 for ROCm 5.6.0 ##### Changed - Default to GPU rocBLAS validation against rocWMMA -- Re-enabled INT8 gemm tests on gfx9 +- Re-enabled int8 gemm tests on gfx9 - Upgraded to C++17 - Restructured unit test folder for consistency - Consolidated rocWMMA samples common code @@ -3374,7 +3677,7 @@ Tensile 4.37.0 for ROCm 5.6.0 - Added user driven tuning API - Added decision tree fallback feature - Added SingleBuffer + AtomicAdd option for GlobalSplitU -- DirectToVgpr support for FP16 and Int8 with TN orientation +- DirectToVgpr support for fp16 and Int8 with TN orientation - Added new test cases for various functions - Added SingleBuffer algorithm for ZGEMM/CGEMM - Added joblib for parallel map calls @@ -3450,18 +3753,20 @@ The following HIP API is updated in the ROCm 5.5.1 release: * The return value for `hipDeviceSetCacheConfig` is updated from `hipErrorNotSupported` to `hipSuccess` -### Library changes in ROCM 5.5.1 +### Library changes in ROCm 5.5.1 | Library | Version | |---------|---------| | AMDMIGraphX | [2.5](https://github.com/ROCm/AMDMIGraphX/releases/tag/rocm-5.5.1) | | hipBLAS | [0.54.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-5.5.1) | +| hipBLASLt | [0.1.0](https://github.com/ROCm/hipBLASLt/releases/tag/rocm-5.5.1) | | hipCUB | [2.13.1](https://github.com/ROCm/hipCUB/releases/tag/rocm-5.5.1) | | hipFFT | [1.0.11](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.5.1) | | hipRAND | [2.10.16](https://github.com/ROCm/hipRAND/releases/tag/rocm-5.5.1) | | hipSOLVER | [1.7.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.5.1) | | hipSPARSE | [2.3.5](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.5.1) | | MIOpen | [2.19.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-5.5.1) | +| MIVisionX | [2.3.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.5.1) | | rccl | [2.15.5](https://github.com/ROCm/rccl/releases/tag/rocm-5.5.1) | | rocALUTION | [2.1.8](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.5.1) | | rocBLAS | [2.47.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.5.1) | @@ -3624,7 +3929,7 @@ The new HIP graph management APIs are as follows: This release consists of the following OpenMP enhancements: * Additional support for OMPT functions `get_device_time` and `get_record_type` -* Added support for min/max fast FP atomics on AMD GPUs +* Added support for min/max fast fp atomics on AMD GPUs * Fixed the use of the abs function in C device regions ### Deprecations and warnings @@ -3802,18 +4107,20 @@ communicators repeatedly, subsequent communicators may fail to initialize. This issue is under investigation and will be resolved in a future release. -### Library changes in ROCM 5.5.0 +### Library changes in ROCm 5.5.0 | Library | Version | |---------|---------| -| AMDMIGraphX | ⇒ [2.5](https://github.com/ROCm/AMDMIGraphX/releases/tag/rocm-5.5.0) | +| AMDMIGraphX | [2.5](https://github.com/ROCm/AMDMIGraphX/releases/tag/rocm-5.5.0) | | hipBLAS | 0.53.0 ⇒ [0.54.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-5.5.0) | +| hipBLASLt | [0.1.0](https://github.com/ROCm/hipBLASLt/releases/tag/rocm-5.5.0) | | hipCUB | 2.13.0 ⇒ [2.13.1](https://github.com/ROCm/hipCUB/releases/tag/rocm-5.5.0) | | hipFFT | 1.0.10 ⇒ [1.0.11](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.5.0) | -| hipRAND | ⇒ [2.10.16](https://github.com/ROCm/hipRAND/releases/tag/rocm-5.5.0) | +| hipRAND | [2.10.16](https://github.com/ROCm/hipRAND/releases/tag/rocm-5.5.0) | | hipSOLVER | 1.6.0 ⇒ [1.7.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.5.0) | | hipSPARSE | 2.3.3 ⇒ [2.3.5](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.5.0) | -| MIOpen | ⇒ [2.19.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-5.5.0) | +| MIOpen | [2.19.0](https://github.com/ROCm/MIOpen/releases/tag/rocm-5.5.0) | +| MIVisionX | [2.3.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.5.0) | | rccl | 2.13.4 ⇒ [2.15.5](https://github.com/ROCm/rccl/releases/tag/rocm-5.5.0) | | rocALUTION | 2.1.3 ⇒ [2.1.8](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.5.0) | | rocBLAS | 2.46.0 ⇒ [2.47.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.5.0) | @@ -3834,9 +4141,9 @@ MIGraphX 2.5 for ROCm 5.5.0 ##### Added - Y-Model feature to store tuning information with the optimized model -- Added Python 3.10 bindings +- Added Python 3.10 bindings - Accuracy checker tool based on ONNX Runtime -- ONNX Operators parse_split, and Trilu +- ONNX Operators parse_split, and Trilu - Build support for ROCm MLIR - Added migraphx-driver flag to print optimizations in python (--python) - Added JIT implementation of the Gather and Pad operator which results in better handling of larger tensor sizes. @@ -3850,9 +4157,9 @@ MIGraphX 2.5 for ROCm 5.5.0 ##### Fixed -- Improved parsing Tensorflow Protobuf files +- Improved parsing Tensorflow Protobuf files - Resolved various accuracy issues with some onnx models -- Resolved a gcc-12 issue with MIVisionX +- Resolved a gcc-12 issue with mivisionx - Improved support for larger sized models and batches - Use --offload-arch instead of --cuda-gpu-arch for the HIP compiler - Changes inside JIT to use float accumulator for large reduce ops of half type to avoid overflow. @@ -3887,6 +4194,28 @@ hipBLAS 0.54.0 for ROCm 5.5.0 - changed reference code for Windows to OpenBLAS - hipblas client executables all now begin with hipblas- prefix +#### hipBLASLt 0.1.0 + +hipBLASLt 0.1.0 for ROCm 5.5.0 + +##### Added + +- Enable hipBLASLt APIs +- Support gfx90a +- Support problem type: fp32, fp16, bf16 +- Support activation: relu, gelu +- Support bias vector +- Support Scale D vector +- Integreate with tensilelite kernel generator +- Add Gtest: hipblaslt-test +- Add full function tool: hipblaslt-bench +- Add sample app: example_hipblaslt_preference + +##### Optimizations + +- Gridbase solution search algorithm for untuned size +- Tune 10k sizes for each problem type + #### hipCUB 2.13.1 hipCUB 2.13.1 for ROCm 5.5.0 @@ -3951,7 +4280,7 @@ hipSPARSE 2.3.5 for ROCm 5.5.0 ##### Improved - Fixed an issue, where the rocm folder was not removed on upgrade of meta packages -- Fixed a compilation issue with cuSPARSE backend +- Fixed a compilation issue with cusparse backend - Added more detailed messages on unit test failures due to missing input data - Improved documentation - Fixed a bug with deprecation messages when using gcc9 (Thanks @Maetveis) @@ -3998,7 +4327,7 @@ RCCL 2.15.5 for ROCm 5.5.0 ##### Removed -- Removed TransferBench from tools. Exists in standalone repo: https://github.com/ROCm/TransferBench +- Removed TransferBench from tools. Exists in standalone repo: https://github.com/ROCmSoftwarePlatform/TransferBench #### rocALUTION 2.1.8 @@ -4417,7 +4746,7 @@ most `-save-temps` use cases work correctly, this error may appear occasionally. This issue is under investigation, and the known workaround is not to use `-save-temps` when the error appears. -### Library changes in ROCM 5.4.3 +### Library changes in ROCm 5.4.3 | Library | Version | |---------|---------| @@ -4426,6 +4755,7 @@ appears. | hipFFT | [1.0.10](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.4.3) | | hipSOLVER | [1.6.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.4.3) | | hipSPARSE | [2.3.3](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.4.3) | +| MIVisionX | [2.3.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.4.3) | | rccl | [2.13.4](https://github.com/ROCm/rccl/releases/tag/rocm-5.4.3) | | rocALUTION | [2.1.3](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.4.3) | | rocBLAS | [2.46.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.4.3) | @@ -4490,7 +4820,7 @@ with one of the following error messages: This is a known issue and will be fixed in a future release. -### Library changes in ROCM 5.4.2 +### Library changes in ROCm 5.4.2 | Library | Version | |---------|---------| @@ -4499,6 +4829,7 @@ This is a known issue and will be fixed in a future release. | hipFFT | [1.0.10](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.4.2) | | hipSOLVER | [1.6.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.4.2) | | hipSPARSE | [2.3.3](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.4.2) | +| MIVisionX | [2.3.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.4.2) | | rccl | [2.13.4](https://github.com/ROCm/rccl/releases/tag/rocm-5.4.2) | | rocALUTION | [2.1.3](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.4.2) | | rocBLAS | [2.46.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.4.2) | @@ -4591,7 +4922,7 @@ Instructions on how to download and apply MI200 maintenance updates are availabl Maintenance update #3, combined with ROCm 5.4.1, now provides SRIOV virtualization support for all AMD Instinct™ MI200 devices. -### Library changes in ROCM 5.4.1 +### Library changes in ROCm 5.4.1 | Library | Version | |---------|---------| @@ -4600,6 +4931,7 @@ AMD Instinct™ MI200 devices. | hipFFT | [1.0.10](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.4.1) | | hipSOLVER | [1.6.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.4.1) | | hipSPARSE | [2.3.3](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.4.1) | +| MIVisionX | [2.3.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.4.1) | | rccl | [2.13.4](https://github.com/ROCm/rccl/releases/tag/rocm-5.4.1) | | rocALUTION | [2.1.3](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.4.1) | | rocBLAS | [2.46.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.4.1) | @@ -4881,7 +5213,7 @@ globally unique and guaranteed to be consistent across APIs and processes. GPU IDs reported by ROCTracer and ROCProfiler or ROCm Tools are HSA Driver Node ID of that GPU, as it is a unique ID for that device in that particular node. -### Library changes in ROCM 5.4.0 +### Library changes in ROCm 5.4.0 | Library | Version | |---------|---------| @@ -4890,6 +5222,7 @@ as it is a unique ID for that device in that particular node. | hipFFT | 1.0.9 ⇒ [1.0.10](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.4.0) | | hipSOLVER | 1.5.0 ⇒ [1.6.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.4.0) | | hipSPARSE | 2.3.1 ⇒ [2.3.3](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.4.0) | +| MIVisionX | [2.3.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.4.0) | | rccl | 2.12.10 ⇒ [2.13.4](https://github.com/ROCm/rccl/releases/tag/rocm-5.4.0) | | rocALUTION | 2.1.0 ⇒ [2.1.3](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.4.0) | | rocBLAS | 2.45.0 ⇒ [2.46.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.4.0) | @@ -4909,7 +5242,7 @@ hipBLAS 0.53.0 for ROCm 5.4.0 ##### Added -- Allow for selection of INT8 datatype +- Allow for selection of int8 datatype - Added support for hipblasXgels and hipblasXgelsStridedBatched operations (with s,d,c,z precisions), only supported with rocBLAS backend - Added support for hipblasXgelsBatched operations (with s,d,c,z precisions) @@ -4963,7 +5296,7 @@ hipSPARSE 2.3.3 for ROCm 5.4.0 ##### Changed -- HIPSPARSE_ORDER_COLUMN has been renamed to HIPSPARSE_ORDER_COL to match cuSPARSE +- HIPSPARSE_ORDER_COLUMN has been renamed to HIPSPARSE_ORDER_COL to match cusparse #### rccl 2.13.4 @@ -5021,7 +5354,7 @@ rocBLAS 2.46.0 for ROCm 5.4.0 - Level 2, Level 1, and Extension functions: argument checking when the handle is set to rocblas_pointer_mode_host now returns the status of rocblas_status_invalid_pointer only for pointers that must be dereferenced based on the alpha and beta argument values. With handle mode rocblas_pointer_mode_device only pointers that are always dereferenced regardless of alpha and beta values are checked and so may lead to a return status of rocblas_status_invalid_pointer. This improves consistency with legacy BLAS behaviour. - Add variable to turn on/off ieee16/ieee32 tests for mixed precision gemm -- Allow hipBLAS to select INT8 datatype +- Allow hipBLAS to select int8 datatype - Disallow B == C && ldb != ldc in rocblas_xtrmm_outofplace ##### Fixed @@ -5212,7 +5545,7 @@ This issue is resolved with the following fixes to compilation failures: * rocPRIM: in device_merge if the two key iterators do not match. * rocTHRUST: in thrust::merge if the two key iterators do not match. -### Library changes in ROCM 5.3.3 +### Library changes in ROCm 5.3.3 | Library | Version | |---------|---------| @@ -5221,6 +5554,7 @@ This issue is resolved with the following fixes to compilation failures: | hipFFT | [1.0.9](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.3.3) | | hipSOLVER | [1.5.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.3.3) | | hipSPARSE | [2.3.1](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.3.3) | +| MIVisionX | [2.3.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.3.3) | | rccl | [2.12.10](https://github.com/ROCm/rccl/releases/tag/rocm-5.3.3) | | rocALUTION | [2.1.0](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.3.3) | | rocBLAS | [2.45.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.3.3) | @@ -5290,7 +5624,7 @@ ROCm v5.3.x releases. This issue is currently under investigation and will be resolved in a future release. -### Library changes in ROCM 5.3.2 +### Library changes in ROCm 5.3.2 | Library | Version | |---------|---------| @@ -5299,6 +5633,7 @@ This issue is currently under investigation and will be resolved in a future rel | hipFFT | [1.0.9](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.3.2) | | hipSOLVER | [1.5.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.3.2) | | hipSPARSE | [2.3.1](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.3.2) | +| MIVisionX | [2.3.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.3.2) | | rccl | [2.12.10](https://github.com/ROCm/rccl/releases/tag/rocm-5.3.2) | | rocALUTION | [2.1.0](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.3.2) | | rocBLAS | [2.45.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.3.2) | @@ -5504,7 +5839,7 @@ clinfo, and HelloWord.cl and cause a system crash. Workaround: To avoid the system crash, add `amd_iommu=on iommu=pt` as the kernel bootparam, as indicated in the warning message. -### Library changes in ROCM 5.3.0 +### Library changes in ROCm 5.3.0 | Library | Version | |---------|---------| @@ -5513,11 +5848,12 @@ indicated in the warning message. | hipFFT | 1.0.8 ⇒ [1.0.9](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.3.0) | | hipSOLVER | 1.4.0 ⇒ [1.5.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.3.0) | | hipSPARSE | 2.2.0 ⇒ [2.3.1](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.3.0) | +| MIVisionX | [2.3.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.3.0) | | rccl | [2.12.10](https://github.com/ROCm/rccl/releases/tag/rocm-5.3.0) | | rocALUTION | 2.0.3 ⇒ [2.1.0](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.3.0) | | rocBLAS | 2.44.0 ⇒ [2.45.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.3.0) | | rocFFT | 1.0.17 ⇒ [1.0.18](https://github.com/ROCm/rocFFT/releases/tag/rocm-5.3.0) | -| rocm-cmake | ⇒ [0.8.0](https://github.com/ROCm/rocm-cmake/releases/tag/rocm-5.3.0) | +| rocm-cmake | [0.8.0](https://github.com/ROCm/rocm-cmake/releases/tag/rocm-5.3.0) | | rocPRIM | 2.10.14 ⇒ [2.11.0](https://github.com/ROCm/rocPRIM/releases/tag/rocm-5.3.0) | | rocRAND | 2.10.14 ⇒ [2.10.15](https://github.com/ROCm/rocRAND/releases/tag/rocm-5.3.0) | | rocSOLVER | 3.18.0 ⇒ [3.19.0](https://github.com/ROCm/rocSOLVER/releases/tag/rocm-5.3.0) | @@ -5675,8 +6011,7 @@ rocBLAS 2.45.0 for ROCm 5.3.0 ##### Removed -- `install.sh` options `--hip-clang`, `--no-hip-clang`, `--merge-files`, and `--no-merge-files` were - removed +- install.sh options --hip-clang , --no-hip-clang, --merge-files, --no-merge-files are removed. #### rocFFT 1.0.18 @@ -5745,16 +6080,16 @@ rocSOLVER 3.19.0 for ROCm 5.3.0 ##### Added -- Partial EigenSolver routines for symmetric/hermitian matrices: +- Partial eigensolver routines for symmetric/hermitian matrices: - SYEVX (with batched and strided\_batched versions) - HEEVX (with batched and strided\_batched versions) -- Generalized symmetric- and hermitian-definite partial EigenSolvers: +- Generalized symmetric- and hermitian-definite partial eigensolvers: - SYGVX (with batched and strided\_batched versions) - HEGVX (with batched and strided\_batched versions) -- EigenSolver routines for symmetric/hermitian matrices using Jacobi algorithm: +- Eigensolver routines for symmetric/hermitian matrices using Jacobi algorithm: - SYEVJ (with batched and strided\_batched versions) - HEEVJ (with batched and strided\_batched versions) -- Generalized symmetric- and hermitian-definite EigenSolvers using Jacobi algorithm: +- Generalized symmetric- and hermitian-definite eigensolvers using Jacobi algorithm: - SYGVJ (with batched and strided\_batched versions) - HEGVJ (with batched and strided\_batched versions) - Added --profile_kernels option to rocsolver-bench, which will include kernel calls in the @@ -5902,7 +6237,7 @@ No notable changes in this release for deployment and management tools. For release information for older ROCm releases, refer to -### Library changes in ROCM 5.2.3 +### Library changes in ROCm 5.2.3 | Library | Version | |---------|---------| @@ -5911,6 +6246,7 @@ For release information for older ROCm releases, refer to | hipFFT | [1.0.8](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.2.3) | | hipSOLVER | [1.4.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.2.3) | | hipSPARSE | [2.2.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.2.3) | +| MIVisionX | [2.3.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.2.3) | | rccl | 2.11.4 ⇒ [2.12.10](https://github.com/ROCm/rccl/releases/tag/rocm-5.2.3) | | rocALUTION | [2.0.3](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.2.3) | | rocBLAS | [2.44.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.2.3) | @@ -5949,7 +6285,7 @@ RCCL 2.12.10 for ROCm 5.2.3 ## ROCm 5.2.1 -### Library changes in ROCM 5.2.1 +### Library changes in ROCm 5.2.1 | Library | Version | |---------|---------| @@ -5958,6 +6294,7 @@ RCCL 2.12.10 for ROCm 5.2.3 | hipFFT | [1.0.8](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.2.1) | | hipSOLVER | [1.4.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.2.1) | | hipSPARSE | [2.2.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.2.1) | +| MIVisionX | 2.2.0 ⇒ [2.3.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.2.1) | | rccl | [2.11.4](https://github.com/ROCm/rccl/releases/tag/rocm-5.2.1) | | rocALUTION | [2.0.3](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.2.1) | | rocBLAS | [2.44.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.2.1) | @@ -5970,6 +6307,63 @@ RCCL 2.12.10 for ROCm 5.2.3 | rocWMMA | [0.7](https://github.com/ROCm/rocWMMA/releases/tag/rocm-5.2.1) | | Tensile | [4.33.0](https://github.com/ROCm/Tensile/releases/tag/rocm-5.2.1) | +#### MIVisionX 2.3.0 + +MIVisionX for ROCm 5.2.1 + +##### Added + +* Docker Support for ROCm `5.2.X` + +##### Optimizations + +* + +##### Changed + +* + +##### Fixed + +* + +##### Tested Configurations + +* Windows `10` / `11` +* Linux distribution + + Ubuntu - `18.04` / `20.04` + + CentOS - `7` / `8` + + SLES - `15-SP2` +* ROCm: rocm-core - `5.2.0.50200-65` +* miopen-hip - `2.16.0.50101-48` +* miopen-opencl - `2.16.0.50101-48` +* migraphx - `2.1.0.50101-48` +* Protobuf - [V3.12.4](https://github.com/protocolbuffers/protobuf/releases/tag/v3.12.4) +* OpenCV - [4.5.5](https://github.com/opencv/opencv/releases/tag/4.5.5) +* RPP - [0.93](https://github.com/GPUOpen-ProfessionalCompute-Libraries/rpp/releases/tag/0.93) +* FFMPEG - [n4.4.2](https://github.com/FFmpeg/FFmpeg/releases/tag/n4.4.2) +* Dependencies for all the above packages +* MIVisionX Setup Script - `V2.3.4` + +##### Known Issues + +* OpenCV 4.X support for some apps missing + +##### Mivisionx Dependency Map + +**Docker Image:** `sudo docker build -f docker/ubuntu20/{DOCKER_LEVEL_FILE_NAME}.dockerfile -t {mivisionx-level-NUMBER} .` + +- ![#c5f015](docs/images/green_square.png) `new component added to the level` +- ![#1589F0](docs/images/blue_square.png) `existing component from the previous level` + +| Build Level | MIVisionX Dependencies | Modules | Libraries and Executables | Docker Tag | +| ----------- | -------------------------------------------------- | ------------------------------------------------------------------------- || -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Level_1` | cmake <br> gcc <br> g++ | amd_openvx <br> utilities | ![#c5f015](docs/images/green_square.png) `libopenvx.so` - OpenVX&trade; Lib - CPU <br> ![#c5f015](docs/images/green_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU <br> ![#c5f015](docs/images/green_square.png) `runvx` - OpenVX&trade; Graph Executor - CPU with Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-1?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_2` | ROCm OpenCL <br> +Level 1 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#c5f015](docs/images/green_square.png) `libopenvx.so` - OpenVX&trade; Lib - CPU/GPU <br> ![#c5f015](docs/images/green_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU/GPU <br> ![#c5f015](docs/images/green_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#c5f015](docs/images/green_square.png) `loom_shell` - 360 Stitch App <br> ![#c5f015](docs/images/green_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](docs/images/green_square.png) `runvx` - OpenVX&trade; Graph Executor - Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-2?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_3` | OpenCV <br> FFMPEG <br> +Level 2 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#1589F0](docs/images/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](docs/images/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](docs/images/blue_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](docs/images/blue_square.png) `loom_shell` - 360 Stitch App <br> ![#1589F0](docs/images/blue_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](docs/images/green_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#c5f015](docs/images/green_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#c5f015](docs/images/green_square.png) `mv_compile` - Neural Net Model Compile <br> ![#c5f015](docs/images/green_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-3?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_4` | MIOpenGEMM <br> MIOpen <br> ProtoBuf <br> +Level 3 | amd_openvx <br> amd_openvx_extensions <br> apps <br> utilities | ![#1589F0](docs/images/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](docs/images/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](docs/images/blue_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](docs/images/blue_square.png) `loom_shell` - 360 Stitch App <br> ![#1589F0](docs/images/blue_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](docs/images/blue_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](docs/images/blue_square.png) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](docs/images/blue_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](docs/images/blue_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#c5f015](docs/images/green_square.png) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#c5f015](docs/images/green_square.png) `inference_server_app` - Cloud Inference App | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-4?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_5` | AMD_RPP <br> rocAL deps <br> +Level 4 | amd_openvx <br> amd_openvx_extensions <br> apps <br> rocAL <br> utilities | ![#1589F0](docs/images/blue_square.png) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](docs/images/blue_square.png) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](docs/images/blue_square.png) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](docs/images/blue_square.png) `loom_shell` - 360 Stitch App <br> ![#1589F0](docs/images/blue_square.png) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](docs/images/blue_square.png) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](docs/images/blue_square.png) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](docs/images/blue_square.png) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](docs/images/blue_square.png) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#1589F0](docs/images/blue_square.png) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#1589F0](docs/images/blue_square.png) `inference_server_app` - Cloud Inference App <br> ![#c5f015](docs/images/green_square.png) `libvx_rpp.so` - OpenVX&trade; RPP Extension <br> ![#c5f015](docs/images/green_square.png) `librali.so` - Radeon Augmentation Library <br> ![#c5f015](docs/images/green_square.png) `rali_pybind.so` - rocAL Pybind Lib | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-5?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | + ------------------- ## ROCm 5.2.0 @@ -6515,7 +6909,7 @@ ambiguous kernel execution. `noundef` attribute, if it finds that argument is tagged with shuffle attribute. Refer to for more information. -* Introduce Clang builtin for `__shfl` to identify it and skip adding `noundef` attribute. +* Introduce clang builtin for `__shfl` to identify it and skip adding `noundef` attribute. * Introduce `__builtin_freeze` to use on the relevant arguments in library wrappers. The library/header need to insert freezes on the relevant inputs. @@ -6527,7 +6921,7 @@ ROCgdb is used on AMD Instinct™ MI50 and MI100 systems. This issue is under investigation and will be fixed in a future release. -### Library changes in ROCM 5.2.0 +### Library changes in ROCm 5.2.0 | Library | Version | |---------|---------| @@ -6536,6 +6930,7 @@ This issue is under investigation and will be fixed in a future release. | hipFFT | 1.0.7 ⇒ [1.0.8](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.2.0) | | hipSOLVER | 1.3.0 ⇒ [1.4.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.2.0) | | hipSPARSE | 2.1.0 ⇒ [2.2.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.2.0) | +| MIVisionX | [2.2.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.2.0) | | rccl | [2.11.4](https://github.com/ROCm/rccl/releases/tag/rocm-5.2.0) | | rocALUTION | 2.0.2 ⇒ [2.0.3](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.2.0) | | rocBLAS | 2.43.0 ⇒ [2.44.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.2.0) | @@ -6545,7 +6940,7 @@ This issue is under investigation and will be fixed in a future release. | rocSOLVER | 3.17.0 ⇒ [3.18.0](https://github.com/ROCm/rocSOLVER/releases/tag/rocm-5.2.0) | | rocSPARSE | 2.1.0 ⇒ [2.2.0](https://github.com/ROCm/rocSPARSE/releases/tag/rocm-5.2.0) | | rocThrust | 2.14.0 ⇒ [2.15.0](https://github.com/ROCm/rocThrust/releases/tag/rocm-5.2.0) | -| rocWMMA | ⇒ [0.7](https://github.com/ROCm/rocWMMA/releases/tag/rocm-5.2.0) | +| rocWMMA | [0.7](https://github.com/ROCm/rocWMMA/releases/tag/rocm-5.2.0) | | Tensile | 4.32.0 ⇒ [4.33.0](https://github.com/ROCm/Tensile/releases/tag/rocm-5.2.0) | #### hipBLAS 0.51.0 @@ -6755,25 +7150,25 @@ rocWMMA 0.7 for ROCm 5.2.0 - Added GEMM sample - Added DLRM sample - Added SGEMV sample -- Added unit tests for cooperative WMMA load and stores +- Added unit tests for cooperative wmma load and stores - Added unit tests for IOBarrier.h -- Added WMMA load/ store tests for different matrix types (A, B and Accumulator) +- Added wmma load/ store tests for different matrix types (A, B and Accumulator) - Added more block sizes 1, 2, 4, 8 to test MmaSyncMultiTest - Added block sizes 4, 8 to test MmaSynMultiLdsTest -- Added support for WMMA load / store layouts with block dimension greater than 64 -- Added IOShape structure to define the attributes of mapping and layouts for all WMMA matrix types +- Added support for wmma load / store layouts with block dimension greater than 64 +- Added IOShape structure to define the attributes of mapping and layouts for all wmma matrix types - Added CI testing for rocWMMA ##### Changed -- Renamed WMMA to rocWMMA in cmake, header files and documentation +- Renamed wmma to rocwmma in cmake, header files and documentation - Renamed library files - Modified Layout.h to use different matrix offset calculations (base offset, incremental offset and cumulative offset) - Opaque load/store continue to use incrementatl offsets as they fill the entire block - Cooperative load/store use cumulative offsets as they fill only small portions for the entire block - Increased Max split counts to 64 for cooperative load/store -- Moved all the WMMA definitions, API headers to rocWMMA namespace -- Modified WMMA fill unit tests to validate all matrix types (A, B, Accumulator) +- Moved all the wmma definitions, API headers to rocwmma namespace +- Modified wmma fill unit tests to validate all matrix types (A, B, Accumulator) #### Tensile 4.33.0 @@ -6807,7 +7202,7 @@ Tensile 4.33.0 for ROCm 5.2.0 ## ROCm 5.1.3 -### Library changes in ROCM 5.1.3 +### Library changes in ROCm 5.1.3 | Library | Version | |---------|---------| @@ -6817,6 +7212,7 @@ Tensile 4.33.0 for ROCm 5.2.0 | hipRAND | [2.10.13](https://github.com/ROCm/hipRAND/releases/tag/rocm-5.1.3) | | hipSOLVER | [1.3.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.1.3) | | hipSPARSE | [2.1.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.1.3) | +| MIVisionX | 2.1.0 ⇒ [2.2.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.1.3) | | rccl | [2.11.4](https://github.com/ROCm/rccl/releases/tag/rocm-5.1.3) | | rocALUTION | [2.0.2](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.1.3) | | rocBLAS | [2.43.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.1.3) | @@ -6828,12 +7224,69 @@ Tensile 4.33.0 for ROCm 5.2.0 | rocThrust | [2.14.0](https://github.com/ROCm/rocThrust/releases/tag/rocm-5.1.3) | | Tensile | [4.32.0](https://github.com/ROCm/Tensile/releases/tag/rocm-5.1.3) | +#### MIVisionX 2.2.0 + +MIVisionX for ROCm 5.1.3 + +##### Added + +* + +##### Optimizations + +* + +##### Changed + +* DockerFiles - Updates to install ROCm 5.1.1 Plus + +##### Fixed + +* + +##### Tested Configurations + +* Windows `10` / `11` +* Linux distribution + + Ubuntu - `18.04` / `20.04` + + CentOS - `7` / `8` + + SLES - `15-SP2` +* ROCm: rocm-core - `5.1.1.50101-48 ` +* miopen-hip - `2.16.0.50101-48` +* miopen-opencl - `2.16.0.50101-48` +* migraphx - `2.1.0.50101-48` +* Protobuf - [V3.12.0](https://github.com/protocolbuffers/protobuf/releases/tag/v3.12.0) +* OpenCV - [4.5.5](https://github.com/opencv/opencv/releases/tag/4.5.5) +* RPP - [0.93](https://github.com/GPUOpen-ProfessionalCompute-Libraries/rpp/releases/tag/0.93) +* FFMPEG - [n4.0.4](https://github.com/FFmpeg/FFmpeg/releases/tag/n4.0.4) +* Dependencies for all the above packages +* MIVisionX Setup Script - `V2.3.0` + +##### Known Issues + +* + +##### Mivisionx Dependency Map + +**Docker Image:** `sudo docker build -f docker/ubuntu20/{DOCKER_LEVEL_FILE_NAME}.dockerfile -t {mivisionx-level-NUMBER} .` + +- ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `new component added to the level` +- ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `existing component from the previous level` + +| Build Level | MIVisionX Dependencies | Modules | Libraries and Executables | Docker Tag | +| ----------- | -------------------------------------------------- | ------------------------------------------------------------------------- || -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Level_1` | cmake <br> gcc <br> g++ | amd_openvx <br> utilities | ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib - CPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - CPU with Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-1?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_2` | ROCm OpenCL <br> +Level 1 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib - CPU/GPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU/GPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-2?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_3` | OpenCV <br> FFMPEG <br> +Level 2 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `mv_compile` - Neural Net Model Compile <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display ON | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-3?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_4` | MIOpenGEMM <br> MIOpen <br> ProtoBuf <br> +Level 3 | amd_openvx <br> amd_openvx_extensions <br> apps <br> utilities | ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `inference_server_app` - Cloud Inference App | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-4?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_5` | AMD_RPP <br> rocAL deps <br> +Level 4 | amd_openvx <br> amd_openvx_extensions <br> apps <br> rocAL <br> utilities | ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `inference_server_app` - Cloud Inference App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_rpp.so` - OpenVX&trade; RPP Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `librali.so` - Radeon Augmentation Library <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `rali_pybind.so` - rocAL Pybind Lib | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-5?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | + ------------------- ## ROCm 5.1.1 -### Library changes in ROCM 5.1.1 +### Library changes in ROCm 5.1.1 | Library | Version | |---------|---------| @@ -6843,6 +7296,7 @@ Tensile 4.33.0 for ROCm 5.2.0 | hipRAND | [2.10.13](https://github.com/ROCm/hipRAND/releases/tag/rocm-5.1.1) | | hipSOLVER | [1.3.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.1.1) | | hipSPARSE | [2.1.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.1.1) | +| MIVisionX | [2.1.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.1.1) | | rccl | [2.11.4](https://github.com/ROCm/rccl/releases/tag/rocm-5.1.1) | | rocALUTION | [2.0.2](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.1.1) | | rocBLAS | [2.43.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.1.1) | @@ -6940,7 +7394,7 @@ new inferior. #### MIOpen support for RDNA GPUs -This release includes support for AMD Radeon PRO W6800, in addition to other bug fixes and +This release includes support for AMD Radeon™ Pro W6800, in addition to other bug fixes and performance improvements as listed below: * MIOpen now supports RDNA GPUs!! (via MIOpen PRs 973, 780, 764, 740, 739, 677, 660, 653, 493, 498) @@ -7086,7 +7540,7 @@ As a workaround, use an older version of the kernel. For example, Ubuntu 5.11.0- Workloads that use the cooperative groups function to ensure all waves can be resident at the same time may fail to restore correctly. This issue is under investigation and will be fixed in a future release. -#### Radeon PRO V620 and W6800 workstation GPUs +#### Radeon Pro V620 and W6800 workstation GPUs ##### No support for ROCDebugger on SRIOV @@ -7100,16 +7554,17 @@ Random error messages are generated by unsupported functions or commands. This is a known issue and will be fixed in a future release. -### Library changes in ROCM 5.1.0 +### Library changes in ROCm 5.1.0 | Library | Version | |---------|---------| | hipBLAS | 0.49.0 ⇒ [0.50.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-5.1.0) | | hipCUB | 2.10.13 ⇒ [2.11.0](https://github.com/ROCm/hipCUB/releases/tag/rocm-5.1.0) | | hipFFT | 1.0.4 ⇒ [1.0.7](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.1.0) | -| hipRAND | ⇒ [2.10.13](https://github.com/ROCm/hipRAND/releases/tag/rocm-5.1.0) | +| hipRAND | [2.10.13](https://github.com/ROCm/hipRAND/releases/tag/rocm-5.1.0) | | hipSOLVER | 1.2.0 ⇒ [1.3.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.1.0) | | hipSPARSE | 2.0.0 ⇒ [2.1.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.1.0) | +| MIVisionX | [2.1.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.1.0) | | rccl | 2.10.3 ⇒ [2.11.4](https://github.com/ROCm/rccl/releases/tag/rocm-5.1.0) | | rocALUTION | 2.0.1 ⇒ [2.0.2](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.1.0) | | rocBLAS | 2.42.0 ⇒ [2.43.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.1.0) | @@ -7153,7 +7608,7 @@ hipCUB 2.11.0 for ROCm 5.1.0 ##### Added - Device segmented sort -- Warp merge sort, WarpMask and thread sort from CUB 1.15.0 supported in hipCUB +- Warp merge sort, WarpMask and thread sort from cub 1.15.0 supported in hipCUB - Device three way partition ##### Changed @@ -7318,7 +7773,7 @@ rocFFT 1.0.16 for ROCm 5.1.0 ##### Removed -- The hipFFT API (header) has been removed from after a long deprecation period. Please use the [hipFFT](https://github.com/ROCm/hipFFT) package/repository to obtain the hipFFT API. +- The hipFFT API (header) has been removed from after a long deprecation period. Please use the [hipFFT](https://github.com/ROCmSoftwarePlatform/hipFFT) package/repository to obtain the hipFFT API. #### rocPRIM 2.10.13 @@ -7356,7 +7811,7 @@ rocRAND 2.10.13 for ROCm 5.1.0 ##### Changed -- [hipRAND](https://github.com/ROCm/hipRAND.git) split into a separate package +- [hipRAND](https://github.com/ROCmSoftwarePlatform/hipRAND.git) split into a separate package - Header file installation location changed to match other libraries. - Using the `rocrand.h` header file should now use `#include <rocrand/rocrand.h>`, rather than `#include <rocrand/rocrand.h>` - rocRAND still includes hipRAND using a submodule @@ -7473,10 +7928,10 @@ This fix may lead to breakage in some OpenMP offload use cases, which use print and result in an abort in device code. The issue will be fixed in a future release. ::: -The compatibility matrix in the {doc}`Deep-learning guide` is updated for +The compatibility matrix in the [Deep-learning guide](./how-to/deep-learning-rocm.md) is updated for ROCm v5.0.2. -### Library changes in ROCM 5.0.2 +### Library changes in ROCm 5.0.2 | Library | Version | |---------|---------| @@ -7485,6 +7940,7 @@ ROCm v5.0.2. | hipFFT | [1.0.4](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.0.2) | | hipSOLVER | [1.2.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.0.2) | | hipSPARSE | [2.0.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.0.2) | +| MIVisionX | 2.0.1 ⇒ [2.1.0](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.0.2) | | rccl | [2.10.3](https://github.com/ROCm/rccl/releases/tag/rocm-5.0.2) | | rocALUTION | [2.0.1](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.0.2) | | rocBLAS | [2.42.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.0.2) | @@ -7496,6 +7952,65 @@ ROCm v5.0.2. | rocThrust | [2.13.0](https://github.com/ROCm/rocThrust/releases/tag/rocm-5.0.2) | | Tensile | [4.31.0](https://github.com/ROCm/Tensile/releases/tag/rocm-5.0.2) | +#### MIVisionX 2.1.0 + +MIVisionX for ROCm 5.0.2 + +##### Added + +* New Tests - AMD_MEDIA + +##### Optimizations + +* Readme Updates +* HIP Buffer Transfer - Eliminate cupy usage + +##### Changed + +* **Backend** - Default Backend set to `HIP` + +##### Fixed + +* Minor bugs and warnings +* AMD_MEDIA - Bug Fixes + +##### Tested Configurations + +* Windows 10 +* Linux distribution + + Ubuntu - `18.04` / `20.04` + + CentOS - `7` / `8` + + SLES - `15-SP2` +* ROCm: rocm-dev - `4.5.2.40502-164` +* rocm-cmake - [rocm-4.2.0](https://github.com/RadeonOpenCompute/rocm-cmake/releases/tag/rocm-4.2.0) +* MIOpenGEMM - [1.1.5](https://github.com/ROCmSoftwarePlatform/MIOpenGEMM/releases/tag/1.1.5) +* MIOpen - [2.14.0](https://github.com/ROCmSoftwarePlatform/MIOpen/releases/tag/2.14.0) +* Protobuf - [V3.12.0](https://github.com/protocolbuffers/protobuf/releases/tag/v3.12.0) +* OpenCV - [4.5.5](https://github.com/opencv/opencv/releases/tag/4.5.5) +* RPP - [0.92](https://github.com/GPUOpen-ProfessionalCompute-Libraries/rpp/releases/tag/0.92) +* FFMPEG - [n4.0.4](https://github.com/FFmpeg/FFmpeg/releases/tag/n4.0.4) +* Dependencies for all the above packages +* MIVisionX Setup Script - `V2.0.0` + +##### Known Issues + +* `TBD` + +##### Mivisionx Dependency Map + +**Docker Image:** `sudo docker build -f docker/ubuntu20/{DOCKER_LEVEL_FILE_NAME}.dockerfile -t {mivisionx-level-NUMBER} .` + +- ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `new component added to the level` +- ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `existing component from the previous level` + +| Build Level | MIVisionX Dependencies | Modules | Libraries and Executables | Docker Tag | +| ----------- | -------------------------------------------------- | ------------------------------------------------------------------------- || -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Level_1` | cmake <br> gcc <br> g++ | amd_openvx <br> utilities | ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib - CPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - CPU with Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-1?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_2` | ROCm OpenCL <br> +Level 1 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib - CPU/GPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU/GPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-2?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_3` | OpenCV <br> FFMPEG <br> +Level 2 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `mv_compile` - Neural Net Model Compile <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display ON | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-3?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_4` | MIOpenGEMM <br> MIOpen <br> ProtoBuf <br> +Level 3 | amd_openvx <br> amd_openvx_extensions <br> apps <br> utilities | ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `inference_server_app` - Cloud Inference App | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-4?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_5` | AMD_RPP <br> rocAL deps <br> +Level 4 | amd_openvx <br> amd_openvx_extensions <br> apps <br> rocAL <br> utilities | ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `inference_server_app` - Cloud Inference App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_rpp.so` - OpenVX&trade; RPP Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `librali.so` - Radeon Augmentation Library <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `rali_pybind.so` - rocAL Pybind Lib | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-5?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | + ------------------- ## ROCm 5.0.1 @@ -7519,7 +8034,7 @@ scripts. Subsequent Perl scripts will no longer be available in ROCm in a future release. -### Library changes in ROCM 5.0.1 +### Library changes in ROCm 5.0.1 | Library | Version | |---------|---------| @@ -7528,6 +8043,7 @@ Subsequent Perl scripts will no longer be available in ROCm in a future release. | hipFFT | [1.0.4](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.0.1) | | hipSOLVER | [1.2.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.0.1) | | hipSPARSE | [2.0.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.0.1) | +| MIVisionX | [2.0.1](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.0.1) | | rccl | [2.10.3](https://github.com/ROCm/rccl/releases/tag/rocm-5.0.1) | | rocALUTION | [2.0.1](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.0.1) | | rocBLAS | [2.42.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.0.1) | @@ -7869,7 +8385,7 @@ follows: 5. Users can find the values of the collected counters in the output file generated in step 2. -#### Radeon PRO V620 and W6800 workstation GPUs +#### Radeon Pro V620 and W6800 workstation GPUs ##### No support for SMI and ROCDebugger on SRIOV @@ -7933,7 +8449,7 @@ In this release, arithmetic operators of HIP complex and vector types are deprec `std::complex` types. * As alternatives to arithmetic operators of HIP vector types, users can use the operators of the native - Clang vector type associated with the data member of HIP vector types. + clang vector type associated with the data member of HIP vector types. During the deprecation, two macros `_HIP_ENABLE_COMPLEX_OPERATORS` and `_HIP_ENABLE_VECTOR_OPERATORS` are provided to allow users to conditionally enable arithmetic @@ -7956,25 +8472,26 @@ The current default is code object version 4. MIOpenTensile will be deprecated in a future release. -### Library changes in ROCM 5.0.0 +### Library changes in ROCm 5.0.0 | Library | Version | |---------|---------| -| hipBLAS | ⇒ [0.49.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-5.0.0) | -| hipCUB | ⇒ [2.10.13](https://github.com/ROCm/hipCUB/releases/tag/rocm-5.0.0) | -| hipFFT | ⇒ [1.0.4](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.0.0) | -| hipSOLVER | ⇒ [1.2.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.0.0) | -| hipSPARSE | ⇒ [2.0.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.0.0) | -| rccl | ⇒ [2.10.3](https://github.com/ROCm/rccl/releases/tag/rocm-5.0.0) | -| rocALUTION | ⇒ [2.0.1](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.0.0) | -| rocBLAS | ⇒ [2.42.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.0.0) | -| rocFFT | ⇒ [1.0.13](https://github.com/ROCm/rocFFT/releases/tag/rocm-5.0.0) | -| rocPRIM | ⇒ [2.10.12](https://github.com/ROCm/rocPRIM/releases/tag/rocm-5.0.0) | -| rocRAND | ⇒ [2.10.12](https://github.com/ROCm/rocRAND/releases/tag/rocm-5.0.0) | -| rocSOLVER | ⇒ [3.16.0](https://github.com/ROCm/rocSOLVER/releases/tag/rocm-5.0.0) | -| rocSPARSE | ⇒ [2.0.0](https://github.com/ROCm/rocSPARSE/releases/tag/rocm-5.0.0) | -| rocThrust | ⇒ [2.13.0](https://github.com/ROCm/rocThrust/releases/tag/rocm-5.0.0) | -| Tensile | ⇒ [4.31.0](https://github.com/ROCm/Tensile/releases/tag/rocm-5.0.0) | +| hipBLAS | [0.49.0](https://github.com/ROCm/hipBLAS/releases/tag/rocm-5.0.0) | +| hipCUB | [2.10.13](https://github.com/ROCm/hipCUB/releases/tag/rocm-5.0.0) | +| hipFFT | [1.0.4](https://github.com/ROCm/hipFFT/releases/tag/rocm-5.0.0) | +| hipSOLVER | [1.2.0](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-5.0.0) | +| hipSPARSE | [2.0.0](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-5.0.0) | +| MIVisionX | [2.0.1](https://github.com/ROCm/MIVisionX/releases/tag/rocm-5.0.0) | +| rccl | [2.10.3](https://github.com/ROCm/rccl/releases/tag/rocm-5.0.0) | +| rocALUTION | [2.0.1](https://github.com/ROCm/rocALUTION/releases/tag/rocm-5.0.0) | +| rocBLAS | [2.42.0](https://github.com/ROCm/rocBLAS/releases/tag/rocm-5.0.0) | +| rocFFT | [1.0.13](https://github.com/ROCm/rocFFT/releases/tag/rocm-5.0.0) | +| rocPRIM | [2.10.12](https://github.com/ROCm/rocPRIM/releases/tag/rocm-5.0.0) | +| rocRAND | [2.10.12](https://github.com/ROCm/rocRAND/releases/tag/rocm-5.0.0) | +| rocSOLVER | [3.16.0](https://github.com/ROCm/rocSOLVER/releases/tag/rocm-5.0.0) | +| rocSPARSE | [2.0.0](https://github.com/ROCm/rocSPARSE/releases/tag/rocm-5.0.0) | +| rocThrust | [2.13.0](https://github.com/ROCm/rocThrust/releases/tag/rocm-5.0.0) | +| Tensile | [4.31.0](https://github.com/ROCm/Tensile/releases/tag/rocm-5.0.0) | #### hipBLAS 0.49.0 @@ -8002,7 +8519,7 @@ hipCUB 2.10.13 for ROCm 5.0.0 ##### Fixed -- Added missing includes to `hipcub.hpp` +- Added missing includes to hipcub.hpp ##### Added @@ -8052,6 +8569,69 @@ hipSPARSE 2.0.0 for ROCm 5.0.0 - Added (conjugate) transpose support for csrmv, hybmv and spmv routines +#### MIVisionX 2.0.1 + +MIVisionX for ROCm 5.0.0 + +##### Added + +* Support for cmake 3.22.X +* Support for OpenCV 4.X.X +* Support for mv_compile with the HIP GPU backend +* Support for tensor_compare node (less/greater/less_than/greater_than/equal onnx operators) + +##### Optimizations + +* Code Cleanup +* Readme Updates + +##### Changed + +* License Updates + +##### Fixed + +* Minor bugs and warnings +* Inference server application - OpenCL Backend +* vxCreateThreshold Fix - Apps & Sample + +##### Tested Configurations + +* Windows 10 +* Linux distribution + + Ubuntu - `18.04` / `20.04` + + CentOS - `7` / `8` + + SLES - `15-SP2` +* ROCm: rocm-dev - `4.5.2.40502-164` +* rocm-cmake - [rocm-4.2.0](https://github.com/RadeonOpenCompute/rocm-cmake/releases/tag/rocm-4.2.0) +* MIOpenGEMM - [1.1.5](https://github.com/ROCmSoftwarePlatform/MIOpenGEMM/releases/tag/1.1.5) +* MIOpen - [2.14.0](https://github.com/ROCmSoftwarePlatform/MIOpen/releases/tag/2.14.0) +* Protobuf - [V3.12.0](https://github.com/protocolbuffers/protobuf/releases/tag/v3.12.0) +* OpenCV - [3.4.0](https://github.com/opencv/opencv/releases/tag/3.4.0) +* RPP - [0.92](https://github.com/GPUOpen-ProfessionalCompute-Libraries/rpp/releases/tag/0.92) +* FFMPEG - [n4.0.4](https://github.com/FFmpeg/FFmpeg/releases/tag/n4.0.4) +* Dependencies for all the above packages +* MIVisionX Setup Script - `V2.0.0` + +##### Known Issues + +* Package install requires **OpenCV** `v3.4.X` to execute `AMD OpenCV extensions` + +##### Mivisionx Dependency Map + +**Docker Image:** `docker pull kiritigowda/ubuntu-18.04:{TAGNAME}` + +- ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `new component added to the level` +- ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `existing component from the previous level` + +| Build Level | MIVisionX Dependencies | Modules | Libraries and Executables | Docker Tag | +|-------------|----------------------------------------------------|--------------------------------------------------------------------------||----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Level_1` | cmake <br> gcc <br> g++ | amd_openvx <br> utilities | ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib - CPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - CPU with Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-1?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_2` | ROCm OpenCL <br> +Level 1 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib - CPU/GPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib - CPU/GPU <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display OFF | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-2?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_3` | OpenCV <br> FFMPEG <br> +Level 2 | amd_openvx <br> amd_openvx_extensions <br> utilities | ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `mv_compile` - Neural Net Model Compile <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display ON | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-3?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_4` | MIOpenGEMM <br> MIOpen <br> ProtoBuf <br> +Level 3 | amd_openvx <br> amd_openvx_extensions <br> apps <br> utilities | ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `inference_server_app` - Cloud Inference App | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-4?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | +| `Level_5` | AMD_RPP <br> rocAL deps <br> +Level 4 | amd_openvx <br> amd_openvx_extensions <br> apps <br> rocAL <br> utilities | ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libopenvx.so` - OpenVX&trade; Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvxu.so` - OpenVX&trade; immediate node Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_loomsl.so` - Loom 360 Stitch Lib <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `loom_shell` - 360 Stitch App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_amd_media.so` - OpenVX&trade; Media Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_opencv.so` - OpenVX&trade; OpenCV InterOp Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `mv_compile` - Neural Net Model Compile <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runcl` - OpenCL&trade; program debug App <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `runvx` - OpenVX&trade; Graph Executor - Display ON <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `libvx_nn.so` - OpenVX&trade; Neural Net Extension <br> ![#1589F0](https://via.placeholder.com/15/1589F0/000000?text=+) `inference_server_app` - Cloud Inference App <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `libvx_rpp.so` - OpenVX&trade; RPP Extension <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `librali.so` - Radeon Augmentation Library <br> ![#c5f015](https://via.placeholder.com/15/c5f015/000000?text=+) `rali_pybind.so` - rocAL Pybind Lib | [![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/kiritigowda/ubuntu-18.04/mivisionx-level-5?style=flat-square)](https://hub.docker.com/repository/docker/kiritigowda/ubuntu-18.04) | + #### rccl 2.10.3 RCCL 2.10.3 for ROCm 5.0.0 diff --git a/tools/autotag/tag_script.py b/tools/autotag/tag_script.py index 219d4ef992..8fd0f72287 100755 --- a/tools/autotag/tag_script.py +++ b/tools/autotag/tag_script.py @@ -84,11 +84,9 @@ def exclude(self) -> List[str]: "MIOpenGEMM", "MIOpenKernels", "MIOpenTensile", - "ROCmValidationSuite", + "MLSEQA_TestRepo", "half", - "hipFORT", "rccl-rdma-sharp-plugins", - "MLSEQA_TestRepo", ] return defaults + (self._exclude if self._exclude is not None else []) @@ -237,10 +235,15 @@ def run_tagging(): # Find all the math libraries and their remotes. included_names = [ "AMDMIGraphX", + "HIPIFY", # "MIOpen", + "MIVisionX", + "ROCmValidationSuite", # + "composable_kernel", + "hipfort", "rocDecode", "rocm-cmake", - "rocprofiler" + "rpp", ] included_groups = [ "mathlibs" diff --git a/tools/autotag/templates/rocm_changes/5.7.1.md b/tools/autotag/templates/rocm_changes/5.7.1.md index bfed61206d..cb46786501 100644 --- a/tools/autotag/templates/rocm_changes/5.7.1.md +++ b/tools/autotag/templates/rocm_changes/5.7.1.md @@ -41,7 +41,7 @@ kernels found by setting the environment variable ROCBLAS_TENSILE_GEMM_OVERRIDE_ points to the stored file. For more details, refer to the -[rocBLAS Programmer's Guide](https://rocm.docs.amd.com/projects/rocBLAS/en/latest/Programmers_Guide.html#rocblas-gemm-tune). +[rocBLAS Programmer's Guide](https://rocm.docs.amd.com/projects/rocBLAS/en/docs-5.7.1/Programmers_Guide.html). #### HIP 5.7.1 (for ROCm 5.7.1) diff --git a/tools/autotag/templates/rocm_changes/6.1.0.md b/tools/autotag/templates/rocm_changes/6.1.0.md new file mode 100644 index 0000000000..0395990e77 --- /dev/null +++ b/tools/autotag/templates/rocm_changes/6.1.0.md @@ -0,0 +1,319 @@ + +The ROCm™ 6.1 release consists of new features and fixes to improve the stability and +performance of AMD Instinct™ MI300 GPU applications. Notably, we've added: + +* Full support for Ubuntu 22.04.4. + +* **rocDecode**, a new ROCm component that provides high-performance video decode support for + AMD GPUs. With rocDecode, you can decode compressed video streams while keeping the resulting + YUV frames in video memory. With decoded frames in video memory, you can run video + post-processing using ROCm HIP, avoiding unnecessary data copies via the PCIe bus. + + To learn more, refer to the rocDecode + [documentation](https://rocm.docs.amd.com/projects/rocDecode/en/latest/). + +### OS and GPU support changes + +ROCm 6.1 adds the following operating system support: + +* MI300A: Ubuntu 22.04.4 and RHEL 9.3 +* MI300X: Ubuntu 22.04.4 + +Future releases will add additional operating systems to match our general offering. For older +generations of supported AMD Instinct products, we’ve added Ubuntu 22.04.4 support. + +```{tip} +To view the complete list of supported GPUs and operating systems, refer to the system requirements +page for +[Linux](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) +and +[Windows](https://rocm.docs.amd.com/projects/install-on-windows/en/latest/reference/system-requirements.html). +``` + +### Installation packages + +This release includes a new set of packages for every module (all libraries and binaries default to +`DT_RPATH`). Package names have the suffix `rpath`; for example, the `rpath` variant of `rocminfo` is +`rocminfo-rpath`. + +```{warning} +The new `rpath` packages will conflict with the default packages; they are meant to be used only in +environments where legacy `DT_RPATH` is the preferred form of linking (instead of `DT_RUNPATH`). We +do **not** recommend trying to install both sets of packages. +``` + +#### AMD SMI + +AMD SMI for ROCm 6.1.0 + +##### Additions + +* **Added Monitor command**. This provides users the ability to customize GPU metrics to capture, + collect, and observe. Output is provided in a table view. This aligns closer to ROCm SMI `rocm-smi` + (no argument), and allows you to customize per the data that are helpful for your use-case. + +* **Integrated ESMI Tool**. You can get CPU metrics and telemetry through our API and CLI tools. + You can get this information using the `amd-smi static` and `amd-smi metric` commands. This is only + available for limited target processors. As of ROCm 6.0.2, this is listed as: + * AMD Zen3 based CPU Family 19h Models 0h-Fh and 30h-3Fh + * AMD Zen4 based CPU Family 19h Models 10h-1Fh and A0-AFh + +* **Added support for new metrics: VCN, JPEG engines, and PCIe errors**. Using the AMD SMIrccl + tool, you can retrieve VCN, JPEG engines, and PCIe errors by calling `amd-smi metric -P` or + `amd-smi metric --usage`. Depending on device support, `VCN_ACTIVITY` will update for MI3x ASICs + (with 4 separate VCN engine activities) for older ASICs `MM_ACTIVITY` with UVD/VCN engine activity + (average of all engines). `JPEG_ACTIVITY` is a new field for MI3x ASICs, where device can support up + to 32 JPEG engine activities. See our documentation for more in-depth understanding of these new + fields. + +* **Added AMDSMI Tool version**. AMD SMI will report *three versions*: AMDSMI Tool, AMDSMI + Library version, and ROCm version. + + The AMDSMI Tool version is the CLI/tool version number with commit ID appended after the `+` sign. + The AMDSMI Library version is the library package version number. The ROCm version is the system's + installed ROCm version; if ROCm is not installed, it reports N/A. + +* **Added XGMI table**. Displays XGMI information for AMD GPU devices in a table format. This is + only available on supported ASICs (e.g., MI300). Here, users can view read/write data XGMI or PCIe + accumulated data transfer size (in KiloBytes). + +* **Added units of measure to JSON output.**. We added unit of measure to JSON/CSV + `amd-smi metric`, `amd-smi static`, and `amd-smi monitor` commands. + +##### Changes + +* **Topology is now left-aligned with BDF for each device listed individual table's row/columns**. + We provided each device's BDF for every table's row/columns, then left-aligned data. We want AMD + SMI Tool output to be easy to understand and digest. Having to scroll up to find this information + made it difficult to follow, especially for devices that have many devices associated with one ASIC. + +##### Fixes + +* **Fix for RDNA3/RDNA2/MI100 'amdsmi_get_gpu_pci_bandwidth()' in 'frequencies_read' tests**. + For devices that do not report (e.g., RDNA3/RDNA2/MI100), we have added checks to confirm that + these devices return `AMDSMI_STATUS_NOT_SUPPORTED`. Otherwise, tests now display a return + string. + +* **Fix for devices that have an older PyYAML installed**. For platforms that are identified as having + an older PyYAML version or pip, we now manually update both pip and PyYAML as needed. This + fix impacts the following CLI commands: + * `amd-smi list` + * `amd-smi static` + * `amd-smi firmware` + * `amd-smi metric` + * `amd-smi topology` + +* **Fix for crash when user is not a member of video/render groups**. AMD SMI now uses the + same mutex handler for devices as ROCm SMI. This helps avoid crashes when DRM/device data are + inaccessible to the logged-in user. + +##### Known issues + +* There is an `AttributeError` while running `amd-smi process --csv` +* GPU reset results in an "*Unable to reset non-amd GPU*" error +* bad pages results with "ValueError: NULL pointer access" +* Some RDNA3 cards may enumerate to `Slot type = UNKNOWN` + +#### HIP + +HIP 6.1 for ROCm 6.1 + +##### Additions + +* New environment variable, `HIP_LAUNCH_BLOCKING`, which is used for serialization on kernel + execution. +* The default value is 0 (disable): kernel runs normally, as defined in the queue +* When set as 1 (enable): HIP runtime serializes the kernel enqueue and behaves the same as + `AMD_SERIALIZE_KERNEL` +* Added HIPRTC support for hip headers `driver_types`, `math_functions`, `library_types`, + `math_functions`, `hip_math_constants`, `channel_descriptor`, `device_functions`, `hip_complex`, + `surface_types`, `texture_types` + +##### Changes + +* HIPRTC now assumes WGP mode for gfx10+. You can enable CU mode by passing `-mcumode` to the + compile options from `hiprtcCompileProgram`. + +##### Fixes + +* HIP complex vector type multiplication and division operations. + On an AMD platform, some duplicated complex operators are removed to avoid compilation failures. + In HIP, `hipFloatComplex` and `hipDoubleComplex` are defined as complex datatypes: + * `typedef float2 hipFloatComplex` + * `typedef double2 hipDoubleComplex` + + Any application that uses complex multiplication and division operations must replace `*` and `/` + operators with the following: + * `hipCmulf() and hipCdivf() for hipFloatComplex` + * `hipCmul() and hipCdiv() for hipDoubleComplex` + + Note that these complex operations are equivalent to corresponding types/functions on an NVIDIA + platform. + +#### HIPIFY + +HIPIFY for ROCm 6.1.0 + +##### Additions + +* CUDA 12.3.2 support +* cuDNN 8.9.7 support +* LLVM 17.0.6 support +* Full `hipSOLVER` support +* Full `rocSPARSE` support +* New option: `--amap`, which will hipify as much as possible, ignoring `--default-preprocessor` + behavior + +##### Fixes + +* Code blocks skipped by the preprocessor are no longer hipified under the `--default-preprocessor` + option + +#### ROCm Compiler + +ROCm Compiler for ROCm 6.1.0 + +##### Additions + +* Compiler now generates `.uniform_work_group_size` and records it in the metadata. It indicates if the + kernel requires that each dimension of global size is a multiple of the corresponding dimension of + work-group size. A value of 1 is true, and 0 is false. This metadata is only provided when the value is + 1. +* Added the `rocm-llvm-docs` package. +* Added ROCm Device-Libs, ROCm Compiler Support, and hipCC within the `llvm-project/amd` + subdirectory to AMD’s fork of the LLVM project. +* Added support for C++ Parallel Algorithm Offload via HIP (HIPSTDPAR), which allows parallel + algorithms to run on the GPU. + +##### Changes + +* `rocm-clang-ocl` is now an optional package and will require manual installation. + +##### Deprecations + +* hipCC adds `-mllvm`, `-amdgpu-early-inline-all=true`, and `-mllvm` `-amdgpu-function-calls=false` by + default to compiler invocations. These flags will be removed from hipCC in a future ROCm release. + +##### Fixes + +AddressSanitizer (ASan): +* Added `sanitized_padded_global` LLVM ir attribute to identify sanitizer instrumented globals. +* For ASan instrumented global, emit two symbols: one with actual size and the other with + instrumented size. + + [On GitHub](https://github.com/ROCm/ROCm/issues/2551) + +##### Known issues + +* Due to an issue within the `amd-llvm` compiler shipping with ROCm 6.1, HIPSTDPAR's interposition mode, which is enabled by `--hipstdpar-interpose-alloc` is currently broken. + +The temporary workaround is to use the upstream LLVM 18 (or newer) compiler. This issue will be addressed in a future ROCm release ." + +#### ROCm Data Center (RDC) + +RDC for ROCm 6.1.0 + +##### Changes + +* Added `--address` flag to rdcd +* Upgraded from C++11 to C++17 +* Upgraded gRPC + +#### ROCDebugger (ROCgdb) + +ROCgdb for ROCm 6.1.0 + +##### Fixes + +Previously, ROCDebugger encountered hangs and crashes when stepping over the `s_endpgm` +instruction at the end of a HIP kernel entry function, which caused the stepped wave to exit. This issue +is fixed in the ROCm 6.1 release. You can now step over the last instruction of any HIP kernel without +debugger hangs or crashes. + +#### ROCm SMI + +ROCm SMI for ROCm 6.1.0 + +##### Additions + +* **Added support to set max/min clock level for sclk ('RSMI_CLK_TYPE_SYS') or mclk ('RSMI_CLK_TYPE_MEM')**. + You can now set a maximum or minimum `sclk` or `mclk` value through the + `rsmi_dev_clk_extremum_set()` API provided ASIC support. Alternatively, you can use our Python CLI + tool (`rocm-smi --setextremum max sclk 1500`). + +* **Added `rsmi_dev_target_graphics_version_get()`**. You can now query through ROCm SMI API + (`rsmi_dev_target_graphics_version_get()`) to retreive the target graphics version for a GPU device. + Currently, this output is not supplied through our ROCm SMI CLI. + +##### Changes + +* **Removed non-unified API headers: Individual GPU metric APIs are no longer supported**. + The individual metric APIs (`rsmi_dev_metrics_*`) were removed in order to keep updates easier for + new GPU metric support. By providing a simple API (`rsmi_dev_gpu_metrics_info_get()`) with its + reported device metrics, it is worth noting there is a risk for ABI break-age using + `rsmi_dev_gpu_metrics_info_get()`. It is vital to understand that ABI breaks are necessary (in some + cases) in order to support newer ASICs and metrics for our customers. We will continue to support + `rsmi_dev_gpu_metrics_info_get()` with these considerations and limitations in mind. + +* **Deprecated 'rsmi_dev_power_ave_get()'; use the newer API, 'rsmi_dev_power_get()'**. As + outlined in the change for 6.0.0 (*Added a generic power API: rsmi_dev_power_get*), is now + deprecated. You must update your ROCm SMI API calls accordingly. + +##### Fixes + +* Fixed `--showpids` reporting `[PID] [PROCESS NAME] 1 UNKNOWN UNKNOWN UNKNOWN`. + Output was failing because `cu_occupancy debugfs` method is not provided on some graphics cards + by design. `get_compute_process_info_by_pid` was updated to reflect this and returns with the output + needed by the CLI. + +* Fixed `rocm-smi --showpower` output, which was inconsistent on some RDNA3 devices. + We updated this to use `rsmi_dev_power_get()` within the CLI to provide a consistent device power + output. This was caused by using the now-deprecated `rsmi_dev_average_power_get()` API. + +* Fixed `rocm-smi --setcomputepartition` and `rocm-smi --resetcomputepartition` to notate if device is + `EBUSY` + +* Fixed `rocm-smi --setmemorypartition` and `rocm-smi --resetmemorypartition` read only SYSFS to + return `RSMI_STATUS_NOT_SUPPORTED` + The `rsmi_dev_memory_partition_set` API is updated to handle the read-only SYSFS check. + Corresponding tests and CLI (`rocm-smi --setmemorypartition` and + `rocm-smi --resetmemorypartition`) calls were updated accordingly. + +* Fixed `rocm-smi --showclkvolt` and `rocm-smi --showvc`, which were displaying 0 for overdrive and + that the voltage curve is not supported. + +#### ROCProfiler + +ROCProfiler for ROCm 6.1.0 + +##### Fixes + +* Fixed ROCprofiler to match versioning changes in HIP Runtime +* Fixed plugins race condition +* Updated metrics to MI300 + +#### ROCm Validation Suite + +##### Known issue + +* In a future release, the ROCm Validation Suite P2P Benchmark and Qualification Tool (PBQT) tests will be optimized to meet the target bandwidth requirements for MI300X. + + [On GitHub](https://github.com/ROCm/ROCm/issues/3027) + +#### MI200 SR-IOV + +##### Known issue + +* Multimedia applications may encounter compilation errors in the MI200 Single Root Input/Output Virtualization (SR-IOV) environment. This is because MI200 SR-IOV does not currently support multimedia applications. + + [On GitHub](https://github.com/ROCm/ROCm/issues/3028) + +### AMD MI300A RAS + +#### Fixed defect + +##### GFX correctable and uncorrectable error inject failures + +* Previously, the AMD CPU Reliability, Availability, and Serviceability (RAS) installation encountered correctable and uncorrectable failures while injecting an error. + + This issue is resolved in the ROCm 6.1 release, and users will no longer encounter the GFX correctable error (CE) and uncorrectable error (UE) failures. diff --git a/tools/autotag/util/__init__.py b/tools/autotag/util/__init__.py index 8bfc2a8f20..40a4fdbd4f 100755 --- a/tools/autotag/util/__init__.py +++ b/tools/autotag/util/__init__.py @@ -1,2 +1,2 @@ from .defaults import TEMPLATES, PROCESSORS -from . import mivisionx +from .custom_templates import hipfort, mivisionx, rpp, rvs diff --git a/tools/autotag/util/custom_templates/__init__.py b/tools/autotag/util/custom_templates/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/autotag/util/custom_templates/ck.py b/tools/autotag/util/custom_templates/ck.py new file mode 100644 index 0000000000..b53fc4f87b --- /dev/null +++ b/tools/autotag/util/custom_templates/ck.py @@ -0,0 +1,41 @@ +import re + +from util.release_data import ReleaseLib +from util.defaults import TEMPLATES, PROCESSORS + +TEMPLATES['composable_kernel'] = ( + ( + r"## (\(Unreleased\))? CK (?P\d+\.\d+(?:\.\d+))?" + r"(?P for ROCm )?" + r"(?P(?(for_rocm)\d+\.\d+(?:\.\d+)?|.*))?" + r"\n" + r"(?P(?:(?!## ).*(?:(?!\n## )\n|(?=\n## )))*)" + ) +) + + +def composable_kernel_processor(data: ReleaseLib, template: str, _, __) -> bool: + """Processor for releases.""" + changelog = data.repo.get_contents("CHANGELOG.md", data.commit) + changelog = changelog.decoded_content.decode() + pattern = re.compile(template) + match = pattern.search(changelog) + lib_version = match["lib_version"] + data.message = ( + f"composable_kernel for ROCm" + f" {data.full_version}" + ) + + data.lib_version = lib_version + data.notes = f"""{match["body"]}""" + + change_pattern = re.compile( + r"^#+ +(?P[^\n]+)$\n*(?P(^(?!#).*\n*)*)", + re.RegexFlag.MULTILINE + ) + for match in change_pattern.finditer(data.notes): + data.data.changes[match["type"]] = match["change"] + + return True + +PROCESSORS['composable_kernel'] = composable_kernel_processor diff --git a/tools/autotag/util/custom_templates/hipfort.py b/tools/autotag/util/custom_templates/hipfort.py new file mode 100644 index 0000000000..65d6876f64 --- /dev/null +++ b/tools/autotag/util/custom_templates/hipfort.py @@ -0,0 +1,42 @@ +import re + +from util.release_data import ReleaseLib +from util.defaults import TEMPLATES, PROCESSORS + +TEMPLATES['hipfort'] = ( + ( + r"## hipfort (?P\d+\.\d+(?:\.\d+))?" + r"(?P for ROCm )?" + r"(?P(?(for_rocm)\d+\.\d+(?:\.\d+)?|.*))?" + r"( \(Unreleased\))?" + r"\n" + r"(?P(?:(?!## ).*(?:(?!\n## )\n|(?=\n## )))*)" + ) +) + + +def hipfort_processor(data: ReleaseLib, template: str, _, __) -> bool: + """Processor for releases.""" + changelog = data.repo.get_contents("CHANGELOG.md", data.commit) + changelog = changelog.decoded_content.decode() + pattern = re.compile(template) + match = pattern.search(changelog) + lib_version = match["lib_version"] + data.message = ( + f"hipfort for ROCm" + f" {data.full_version}" + ) + + data.lib_version = lib_version + data.notes = f"""{match["body"]}""" + + change_pattern = re.compile( + r"^#+ +(?P[^\n]+)$\n*(?P(^(?!#).*\n*)*)", + re.RegexFlag.MULTILINE + ) + for match in change_pattern.finditer(data.notes): + data.data.changes[match["type"]] = match["change"] + + return True + +PROCESSORS['hipfort'] = hipfort_processor diff --git a/tools/autotag/util/mivisionx.py b/tools/autotag/util/custom_templates/mivisionx.py old mode 100755 new mode 100644 similarity index 67% rename from tools/autotag/util/mivisionx.py rename to tools/autotag/util/custom_templates/mivisionx.py index 98bfa4dbe8..81280913bb --- a/tools/autotag/util/mivisionx.py +++ b/tools/autotag/util/custom_templates/mivisionx.py @@ -13,12 +13,13 @@ ) -def mivisionx_processor(data: ReleaseLib, template: str, _) -> bool: +def mivisionx_processor(data: ReleaseLib, template: str, _, __) -> bool: """Processor for MIVisionX releases.""" changelog = data.repo.get_contents("CHANGELOG.md", data.commit) changelog = changelog.decoded_content.decode() pattern = re.compile(template) match = pattern.search(changelog) + lib_version = match["lib_version"] data.message = ( f"MIVisionX for ROCm" f" {data.full_version}" @@ -27,19 +28,18 @@ def mivisionx_processor(data: ReleaseLib, template: str, _) -> bool: readme = data.repo.get_contents("README.md", data.commit) readme = readme.decoded_content.decode() dependency_map = readme[readme.find("## MIVisionX Dependency Map"):] - data.notes = f""" -

- -

- -## Online Documentation -[MIVisionX Documentation](https://rocm.docs.amd.com/projects/MIVisionX/en/latest/doxygen/html/index.html) -## MIVisionX {match['lib_version']} -{match["body"]} + data.lib_version = lib_version + data.notes = f"""{match["body"]} {dependency_map} """ + + change_pattern = re.compile( + r"^#+ +(?P[^\n]+)$\n*(?P(^(?!#).*\n*)*)", + re.RegexFlag.MULTILINE + ) + for match in change_pattern.finditer(data.notes): + data.data.changes[match["type"]] = match["change"] + return True - PROCESSORS['MIVisionX'] = mivisionx_processor diff --git a/tools/autotag/util/custom_templates/rpp.py b/tools/autotag/util/custom_templates/rpp.py new file mode 100644 index 0000000000..22dc9dde13 --- /dev/null +++ b/tools/autotag/util/custom_templates/rpp.py @@ -0,0 +1,42 @@ +import re + +from util.release_data import ReleaseLib +from util.defaults import TEMPLATES, PROCESSORS + +TEMPLATES['rpp'] = ( + ( + r"## RPP (?P\d+\.\d+(?:\.\d+))?" + r"(?P for ROCm )?" + r"(?P(?(for_rocm)\d+\.\d+(?:\.\d+)?|.*))?" + r"( \(Unreleased\))?" + r"\n" + r"(?P(?:(?!## ).*(?:(?!\n## )\n|(?=\n## )))*)" + ) +) + + +def rpp_processor(data: ReleaseLib, template: str, _, __) -> bool: + """Processor for releases.""" + changelog = data.repo.get_contents("CHANGELOG.md", data.commit) + changelog = changelog.decoded_content.decode() + pattern = re.compile(template) + match = pattern.search(changelog) + lib_version = match["lib_version"] + data.message = ( + f"rpp for ROCm" + f" {data.full_version}" + ) + + data.lib_version = lib_version + data.notes = f"""{match["body"]}""" + + change_pattern = re.compile( + r"^#+ +(?P[^\n]+)$\n*(?P(^(?!#).*\n*)*)", + re.RegexFlag.MULTILINE + ) + for match in change_pattern.finditer(data.notes): + data.data.changes[match["type"]] = match["change"] + + return True + +PROCESSORS['rpp'] = rpp_processor