diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml index e356e0bdb4..67f9b3a7ee 100644 --- a/.github/workflows/bandit.yml +++ b/.github/workflows/bandit.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Clone the git repo - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index e697dd6aaf..1c9f740b91 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -36,7 +36,7 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install apt packages run: | @@ -122,7 +122,7 @@ jobs: runs-on: 'ubuntu-22.04' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt @@ -174,7 +174,7 @@ jobs: runs-on: ${{matrix.adapter.name}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt @@ -240,13 +240,13 @@ jobs: runs-on: ${{matrix.adapter.name}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt - name: Init conda env - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@9f54435e0e72c53962ee863144e47a4b094bfd35 # v2.3.0 with: miniconda-version: "latest" activate-environment: examples @@ -306,9 +306,9 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 with: python-version: 3.9 @@ -357,9 +357,9 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 with: python-version: 3.9 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index bf312be396..5e00191ced 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -18,10 +18,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@1500a131381b66de0c52ac28abb13cd79f4b7ecc # v2.22.12 with: languages: cpp, python @@ -35,7 +35,7 @@ jobs: run: cmake --build ${{github.workspace}}/build -j $(nproc) - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@1500a131381b66de0c52ac28abb13cd79f4b7ecc # v2.22.12 analyze-windows: name: Analyze on Windows @@ -48,10 +48,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@1500a131381b66de0c52ac28abb13cd79f4b7ecc # v2.22.12 with: languages: cpp, python @@ -65,4 +65,4 @@ jobs: run: cmake --build ${{github.workspace}}/build -j $(nproc) --config Release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@1500a131381b66de0c52ac28abb13cd79f4b7ecc # v2.22.12 diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 731f7ea320..6f2cb38aab 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -16,7 +16,7 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install apt packages run: | @@ -72,7 +72,7 @@ jobs: run: ctest -T Coverage - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d # v3.1.4 with: gcov: true gcov_include: source diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index ab065ee77e..7e3dae32dd 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -31,7 +31,7 @@ jobs: steps: - name: Clone the git repo - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 9e51af24bc..53734a1d80 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -26,9 +26,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 with: python-version: 3.9 @@ -41,14 +41,14 @@ jobs: run: python3 -m pip install -r third_party/requirements.txt - name: Setup Pages - uses: actions/configure-pages@v2 + uses: actions/configure-pages@c5a3e1159e0cbdf0845eb8811bd39e39fc3099c2 # v2.1.3 - name: Build Documentation working-directory: ${{github.workspace}}/scripts run: python3 run.py --core - name: Upload artifact - uses: actions/upload-pages-artifact@v1 + uses: actions/upload-pages-artifact@84bb4cd4b733d5c320c9c9cfbc354937524f4d64 # v1.0.10 with: path: ${{github.workspace}}/docs/html @@ -62,4 +62,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v1 + uses: actions/deploy-pages@f27bcc15848fdcdcc02f01754eb838e44bcf389b # v1.2.9 diff --git a/.github/workflows/e2e_nightly.yml b/.github/workflows/e2e_nightly.yml index e3cda49245..4a3999fc5c 100644 --- a/.github/workflows/e2e_nightly.yml +++ b/.github/workflows/e2e_nightly.yml @@ -29,12 +29,12 @@ jobs: rm -rf ./* || true - name: Checkout UR - uses: actions/checkout@v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: path: ur-repo - name: Checkout SYCL - uses: actions/checkout@v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: repository: intel/llvm ref: sycl diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 4a81c94e8f..38d3dcef04 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -16,7 +16,7 @@ jobs: runs-on: 'ubuntu-22.04' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 882b06985a..fe0790cc46 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -12,7 +12,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Create weekly prerelease run: diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 0000000000..033e796b24 --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,73 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecard supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + workflow_dispatch: + schedule: + - cron: '45 22 * * 4' + push: + branches: [ "main" ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@e38b1902ae4f44df626f11ba0734b14fb91f8f86 # v2.1.2 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # v3.1.0 + with: + name: SARIF file + path: results.sarif + retention-days: 5 + + # Upload the results to GitHub's code scanning dashboard. + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@17573ee1cc1b9d061760f3a006fc4aac4f944fd5 # v2.2.4 + with: + sarif_file: results.sarif diff --git a/README.md b/README.md index 226dbfbfe5..1e9c47b755 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![Bandit](https://github.com/oneapi-src/unified-runtime/actions/workflows/bandit.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/bandit.yml) [![Coverity](https://scan.coverity.com/projects/28213/badge.svg)](https://scan.coverity.com/projects/oneapi-src-unified-runtime) [![codecov.io](https://codecov.io/github/oneapi-src/unified-runtime/coverage.svg?branch=main)](https://codecov.io/github/oneapi-src/unified-runtime?branch=master) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/oneapi-src/unified-runtime/badge)](https://securityscorecards.dev/viewer/?uri=github.com/oneapi-src/unified-runtime) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 7875650b85..68e3e665d2 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -84,6 +84,62 @@ void simpleGuessLocalWorkSize(size_t *ThreadsPerBlock, --ThreadsPerBlock[0]; } } + +ur_result_t setHipMemAdvise(const void *DevPtr, const size_t Size, + ur_usm_advice_flags_t URAdviceFlags, + hipDevice_t Device) { + // Handle unmapped memory advice flags + if (URAdviceFlags & + (UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY | + UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY | + UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED)) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + using ur_to_hip_advice_t = std::pair; + + static constexpr std::array + URToHIPMemAdviseDeviceFlags{ + std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, + hipMemAdviseSetReadMostly), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, + hipMemAdviseUnsetReadMostly), + std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, + hipMemAdviseSetPreferredLocation), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, + hipMemAdviseUnsetPreferredLocation), + std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE, + hipMemAdviseSetAccessedBy), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE, + hipMemAdviseUnsetAccessedBy), + }; + for (auto &FlagPair : URToHIPMemAdviseDeviceFlags) { + if (URAdviceFlags & FlagPair.first) { + UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, FlagPair.second, Device)); + } + } + + static constexpr std::array URToHIPMemAdviseHostFlags{ + std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST, + hipMemAdviseSetPreferredLocation), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST, + hipMemAdviseUnsetPreferredLocation), + std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST, + hipMemAdviseSetAccessedBy), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST, + hipMemAdviseUnsetAccessedBy), + }; + + for (auto &FlagPair : URToHIPMemAdviseHostFlags) { + if (URAdviceFlags & FlagPair.first) { + UR_CHECK_ERROR( + hipMemAdvise(DevPtr, Size, FlagPair.second, hipCpuDeviceId)); + } + } + + return UR_RESULT_SUCCESS; +} + } // namespace UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( @@ -1403,34 +1459,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_queue_handle_t hQueue, const void *pMem, size_t size, ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = flags; + void *HIPDevicePtr = const_cast(pMem); ur_device_handle_t Device = hQueue->getDevice(); - // If the device does not support managed memory access, we can't set - // mem_advise. - if (!getAttribute(Device, hipDeviceAttributeManagedMemory)) { - setErrorMessage("mem_advise ignored as device does not support " - " managed memory access", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } - - hipPointerAttribute_t attribs; - // TODO: hipPointerGetAttributes will fail if pMem is non-HIP allocated - // memory, as it is neither registered as host memory, nor into the address - // space for the current device, meaning the pMem ptr points to a - // system-allocated memory. This means we may need to check system-alloacted - // memory and handle the failure more gracefully. - UR_CHECK_ERROR(hipPointerGetAttributes(&attribs, pMem)); - // async prefetch requires USM pointer (or hip SVM) to work. - if (!attribs.isManaged) { - setErrorMessage("Prefetch hint ignored as prefetch only works with USM", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } - - // HIP_POINTER_ATTRIBUTE_RANGE_SIZE is not an attribute in ROCM < 5, - // so we can't perform this check for such cases. +// HIP_POINTER_ATTRIBUTE_RANGE_SIZE is not an attribute in ROCM < 5, +// so we can't perform this check for such cases. #if HIP_VERSION_MAJOR >= 5 unsigned int PointerRangeSize = 0; UR_CHECK_ERROR(hipPointerGetAttribute(&PointerRangeSize, @@ -1438,29 +1473,60 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( (hipDeviceptr_t)HIPDevicePtr)); UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); #endif - // flags is currently unused so fail if set - if (flags != 0) - return UR_RESULT_ERROR_INVALID_VALUE; + ur_result_t Result = UR_RESULT_SUCCESS; - std::unique_ptr EventPtr{nullptr}; try { ScopedContext Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList); + + std::unique_ptr EventPtr{nullptr}; + if (phEvent) { EventPtr = std::unique_ptr(ur_event_handle_t_::makeNative( UR_COMMAND_USM_PREFETCH, hQueue, HIPStream)); UR_CHECK_ERROR(EventPtr->start()); } + + // Helper to ensure returning a valid event on early exit. + auto releaseEvent = [&EventPtr, &phEvent]() -> void { + if (phEvent) { + UR_CHECK_ERROR(EventPtr->record()); + *phEvent = EventPtr.release(); + } + }; + + // If the device does not support managed memory access, we can't set + // mem_advise. + if (!getAttribute(Device, hipDeviceAttributeManagedMemory)) { + releaseEvent(); + setErrorMessage("mem_advise ignored as device does not support " + "managed memory access", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + + hipPointerAttribute_t attribs; + // TODO: hipPointerGetAttributes will fail if pMem is non-HIP allocated + // memory, as it is neither registered as host memory, nor into the address + // space for the current device, meaning the pMem ptr points to a + // system-allocated memory. This means we may need to check system-alloacted + // memory and handle the failure more gracefully. + UR_CHECK_ERROR(hipPointerGetAttributes(&attribs, pMem)); + // async prefetch requires USM pointer (or hip SVM) to work. + if (!attribs.isManaged) { + releaseEvent(); + setErrorMessage("Prefetch hint ignored as prefetch only works with USM", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + UR_CHECK_ERROR( hipMemPrefetchAsync(pMem, size, hQueue->getDevice()->get(), HIPStream)); - if (phEvent) { - UR_CHECK_ERROR(EventPtr->record()); - *phEvent = EventPtr.release(); - } + releaseEvent(); } catch (ur_result_t Err) { Result = Err; } @@ -1468,22 +1534,109 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( return Result; } +/// USM: memadvise API to govern behavior of automatic migration mechanisms UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_advice_flags_t, ur_event_handle_t *phEvent) { + ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { + UR_ASSERT(pMem && size > 0, UR_RESULT_ERROR_INVALID_VALUE); void *HIPDevicePtr = const_cast(pMem); -// HIP_POINTER_ATTRIBUTE_RANGE_SIZE is not an attribute in ROCM < 5, -// so we can't perform this check for such cases. + ur_device_handle_t Device = hQueue->getDevice(); + #if HIP_VERSION_MAJOR >= 5 - unsigned int PointerRangeSize = 0; - UR_CHECK_ERROR(hipPointerGetAttribute(&PointerRangeSize, - HIP_POINTER_ATTRIBUTE_RANGE_SIZE, - (hipDeviceptr_t)HIPDevicePtr)); + // NOTE: The hipPointerGetAttribute API is marked as beta, meaning, while this + // is feature complete, it is still open to changes and outstanding issues. + size_t PointerRangeSize = 0; + UR_CHECK_ERROR(hipPointerGetAttribute( + &PointerRangeSize, HIP_POINTER_ATTRIBUTE_RANGE_SIZE, + static_cast(HIPDevicePtr))); UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); #endif - // TODO implement a mapping to hipMemAdvise once the expected behaviour - // of urEnqueueUSMAdvise is detailed in the USM extension - return urEnqueueEventsWait(hQueue, 0, nullptr, phEvent); + + ur_result_t Result = UR_RESULT_SUCCESS; + + try { + ScopedContext Active(Device); + std::unique_ptr EventPtr{nullptr}; + + if (phEvent) { + EventPtr = + std::unique_ptr(ur_event_handle_t_::makeNative( + UR_COMMAND_USM_ADVISE, hQueue, hQueue->getNextTransferStream())); + EventPtr->start(); + } + + // Helper to ensure returning a valid event on early exit. + auto releaseEvent = [&EventPtr, &phEvent]() -> void { + if (phEvent) { + UR_CHECK_ERROR(EventPtr->record()); + *phEvent = EventPtr.release(); + } + }; + + // If the device does not support managed memory access, we can't set + // mem_advise. + if (!getAttribute(Device, hipDeviceAttributeManagedMemory)) { + releaseEvent(); + setErrorMessage("mem_advise ignored as device does not support " + "managed memory access", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + + // Passing MEM_ADVICE_SET/MEM_ADVICE_CLEAR_PREFERRED_LOCATION to + // hipMemAdvise on a GPU device requires the GPU device to report a non-zero + // value for hipDeviceAttributeConcurrentManagedAccess. Therefore, ignore + // the mem advice if concurrent managed memory access is not available. + if (advice & (UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION | + UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION | + UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE | + UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE | + UR_USM_ADVICE_FLAG_DEFAULT)) { + if (!getAttribute(Device, hipDeviceAttributeConcurrentManagedAccess)) { + releaseEvent(); + setErrorMessage("mem_advise ignored as device does not support " + "concurrent managed access", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + + // TODO: If pMem points to valid system-allocated pageable memory, we + // should check that the device also has the + // hipDeviceAttributePageableMemoryAccess property, so that a valid + // read-only copy can be created on the device. This also applies for + // UR_USM_MEM_ADVICE_SET/MEM_ADVICE_CLEAR_READ_MOSTLY. + } + + const auto DeviceID = Device->get(); + if (advice & UR_USM_ADVICE_FLAG_DEFAULT) { + UR_CHECK_ERROR( + hipMemAdvise(pMem, size, hipMemAdviseUnsetReadMostly, DeviceID)); + UR_CHECK_ERROR(hipMemAdvise( + pMem, size, hipMemAdviseUnsetPreferredLocation, DeviceID)); + UR_CHECK_ERROR( + hipMemAdvise(pMem, size, hipMemAdviseUnsetAccessedBy, DeviceID)); + } else { + Result = setHipMemAdvise(HIPDevicePtr, size, advice, DeviceID); + // UR_RESULT_ERROR_INVALID_ENUMERATION is returned when using a valid but + // currently unmapped advice arguments as not supported by this platform. + // Therefore, warn the user instead of throwing and aborting the runtime. + if (Result == UR_RESULT_ERROR_INVALID_ENUMERATION) { + releaseEvent(); + setErrorMessage("mem_advise is ignored as the advice argument is not " + "supported by this device", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + } + + releaseEvent(); + } catch (ur_result_t err) { + Result = err; + } catch (...) { + Result = UR_RESULT_ERROR_UNKNOWN; + } + + return Result; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( diff --git a/source/adapters/hip/queue.cpp b/source/adapters/hip/queue.cpp index f01fc0e180..6e6496fec1 100644 --- a/source/adapters/hip/queue.cpp +++ b/source/adapters/hip/queue.cpp @@ -38,8 +38,8 @@ hipStream_t ur_queue_handle_t_::getNextComputeStream(uint32_t *StreamToken) { // The second check is done after mutex is locked so other threads can not // change NumComputeStreams after that if (NumComputeStreams < ComputeStreams.size()) { - UR_CHECK_ERROR(hipStreamCreateWithFlags( - &ComputeStreams[NumComputeStreams++], Flags)); + UR_CHECK_ERROR(hipStreamCreateWithPriority( + &ComputeStreams[NumComputeStreams++], Flags, Priority)); } } Token = ComputeStreamIdx++; @@ -97,8 +97,8 @@ hipStream_t ur_queue_handle_t_::getNextTransferStream() { // The second check is done after mutex is locked so other threads can not // change NumTransferStreams after that if (NumTransferStreams < TransferStreams.size()) { - UR_CHECK_ERROR(hipStreamCreateWithFlags( - &TransferStreams[NumTransferStreams++], Flags)); + UR_CHECK_ERROR(hipStreamCreateWithPriority( + &TransferStreams[NumTransferStreams++], Flags, Priority)); } } uint32_t Stream_i = TransferStreamIdx++ % TransferStreams.size(); @@ -118,6 +118,19 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, std::unique_ptr QueueImpl{nullptr}; unsigned int Flags = 0; + ur_queue_flags_t URFlags = 0; + int Priority = 0; // Not guaranteed, but, in ROCm 5.0-6.0, 0 is the default + + if (pProps && pProps->stype == UR_STRUCTURE_TYPE_QUEUE_PROPERTIES) { + URFlags = pProps->flags; + if (URFlags & UR_QUEUE_FLAG_PRIORITY_HIGH) { + ScopedContext Active(hDevice); + UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(nullptr, &Priority)); + } else if (URFlags & UR_QUEUE_FLAG_PRIORITY_LOW) { + ScopedContext Active(hDevice); + UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(&Priority, nullptr)); + } + } const bool IsOutOfOrder = pProps ? pProps->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE @@ -130,7 +143,7 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, QueueImpl = std::unique_ptr(new ur_queue_handle_t_{ std::move(ComputeHipStreams), std::move(TransferHipStreams), hContext, - hDevice, Flags, pProps ? pProps->flags : 0}); + hDevice, Flags, pProps ? pProps->flags : 0, Priority}); *phQueue = QueueImpl.release(); @@ -293,6 +306,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( hDevice, HIPFlags, Flags, + /*priority*/ 0, /*backend_owns*/ pProperties->isNativeHandleOwned}; (*phQueue)->NumComputeStreams = 1; diff --git a/source/adapters/hip/queue.hpp b/source/adapters/hip/queue.hpp index c79bd293a3..ad2f0f016e 100644 --- a/source/adapters/hip/queue.hpp +++ b/source/adapters/hip/queue.hpp @@ -44,6 +44,7 @@ struct ur_queue_handle_t_ { unsigned int LastSyncTransferStreams; unsigned int Flags; ur_queue_flags_t URFlags; + int Priority; // When ComputeStreamSyncMutex and ComputeStreamMutex both need to be // locked at the same time, ComputeStreamSyncMutex should be locked first // to avoid deadlocks @@ -56,7 +57,7 @@ struct ur_queue_handle_t_ { ur_queue_handle_t_(std::vector &&ComputeStreams, std::vector &&TransferStreams, ur_context_handle_t Context, ur_device_handle_t Device, - unsigned int Flags, ur_queue_flags_t URFlags, + unsigned int Flags, ur_queue_flags_t URFlags, int Priority, bool BackendOwns = true) : ComputeStreams{std::move(ComputeStreams)}, TransferStreams{std::move( TransferStreams)}, @@ -66,7 +67,7 @@ struct ur_queue_handle_t_ { Device{Device}, RefCount{1}, EventCount{0}, ComputeStreamIdx{0}, TransferStreamIdx{0}, NumComputeStreams{0}, NumTransferStreams{0}, LastSyncComputeStreams{0}, LastSyncTransferStreams{0}, Flags(Flags), - URFlags(URFlags), HasOwnership{BackendOwns} { + URFlags(URFlags), Priority(Priority), HasOwnership{BackendOwns} { urContextRetain(Context); urDeviceRetain(Device); } diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 5308c3554a..e2bb3bdcd3 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -1,10 +1,10 @@ alabaster==0.7.12 -Babel==2.7.0 +Babel==2.14.0 bandit==1.6.2 beautifulsoup4==4.11.1 breathe==4.33.1 bs4==0.0.1 -certifi==2019.11.28 +certifi==2023.07.22 chardet==3.0.4 clang-format==15.0.7 colorama==0.4.1 @@ -14,15 +14,15 @@ idna==2.8 imagesize==1.1.0 Jinja2==2.11.3 lxml==4.9.3 -Mako==1.1.0 +Mako==1.3.0 MarkupSafe==1.1.1 packaging==19.2 -Pygments==2.5.2 +Pygments==2.17.2 pyparsing==2.4.5 pytest>=7.0 pytz==2019.3 -PyYAML==5.2 -requests==2.22.0 +PyYAML==6.0.1 +requests==2.31.0 rst2pdf==0.98 six==1.13.0 snowballstemmer==2.0.0 @@ -37,4 +37,4 @@ sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 sphinxcontrib-websupport==1.2.4 sphinx-rtd-theme==1.0.0 -urllib3==1.25.7 +urllib3==2.1.0