diff --git a/.ci/env/apt.sh b/.ci/env/apt.sh index 4b8c2edb80c..591c87b4297 100755 --- a/.ci/env/apt.sh +++ b/.ci/env/apt.sh @@ -32,7 +32,7 @@ function add_repo { } function install_dpcpp { - sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-2024.1 + sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-2024.2 sudo bash -c 'echo libintelocl.so > /etc/OpenCL/vendors/intel-cpu.icd' } @@ -63,10 +63,33 @@ function install_qemu_emulation_apt { } function install_qemu_emulation_deb { - qemu_deb=qemu-user-static_8.2.2+ds-2+b1_amd64.deb - wget http://ftp.de.debian.org/debian/pool/main/q/qemu/${qemu_deb} - sudo dpkg -i ${qemu_deb} + set +e + + versions=(9.0.2 9.0.1 8.2.4) + found_version="" + for version in ${versions[@]}; do + qemu_deb="qemu-user-static_${version}+ds-1_amd64.deb" + echo "Checking for http://ftp.debian.org/debian/pool/main/q/qemu/${qemu_deb}" + if wget -q --method=HEAD http://ftp.debian.org/debian/pool/main/q/qemu/${qemu_deb} &> /dev/null; + then + echo "Found qemu version ${version}" + found_version=${qemu_deb} + break + fi + done + + set -eo pipefail + if [[ -z "${found_version}" ]] ; then + # If nothing is found, error out and fail + echo "None of the requested qemu versions ${versions[*]} are available." + false + fi + + wget http://ftp.debian.org/debian/pool/main/q/qemu/${found_version} + sudo dpkg -i ${found_version} + sudo systemctl restart systemd-binfmt.service + set +eo pipefail } function install_llvm_version { diff --git a/.ci/env/environment.yml b/.ci/env/environment.yml index 0a704f66f14..61655fe9e76 100644 --- a/.ci/env/environment.yml +++ b/.ci/env/environment.yml @@ -1,7 +1,6 @@ name: ci-env channels: - conda-forge - - intel - defaults dependencies: - - impi-devel=2021.10.0 + - impi-devel=2021.12.0 diff --git a/.ci/env/openblas.sh b/.ci/env/openblas.sh index 613885a641e..dc86b1d96d3 100755 --- a/.ci/env/openblas.sh +++ b/.ci/env/openblas.sh @@ -37,6 +37,7 @@ show_help() { --prefix:The path where OpenBLAS will be installed --version:The version of OpenBLAS to install. This is a git reference from the OpenBLAS repo, and defaults to ${BLAS_DEFAULT_VERSION} --sysroot:If cross-compiling with LLVM, determines the location of the target architecture sysroot +--ilp64 : whether or not to use the ILP64 build ' } @@ -74,6 +75,9 @@ while [[ $# -gt 0 ]]; do --sysroot) sysroot="$2" shift;; + --ilp64) + ilp64=on + shift;; --help) show_help exit 0 @@ -89,6 +93,7 @@ done target=${target:-ARMV8} host_compiler=${host_compiler:-gcc} compiler=${compiler:-aarch64-linux-gnu-gcc} +openblas_ilp64=${ilp64:-on} target_arch=${target_arch:-$(uname -m)} OPENBLAS_DEFAULT_PREFIX="${ONEDAL_DIR}/__deps/openblas_${target_arch}" @@ -160,6 +165,9 @@ pushd "${blas_src_dir}" USE_THREAD=0 USE_LOCKING=1) fi + if [ "${openblas_ilp64}" == "on" ]; then + make_options+=( 'BINARY=64' 'INTERFACE64=1' ) + fi # Clean echo make "${make_options[@]}" clean make "${make_options[@]}" clean diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index 9b33760202d..788839d68b6 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -114,7 +114,7 @@ jobs: displayName: 'System info' - task: Cache@2 inputs: - key: '"gcc" | "aarch64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)"' + key: '"gcc" | "aarch64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)" | "ILP64"' path: $(OPENBLAS_CACHE_DIR) cacheHitVar: OPENBLAS_RESTORED - script: | @@ -201,7 +201,7 @@ jobs: displayName: 'System info' - task: Cache@2 inputs: - key: '"clang" | "18" | "aarch64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)"' + key: '"clang" | "18" | "aarch64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)" | "ILP64"' path: $(OPENBLAS_CACHE_DIR) cacheHitVar: OPENBLAS_RESTORED - script: | @@ -293,7 +293,7 @@ jobs: displayName: 'System info' - task: Cache@2 inputs: - key: '"clang" | "riscv64" | "openblas" | "$(OPENBLAS_VERSION)"' + key: '"clang" | "riscv64" | "openblas" | "$(OPENBLAS_VERSION)" | "ILP64"' path: $(OPENBLAS_CACHE_DIR) cacheHitVar: OPENBLAS_RESTORED - script: | @@ -363,7 +363,7 @@ jobs: displayName: 'System info' - task: Cache@2 inputs: - key: '"gnu" | "x86_64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)"' + key: '"gcc" | "x86_64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)" | "ILP64"' path: $(OPENBLAS_CACHE_DIR) cacheHitVar: OPENBLAS_RESTORED - script: | @@ -558,7 +558,7 @@ jobs: fi displayName: 'bazel-cache-limit' -- job: LinuxDaal4py +- job: LinuxSklearnex dependsOn: LinuxMakeGNU_MKL timeoutInMinutes: 0 variables: @@ -583,57 +583,50 @@ jobs: displayName: 'System info' - script: | conda update -y -q conda - conda create -q -y -n CB -c intel python=$(python.version) tbb mpich + conda create -q -y -n CB -c conda-forge python=$(python.version) tbb mpich displayName: 'Conda create' - script: | - git clone https://github.com/intel/scikit-learn-intelex.git daal4py - displayName: Clone daal4py + git clone https://github.com/intel/scikit-learn-intelex.git sklearnex + displayName: Clone sklearnex - script: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate CB - pip install -r daal4py/dependencies-dev - pip install -r daal4py/requirements-test.txt + pip install -r sklearnex/dependencies-dev + pip install -r sklearnex/requirements-test.txt displayName: Create python environment - script: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate CB export DALROOT=$(Pipeline.Workspace)/daal/latest source ${DALROOT}/env/vars.sh - cd daal4py + cd sklearnex export PYTHON=python ./conda-recipe/build.sh - displayName: daal4py build + displayName: sklearnex build - task: PublishPipelineArtifact@1 inputs: - artifactName: '$(platform.type) daal4py build' - targetPath: '$(Build.Repository.LocalPath)/daal4py' - displayName: 'Upload daal4py build artifacts' + artifactName: '$(platform.type) sklearnex build' + targetPath: '$(Build.Repository.LocalPath)/sklearnex' + displayName: 'Upload sklearnex build artifacts' continueOnError: true - - script: | - . /usr/share/miniconda/etc/profile.d/conda.sh - conda activate CB - export DALROOT=$(Pipeline.Workspace)/daal/latest - cd daal4py - python setup_sklearnex.py install --single-version-externally-managed --record=record.txt - displayName: sklearnex build - script: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate CB source $(Pipeline.Workspace)/daal/latest/env/vars.sh - ./daal4py/conda-recipe/run_test.sh + ./sklearnex/conda-recipe/run_test.sh timeoutInMinutes: 15 - displayName: daal4py test + displayName: sklearnex test - script: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate CB source $(Pipeline.Workspace)/daal/latest/env/vars.sh ret_code=0 - python -m daal4py daal4py/tests/run_examples.py + python -m sklearnex sklearnex/tests/run_examples.py ret_code=$(($ret_code + $?)) - python -m daal4py daal4py/tests/daal4py/sycl/sklearn_sycl.py + python -m sklearnex sklearnex/tests/daal4py/sycl/sklearn_sycl.py ret_code=$(($ret_code + $?)) exit $ret_code - displayName: daal4py examples + displayName: sklearnex examples - script: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate CB diff --git a/.ci/scripts/build.bat b/.ci/scripts/build.bat index d218464707c..76c78c620cd 100644 --- a/.ci/scripts/build.bat +++ b/.ci/scripts/build.bat @@ -19,8 +19,8 @@ rem %1 - Make target rem %2 - Compiler rem %3 - Instruction set -for /f "tokens=*" %%i in ('python -c "from multiprocessing import cpu_count; print(cpu_count())"') do set CPUCOUNT=%%i -echo CPUCOUNT=%CPUCOUNT% +set errorcode=0 +echo CPUCOUNT=%NUMBER_OF_PROCESSORS% echo PATH=C:\msys64\usr\bin;%PATH% set PATH=C:\msys64\usr\bin;%PATH% @@ -29,15 +29,16 @@ echo pacman -S --noconfirm msys/make msys/dos2unix pacman -S --noconfirm msys/make msys/dos2unix echo call .ci\env\tbb.bat -call .ci\env\tbb.bat +if "%TBBROOT%"=="" if not exist .\__deps\tbb\win\tbb call .ci\env\tbb.bat || set errorcode=1 echo call .\dev\download_micromkl.bat -call .\dev\download_micromkl.bat +if "%MKLGPUFPKROOT%"=="" if not exist .\__deps\mklgpufpk\win call .\dev\download_micromkl.bat || set errorcode=1 echo call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 -call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 +if "%VISUALSTUDIOVERSION%"=="" call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 || set errorcode=1 -echo make %1 -j%CPUCOUNT% COMPILER=%2 PLAT=win32e REQCPU=%3 -make %1 -j%CPUCOUNT% COMPILER=%2 PLAT=win32e REQCPU=%3 +echo make %1 -j%NUMBER_OF_PROCESSORS% COMPILER=%2 PLAT=win32e REQCPU=%3 +make %1 -j%NUMBER_OF_PROCESSORS% COMPILER=%2 PLAT=win32e REQCPU=%3 || set errorcode=1 -cmake -DINSTALL_DIR=__release_win_vc\daal\latest\lib\cmake\oneDAL -DARCH_DIR=intel64 -P cmake\scripts\generate_config.cmake +cmake -DINSTALL_DIR=__release_win_vc\daal\latest\lib\cmake\oneDAL -DARCH_DIR=intel64 -P cmake\scripts\generate_config.cmake || set errorcode=1 +EXIT /B %errorcode% diff --git a/.ci/scripts/test.bat b/.ci/scripts/test.bat index 74909f3652b..aaced0689be 100644 --- a/.ci/scripts/test.bat +++ b/.ci/scripts/test.bat @@ -35,10 +35,10 @@ echo PATH=C:\msys64\usr\bin;%PATH% set PATH=C:\msys64\usr\bin;%PATH% echo call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 -call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 +call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 || set errorcode=1 echo call __release_win_vc\daal\latest\env\vars.bat -call __release_win_vc\daal\latest\env\vars.bat +call __release_win_vc\daal\latest\env\vars.bat || set errorcode=1 echo set LIB=%~dp0..\..\__release_win_vc\tbb\latest\lib\intel64\vc_mt;%LIB% set LIB=%~dp0..\..\__release_win_vc\tbb\latest\lib\intel64\vc_mt;%LIB% @@ -69,9 +69,9 @@ if "%build_system%"=="cmake" ( set results_dir=_cmake_results\intel_intel64_%cmake_link_mode_short%\Release echo cmake -B Build -S . -DONEDAL_LINK=%cmake_link_mode% -DTBB_DIR=%TBB_DIR% - cmake -B Build -S . -DONEDAL_LINK=%cmake_link_mode% -DTBB_DIR=%TBB_DIR% + cmake -B Build -S . -DONEDAL_LINK=%cmake_link_mode% -DTBB_DIR=%TBB_DIR% || set errorcode=1 set solution_name=%examples:\=_% - msbuild.exe "Build\!solution_name!_examples.sln" /p:Configuration=Release + msbuild.exe "Build\!solution_name!_examples.sln" /p:Configuration=Release || set errorcode=1 for /f "delims=." %%F in ('dir /B !results_dir!\*.exe 2^> nul') do ( set example=%%F @@ -100,3 +100,4 @@ if "%build_system%"=="cmake" ( if "%examples%"=="daal\cpp" nmake %linking% compiler=%compiler% if "%examples%"=="oneapi\cpp" nmake %linking% compiler=%compiler% ) +EXIT /B %errorcode% diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5871fb84661..d6a47717935 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,6 +1,6 @@ # Documentation owners and reviewers -/docs/ @Vika-F @maria-Petrova @Alexsandruss @aepanchi -*.md @Vika-F @maria-Petrova @Alexsandruss @aepanchi +/docs/ @Vika-F @maria-Petrova @Alexsandruss @bdmoore1 +*.md @Vika-F @maria-Petrova @Alexsandruss @bdmoore1 # TTP files third-party* @maria-Petrova @@ -22,7 +22,7 @@ deploy/ @Alexsandruss @napetrov @homksei @ahuber21 @ethanglaser dev/ @Alexsandruss @napetrov @homksei @ahuber21 @ethanglaser # C++ code -cpp/ @Alexsandruss @samir-nasibli @KulikovNikita @Alexandr-Solovev +cpp/ @Alexsandruss @samir-nasibli @Alexandr-Solovev # Tree based methods dtrees @razdoburdin @ahuber21 @avolkov-intel @icfaust diff --git a/.github/workflows/docker-validation-ci.yml b/.github/workflows/docker-validation-ci.yml index ce4c16c8bbc..380172f82a5 100644 --- a/.github/workflows/docker-validation-ci.yml +++ b/.github/workflows/docker-validation-ci.yml @@ -17,6 +17,6 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 - name: Build docker image run: docker build . --file dev/docker/onedal-dev.Dockerfile --tag onedal-dev:latest diff --git a/.github/workflows/docker-validation-nightly.yml b/.github/workflows/docker-validation-nightly.yml index ecaab5ed10f..ea6ec499025 100644 --- a/.github/workflows/docker-validation-nightly.yml +++ b/.github/workflows/docker-validation-nightly.yml @@ -30,7 +30,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 - name: Build docker image run: docker build . --file dev/docker/onedal-dev.Dockerfile --tag onedal-dev:latest - name: Building oneDAL diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml new file mode 100644 index 00000000000..42deb5ad780 --- /dev/null +++ b/.github/workflows/openssf-scorecard.yml @@ -0,0 +1,38 @@ +name: OpenSSF Scorecard +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + - cron: '0 21 * * 5' + push: + branches: [ "main" ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + if: github.repository == 'oneapi-src/oneDAL' + runs-on: ubuntu-latest + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + + steps: + - name: "Checkout code" + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0 + with: + results_file: results.sarif + results_format: sarif + publish_results: true diff --git a/.github/workflows/renovate-validation.yml b/.github/workflows/renovate-validation.yml index 3241cb94564..83825305388 100644 --- a/.github/workflows/renovate-validation.yml +++ b/.github/workflows/renovate-validation.yml @@ -23,7 +23,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 - name: Validate uses: suzuki-shunsuke/github-action-renovate-config-validator@v1.0.1 with: diff --git a/.github/workflows/slack-pr-notification.yml b/.github/workflows/slack-pr-notification.yml new file mode 100644 index 00000000000..09e1c854ada --- /dev/null +++ b/.github/workflows/slack-pr-notification.yml @@ -0,0 +1,27 @@ +name: Slack PR Notification +on: + # use pull_request_target to run on PRs from forks and have access to secrets + pull_request_target: + types: [labeled] + +env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + channel: "onedal" + +permissions: + pull-requests: read + +jobs: + rfc: + name: RFC Notification + runs-on: ubuntu-latest + # Trigger when labeling a PR with "RFC" + if: | + github.event.action == 'labeled' && + contains(toJson(github.event.pull_request.labels.*.name), '"RFC"') + steps: + - name: Notify Slack + uses: slackapi/slack-github-action@70cd7be8e40a46e8b0eced40b0de447bdb42f68e # v1.26.0 + with: + channel-id: ${{ env.channel }} + slack-message: "${{ github.actor }} posted a RFC: ${{ github.event.pull_request.title }}. URL: ${{ github.event.pull_request.html_url }}" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9e3d04f3eed..9fc5122a5c7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -23,6 +23,14 @@ We welcome community contributions to Intel(R) oneAPI Data Analytics Library. Yo Refer to our guidelines on [pull requests](#pull-requests) and [issues](#issues) before you proceed. +## Contacting maintainers +You may reach out to Intel project maintainers privately at onedal.maintainers@intel.com. +[Codeowners](https://github.com/oneapi-src/oneDAL/blob/main/.github/CODEOWNERS) configuration defines specific maintainers for corresponding code sections, however it's currently limited to Intel members. With further migration to UXL we will be changing this, but here are non-Intel contacts: + +For ARM specifics you may contact: [@rakshithgb-fujitsu](https://github.com/rakshithgb-fujitsu/) + +For RISC-V specifics you may contact: [@keeranroth](https://github.com/keeranroth/) + ## Issues Use [GitHub issues](https://github.com/oneapi-src/oneDAL/issues) to: @@ -35,8 +43,9 @@ Use [GitHub issues](https://github.com/oneapi-src/oneDAL/issues) to: To contribute your changes directly to the repository, do the following: - Make sure you can build the product and run all the examples with your patch. +- Product uses bazel for validation and your changes should pass tests. Please add new tests as necessary. [Bazel Guide for oneDAL](https://github.com/oneapi-src/oneDAL/tree/main/dev/bazel) - Make sure your code is in line with our [coding style](#code-style) as `clang-format` is one of the checks in our public CI. -- For a larger feature, provide a relevant example. +- For a larger feature, provide a relevant example, and tests. - [Document](#documentation-guidelines) your code. - [Submit](https://github.com/oneapi-src/oneDAL/pulls) a pull request into the `main` branch. diff --git a/MODULE.bazel b/MODULE.bazel index 2eb13463dbc..14f7c53ec9b 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -1,3 +1,3 @@ module(name = "onedal") -bazel_dep(name = "bazel_skylib", version = "1.6.1") +bazel_dep(name = "bazel_skylib", version = "1.7.1") diff --git a/README.md b/README.md index e90f3028d47..2c19f382da3 100644 --- a/README.md +++ b/README.md @@ -15,17 +15,21 @@ * limitations under the License. *******************************************************************************/--> -# oneAPI Data Analytics Library +# oneAPI Data Analytics Library [Installation](#installation)   |   [Documentation](#documentation)   |   [Support](#support)   |   [Examples](#examples)   |   [How to Contribute](CONTRIBUTING.md)    -[![Build Status](https://dev.azure.com/daal/DAAL/_apis/build/status/oneapi-src.oneDAL?branchName=main)](https://dev.azure.com/daal/DAAL/_build/latest?definitionId=5&branchName=main) [![License](https://img.shields.io/github/license/oneapi-src/oneDAL.svg)](https://github.com/oneapi-src/oneDAL/blob/main/LICENSE) [![Join the community on GitHub Discussions](https://badgen.net/badge/join%20the%20discussion/on%20github/black?icon=github)](https://github.com/oneapi-src/oneDAL/discussions) +[![Build Status](https://dev.azure.com/daal/DAAL/_apis/build/status/oneapi-src.oneDAL?branchName=main)](https://dev.azure.com/daal/DAAL/_build/latest?definitionId=5&branchName=main) +[![License](https://img.shields.io/github/license/oneapi-src/oneDAL.svg)](https://github.com/oneapi-src/oneDAL/blob/main/LICENSE) +[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8859/badge)](https://www.bestpractices.dev/projects/8859) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/oneapi-src/oneDAL/badge)](https://securityscorecards.dev/viewer/?uri=github.com/oneapi-src/oneDAL) +[![Join the community on GitHub Discussions](https://badgen.net/badge/join%20the%20discussion/on%20github/black?icon=github)](https://github.com/oneapi-src/oneDAL/discussions) oneAPI Data Analytics Library (oneDAL) is a powerful machine learning library that helps you accelerate big data analysis at all stages: **preprocessing**, **transformation**, **analysis**, **modeling**, **validation**, and **decision making**. The library implements classical machine learning algorithms. The boost in their performance is achieved by leveraging the capabilities of Intel® hardware. -oneDAL is part of [oneAPI](https://oneapi.io). The current branch implements version 1.1 of oneAPI Specification. +The oneDAL is part of the [UXL Foundation](http://www.uxlfoundation.org) and is an implementation of the [oneAPI specification](https://spec.oneapi.io) for oneDAL component. ## Usage @@ -38,9 +42,24 @@ Deprecation Notice: The Java interfaces are removed from the oneDAL library. ## Installation -Check [System Requirements](https://oneapi-src.github.io/oneDAL/system-requirements.html) before installing oneDAL. +Check the [System Requirements](https://oneapi-src.github.io/oneDAL/system-requirements.html) before installing to ensure compatibility with your system. + +There are several options available for installing oneDAL: + +- **Binary Distribution**: You can download pre-built binary packages from the following sources: + - Intel® oneAPI: + - Download as Part of the [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onedal.html#gs.8xrue2) + - Download as the Stand-Alone [Intel® oneAPI Data Analytics Library](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onedal.html#gs.8xrue2) + - Anaconda: + | Channel | Version | + |:-------:|:-------:| + | intel | [![Anaconda-Server Intel Badge](https://anaconda.org/intel/dal-devel/badges/version.svg)](https://anaconda.org/intel/dal-devel) | + | conda-forge | [![Anaconda-Server Conda-forge Badge](https://anaconda.org/conda-forge/dal-devel/badges/version.svg)](https://anaconda.org/conda-forge/dal-devel) | + + - [NuGet](https://www.nuget.org/packages/inteldal.devel.linux-x64) + +- **Source Distribution**: You can build the library from source. To do this, [download the specific version of oneDAL](https://github.com/oneapi-src/oneDAL/releases) from the official GitHub repository and follow the instructions in the [INSTALL.md](INSTALL.md). -You can [download the specific version of oneDAL](https://github.com/oneapi-src/oneDAL/releases) or [install it from sources](INSTALL.md). ## Examples @@ -93,6 +112,15 @@ oneDAL K-Means fit, strong scaling result | oneDAL K-Means fit, weak scaling res >*Technical details: FPType: float32; HW: Intel Xeon Processor E5-2698 v3 @2.3GHz, 2 sockets, 16 cores per socket; SW: Intel® DAAL (2019.3), MPI4Py (3.0.0), Intel® Distribution Of Python (IDP) 3.6.8; Details available in the article https://arxiv.org/abs/1909.11822* +## Governance + +The oneDAL project is governed by the UXL Foundation and you can get involved in this project in multiple ways. It is possible to join the [AI Special Interest Group (SIG)](https://github.com/uxlfoundation/foundation/tree/main/ai) meetings where the group discuss and demonstrates work using this project. Members can also join the Open Source and Specification Working Group meetings. + +You can also join the mailing lists for the [UXL Foundation](https://lists.uxlfoundation.org/g/main/subgroups) to be informed of when meetings are happening and receive the latest information and discussions. + +You can contribute to this project and also contribute to the specification for this project, read the [CONTRIBUTING](CONTRIBUTING.md) page for more information. + + ## Support Ask questions and engage in discussions with oneDAL developers, contributers, and other users through the following channels: @@ -108,7 +136,11 @@ To report a vulnerability, refer to [Intel vulnerability reporting policy](https ### Contribute -We welcome community contributions. Check our [contributing guidelines](CONTRIBUTING.md) to learn more. +We welcome community contributions. Check our [contributing guidelines](CONTRIBUTING.md) to learn more. You can also contact the oneDAL team via [UXL Foundation Slack] using +[#onedal] channel. + +[UXL Foundation Slack]: https://slack-invite.uxlfoundation.org/ +[#onedal]: https://uxlfoundation.slack.com/channels/onedal ## License diff --git a/SECURITY.md b/SECURITY.md index eb482d90983..3fa4dfa8799 100755 --- a/SECURITY.md +++ b/SECURITY.md @@ -1,12 +1,93 @@ + + # Security Policy +As an open-source project, we understand the importance of and responsibility +for security. This Security Policy outlines our guidelines and procedures to +ensure the highest level of security and trust for oneDAL + +## Supported Versions + +Security vulnerabilities are fixed in the [latest version][1] +and delivered as a patch release. We don't guarantee security fixes to be +back-ported to older oneDAL versions. + ## Report a Vulnerability -Please report security issues or vulnerabilities to the [Intel® Security Center]. +We are very grateful to the security researchers and users that report back +security vulnerabilities. We investigate every report thoroughly. +We strongly encourage you to report security vulnerabilities to us privately, +before disclosing them on public forums or opening a public GitHub* issue. + +Report a vulnerability to us in one of two ways: + +* Open a draft **[GitHub* Security Advisory][2]** +* Send an e-mail to: **security@uxlfoundation.org**. + +Along with the report, provide the following info: + + * A descriptive title. + * Your name and affiliation (if any). + * A description of the technical details of the vulnerabilities. + * A minimal example of the vulnerability so we can reproduce your findings. + * An explanation of who can exploit this vulnerability, and what they gain + doing so. + * Whether this vulnerability is public or known to third parties. If it is, + provide details. + +### When Should I Report a Vulnerability? + +* You think you discovered a potential security vulnerability in oneDAL +* You are unsure how the potential vulnerability affects oneDAL +* You think you discovered a vulnerability in another project or 3rd party +component on which oneDAL depends. If the issue is not fixed in the 3rd party +component, try to report directly there first. + +### When Should I NOT Report a Vulnerability? + +* You got an automated scan hit and are unable to provide details. +* You need help using oneDAL for security. +* You need help applying security-related updates. +* Your issue is not security-related. + +## Security Reports Review Process + +We aim to respond quickly to your inquiry and coordinate a fix and +disclosure with you. All confirmed security vulnerabilities will be addressed +according to severity level and impact on oneDAL Normally, security issues are fixed in the next planned release. + +## Disclosure Policy + +We will publish security advisories using the +[**GitHub Security Advisories feature**][3] +to keep our community well-informed, and will credit you for your findings +unless you prefer to stay anonymous. We request that you refrain from +exploiting the vulnerability or making it public before the official disclosure. + +We will disclose the vulnerabilities and bugs as soon as possible once +mitigation is implemented and available. -For more information on how Intel® works to resolve security issues, see -[Vulnerability Handling Guidelines]. +## Feedback on This Policy -[Intel® Security Center]:https://www.intel.com/security +If you have any suggestions on how this Policy could be improved, submit +an issue or a pull request to this repository. **Do not** report +potential vulnerabilities or security flaws via a pull request. -[Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html +[1]: https://github.com/oneapi-src/oneDAL/releases +[2]: https://github.com/oneapi-src/oneDAL/security/advisories/new +[3]: https://github.com/oneapi-src/oneDAL/security/advisories diff --git a/WORKSPACE b/WORKSPACE index 48cfa890abe..3cba5bcd224 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -33,8 +33,8 @@ micromkl_repo( micromkl_dpc_repo( name = "micromkl_dpc", root_env_var = "MKLGPUFPKROOT", - url = "https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/mklgpufpk_lnx_2024-02-20.tgz", - sha256 = "1c60914461aafa5e5512181c7d5c1fdbdeff83746dbd980fe97074a3b65fc1ed", + url = "https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/mklgpufpk_lnx_20240605.tgz", + sha256 = "0787a92e9580ed6b9fb97d054a0ed77994dbc18b4b3fb099451cb1e6ebdf4f16", ) load("@onedal//dev/bazel/deps:openblas.bzl", "openblas_repo") @@ -108,15 +108,15 @@ onedal_repo( http_archive( name = "catch2", - url = "https://github.com/catchorg/Catch2/archive/v3.5.4.tar.gz", - sha256 = "b7754b711242c167d8f60b890695347f90a1ebc95949a045385114165d606dbb", - strip_prefix = "Catch2-3.5.4", + url = "https://github.com/catchorg/Catch2/archive/v3.6.0.tar.gz", + sha256 = "485932259a75c7c6b72d4b874242c489ea5155d17efa345eb8cc72159f49f356", + strip_prefix = "Catch2-3.6.0", ) http_archive( name = "fmt", - url = "https://github.com/fmtlib/fmt/archive/10.2.1.tar.gz", - sha256 = "1250e4cc58bf06ee631567523f48848dc4596133e163f02615c97f78bab6c811", - strip_prefix = "fmt-10.2.1", + url = "https://github.com/fmtlib/fmt/archive/11.0.2.tar.gz", + sha256 = "6cb1e6d37bdcb756dbbe59be438790db409cdb4868c66e888d5df9f13f7c027f", + strip_prefix = "fmt-11.0.2", build_file = "@onedal//dev/bazel/deps:fmt.tpl.BUILD", ) diff --git a/cpp/daal/BUILD b/cpp/daal/BUILD index 7463af41c55..4f15e2b0c3a 100644 --- a/cpp/daal/BUILD +++ b/cpp/daal/BUILD @@ -143,7 +143,6 @@ daal_module( name = "threading_tbb", srcs = glob(["src/threading/**/*.cpp"]), local_defines = [ - "__DO_TBB_LAYER__", "__TBB_NO_IMPLICIT_LINKAGE", "__TBB_LEGACY_MODE", "TBB_SUPPRESS_DEPRECATED_MESSAGES", diff --git a/cpp/daal/include/algorithms/pca/pca_types.h b/cpp/daal/include/algorithms/pca/pca_types.h index 8c8472b140b..9a9daa4f987 100644 --- a/cpp/daal/include/algorithms/pca/pca_types.h +++ b/cpp/daal/include/algorithms/pca/pca_types.h @@ -670,7 +670,7 @@ class DAAL_EXPORT BaseBatchParameter : public daal::algorithms::Parameter BaseBatchParameter(); DAAL_UINT64 resultsToCompute; /*!< 64 bit integer flag that indicates the results to compute */ - size_t nComponents; /*!< number of components for reduced implementation */ + size_t nComponents; /*!< number of components for reduced implementation (applicable for batch mode only) */ bool isDeterministic; /*!< sign flip if required */ bool doScale; /*!< scaling if required */ bool isCorrelation; /*!< correlation is provided */ diff --git a/cpp/daal/include/data_management/features/internal/helpers.h b/cpp/daal/include/data_management/features/internal/helpers.h index 6826da1588b..f26ccfa8284 100644 --- a/cpp/daal/include/data_management/features/internal/helpers.h +++ b/cpp/daal/include/data_management/features/internal/helpers.h @@ -1,6 +1,7 @@ /* file: helpers.h */ /******************************************************************************* * Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -225,7 +226,7 @@ inline IndexNumType getIndexNumType() } #endif -#if !(defined(_WIN32) || defined(_WIN64)) && defined(__x86_64__) +#if !(defined(_WIN32) || defined(_WIN64)) && (defined(__x86_64__) || defined(TARGET_ARM) || defined(TARGET_RISCV64)) template <> inline IndexNumType getIndexNumType() { diff --git a/cpp/daal/include/services/env_detect.h b/cpp/daal/include/services/env_detect.h index f561de5ae2c..d132c55794d 100644 --- a/cpp/daal/include/services/env_detect.h +++ b/cpp/daal/include/services/env_detect.h @@ -198,6 +198,10 @@ class DAAL_EXPORT Environment : public Base void initNumberOfThreads(); env _env; + // Pointer to the oneapi::tbb::task_scheduler_handle class object, global for oneDAL. + // The oneapi::tbb::task_scheduler_handle and the oneapi::tbb::finalize function + // allow user to wait for completion of worker threads. + void * _schedulerHandle; void * _globalControl; SharedPtr _executionContext; }; diff --git a/cpp/daal/src/algorithms/linear_model/linear_model_train_normeq_merge_impl.i b/cpp/daal/src/algorithms/linear_model/linear_model_train_normeq_merge_impl.i index e7e7ac56f31..e76c65cd094 100644 --- a/cpp/daal/src/algorithms/linear_model/linear_model_train_normeq_merge_impl.i +++ b/cpp/daal/src/algorithms/linear_model/linear_model_train_normeq_merge_impl.i @@ -43,22 +43,6 @@ using namespace daal::data_management; using namespace daal::internal; using namespace daal::services::internal; -template -void conditional_threader_for(bool condition, size_t n, size_t threadsRequest, const F & processIteration) -{ - if (condition) - { - daal::threader_for(n, threadsRequest, processIteration); - } - else - { - for (size_t i = 0; i < n; i++) - { - processIteration(i); - } - } -} - template Status MergeKernel::merge(const NumericTable & partialTable, algorithmFPType * result, bool threadingCondition) { @@ -69,7 +53,7 @@ Status MergeKernel::merge(const NumericTable & partialTabl algorithmFPType * partialResult = const_cast(block.get()); size_t resultSize = nRows * partialTable.getNumberOfColumns(); - conditional_threader_for(threadingCondition, resultSize, resultSize, [=](size_t i) { result[i] += partialResult[i]; }); + daal::conditional_threader_for(threadingCondition, resultSize, [=](size_t i) { result[i] += partialResult[i]; }); return Status(); } diff --git a/cpp/daal/src/algorithms/linear_regression/linear_regression_train_dense_normeq_batch_fpt_cpu.cpp b/cpp/daal/src/algorithms/linear_regression/linear_regression_train_dense_normeq_batch_fpt_cpu.cpp index ef9ab58c256..bb0a22d089e 100644 --- a/cpp/daal/src/algorithms/linear_regression/linear_regression_train_dense_normeq_batch_fpt_cpu.cpp +++ b/cpp/daal/src/algorithms/linear_regression/linear_regression_train_dense_normeq_batch_fpt_cpu.cpp @@ -39,7 +39,7 @@ template class BatchContainer; } namespace internal { -template class BatchKernel; +template class DAAL_EXPORT BatchKernel; } } // namespace training } // namespace linear_regression diff --git a/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_batch_fpt_cpu.cpp b/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_batch_fpt_cpu.cpp index 2c71f4d64a0..e1ed3085861 100644 --- a/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_batch_fpt_cpu.cpp +++ b/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_batch_fpt_cpu.cpp @@ -41,7 +41,7 @@ template class BatchContainer; namespace internal { -template class BatchKernel; +template class DAAL_EXPORT BatchKernel; } // namespace internal } // namespace training diff --git a/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_online_fpt_cpu.cpp b/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_online_fpt_cpu.cpp index 867f3a23b56..c82553c834a 100644 --- a/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_online_fpt_cpu.cpp +++ b/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_online_fpt_cpu.cpp @@ -40,7 +40,7 @@ template class OnlineContainer; namespace internal { -template class OnlineKernel; +template class DAAL_EXPORT OnlineKernel; } // namespace internal } // namespace training diff --git a/cpp/daal/src/externals/core_threading_win_dll.cpp b/cpp/daal/src/externals/core_threading_win_dll.cpp index bfd7ac01a32..37c4f7d0e2b 100644 --- a/cpp/daal/src/externals/core_threading_win_dll.cpp +++ b/cpp/daal/src/externals/core_threading_win_dll.cpp @@ -143,7 +143,9 @@ typedef void (*_daal_wait_task_group_t)(void * taskGroupPtr); typedef bool (*_daal_is_in_parallel_t)(); typedef void (*_daal_tbb_task_scheduler_free_t)(void *& globalControl); +typedef void (*_daal_tbb_task_scheduler_handle_free_t)(void *& schedulerHandle); typedef size_t (*_setNumberOfThreads_t)(const size_t, void **); +typedef size_t (*_setSchedulerHandle_t)(void **); typedef void * (*_daal_threader_env_t)(); typedef void (*_daal_parallel_sort_int32_t)(int *, int *); @@ -205,10 +207,12 @@ static _daal_del_task_group_t _daal_del_task_group_ptr = NULL; static _daal_run_task_group_t _daal_run_task_group_ptr = NULL; static _daal_wait_task_group_t _daal_wait_task_group_ptr = NULL; -static _daal_is_in_parallel_t _daal_is_in_parallel_ptr = NULL; -static _daal_tbb_task_scheduler_free_t _daal_tbb_task_scheduler_free_ptr = NULL; -static _setNumberOfThreads_t _setNumberOfThreads_ptr = NULL; -static _daal_threader_env_t _daal_threader_env_ptr = NULL; +static _daal_is_in_parallel_t _daal_is_in_parallel_ptr = NULL; +static _daal_tbb_task_scheduler_free_t _daal_tbb_task_scheduler_free_ptr = NULL; +static _daal_tbb_task_scheduler_handle_free_t _daal_tbb_task_scheduler_handle_free_ptr = NULL; +static _setNumberOfThreads_t _setNumberOfThreads_ptr = NULL; +static _setSchedulerHandle_t _setSchedulerHandle_ptr = NULL; +static _daal_threader_env_t _daal_threader_env_ptr = NULL; static _daal_parallel_sort_int32_t _daal_parallel_sort_int32_ptr = NULL; static _daal_parallel_sort_uint64_t _daal_parallel_sort_uint64_ptr = NULL; @@ -657,6 +661,16 @@ DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& init) return _daal_tbb_task_scheduler_free_ptr(init); } +DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& init) +{ + load_daal_thr_dll(); + if (_daal_tbb_task_scheduler_handle_free_ptr == NULL) + { + _daal_tbb_task_scheduler_handle_free_ptr = (_daal_tbb_task_scheduler_handle_free_t)load_daal_thr_func("_daal_tbb_task_scheduler_handle_free"); + } + return _daal_tbb_task_scheduler_handle_free_ptr(init); +} + DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** init) { load_daal_thr_dll(); @@ -667,6 +681,16 @@ DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** init) return _setNumberOfThreads_ptr(numThreads, init); } +DAAL_EXPORT size_t _setSchedulerHandle(void ** init) +{ + load_daal_thr_dll(); + if (_setSchedulerHandle_ptr == NULL) + { + _setSchedulerHandle_ptr = (_setSchedulerHandle_t)load_daal_thr_func("_setSchedulerHandle"); + } + return _setSchedulerHandle_ptr(init); +} + DAAL_EXPORT void * _daal_threader_env() { load_daal_thr_dll(); diff --git a/cpp/daal/src/externals/service_spblas_ref.h b/cpp/daal/src/externals/service_spblas_ref.h index a42c888aee6..2106e689a0e 100644 --- a/cpp/daal/src/externals/service_spblas_ref.h +++ b/cpp/daal/src/externals/service_spblas_ref.h @@ -1,6 +1,7 @@ /* file: service_spblas_ref.h */ /******************************************************************************* * Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +25,8 @@ #ifndef __SERVICE_SPBLAS_REF_H__ #define __SERVICE_SPBLAS_REF_H__ +#include "src/externals/service_memory.h" // required for memset + namespace daal { namespace internal @@ -38,13 +41,125 @@ struct RefSpBlas static void xcsrmultd(const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, fpType * a, DAAL_INT * ja, DAAL_INT * ia, fpType * b, DAAL_INT * jb, DAAL_INT * ib, fpType * c, DAAL_INT * ldc) { - services::throwIfPossible(services::Status(services::ErrorMethodNotImplemented)); + if (*transa == 'n' || *transa == 'N') + { + csrmultd(m, n, k, a, ja, ia, b, jb, ib, c, ldc); + } + else + { + csrmultd_transpose(m, n, k, a, ja, ia, b, jb, ib, c, ldc); + } + } + + static void csrmultd(const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, fpType * a, DAAL_INT * ja, DAAL_INT * ia, fpType * b, + DAAL_INT * jb, DAAL_INT * ib, fpType * c, DAAL_INT * ldc) + { + DAAL_INT indexing = 1; // 1-based indexing + DAAL_INT row_b, row_c, col_c, val_ptr_a, val_ptr_b; + fpType a_elt, b_elt; + DAAL_OVERFLOW_CHECK_BY_MULTIPLICATION_THROW_IF_POSSIBLE(DAAL_INT, *ldc, (*k) - 1); + for (DAAL_INT col_c = 0; col_c < *k; col_c++) //flush the matrix c + { + services::internal::service_memset(c + col_c * (*ldc), fpType(0), *m); + } + for (row_c = 0; row_c < *m; row_c++) // row_a = row_c + { + for (val_ptr_a = ia[row_c] - indexing; val_ptr_a < ia[row_c + 1] - indexing; val_ptr_a++) + { + row_b = ja[val_ptr_a] - indexing; + a_elt = a[val_ptr_a]; + for (val_ptr_b = ib[row_b] - indexing; val_ptr_b < ib[row_b + 1] - indexing; val_ptr_b++) + { + col_c = jb[val_ptr_b] - indexing; + b_elt = b[val_ptr_b]; + c[col_c * (*ldc) + row_c] += a_elt * b_elt; + } + } + } + } + + static void csrmultd_transpose(const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, fpType * a, DAAL_INT * ja, DAAL_INT * ia, fpType * b, + DAAL_INT * jb, DAAL_INT * ib, fpType * c, DAAL_INT * ldc) + { + DAAL_INT indexing = 1; + DAAL_INT row_a, row_b, row_c, col_c, val_ptr_a, val_ptr_b; + fpType a_elt, b_elt; + DAAL_OVERFLOW_CHECK_BY_MULTIPLICATION_THROW_IF_POSSIBLE(DAAL_INT, *ldc, (*n) - 1); + for (DAAL_INT col_c = 0; col_c < *k; col_c++) //flush the matrix c + { + services::internal::service_memset(c + col_c * (*ldc), fpType(0), *n); + } + for (row_a = 0; row_a < *m; row_a++) + { + row_b = row_a; + for (val_ptr_b = ib[row_b] - indexing; val_ptr_b < ib[row_b + 1] - indexing; val_ptr_b++) + { + b_elt = b[val_ptr_b]; + col_c = jb[val_ptr_b] - indexing; //col_c = col_b + for (val_ptr_a = ia[row_a] - indexing; val_ptr_a < ia[row_a + 1] - indexing; val_ptr_a++) + { + row_c = ja[val_ptr_a] - indexing; //row_c = col_a + a_elt = a[val_ptr_a]; + c[col_c * (*ldc) + row_c] += a_elt * b_elt; + } + } + } } static void xcsrmv(const char * transa, const DAAL_INT * m, const DAAL_INT * k, const fpType * alpha, const char * matdescra, const fpType * val, const DAAL_INT * indx, const DAAL_INT * pntrb, const DAAL_INT * pntre, const fpType * x, const fpType * beta, fpType * y) { - services::throwIfPossible(services::Status(services::ErrorMethodNotImplemented)); + if (*transa == 'n' || *transa == 'N') + { + csrmv(m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y); + } + else + { + csrmv_transpose(m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y); + } + } + + static void csrmv(const DAAL_INT * m, const DAAL_INT * k, const fpType * alpha, const char * matdescra, const fpType * val, const DAAL_INT * indx, + const DAAL_INT * pntrb, const DAAL_INT * pntre, const fpType * x, const fpType * beta, fpType * y) + { + DAAL_INT indexing = 1; + if (matdescra[3] == 'C') indexing = 0; // if fourth entry is 'C' zero based + DAAL_INT curr_row_start, curr_row_end, i, k_ind; + for (DAAL_INT row_num = 0; row_num < *m; row_num++) + { + y[row_num] *= (*beta); + curr_row_start = pntrb[row_num] - indexing; + curr_row_end = pntre[row_num] - indexing; + for (i = curr_row_start; i < curr_row_end; i++) + { + k_ind = indx[i] - indexing; + y[row_num] += (*alpha) * x[k_ind] * val[i]; + } + } + } + + static void csrmv_transpose(const DAAL_INT * m, const DAAL_INT * k, const fpType * alpha, const char * matdescra, const fpType * val, + const DAAL_INT * indx, const DAAL_INT * pntrb, const DAAL_INT * pntre, const fpType * x, const fpType * beta, + fpType * y) + { + DAAL_INT indexing = 1; + if (matdescra[3] == 'C') indexing = 0; // if fourth entry is 'C' zero based + for (DAAL_INT _i = 0; _i < *k; _i++) + { + y[_i] *= *beta; + } + fpType coeff; + DAAL_INT row_num, i, curr_row_start, curr_row_end; + for (row_num = 0; row_num < *m; row_num++) + { + coeff = (*alpha) * x[row_num]; + curr_row_start = pntrb[row_num] - indexing; + curr_row_end = pntre[row_num] - indexing; + for (i = curr_row_start; i < curr_row_end; i++) + { + y[indx[i] - indexing] += coeff * val[i]; + } + } } static void xcsrmm(const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const fpType * alpha, const char * matdescra, diff --git a/cpp/daal/src/externals/service_stat_ref.h b/cpp/daal/src/externals/service_stat_ref.h index 81a44ce1434..d01eef06d55 100644 --- a/cpp/daal/src/externals/service_stat_ref.h +++ b/cpp/daal/src/externals/service_stat_ref.h @@ -1,6 +1,7 @@ /* file: service_stat_ref.h */ /******************************************************************************* * Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,8 +25,9 @@ #ifndef __SERVICE_STAT_REF_H__ #define __SERVICE_STAT_REF_H__ -typedef void (*func_type)(DAAL_INT, DAAL_INT, DAAL_INT, void *); +#include "src/externals/service_memory.h" +typedef void (*func_type)(DAAL_INT, DAAL_INT, DAAL_INT, void *); extern "C" { #define __DAAL_VSL_SS_MATRIX_STORAGE_COLS 0x00020000 @@ -194,8 +196,37 @@ struct RefStatistics static int x2c_mom(const double * data, const __int64 nFeatures, const __int64 nVectors, double * variance, const __int64 method) { - int errcode = 0; - + // E(x-\mu)^2 = E(x^2) - \mu^2 + int errcode = 0; + double * sum = (double *)daal::services::internal::service_calloc(nFeatures, sizeof(double)); + if (!sum) return -4; + daal::services::internal::service_memset(variance, double(0), nFeatures); + DAAL_INT feature_ptr, vec_ptr; + double wtInv = (double)1 / nVectors; + double wtInvMinus = (double)1 / (nVectors - 1); + double pt = 0; + for (vec_ptr = 0; vec_ptr < nVectors; ++vec_ptr) + { +#pragma omp simd + for (feature_ptr = 0; feature_ptr < nFeatures; ++feature_ptr) + { + pt = data[vec_ptr * nFeatures + feature_ptr]; + sum[feature_ptr] += pt; + variance[feature_ptr] += (pt * pt); // 2RSum + } + } + double sumSqDivN; // S^2/n = n*\mu^2 +#pragma omp simd + for (feature_ptr = 0; feature_ptr < nFeatures; ++feature_ptr) + { + sumSqDivN = sum[feature_ptr]; + sumSqDivN *= sumSqDivN; + sumSqDivN *= wtInv; + variance[feature_ptr] -= sumSqDivN; // (2RSum-S^2/n) + variance[feature_ptr] *= wtInvMinus; + } + daal::services::internal::service_free(sum); + sum = NULL; return errcode; } @@ -276,8 +307,37 @@ struct RefStatistics static int x2c_mom(const float * data, const __int64 nFeatures, const __int64 nVectors, float * variance, const __int64 method) { + // E(x-\mu)^2 = E(x^2) - \mu^2 int errcode = 0; - + float * sum = (float *)daal::services::internal::service_calloc(nFeatures, sizeof(float)); + if (!sum) return -4; + daal::services::internal::service_memset(variance, float(0), nFeatures); + DAAL_INT feature_ptr, vec_ptr; + float wtInv = (float)1 / nVectors; + float wtInvMinus = (float)1 / (nVectors - 1); + float pt = 0; + for (vec_ptr = 0; vec_ptr < nVectors; ++vec_ptr) + { +#pragma omp simd + for (feature_ptr = 0; feature_ptr < nFeatures; ++feature_ptr) + { + pt = data[vec_ptr * nFeatures + feature_ptr]; + sum[feature_ptr] += pt; + variance[feature_ptr] += (pt * pt); // 2RSum + } + } + float sumSqDivN; // S^2/n = n*\mu^2 +#pragma omp simd + for (feature_ptr = 0; feature_ptr < nFeatures; ++feature_ptr) + { + sumSqDivN = sum[feature_ptr]; + sumSqDivN *= sumSqDivN; + sumSqDivN *= wtInv; + variance[feature_ptr] -= sumSqDivN; // (2RSum-S^2/n) + variance[feature_ptr] *= wtInvMinus; + } + daal::services::internal::service_free(sum); + sum = NULL; return errcode; } diff --git a/cpp/daal/src/services/env_detect.cpp b/cpp/daal/src/services/env_detect.cpp index 6698ede0d3a..286416ed571 100644 --- a/cpp/daal/src/services/env_detect.cpp +++ b/cpp/daal/src/services/env_detect.cpp @@ -125,7 +125,7 @@ DAAL_EXPORT void daal::services::Environment::setDynamicLibraryThreadingTypeOnWi initNumberOfThreads(); } -DAAL_EXPORT daal::services::Environment::Environment() : _globalControl {} +DAAL_EXPORT daal::services::Environment::Environment() : _schedulerHandle {}, _globalControl {} { _env.cpuid_init_flag = false; _env.cpuid = -1; @@ -137,7 +137,14 @@ DAAL_EXPORT daal::services::Environment::Environment(const Environment & e) : da DAAL_EXPORT void daal::services::Environment::initNumberOfThreads() { if (isInit) return; - + // Initializes global oneapi::tbb::task_scheduler_handle object in oneDAL to prevent the unexpected + // destruction of the calling thread. + // When the oneapi::tbb::finalize function is called with an oneapi::tbb::task_scheduler_handle + // instance, it blocks the calling thread until the completion of all worker + // threads that were implicitly created by the library. +#if defined(TARGET_X86_64) + daal::setSchedulerHandle(&_schedulerHandle); +#endif /* if HT enabled - set _numThreads to physical cores num */ if (daal::internal::ServiceInst::serv_get_ht()) { @@ -156,7 +163,6 @@ DAAL_EXPORT void daal::services::Environment::initNumberOfThreads() DAAL_EXPORT daal::services::Environment::~Environment() { daal::services::daal_free_buffers(); - _daal_tbb_task_scheduler_free(_globalControl); } void daal::services::Environment::_cpu_detect(int enable) @@ -171,6 +177,9 @@ void daal::services::Environment::_cpu_detect(int enable) DAAL_EXPORT void daal::services::Environment::setNumberOfThreads(const size_t numThreads) { isInit = true; +#if defined(TARGET_X86_64) + daal::setSchedulerHandle(&_schedulerHandle); +#endif daal::setNumberOfThreads(numThreads, &_globalControl); } diff --git a/cpp/daal/src/threading/service_thread_pinner.cpp b/cpp/daal/src/threading/service_thread_pinner.cpp old mode 100755 new mode 100644 index 069a163c0a5..786a589946a --- a/cpp/daal/src/threading/service_thread_pinner.cpp +++ b/cpp/daal/src/threading/service_thread_pinner.cpp @@ -27,39 +27,37 @@ #include "services/daal_memory.h" #include "src/threading/threading.h" - #if defined(__DO_TBB_LAYER__) - - #define USE_TASK_ARENA_CURRENT_SLOT 1 - #define LOG_PINNING 1 - #define TBB_PREVIEW_TASK_ARENA 1 - #define TBB_PREVIEW_LOCAL_OBSERVER 1 - - #include "tbb/tbb.h" - #include - #include - #include - #include - #include - #include - #include "services/daal_atomic_int.h" + #define USE_TASK_ARENA_CURRENT_SLOT 1 + #define LOG_PINNING 1 + #define TBB_PREVIEW_TASK_ARENA 1 + #define TBB_PREVIEW_LOCAL_OBSERVER 1 + + #include "tbb/tbb.h" + #include + #include + #include + #include + #include + #include + #include "services/daal_atomic_int.h" using namespace daal::services; - #if defined(_WIN32) || defined(_WIN64) - #include - #define __PINNER_WINDOWS__ + #if defined(_WIN32) || defined(_WIN64) + #include + #define __PINNER_WINDOWS__ - #if defined(_WIN64) - #define MASK_WIDTH 64 - #else - #define MASK_WIDTH 32 - #endif + #if defined(_WIN64) + #define MASK_WIDTH 64 + #else + #define MASK_WIDTH 32 + #endif - #else // LINUX - #include - #define __PINNER_LINUX__ + #else // LINUX + #include + #define __PINNER_LINUX__ - #ifdef __FreeBSD__ - #include + #ifdef __FreeBSD__ + #include cpu_set_t * __sched_cpualloc(size_t count) { @@ -73,25 +71,25 @@ int sched_getaffinity(pid_t pid, size_t cpusetsize, cpu_set_t * mask) { return cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, pid == 0 ? -1 : pid, cpusetsize, mask); } - #endif - #endif + #endif + struct cpu_mask_t { int status; - #if defined(_WIN32) || defined(_WIN64) + #if defined(_WIN32) || defined(_WIN64) GROUP_AFFINITY ga; - #else + #else int ncpus; int bit_parts_size; cpu_set_t * cpu_set; - #endif + #endif cpu_mask_t() { status = 0; - #if defined __PINNER_LINUX__ + #if defined __PINNER_LINUX__ ncpus = 0; bit_parts_size = 0; @@ -113,10 +111,10 @@ struct cpu_mask_t } if (cpu_set == NULL) - #else // defined __PINNER_WINDOWS__ + #else // defined __PINNER_WINDOWS__ bool retval = GetThreadGroupAffinity(GetCurrentThread(), &ga); if (!retval) - #endif + #endif { status--; } @@ -128,13 +126,13 @@ struct cpu_mask_t { if (status == 0) { - #if defined __PINNER_LINUX__ + #if defined __PINNER_LINUX__ int err = pthread_getaffinity_np(pthread_self(), bit_parts_size, cpu_set); if (err) - #else // defined __PINNER_WINDOWS__ + #else // defined __PINNER_WINDOWS__ bool retval = GetThreadGroupAffinity(GetCurrentThread(), &ga); if (!retval) - #endif + #endif { status--; } @@ -147,15 +145,15 @@ struct cpu_mask_t { if (status == 0) { - #if defined __PINNER_LINUX__ + #if defined __PINNER_LINUX__ int err = pthread_setaffinity_np(pthread_self(), bit_parts_size, cpu_set); if (err) - #else // defined __PINNER_WINDOWS__ + #else // defined __PINNER_WINDOWS__ bool retval = SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL); if (!retval) - #endif + #endif { status--; } @@ -168,13 +166,13 @@ struct cpu_mask_t { if (status == 0) { - #if defined __PINNER_LINUX__ + #if defined __PINNER_LINUX__ CPU_ZERO_S(bit_parts_size, cpu_set); CPU_SET_S(cpu_idx, bit_parts_size, cpu_set); - #else // defined __PINNER_WINDOWS__ + #else // defined __PINNER_WINDOWS__ ga.Group = cpu_idx / MASK_WIDTH; ga.Mask = cpu_idx % MASK_WIDTH; - #endif + #endif } return status; @@ -184,12 +182,12 @@ struct cpu_mask_t ~cpu_mask_t() { - #if defined __PINNER_LINUX__ + #if defined __PINNER_LINUX__ if (cpu_set != NULL) { CPU_FREE(cpu_set); } - #endif + #endif return; } // ~cpu_mask_t() @@ -236,6 +234,9 @@ class thread_pinner_impl_t : public tbb::task_scheduler_observer thread_pinner_impl_t::thread_pinner_impl_t(void (*read_topo)(int &, int &, int &, int **), void (*deleter)(void *)) : pinner_arena(nthreads = daal::threader_get_threads_number()), tbb::task_scheduler_observer(pinner_arena), topo_deleter(deleter) { + #if defined(TARGET_X86_64) + pinner_arena.initialize(); + #endif do_pinning = (nthreads > 0) ? true : false; is_pinning.set(0); @@ -388,34 +389,4 @@ DAAL_EXPORT void _thread_pinner_on_scheduler_exit(bool p) IMPL->on_scheduler_exit(p); } - #else /* if __DO_TBB_LAYER__ is not defined */ - -DAAL_EXPORT void * _getThreadPinner(bool create_pinner, void (*read_topo)(int &, int &, int &, int **), void (*deleter)(void *)) -{ - return NULL; -} - -DAAL_EXPORT void _thread_pinner_thread_pinner_init(void (*f)(int &, int &, int &, int **), void (*deleter)(void *)) {} -DAAL_EXPORT void _thread_pinner_execute(daal::services::internal::thread_pinner_task_t & task) -{ - task(); -} -DAAL_EXPORT bool _thread_pinner_get_pinning() -{ - return false; -} -DAAL_EXPORT bool _thread_pinner_set_pinning(bool p) -{ - return true; -} -DAAL_EXPORT int _thread_pinner_get_status() -{ - return 0; -} - -DAAL_EXPORT void _thread_pinner_on_scheduler_entry(bool p) {} -DAAL_EXPORT void _thread_pinner_on_scheduler_exit(bool p) {} - - #endif /* if __DO_TBB_LAYER__ is not defined */ - #endif /* #if !defined (DAAL_THREAD_PINNING_DISABLED) */ diff --git a/cpp/daal/src/threading/threading.cpp b/cpp/daal/src/threading/threading.cpp index 7fa0127a5ab..15c39368238 100644 --- a/cpp/daal/src/threading/threading.cpp +++ b/cpp/daal/src/threading/threading.cpp @@ -23,61 +23,73 @@ #include "src/threading/threading.h" #include "services/daal_memory.h" +#include "src/algorithms/service_qsort.h" -#if defined(__DO_TBB_LAYER__) - #define TBB_PREVIEW_GLOBAL_CONTROL 1 - #define TBB_PREVIEW_TASK_ARENA 1 +#define TBB_PREVIEW_GLOBAL_CONTROL 1 +#define TBB_PREVIEW_TASK_ARENA 1 - #include // malloc and free - #include - #include - #include - #include - #include - #include "services/daal_atomic_int.h" +#include // malloc and free +#include +#include +#include +#include +#include +#include "services/daal_atomic_int.h" - #if defined(TBB_INTERFACE_VERSION) && TBB_INTERFACE_VERSION >= 12002 - #include - #endif +#if defined(TBB_INTERFACE_VERSION) && TBB_INTERFACE_VERSION >= 12002 + #include +#endif using namespace daal::services; -#else - #include "src/externals/service_service.h" - #include "src/algorithms/service_qsort.h" -#endif DAAL_EXPORT void * _threaded_scalable_malloc(const size_t size, const size_t alignment) { -#if defined(__DO_TBB_LAYER__) return scalable_aligned_malloc(size, alignment); -#else - return daal::internal::Service<>::serv_malloc(size, alignment); -#endif } DAAL_EXPORT void _threaded_scalable_free(void * ptr) { -#if defined(__DO_TBB_LAYER__) scalable_aligned_free(ptr); -#else - daal::internal::Service<>::serv_free(ptr); -#endif } DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& globalControl) { -#if defined(__DO_TBB_LAYER__) if (globalControl) { delete reinterpret_cast(globalControl); globalControl = nullptr; } +} + +DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& schedulerHandle) +{ + // Note: TBB 13 deletes task_scheduler_handle itself during the destruction of thread context + + // #if defined(TARGET_X86_64) + // if (schedulerHandle) + // { + // delete reinterpret_cast(schedulerHandle); + // schedulerHandle = nullptr; + // } + // #endif +} + +DAAL_EXPORT size_t _setSchedulerHandle(void ** schedulerHandle) +{ +#if defined(TARGET_X86_64) + #if (TBB_INTERFACE_VERSION < 12120) + schedulerHandle = nullptr; + #else + *schedulerHandle = reinterpret_cast(new tbb::task_scheduler_handle(tbb::attach {})); + #endif + // It is necessary for initializing tbb in cases where DAAL does not use it. + tbb::task_arena {}.initialize(); #endif + return 0; } DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalControl) { -#if defined(__DO_TBB_LAYER__) static tbb::spin_mutex mt; tbb::spin_mutex::scoped_lock lock(mt); if (numThreads != 0) @@ -87,180 +99,209 @@ DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalCo daal::threader_env()->setNumberOfThreads(numThreads); return numThreads; } -#endif daal::threader_env()->setNumberOfThreads(1); return 1; } DAAL_EXPORT void _daal_threader_for(int n, int threads_request, const void * a, daal::functype func) { -#if defined(__DO_TBB_LAYER__) - tbb::parallel_for(tbb::blocked_range(0, n, 1), [&](tbb::blocked_range r) { + if (daal::threader_env()->getNumberOfThreads() > 1) + { + tbb::parallel_for(tbb::blocked_range(0, n, 1), [&](tbb::blocked_range r) { + int i; + for (i = r.begin(); i < r.end(); i++) + { + func(i, a); + } + }); + } + else + { int i; - for (i = r.begin(); i < r.end(); i++) + for (i = 0; i < n; i++) { func(i, a); } - }); -#elif defined(__DO_SEQ_LAYER__) - int i; - for (i = 0; i < n; i++) - { - func(i, a); } -#endif } DAAL_EXPORT void _daal_threader_for_int64(int64_t n, const void * a, daal::functype_int64 func) { -#if defined(__DO_TBB_LAYER__) - tbb::parallel_for(tbb::blocked_range(0, n, 1), [&](tbb::blocked_range r) { + if (daal::threader_env()->getNumberOfThreads() > 1) + { + tbb::parallel_for(tbb::blocked_range(0, n, 1), [&](tbb::blocked_range r) { + int64_t i; + for (i = r.begin(); i < r.end(); i++) + { + func(i, a); + } + }); + } + else + { int64_t i; - for (i = r.begin(); i < r.end(); i++) + for (i = 0; i < n; i++) { func(i, a); } - }); -#elif defined(__DO_SEQ_LAYER__) - int64_t i; - for (i = 0; i < n; i++) - { - func(i, a); } -#endif } DAAL_EXPORT void _daal_threader_for_blocked_size(size_t n, size_t block, const void * a, daal::functype_blocked_size func) { -#if defined(__DO_TBB_LAYER__) - tbb::parallel_for(tbb::blocked_range(0ul, n, block), [=](tbb::blocked_range r) -> void { return func(r.begin(), r.end(), a); }); -#elif defined(__DO_SEQ_LAYER__) - func(0ul, n, a); -#endif + if (daal::threader_env()->getNumberOfThreads() > 1) + { + tbb::parallel_for(tbb::blocked_range(0ul, n, block), + [=](tbb::blocked_range r) -> void { return func(r.begin(), r.end(), a); }); + } + else + { + func(0ul, n, a); + } } DAAL_EXPORT void _daal_threader_for_simple(int n, int threads_request, const void * a, daal::functype func) { -#if defined(__DO_TBB_LAYER__) - tbb::parallel_for( - tbb::blocked_range(0, n, 1), - [&](tbb::blocked_range r) { - int i; - for (i = r.begin(); i < r.end(); i++) - { - func(i, a); - } - }, - tbb::simple_partitioner {}); -#elif defined(__DO_SEQ_LAYER__) - int i; - for (i = 0; i < n; i++) + if (daal::threader_env()->getNumberOfThreads() > 1) { - func(i, a); + tbb::parallel_for( + tbb::blocked_range(0, n, 1), + [&](tbb::blocked_range r) { + int i; + for (i = r.begin(); i < r.end(); i++) + { + func(i, a); + } + }, + tbb::simple_partitioner {}); + } + else + { + int i; + for (i = 0; i < n; i++) + { + func(i, a); + } } -#endif } DAAL_EXPORT void _daal_threader_for_int32ptr(const int * begin, const int * end, const void * a, daal::functype_int32ptr func) { -#if defined(__DO_TBB_LAYER__) - tbb::parallel_for(tbb::blocked_range(begin, end, 1), [&](tbb::blocked_range r) { + if (daal::threader_env()->getNumberOfThreads() > 1) + { + tbb::parallel_for(tbb::blocked_range(begin, end, 1), [&](tbb::blocked_range r) { + const int * i; + for (i = r.begin(); i != r.end(); i++) + { + func(i, a); + } + }); + } + else + { const int * i; - for (i = r.begin(); i != r.end(); i++) + for (i = begin; i != end; ++i) { func(i, a); } - }); -#elif defined(__DO_SEQ_LAYER__) - const int * i; - for (i = begin; i != end; ++i) - { - func(i, a); } -#endif } DAAL_EXPORT int64_t _daal_parallel_reduce_int32_int64(int32_t n, int64_t init, const void * a, daal::loop_functype_int32_int64 loop_func, const void * b, daal::reduction_functype_int64 reduction_func) { -#if defined(__DO_TBB_LAYER__) - return tbb::parallel_reduce( - tbb::blocked_range(0, n), init, - [&](const tbb::blocked_range & r, int64_t value_for_reduce) { return loop_func(r.begin(), r.end(), value_for_reduce, a); }, - [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::auto_partitioner {}); - -#elif defined(__DO_SEQ_LAYER__) - int64_t value_for_reduce = init; - return loop_func(0, n, value_for_reduce, a); -#endif + if (daal::threader_env()->getNumberOfThreads() > 1) + { + return tbb::parallel_reduce( + tbb::blocked_range(0, n), init, + [&](const tbb::blocked_range & r, int64_t value_for_reduce) { return loop_func(r.begin(), r.end(), value_for_reduce, a); }, + [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::auto_partitioner {}); + } + else + { + int64_t value_for_reduce = init; + return loop_func(0, n, value_for_reduce, a); + } } DAAL_EXPORT int64_t _daal_parallel_reduce_int32_int64_simple(int32_t n, int64_t init, const void * a, daal::loop_functype_int32_int64 loop_func, const void * b, daal::reduction_functype_int64 reduction_func) { -#if defined(__DO_TBB_LAYER__) - return tbb::parallel_reduce( - tbb::blocked_range(0, n), init, - [&](const tbb::blocked_range & r, int64_t value_for_reduce) { return loop_func(r.begin(), r.end(), value_for_reduce, a); }, - [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::simple_partitioner {}); - -#elif defined(__DO_SEQ_LAYER__) - int64_t value_for_reduce = init; - return loop_func(0, n, value_for_reduce, a); -#endif + if (daal::threader_env()->getNumberOfThreads() > 1) + { + return tbb::parallel_reduce( + tbb::blocked_range(0, n), init, + [&](const tbb::blocked_range & r, int64_t value_for_reduce) { return loop_func(r.begin(), r.end(), value_for_reduce, a); }, + [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::simple_partitioner {}); + } + else + { + int64_t value_for_reduce = init; + return loop_func(0, n, value_for_reduce, a); + } } DAAL_EXPORT int64_t _daal_parallel_reduce_int32ptr_int64_simple(const int32_t * begin, const int32_t * end, int64_t init, const void * a, daal::loop_functype_int32ptr_int64 loop_func, const void * b, daal::reduction_functype_int64 reduction_func) { -#if defined(__DO_TBB_LAYER__) - return tbb::parallel_reduce( - tbb::blocked_range(begin, end), init, - [&](const tbb::blocked_range & r, int64_t value_for_reduce) { return loop_func(r.begin(), r.end(), value_for_reduce, a); }, - [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::simple_partitioner {}); - -#elif defined(__DO_SEQ_LAYER__) - int64_t value_for_reduce = init; - return loop_func(begin, end, value_for_reduce, a); -#endif + if (daal::threader_env()->getNumberOfThreads() > 1) + { + return tbb::parallel_reduce( + tbb::blocked_range(begin, end), init, + [&](const tbb::blocked_range & r, int64_t value_for_reduce) { + return loop_func(r.begin(), r.end(), value_for_reduce, a); + }, + [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::simple_partitioner {}); + } + else + { + int64_t value_for_reduce = init; + return loop_func(begin, end, value_for_reduce, a); + } } DAAL_EXPORT void _daal_static_threader_for(size_t n, const void * a, daal::functype_static func) { -#if defined(__DO_TBB_LAYER__) - const size_t nthreads = _daal_threader_get_max_threads(); - const size_t nblocks_per_thread = n / nthreads + !!(n % nthreads); - - tbb::parallel_for( - tbb::blocked_range(0, nthreads, 1), - [&](tbb::blocked_range r) { - const size_t tid = r.begin(); - const size_t begin = tid * nblocks_per_thread; - const size_t end = n < begin + nblocks_per_thread ? n : begin + nblocks_per_thread; - - for (size_t i = begin; i < end; ++i) - { - func(i, tid, a); - } - }, - tbb::static_partitioner()); -#elif defined(__DO_SEQ_LAYER__) - for (size_t i = 0; i < n; i++) + if (daal::threader_env()->getNumberOfThreads() > 1) { - func(i, 0, a); + const size_t nthreads = _daal_threader_get_max_threads(); + const size_t nblocks_per_thread = n / nthreads + !!(n % nthreads); + + tbb::parallel_for( + tbb::blocked_range(0, nthreads, 1), + [&](tbb::blocked_range r) { + const size_t tid = r.begin(); + const size_t begin = tid * nblocks_per_thread; + const size_t end = n < begin + nblocks_per_thread ? n : begin + nblocks_per_thread; + + for (size_t i = begin; i < end; ++i) + { + func(i, tid, a); + } + }, + tbb::static_partitioner()); + } + else + { + for (size_t i = 0; i < n; i++) + { + func(i, 0, a); + } } -#endif } template DAAL_EXPORT void _daal_parallel_sort_template(F * begin_p, F * end_p) { -#if defined(__DO_TBB_LAYER__) - tbb::parallel_sort(begin_p, end_p); -#elif defined(__DO_SEQ_LAYER__) - daal::algorithms::internal::qSort(end_p - begin_p, begin_p); -#endif + if (daal::threader_env()->getNumberOfThreads() > 1) + { + tbb::parallel_sort(begin_p, end_p); + } + else + { + daal::algorithms::internal::qSort(end_p - begin_p, begin_p); + } } #define DAAL_PARALLEL_SORT_IMPL(TYPE, NAMESUFFIX) \ @@ -279,124 +320,109 @@ DAAL_PARALLEL_SORT_IMPL(daal::IdxValType, pair_fp64_uint64) DAAL_EXPORT void _daal_threader_for_blocked(int n, int threads_request, const void * a, daal::functype2 func) { -#if defined(__DO_TBB_LAYER__) - tbb::parallel_for(tbb::blocked_range(0, n, 1), [&](tbb::blocked_range r) { func(r.begin(), r.end() - r.begin(), a); }); -#elif defined(__DO_SEQ_LAYER__) - func(0, n, a); -#endif + if (daal::threader_env()->getNumberOfThreads() > 1) + { + tbb::parallel_for(tbb::blocked_range(0, n, 1), [&](tbb::blocked_range r) { func(r.begin(), r.end() - r.begin(), a); }); + } + else + { + func(0, n, a); + } } DAAL_EXPORT void _daal_threader_for_optional(int n, int threads_request, const void * a, daal::functype func) { -#if defined(__DO_TBB_LAYER__) - if (_daal_is_in_parallel()) + if (daal::threader_env()->getNumberOfThreads() > 1) { - int i; - for (i = 0; i < n; i++) + if (_daal_is_in_parallel()) { - func(i, a); + int i; + for (i = 0; i < n; i++) + { + func(i, a); + } + } + else + { + _daal_threader_for(n, threads_request, a, func); } } else { _daal_threader_for(n, threads_request, a, func); } -#elif defined(__DO_SEQ_LAYER__) - _daal_threader_for(n, threads_request, a, func); -#endif } DAAL_EXPORT void _daal_threader_for_break(int n, int threads_request, const void * a, daal::functype_break func) { -#if defined(__DO_TBB_LAYER__) - tbb::task_group_context context; - tbb::parallel_for( - tbb::blocked_range(0, n, 1), - [&](tbb::blocked_range r) { - int i; - for (i = r.begin(); i < r.end(); ++i) - { - bool needBreak = false; - func(i, needBreak, a); - if (needBreak) context.cancel_group_execution(); - } - }, - context); -#elif defined(__DO_SEQ_LAYER__) - int i; - for (i = 0; i < n; ++i) + if (daal::threader_env()->getNumberOfThreads() > 1) { - bool needBreak = false; - func(i, needBreak, a); - if (needBreak) break; + tbb::task_group_context context; + tbb::parallel_for( + tbb::blocked_range(0, n, 1), + [&](tbb::blocked_range r) { + int i; + for (i = r.begin(); i < r.end(); ++i) + { + bool needBreak = false; + func(i, needBreak, a); + if (needBreak) context.cancel_group_execution(); + } + }, + context); + } + else + { + int i; + for (i = 0; i < n; ++i) + { + bool needBreak = false; + func(i, needBreak, a); + if (needBreak) break; + } } -#endif } DAAL_EXPORT int _daal_threader_get_max_threads() { -#if defined(__DO_TBB_LAYER__) return tbb::this_task_arena::max_concurrency(); -#elif defined(__DO_SEQ_LAYER__) - return 1; -#endif } DAAL_EXPORT int _daal_threader_get_current_thread_index() { -#if defined(__DO_TBB_LAYER__) return tbb::this_task_arena::current_thread_index(); -#elif defined(__DO_SEQ_LAYER__) - return 0; -#endif } DAAL_EXPORT void * _daal_get_tls_ptr(void * a, daal::tls_functype func) { -#if defined(__DO_TBB_LAYER__) tbb::enumerable_thread_specific * p = new tbb::enumerable_thread_specific([=]() -> void * { return func(a); }); return (void *)p; -#elif defined(__DO_SEQ_LAYER__) - return func(a); -#endif } DAAL_EXPORT void _daal_del_tls_ptr(void * tlsPtr) { -#if defined(__DO_TBB_LAYER__) tbb::enumerable_thread_specific * p = static_cast *>(tlsPtr); delete p; -#elif defined(__DO_SEQ_LAYER__) -#endif } DAAL_EXPORT void * _daal_get_tls_local(void * tlsPtr) { -#if defined(__DO_TBB_LAYER__) tbb::enumerable_thread_specific * p = static_cast *>(tlsPtr); return p->local(); -#elif defined(__DO_SEQ_LAYER__) - return tlsPtr; -#endif } DAAL_EXPORT void _daal_reduce_tls(void * tlsPtr, void * a, daal::tls_reduce_functype func) { -#if defined(__DO_TBB_LAYER__) tbb::enumerable_thread_specific * p = static_cast *>(tlsPtr); for (auto it = p->begin(); it != p->end(); ++it) { func((*it), a); } -#elif defined(__DO_SEQ_LAYER__) - func(tlsPtr, a); -#endif } DAAL_EXPORT void _daal_parallel_reduce_tls(void * tlsPtr, void * a, daal::tls_reduce_functype func) { -#if defined(__DO_TBB_LAYER__) size_t n = 0; tbb::enumerable_thread_specific * p = static_cast *>(tlsPtr); @@ -416,51 +442,34 @@ DAAL_EXPORT void _daal_parallel_reduce_tls(void * tlsPtr, void * a, daal::tls_re ::free(aDataPtr); } } -#elif defined(__DO_SEQ_LAYER__) - func(tlsPtr, a); -#endif } DAAL_EXPORT void * _daal_new_mutex() { -#if defined(__DO_TBB_LAYER__) return new tbb::spin_mutex(); -#elif defined(__DO_SEQ_LAYER__) - return NULL; -#endif } DAAL_EXPORT void _daal_lock_mutex(void * mutexPtr) { -#if defined(__DO_TBB_LAYER__) static_cast(mutexPtr)->lock(); -#endif } DAAL_EXPORT void _daal_unlock_mutex(void * mutexPtr) { -#if defined(__DO_TBB_LAYER__) static_cast(mutexPtr)->unlock(); -#endif } DAAL_EXPORT void _daal_del_mutex(void * mutexPtr) { -#if defined(__DO_TBB_LAYER__) delete static_cast(mutexPtr); -#endif } DAAL_EXPORT bool _daal_is_in_parallel() { -#if defined(__DO_TBB_LAYER__) - #if defined(TBB_INTERFACE_VERSION) && TBB_INTERFACE_VERSION >= 12002 +#if defined(TBB_INTERFACE_VERSION) && TBB_INTERFACE_VERSION >= 12002 return tbb::task::current_context() != nullptr; - #else - return tbb::task::self().state() == tbb::task::executing; - #endif #else - return false; + return tbb::task::self().state() == tbb::task::executing; #endif } @@ -470,7 +479,6 @@ DAAL_EXPORT void * _daal_threader_env() return &env; } -#if defined(__DO_TBB_LAYER__) template //Returns an index of the first element in the range[ar, ar + n) that is not less than(i.e.greater or equal to) value. size_t lower_bound(size_t n, const T * ar, const Key & value) @@ -630,19 +638,19 @@ class Collection size_t _capacity; }; - #if _WIN32 || _WIN64 +#if _WIN32 || _WIN64 typedef DWORD ThreadId; ThreadId getCurrentThreadId() { return ::GetCurrentThreadId(); } - #else +#else typedef pthread_t ThreadId; ThreadId getCurrentThreadId() { return pthread_self(); } - #endif // _WIN32||_WIN64 +#endif // _WIN32||_WIN64 class LocalStorage { @@ -826,38 +834,5 @@ DAAL_EXPORT void _daal_wait_task_group(void * taskGroupPtr) ((tbb::task_group *)taskGroupPtr)->wait(); } -#else -DAAL_EXPORT void * _daal_get_ls_ptr(void * a, daal::tls_functype func) -{ - return func(a); -} - -DAAL_EXPORT void * _daal_get_ls_local(void * lsPtr) -{ - return lsPtr; -} - -DAAL_EXPORT void _daal_reduce_ls(void * lsPtr, void * a, daal::tls_reduce_functype func) -{ - func(lsPtr, a); -} - -DAAL_EXPORT void _daal_del_ls_ptr(void * lsPtr) {} - -DAAL_EXPORT void _daal_release_ls_local(void * lsPtr, void * p) {} - -DAAL_EXPORT void * _daal_new_task_group() -{ - return nullptr; -} - -DAAL_EXPORT void _daal_del_task_group(void * taskGroupPtr) {} - -DAAL_EXPORT void _daal_run_task_group(void * taskGroupPtr, daal::task * task) {} - -DAAL_EXPORT void _daal_wait_task_group(void * taskGroupPtr) {} - -#endif - namespace daal {} diff --git a/cpp/daal/src/threading/threading.h b/cpp/daal/src/threading/threading.h index 4d00c789494..0b4a9881b97 100644 --- a/cpp/daal/src/threading/threading.h +++ b/cpp/daal/src/threading/threading.h @@ -102,7 +102,9 @@ extern "C" DAAL_EXPORT void _daal_wait_task_group(void * taskGroupPtr); DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& globalControl); + DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& schedulerHandle); DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalControl); + DAAL_EXPORT size_t _setSchedulerHandle(void ** schedulerHandle); DAAL_EXPORT void * _daal_threader_env(); @@ -183,6 +185,11 @@ inline size_t threader_get_threads_number() return threader_env()->getNumberOfThreads(); } +inline size_t setSchedulerHandle(void ** schedulerHandle) +{ + return _setSchedulerHandle(schedulerHandle); +} + inline size_t setNumberOfThreads(const size_t numThreads, void ** globalControl) { return _setNumberOfThreads(numThreads, globalControl); diff --git a/cpp/oneapi/dal/BUILD b/cpp/oneapi/dal/BUILD index 0bd5a48269d..ff6d770cc7c 100644 --- a/cpp/oneapi/dal/BUILD +++ b/cpp/oneapi/dal/BUILD @@ -65,6 +65,7 @@ dal_public_includes( ":optional", "@onedal//cpp/oneapi/dal/detail/mpi", "@onedal//cpp/oneapi/dal/detail/ccl", + "@onedal//cpp/oneapi/dal/detail/parameters", "@onedal//cpp/oneapi/dal/algo:parameters", ], ) @@ -84,6 +85,7 @@ dal_static_lib( dal_deps = [ ":static", "@onedal//cpp/oneapi/dal/algo:parameters", + "@onedal//cpp/oneapi/dal/detail/parameters", ], ) @@ -102,6 +104,7 @@ dal_dynamic_lib( dal_deps = [ ":dynamic", "@onedal//cpp/oneapi/dal/algo:parameters", + "@onedal//cpp/oneapi/dal/detail/parameters", ], ) diff --git a/cpp/oneapi/dal/algo/BUILD b/cpp/oneapi/dal/algo/BUILD index ab93feb58c0..e93804d2e7e 100644 --- a/cpp/oneapi/dal/algo/BUILD +++ b/cpp/oneapi/dal/algo/BUILD @@ -18,6 +18,7 @@ ALGOS = [ "cosine_distance", "dbscan", "decision_tree", + "finiteness_checker", "jaccard", "kmeans", "kmeans_init", diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel.cpp b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel.cpp index 4c02a680251..3e88891de82 100644 --- a/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel.cpp +++ b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel.cpp @@ -155,25 +155,19 @@ result_t call_daal_kernel_with_weights(const context_cpu& ctx, daal_result.set(daal_lom::ResultId::sumSquaresCentered, daal_partial.get(daal_lom::PartialResultId::partialSumSquaresCentered)); } - - { - const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { - constexpr auto cpu_type = interop::to_daal_cpu_type::value; - return daal_lom_online_kernel_t{}.finalizeCompute( - daal_partial.get(daal_lom::PartialResultId::nObservations).get(), - daal_partial.get(daal_lom::PartialResultId::partialSum).get(), - daal_partial.get(daal_lom::PartialResultId::partialSumSquares).get(), - daal_partial.get(daal_lom::PartialResultId::partialSumSquaresCentered).get(), - daal_result.get(daal_lom::ResultId::mean).get(), - daal_result.get(daal_lom::ResultId::secondOrderRawMoment).get(), - daal_result.get(daal_lom::ResultId::variance).get(), - daal_result.get(daal_lom::ResultId::standardDeviation).get(), - daal_result.get(daal_lom::ResultId::variation).get(), - &daal_parameter); - }); - - interop::status_to_exception(status); - } + interop::status_to_exception( + interop::call_daal_kernel_finalize_compute( + ctx, + daal_partial.get(daal_lom::PartialResultId::nObservations).get(), + daal_partial.get(daal_lom::PartialResultId::partialSum).get(), + daal_partial.get(daal_lom::PartialResultId::partialSumSquares).get(), + daal_partial.get(daal_lom::PartialResultId::partialSumSquaresCentered).get(), + daal_result.get(daal_lom::ResultId::mean).get(), + daal_result.get(daal_lom::ResultId::secondOrderRawMoment).get(), + daal_result.get(daal_lom::ResultId::variance).get(), + daal_result.get(daal_lom::ResultId::standardDeviation).get(), + daal_result.get(daal_lom::ResultId::variation).get(), + &daal_parameter)); auto result = get_result(desc, daal_result).set_result_options(desc.get_result_options()); diff --git a/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp b/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp index eee7eed8c41..3ba569a6e5a 100644 --- a/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp +++ b/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp @@ -107,7 +107,8 @@ class basic_statistics_test : public te::crtp_algo_fixture { check_for_exception_for_non_requested_results(compute_mode, compute_result); } - void csr_general_checks(const te::csr_table_builder& data, bs::result_option_id compute_mode) { + void csr_general_checks(const te::csr_table_builder<>& data, + bs::result_option_id compute_mode) { const auto desc = bs::descriptor{}.set_result_options( compute_mode); @@ -121,7 +122,7 @@ class basic_statistics_test : public te::crtp_algo_fixture { // TODO: Fix DAAL code. On big datasets there is an error in computing. // To reproduce it remove this check from test case in batch.cpp - bool not_cpu_friendly(const te::csr_table_builder& data) { + bool not_cpu_friendly(const te::csr_table_builder<>& data) { auto policy = this->get_policy(); return (data.row_count_ > 100 || data.column_count_ > 100) && policy.is_cpu(); } diff --git a/cpp/oneapi/dal/algo/covariance/BUILD b/cpp/oneapi/dal/algo/covariance/BUILD index 2770e6fc9aa..3a5f16a283e 100644 --- a/cpp/oneapi/dal/algo/covariance/BUILD +++ b/cpp/oneapi/dal/algo/covariance/BUILD @@ -9,6 +9,7 @@ dal_module( auto = True, dal_deps = [ "@onedal//cpp/oneapi/dal:core", + "@onedal//cpp/oneapi/dal/detail/parameters", ], ) @@ -16,6 +17,7 @@ dal_module( name = "parameters", dal_deps = [ "@onedal//cpp/oneapi/dal/algo/covariance/parameters", + "@onedal//cpp/oneapi/dal/detail/parameters", ], ) diff --git a/cpp/oneapi/dal/algo/covariance/compute_types.hpp b/cpp/oneapi/dal/algo/covariance/compute_types.hpp index 5800272ffaf..33f7e3d73e0 100644 --- a/cpp/oneapi/dal/algo/covariance/compute_types.hpp +++ b/cpp/oneapi/dal/algo/covariance/compute_types.hpp @@ -17,6 +17,7 @@ #pragma once #include "oneapi/dal/algo/covariance/common.hpp" +#include "oneapi/dal/detail/parameters/system_parameters.hpp" namespace oneapi::dal::covariance { @@ -35,7 +36,7 @@ template class partial_compute_result_impl; template -class compute_parameters : public base { +class compute_parameters : public dal::detail::system_parameters { public: explicit compute_parameters(); compute_parameters(compute_parameters&&) = default; diff --git a/cpp/oneapi/dal/algo/covariance/test/compute_parameters.cpp b/cpp/oneapi/dal/algo/covariance/test/compute_parameters.cpp index 4c9a13d0baf..6563b7f26c4 100644 --- a/cpp/oneapi/dal/algo/covariance/test/compute_parameters.cpp +++ b/cpp/oneapi/dal/algo/covariance/test/compute_parameters.cpp @@ -88,4 +88,29 @@ TEMPLATE_LIST_TEST_M(covariance_params_test, this->general_checks(input, input_data_table_id); } +TEST("can dump system-related parameters") { + detail::compute_parameters hp{}; + std::string hp_dump; +#ifdef ONEDAL_DATA_PARALLEL + DECLARE_TEST_POLICY(policy); + auto& q = policy.get_queue(); + hp_dump = hp.dump(q); +#else + hp_dump = hp.dump(); +#endif + std::cout << "System-related parameters: " << hp_dump << std::endl; + REQUIRE(hp_dump.size() > 0); +} + +TEST("can retrieve system-related parameters") { + detail::compute_parameters hp{}; + REQUIRE(static_cast(hp.get_top_enabled_cpu_extension()) >= 0); + REQUIRE(hp.get_max_number_of_threads() > 0); +#ifdef ONEDAL_DATA_PARALLEL + DECLARE_TEST_POLICY(policy); + auto& q = policy.get_queue(); + REQUIRE(hp.get_max_workgroup_size(q) > 0); +#endif +} + } // namespace oneapi::dal::covariance::test diff --git a/cpp/oneapi/dal/algo/decision_forest/BUILD b/cpp/oneapi/dal/algo/decision_forest/BUILD index 159f9351ef9..0a47587af06 100644 --- a/cpp/oneapi/dal/algo/decision_forest/BUILD +++ b/cpp/oneapi/dal/algo/decision_forest/BUILD @@ -10,6 +10,7 @@ dal_module( dal_deps = [ "@onedal//cpp/oneapi/dal:core", "@onedal//cpp/oneapi/dal/algo/decision_forest/backend:model_impl", + "@onedal//cpp/oneapi/dal/detail/parameters", ], ) @@ -17,6 +18,7 @@ dal_module( name = "parameters", dal_deps = [ "@onedal//cpp/oneapi/dal/algo/decision_forest/parameters", + "@onedal//cpp/oneapi/dal/detail/parameters", ], ) diff --git a/cpp/oneapi/dal/algo/decision_forest/infer_types.hpp b/cpp/oneapi/dal/algo/decision_forest/infer_types.hpp index fcb45b1dd45..ab962b4831d 100644 --- a/cpp/oneapi/dal/algo/decision_forest/infer_types.hpp +++ b/cpp/oneapi/dal/algo/decision_forest/infer_types.hpp @@ -18,6 +18,7 @@ #pragma once #include "oneapi/dal/algo/decision_forest/common.hpp" +#include "oneapi/dal/detail/parameters/system_parameters.hpp" namespace oneapi::dal::decision_forest { @@ -33,7 +34,7 @@ template struct infer_parameters_impl; template -class infer_parameters : public base { +class infer_parameters : public dal::detail::system_parameters { public: explicit infer_parameters(); infer_parameters(infer_parameters&&) = default; diff --git a/cpp/oneapi/dal/algo/finiteness_checker/BUILD b/cpp/oneapi/dal/algo/finiteness_checker/BUILD new file mode 100644 index 00000000000..de390465f9d --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/BUILD @@ -0,0 +1,61 @@ +package(default_visibility = ["//visibility:public"]) +load("@onedal//dev/bazel:dal.bzl", + "dal_module", + "dal_test_suite", +) + +dal_module( + name = "finiteness_checker", + auto = True, + dal_deps = [ + "@onedal//cpp/oneapi/dal:core", + "@onedal//cpp/oneapi/dal/backend/primitives:reduction", + ], + extra_deps = [ + "@onedal//cpp/daal:data_management", + ] +) + +dal_test_suite( + name = "cpu_tests", + private = True, + compile_as = [ "c++" ], + srcs = glob([ + "backend/cpu/test/*.cpp", + ]), + dal_deps = [ + ":finiteness_checker", + ], +) + +dal_test_suite( + name = "gpu_tests", + private = True, + compile_as = [ "dpc++" ], + srcs = glob([ + "backend/gpu/test/*.cpp", + ]), + dal_deps = [ + ":finiteness_checker", + ], +) + +dal_test_suite( + name = "interface_tests", + framework = "catch2", + srcs = glob([ + "test/*.cpp", + ]), + dal_deps = [ + ":finiteness_checker", + ], +) + +dal_test_suite( + name = "tests", + tests = [ + ":cpu_tests", + ":gpu_tests", + ":interface_tests", + ], +) diff --git a/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp b/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp new file mode 100644 index 00000000000..c1a1b8fc3c4 --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp @@ -0,0 +1,39 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include "oneapi/dal/algo/finiteness_checker/compute_types.hpp" +#include "oneapi/dal/backend/dispatcher.hpp" +#include "oneapi/dal/table/homogen.hpp" + +namespace oneapi::dal::finiteness_checker::backend { + +template +struct compute_kernel_cpu { + compute_result operator()(const dal::backend::context_cpu& ctx, + const detail::descriptor_base& params, + const compute_input& input) const; + +#ifdef ONEDAL_DATA_PARALLEL + void operator()(const dal::backend::context_cpu& ctx, + const detail::descriptor_base& params, + const table& data, + bool& res) const; +#endif +}; + +} // namespace oneapi::dal::finiteness_checker::backend diff --git a/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel_dense.cpp b/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel_dense.cpp new file mode 100644 index 00000000000..60d965046ec --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel_dense.cpp @@ -0,0 +1,73 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include + +#include "oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp" +#include "oneapi/dal/backend/interop/common.hpp" +#include "oneapi/dal/backend/interop/error_converter.hpp" +#include "oneapi/dal/backend/interop/table_conversion.hpp" +#include "oneapi/dal/exceptions.hpp" + +#include "oneapi/dal/table/row_accessor.hpp" + +namespace oneapi::dal::finiteness_checker::backend { + +using dal::backend::context_cpu; +using input_t = compute_input; +using result_t = compute_result; +using descriptor_t = detail::descriptor_base; + +namespace interop = dal::backend::interop; + +template +static result_t call_daal_kernel(const context_cpu& ctx, + const descriptor_t& desc, + const table& data) { + const auto daal_data = interop::convert_to_daal_table(data); + + return result_t().set_finite( + daal::data_management::internal::allValuesAreFinite(*daal_data.get(), + desc.get_allow_NaN())); +} + +template +static result_t compute(const context_cpu& ctx, const descriptor_t& desc, const input_t& input) { + return call_daal_kernel(ctx, desc, input.get_data()); +} + +template +struct compute_kernel_cpu { + result_t operator()(const context_cpu& ctx, + const descriptor_t& desc, + const input_t& input) const { + return compute(ctx, desc, input); + } + +#ifdef ONEDAL_DATA_PARALLEL + void operator()(const context_cpu& ctx, + const descriptor_t& desc, + const table& data, + bool& res) const { + throw unimplemented(dal::detail::error_messages::method_not_implemented()); + } +#endif +}; + +template struct compute_kernel_cpu; +template struct compute_kernel_cpu; + +} // namespace oneapi::dal::finiteness_checker::backend diff --git a/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel.hpp b/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel.hpp new file mode 100644 index 00000000000..51fc6d4a35e --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel.hpp @@ -0,0 +1,39 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include "oneapi/dal/algo/finiteness_checker/compute_types.hpp" +#include "oneapi/dal/backend/dispatcher.hpp" +#include "oneapi/dal/table/homogen.hpp" + +namespace oneapi::dal::finiteness_checker::backend { + +template +struct compute_kernel_gpu { + compute_result operator()(const dal::backend::context_gpu& ctx, + const detail::descriptor_base& params, + const compute_input& input) const; + +#ifdef ONEDAL_DATA_PARALLEL + void operator()(const dal::backend::context_gpu& ctx, + const detail::descriptor_base& params, + const table& data, + bool& res); +#endif +}; + +} // namespace oneapi::dal::finiteness_checker::backend diff --git a/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel_dense_dpc.cpp b/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel_dense_dpc.cpp new file mode 100644 index 00000000000..09389a2c122 --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel_dense_dpc.cpp @@ -0,0 +1,81 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel.hpp" +#include "oneapi/dal/backend/primitives/reduction.hpp" +#include "oneapi/dal/backend/primitives/utils.hpp" +#include "oneapi/dal/detail/profiler.hpp" + +namespace oneapi::dal::finiteness_checker::backend { + +using dal::backend::context_gpu; +using input_t = compute_input; +using result_t = compute_result; +using descriptor_t = detail::descriptor_base; + +namespace pr = dal::backend::primitives; + +template +bool compute_finiteness(sycl::queue& queue, + const pr::ndview& data_1d, + bool allowNaN, + const dal::backend::event_vector& deps = {}) { + Float out; + + if (allowNaN) { + ONEDAL_PROFILER_TASK(finiteness_checker.reduce, queue); + out = pr::reduce_1d(queue, data_1d, pr::logical_or{}, pr::isinf{}, deps); + } + else { + ONEDAL_PROFILER_TASK(finiteness_checker.reduce, queue); + out = pr::reduce_1d(queue, data_1d, pr::logical_or{}, pr::isinfornan{}, deps); + } + // invert out to match daal implementation (assert result is finite) + return !static_cast(out); +} + +template +static result_t compute(const context_gpu& ctx, const descriptor_t& desc, const input_t& input) { + auto& queue = ctx.get_queue(); + const auto data = input.get_data(); + const auto data_1d = pr::table2ndarray_1d(queue, data, sycl::usm::alloc::device); + return result_t{}.set_finite(compute_finiteness(queue, data_1d, desc.get_allow_NaN())); +} + +template +struct compute_kernel_gpu { + result_t operator()(const context_gpu& ctx, + const descriptor_t& desc, + const input_t& input) const { + return compute(ctx, desc, input); + } + +#ifdef ONEDAL_DATA_PARALLEL + void operator()(const context_gpu& ctx, + const descriptor_t& desc, + const table& data, + bool& res) { + auto& queue = ctx.get_queue(); + const auto data_1d = pr::table2ndarray_1d(queue, data, sycl::usm::alloc::device); + res = compute_finiteness(queue, data_1d, desc.get_allow_NaN()); + } +#endif +}; + +template struct compute_kernel_gpu; +template struct compute_kernel_gpu; + +} // namespace oneapi::dal::finiteness_checker::backend diff --git a/cpp/oneapi/dal/algo/finiteness_checker/common.cpp b/cpp/oneapi/dal/algo/finiteness_checker/common.cpp new file mode 100644 index 00000000000..29b8ea6aa21 --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/common.cpp @@ -0,0 +1,45 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/finiteness_checker/common.hpp" +#include "oneapi/dal/exceptions.hpp" + +namespace oneapi::dal::finiteness_checker::detail { +namespace v1 { + +template +class descriptor_impl : public base { +public: + bool allowNaN = false; +}; + +template +descriptor_base::descriptor_base() : impl_(new descriptor_impl{}) {} + +template +bool descriptor_base::get_allow_NaN() const { + return impl_->allowNaN; +} + +template +void descriptor_base::set_allow_NaN(bool value) { + impl_->allowNaN = value; +} + +template class ONEDAL_EXPORT descriptor_base; + +} // namespace v1 +} // namespace oneapi::dal::finiteness_checker::detail diff --git a/cpp/oneapi/dal/algo/finiteness_checker/common.hpp b/cpp/oneapi/dal/algo/finiteness_checker/common.hpp new file mode 100644 index 00000000000..e9e5b36930c --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/common.hpp @@ -0,0 +1,140 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include "oneapi/dal/detail/common.hpp" +#include "oneapi/dal/table/common.hpp" + +namespace oneapi::dal::finiteness_checker { + +namespace task { +namespace v1 { + +/// Tag-type that parameterizes entities that are used to compute statistics, distance, and so on. +struct compute {}; +/// Alias tag-type for the dense method. +using by_default = compute; +} // namespace v1 + +using v1::compute; +using v1::by_default; + +} // namespace task + +namespace method { +namespace v1 { +struct dense {}; +using by_default = dense; +} // namespace v1 + +using v1::dense; +using v1::by_default; + +} // namespace method + +namespace detail { +namespace v1 { +struct descriptor_tag {}; + +template +class descriptor_impl; + +template +constexpr bool is_valid_float_v = dal::detail::is_one_of_v; + +template +constexpr bool is_valid_method_v = dal::detail::is_one_of_v; + +template +constexpr bool is_valid_task_v = dal::detail::is_one_of_v; + +template +class descriptor_base : public base { + static_assert(is_valid_task_v); + +public: + using tag_t = descriptor_tag; + using float_t = float; + using method_t = method::by_default; + using task_t = Task; + + descriptor_base(); + + bool get_allow_NaN() const; + +protected: + void set_allow_NaN(bool); + +private: + dal::detail::pimpl> impl_; +}; + +} // namespace v1 + +using v1::descriptor_tag; +using v1::descriptor_impl; +using v1::descriptor_base; + +using v1::is_valid_float_v; +using v1::is_valid_method_v; +using v1::is_valid_task_v; + +} // namespace detail + +namespace v1 { + +/// @tparam Float The floating-point type that the algorithm uses for +/// intermediate computations. Can be :expr:`float` or +/// :expr:`double`. +/// @tparam Method Tag-type that specifies an implementation of algorithm. Can +/// be :expr:`method::dense`. +/// @tparam Task Tag-type that specifies the type of the problem to solve. Can +/// be :expr:`task::compute`. +template +class descriptor : public detail::descriptor_base { + static_assert(detail::is_valid_float_v); + static_assert(detail::is_valid_method_v); + static_assert(detail::is_valid_task_v); + + using base_t = detail::descriptor_base; + +public: + using float_t = Float; + using method_t = Method; + using task_t = Task; + + /// Creates a new instance of the class with the default property values. + descriptor() = default; + + /// @remark default = False + bool get_allow_NaN() const { + return base_t::get_allow_NaN(); + } + + auto& set_allow_NaN(bool value) { + base_t::set_allow_NaN(value); + return *this; + } +}; + +} // namespace v1 + +using v1::descriptor; + +} // namespace oneapi::dal::finiteness_checker diff --git a/cpp/oneapi/dal/algo/finiteness_checker/compute.hpp b/cpp/oneapi/dal/algo/finiteness_checker/compute.hpp new file mode 100644 index 00000000000..92252303610 --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/compute.hpp @@ -0,0 +1,31 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include "oneapi/dal/algo/finiteness_checker/compute_types.hpp" +#include "oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp" +#include "oneapi/dal/compute.hpp" + +namespace oneapi::dal::detail { +namespace v1 { + +template +struct compute_ops + : dal::finiteness_checker::detail::compute_ops {}; + +} // namespace v1 +} // namespace oneapi::dal::detail diff --git a/cpp/oneapi/dal/algo/finiteness_checker/compute_types.cpp b/cpp/oneapi/dal/algo/finiteness_checker/compute_types.cpp new file mode 100644 index 00000000000..65c29d6630a --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/compute_types.cpp @@ -0,0 +1,71 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/finiteness_checker/compute_types.hpp" +#include "oneapi/dal/detail/common.hpp" + +namespace oneapi::dal::finiteness_checker { + +template +class detail::v1::compute_input_impl : public base { +public: + compute_input_impl(const table& data) : data(data) {} + table data; +}; + +template +class detail::v1::compute_result_impl : public base { +public: + bool finite; +}; + +using detail::v1::compute_input_impl; +using detail::v1::compute_result_impl; + +namespace v1 { + +template +compute_input::compute_input(const table& data) : impl_(new compute_input_impl(data)) {} + +template +const table& compute_input::get_data() const { + return impl_->data; +} + +template +void compute_input::set_data_impl(const table& value) { + impl_->data = value; +} + +template class ONEDAL_EXPORT compute_input; + +template +compute_result::compute_result() : impl_(new compute_result_impl{}) {} + +template +bool compute_result::get_finite() const { + return impl_->finite; +} + +template +void compute_result::set_finite_impl(const bool& value) { + impl_->finite = value; +} + +template class ONEDAL_EXPORT compute_result; + +} // namespace v1 +} // namespace oneapi::dal::finiteness_checker diff --git a/cpp/oneapi/dal/algo/finiteness_checker/compute_types.hpp b/cpp/oneapi/dal/algo/finiteness_checker/compute_types.hpp new file mode 100644 index 00000000000..30091328498 --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/compute_types.hpp @@ -0,0 +1,98 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include "oneapi/dal/algo/finiteness_checker/common.hpp" + +namespace oneapi::dal::finiteness_checker { + +namespace detail { +namespace v1 { +template +class compute_input_impl; + +template +class compute_result_impl; +} // namespace v1 + +using v1::compute_input_impl; +using v1::compute_result_impl; + +} // namespace detail + +namespace v1 { + +/// @tparam Task Tag-type that specifies the type of the problem to solve. Can +/// be :expr:`task::compute`. +template +class compute_input : public base { + static_assert(detail::is_valid_task_v); + +public: + using task_t = Task; + + /// Creates a new instance of the class with the given :literal:`data`. + compute_input(const table& data); + + /// @remark default = table{} + const table& get_data() const; + + auto& set_data(const table& data) { + set_data_impl(data); + return *this; + } + +protected: + void set_data_impl(const table& data); + +private: + dal::detail::pimpl> impl_; +}; + +/// @tparam Task Tag-type that specifies the type of the problem to solve. Can +/// be :expr:`task::compute`. +template +class compute_result : public base { + static_assert(detail::is_valid_task_v); + +public: + using task_t = Task; + + /// Creates a new instance of the class with the default property values. + compute_result(); + + /// A boolean with the result finiteness. + bool get_finite() const; + + auto& set_finite(const bool& value) { + set_finite_impl(value); + return *this; + } + +protected: + void set_finite_impl(const bool&); + +private: + dal::detail::pimpl> impl_; +}; + +} // namespace v1 + +using v1::compute_input; +using v1::compute_result; + +} // namespace oneapi::dal::finiteness_checker diff --git a/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.cpp b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.cpp new file mode 100644 index 00000000000..03822b8ca9e --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.cpp @@ -0,0 +1,44 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp" +#include "oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp" +#include "oneapi/dal/backend/dispatcher.hpp" + +namespace oneapi::dal::finiteness_checker::detail { +namespace v1 { + +using dal::detail::host_policy; + +template +struct compute_ops_dispatcher { + compute_result operator()(const host_policy& ctx, + const descriptor_base& desc, + const compute_input& input) const { + using kernel_dispatcher_t = dal::backend::kernel_dispatcher< // + KERNEL_SINGLE_NODE_CPU(backend::compute_kernel_cpu)>; + return kernel_dispatcher_t()(ctx, desc, input); + } +}; + +#define INSTANTIATE(F, M, T) \ + template struct ONEDAL_EXPORT compute_ops_dispatcher; + +INSTANTIATE(float, method::dense, task::compute) +INSTANTIATE(double, method::dense, task::compute) + +} // namespace v1 +} // namespace oneapi::dal::finiteness_checker::detail diff --git a/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp new file mode 100644 index 00000000000..a974d9dc57f --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp @@ -0,0 +1,77 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include "oneapi/dal/algo/finiteness_checker/compute_types.hpp" +#include "oneapi/dal/detail/error_messages.hpp" +#include "oneapi/dal/table/homogen.hpp" + +namespace oneapi::dal::finiteness_checker::detail { +namespace v1 { + +template +struct compute_ops_dispatcher { + compute_result operator()(const Context&, + const descriptor_base& desc, + const compute_input&) const; + +#ifdef ONEDAL_DATA_PARALLEL + void operator()(const Context&, + const descriptor_base& desc, + const table& data, + const bool&); +#endif +}; + +template +struct compute_ops { + using float_t = typename Descriptor::float_t; + using method_t = typename Descriptor::method_t; + using task_t = typename Descriptor::task_t; + using input_t = compute_input; + using result_t = compute_result; + using descriptor_base_t = descriptor_base; + + void check_preconditions(const Descriptor& params, const input_t& input) const { + using msg = dal::detail::error_messages; + + if (!input.get_data().has_data()) { + throw domain_error(msg::input_data_is_empty()); + } + } + + template + auto operator()(const Context& ctx, const Descriptor& desc, const input_t& input) const { + check_preconditions(desc, input); + const auto result = + compute_ops_dispatcher()(ctx, desc, input); + return result; + } + +#ifdef ONEDAL_DATA_PARALLEL + template + void operator()(const Context& ctx, const Descriptor& desc, const table& data, bool& res) { + compute_ops_dispatcher()(ctx, desc, data, res); + } +#endif +}; + +} // namespace v1 + +using v1::compute_ops; + +} // namespace oneapi::dal::finiteness_checker::detail diff --git a/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops_dpc.cpp b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops_dpc.cpp new file mode 100644 index 00000000000..6e2b210043d --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops_dpc.cpp @@ -0,0 +1,58 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp" +#include "oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel.hpp" +#include "oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp" +#include "oneapi/dal/backend/dispatcher.hpp" + +namespace oneapi::dal::finiteness_checker::detail { +namespace v1 { + +using dal::detail::data_parallel_policy; + +template +struct compute_ops_dispatcher { + compute_result operator()(const data_parallel_policy& ctx, + const descriptor_base& params, + const compute_input& input) const { + using kernel_dispatcher_t = dal::backend::kernel_dispatcher< + KERNEL_SINGLE_NODE_CPU(backend::compute_kernel_cpu), + KERNEL_SINGLE_NODE_GPU(backend::compute_kernel_gpu)>; + return kernel_dispatcher_t{}(ctx, params, input); + } + +#ifdef ONEDAL_DATA_PARALLEL + void operator()(const data_parallel_policy& ctx, + const descriptor_base& params, + const table& data, + bool& res) { + using kernel_dispatcher_t = dal::backend::kernel_dispatcher< + KERNEL_SINGLE_NODE_CPU(backend::compute_kernel_cpu), + KERNEL_SINGLE_NODE_GPU(backend::compute_kernel_gpu)>; + kernel_dispatcher_t{}(ctx, params, data, res); + } +#endif +}; + +#define INSTANTIATE(F, M, T) \ + template struct ONEDAL_EXPORT compute_ops_dispatcher; + +INSTANTIATE(float, method::dense, task::compute) +INSTANTIATE(double, method::dense, task::compute) + +} // namespace v1 +} // namespace oneapi::dal::finiteness_checker::detail diff --git a/cpp/oneapi/dal/algo/finiteness_checker/test/batch.cpp b/cpp/oneapi/dal/algo/finiteness_checker/test/batch.cpp new file mode 100644 index 00000000000..e99b65cda38 --- /dev/null +++ b/cpp/oneapi/dal/algo/finiteness_checker/test/batch.cpp @@ -0,0 +1,104 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include +#include + +#include "oneapi/dal/algo/finiteness_checker/compute.hpp" + +#include "oneapi/dal/test/engine/fixtures.hpp" +#include "oneapi/dal/test/engine/math.hpp" + +namespace oneapi::dal::finiteness_checker::test { + +namespace te = dal::test::engine; + +template +class finite_checker_batch_test : public te::float_algo_fixture> { +public: + using Float = std::tuple_element_t<0, TestType>; + using Method = std::tuple_element_t<1, TestType>; + + void check_finiteness(const te::dataframe& x_data, + bool allowNaN, + double value, + const te::table_id& x_data_table_id) { + const table x = x_data.get_table(this->get_policy(), x_data_table_id); + + INFO("create descriptor"); + const auto finiteness_desc = + finiteness_checker::descriptor{}.set_allow_NaN(allowNaN); + + INFO("run compute"); + const bool compute_result = this->compute(finiteness_desc, x).get_finite(); + if (compute_result == (std::isinf(value) || (std::isnan(value) && !allowNaN))) { + CAPTURE(compute_result, value, allowNaN); + FAIL(); + } + SUCCEED(); + } +}; + +using finiteness_types = COMBINE_TYPES((float, double), (finiteness_checker::method::dense)); + +TEMPLATE_LIST_TEST_M(finite_checker_batch_test, + "finiteness checker typical", + "[finiteness_checker][integration][batch]", + finiteness_types) { + SKIP_IF(this->not_float64_friendly()); + + // Initialize values + const te::dataframe x_data = + GENERATE_DATAFRAME(te::dataframe_builder{ 50, 50 }.fill_normal(0, 1, 7777), + te::dataframe_builder{ 100, 50 }.fill_normal(0, 1, 7777), + te::dataframe_builder{ 250, 50 }.fill_normal(0, 1, 7777), + te::dataframe_builder{ 1100, 50 }.fill_normal(0, 1, 7777)); + auto x_data_mutable = x_data.get_array().get_mutable_data(); + const double value = GENERATE(0.0, + -std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::quiet_NaN()); + const bool allowNaN = GENERATE(0, 1); + x_data_mutable[45] = value; + + // Homogen floating point type is the same as algorithm's floating point type + const auto x_data_table_id = this->get_homogen_table_id(); + + this->check_finiteness(x_data, allowNaN, value, x_data_table_id); +} + +TEMPLATE_LIST_TEST_M(finite_checker_batch_test, + "finiteness_checker compute one element matrix", + "[finiteness_checker][integration][batch]", + finiteness_types) { + SKIP_IF(this->not_float64_friendly()); + + // Initialize values to doubles + const double value = GENERATE(0.0, + -std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::quiet_NaN()); + const bool allowNaN = GENERATE(0, 1); + + const te::dataframe x_data = GENERATE_DATAFRAME(te::dataframe_builder{ 1, 1 }.fill(value)); + + // Homogen floating point type is the same as algorithm's floating point type + const auto x_data_table_id = this->get_homogen_table_id(); + + this->check_finiteness(x_data, allowNaN, value, x_data_table_id); +} + +} // namespace oneapi::dal::finiteness_checker::test diff --git a/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc.hpp b/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc.hpp index 9a31ef369ae..1ba10bf8737 100644 --- a/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc.hpp +++ b/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc.hpp @@ -286,12 +286,12 @@ class knn_callback { const auto& [first, last] = bnds; ONEDAL_ASSERT(last > first); - auto& queue = this->queue_; bk::event_vector ndeps{ deps.cbegin(), deps.cend() }; - auto sq_event = copy_with_sqrt(queue, inp_dts, inp_dts, deps); - if (this->compute_sqrt_) - ndeps.push_back(sq_event); + if (this->compute_sqrt_) { + auto sqrt_event = copy_with_sqrt(this->queue_, inp_dts, inp_dts, deps); + ndeps.push_back(sqrt_event); + } auto out_rps = this->responses_.get_slice(first, last); ONEDAL_ASSERT((last - first) == out_rps.get_count()); @@ -310,12 +310,12 @@ class knn_callback { const auto& [first, last] = bnds; ONEDAL_ASSERT(last > first); - auto& queue = this->queue_; bk::event_vector ndeps{ deps.cbegin(), deps.cend() }; - auto sq_event = copy_with_sqrt(queue, inp_dts, inp_dts, deps); - if (this->compute_sqrt_) - ndeps.push_back(sq_event); + if (this->compute_sqrt_) { + auto sqrt_event = copy_with_sqrt(this->queue_, inp_dts, inp_dts, deps); + ndeps.push_back(sqrt_event); + } auto out_rps = this->responses_.get_slice(first, last); ONEDAL_ASSERT((last - first) == out_rps.get_count()); diff --git a/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc_distr.hpp b/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc_distr.hpp index e67d555616a..daf3caa9187 100644 --- a/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc_distr.hpp +++ b/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc_distr.hpp @@ -347,16 +347,10 @@ class knn_callback_distr { const auto& [first, last] = bnds; ONEDAL_ASSERT(last > first); - auto& queue = this->queue_; - - bk::event_vector ndeps{ deps.cbegin(), deps.cend() }; - auto sq_event = copy_with_sqrt(queue, inp_dts, inp_dts, deps); - if (this->compute_sqrt_) - ndeps.push_back(sq_event); auto out_rps = this->responses_.get_slice(first, last); ONEDAL_ASSERT((last - first) == out_rps.get_count()); - return (*(this->distance_voting_))(tmp_rps, inp_dts, out_rps, ndeps); + return (*(this->distance_voting_))(tmp_rps, inp_dts, out_rps, deps); } template > @@ -371,16 +365,10 @@ class knn_callback_distr { const auto& [first, last] = bnds; ONEDAL_ASSERT(last > first); - auto& queue = this->queue_; - - bk::event_vector ndeps{ deps.cbegin(), deps.cend() }; - auto sq_event = copy_with_sqrt(queue, inp_dts, inp_dts, deps); - if (this->compute_sqrt_) - ndeps.push_back(sq_event); auto out_rps = this->responses_.get_slice(first, last); ONEDAL_ASSERT((last - first) == out_rps.get_count()); - return (*(this->distance_regression_))(tmp_rps, inp_dts, out_rps, ndeps); + return (*(this->distance_regression_))(tmp_rps, inp_dts, out_rps, deps); } sycl::event output_responses(const std::pair& bnds, diff --git a/cpp/oneapi/dal/algo/linear_regression/BUILD b/cpp/oneapi/dal/algo/linear_regression/BUILD index e6ab83d5f19..58a3ebf1ea4 100644 --- a/cpp/oneapi/dal/algo/linear_regression/BUILD +++ b/cpp/oneapi/dal/algo/linear_regression/BUILD @@ -10,6 +10,7 @@ dal_module( dal_deps = [ "@onedal//cpp/oneapi/dal:core", "@onedal//cpp/oneapi/dal/algo/linear_regression/backend:model_impl", + "@onedal//cpp/oneapi/dal/detail/parameters", ], ) @@ -17,6 +18,7 @@ dal_module( name = "parameters", dal_deps = [ "@onedal//cpp/oneapi/dal/algo/linear_regression/parameters", + "@onedal//cpp/oneapi/dal/detail/parameters", ], ) diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/BUILD b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/BUILD index 55adfee47a9..7bd3d6e679d 100644 --- a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/BUILD +++ b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/BUILD @@ -15,5 +15,6 @@ dal_module( "@onedal//cpp/daal:core", "@onedal//cpp/daal/src/algorithms/linear_model:kernel", "@onedal//cpp/daal/src/algorithms/linear_regression:kernel", + "@onedal//cpp/daal/src/algorithms/ridge_regression:kernel" ], ) diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp index 5540641d8fd..c29f9d8cdfe 100644 --- a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp @@ -16,6 +16,7 @@ #include #include +#include #include "oneapi/dal/backend/interop/common.hpp" #include "oneapi/dal/backend/interop/error_converter.hpp" @@ -30,28 +31,32 @@ namespace oneapi::dal::linear_regression::backend { -using daal::services::Status; using dal::backend::context_cpu; -namespace be = dal::backend; -namespace pr = be::primitives; +namespace bk = dal::backend; +namespace pr = bk::primitives; namespace interop = dal::backend::interop; namespace daal_lr = daal::algorithms::linear_regression; +namespace daal_rr = daal::algorithms::ridge_regression; -using daal_hyperparameters_t = daal_lr::internal::Hyperparameter; +using daal_lr_hyperparameters_t = daal_lr::internal::Hyperparameter; -constexpr auto daal_method = daal_lr::training::normEqDense; +constexpr auto daal_lr_method = daal_lr::training::normEqDense; +constexpr auto daal_rr_method = daal_rr::training::normEqDense; template -using online_kernel_t = daal_lr::training::internal::OnlineKernel; +using online_lr_kernel_t = daal_lr::training::internal::OnlineKernel; + +template +using online_rr_kernel_t = daal_rr::training::internal::OnlineKernel; template -static daal_hyperparameters_t convert_parameters(const detail::train_parameters& params) { +static daal_lr_hyperparameters_t convert_parameters(const detail::train_parameters& params) { using daal_lr::internal::HyperparameterId; const std::int64_t block = params.get_cpu_macro_block(); - daal_hyperparameters_t daal_hyperparameter; + daal_lr_hyperparameters_t daal_hyperparameter; auto status = daal_hyperparameter.set(HyperparameterId::denseUpdateStepBlockSize, block); interop::status_to_exception(status); @@ -59,45 +64,56 @@ static daal_hyperparameters_t convert_parameters(const detail::train_parameters< } template -static train_result call_daal_kernel(const context_cpu& ctx, - const detail::descriptor_base& desc, - const detail::train_parameters& params, - const partial_train_result& input) { +static train_result call_daal_kernel_finalize(const context_cpu& ctx, + const detail::descriptor_base& desc, + const detail::train_parameters& params, + const partial_train_result& input) { using dal::detail::check_mul_overflow; using model_t = model; using model_impl_t = detail::model_impl; - const bool beta = desc.get_compute_intercept(); + const bool compute_intercept = desc.get_compute_intercept(); const auto response_count = input.get_partial_xty().get_row_count(); const auto ext_feature_count = input.get_partial_xty().get_column_count(); - const auto feature_count = ext_feature_count - beta; + const auto feature_count = ext_feature_count - compute_intercept; const auto betas_size = check_mul_overflow(response_count, feature_count + 1); auto betas_arr = array::zeros(betas_size); - const daal_hyperparameters_t& hp = convert_parameters(params); - auto xtx_daal_table = interop::convert_to_daal_table(input.get_partial_xtx()); auto xty_daal_table = interop::convert_to_daal_table(input.get_partial_xty()); auto betas_daal_table = interop::convert_to_daal_homogen_table(betas_arr, response_count, feature_count + 1); - { - const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { - constexpr auto cpu_type = interop::to_daal_cpu_type::value; - return online_kernel_t().finalizeCompute(*xtx_daal_table, - *xty_daal_table, - *xtx_daal_table, - *xty_daal_table, - *betas_daal_table, - beta, - &hp); - }); - - interop::status_to_exception(status); + double alpha = desc.get_alpha(); + if (alpha != 0.0) { + auto ridge_matrix_array = array::full(1, static_cast(alpha)); + auto ridge_matrix = interop::convert_to_daal_homogen_table(ridge_matrix_array, 1, 1); + + interop::status_to_exception( + interop::call_daal_kernel_finalize_compute(ctx, + *xtx_daal_table, + *xty_daal_table, + *xtx_daal_table, + *xty_daal_table, + *betas_daal_table, + compute_intercept, + *ridge_matrix)); + } + else { + const daal_lr_hyperparameters_t& hp = convert_parameters(params); + interop::status_to_exception( + interop::call_daal_kernel_finalize_compute(ctx, + *xtx_daal_table, + *xty_daal_table, + *xtx_daal_table, + *xty_daal_table, + *betas_daal_table, + compute_intercept, + &hp)); } auto betas_table = homogen_table::wrap(betas_arr, response_count, feature_count + 1); @@ -139,11 +155,11 @@ static train_result call_daal_kernel(const context_cpu& ctx, } template -static train_result train(const context_cpu& ctx, - const detail::descriptor_base& desc, - const detail::train_parameters& params, - const partial_train_result& input) { - return call_daal_kernel(ctx, desc, params, input); +static train_result finalize_train(const context_cpu& ctx, + const detail::descriptor_base& desc, + const detail::train_parameters& params, + const partial_train_result& input) { + return call_daal_kernel_finalize(ctx, desc, params, input); } template @@ -152,7 +168,7 @@ struct finalize_train_kernel_cpu { const detail::descriptor_base& desc, const detail::train_parameters& params, const partial_train_result& input) const { - return train(ctx, desc, params, input); + return finalize_train(ctx, desc, params, input); } }; diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/partial_train_kernel_norm_eq.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/partial_train_kernel_norm_eq.cpp index 7cac1aa47b7..d5d9f61003c 100644 --- a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/partial_train_kernel_norm_eq.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/partial_train_kernel_norm_eq.cpp @@ -62,14 +62,14 @@ static partial_train_result call_daal_kernel(const context_cpu& ctx, const partial_train_input& input) { using dal::detail::check_mul_overflow; - const bool beta = desc.get_compute_intercept(); + const bool compute_intercept = desc.get_compute_intercept(); const auto feature_count = input.get_data().get_column_count(); const auto response_count = input.get_responses().get_column_count(); const daal_hyperparameters_t& hp = convert_parameters(params); - const auto ext_feature_count = feature_count + beta; + const auto ext_feature_count = feature_count + compute_intercept; const bool has_xtx_data = input.get_prev().get_partial_xtx().has_data(); if (has_xtx_data) { @@ -85,7 +85,7 @@ static partial_train_result call_daal_kernel(const context_cpu& ctx, *y_daal_table, *daal_xtx, *daal_xty, - beta, + compute_intercept, &hp); interop::status_to_exception(status); @@ -117,7 +117,7 @@ static partial_train_result call_daal_kernel(const context_cpu& ctx, *y_daal_table, *xtx_daal_table, *xty_daal_table, - beta, + compute_intercept, &hp); interop::status_to_exception(status); diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/train_kernel_norm_eq.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/train_kernel_norm_eq.cpp index dbea53a33f6..0e6e1f8cd10 100644 --- a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/train_kernel_norm_eq.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/train_kernel_norm_eq.cpp @@ -16,6 +16,7 @@ #include #include +#include #include "oneapi/dal/backend/interop/common.hpp" #include "oneapi/dal/backend/interop/error_converter.hpp" @@ -39,21 +40,26 @@ namespace be = dal::backend; namespace pr = be::primitives; namespace interop = dal::backend::interop; namespace daal_lr = daal::algorithms::linear_regression; +namespace daal_rr = daal::algorithms::ridge_regression; -using daal_hyperparameters_t = daal_lr::internal::Hyperparameter; +using daal_lr_hyperparameters_t = daal_lr::internal::Hyperparameter; -constexpr auto daal_method = daal_lr::training::normEqDense; +constexpr auto daal_lr_method = daal_lr::training::normEqDense; +constexpr auto daal_rr_method = daal_rr::training::normEqDense; template -using online_kernel_t = daal_lr::training::internal::OnlineKernel; +using batch_lr_kernel_t = daal_lr::training::internal::BatchKernel; + +template +using batch_rr_kernel_t = daal_rr::training::internal::BatchKernel; template -static daal_hyperparameters_t convert_parameters(const detail::train_parameters& params) { +static daal_lr_hyperparameters_t convert_parameters(const detail::train_parameters& params) { using daal_lr::internal::HyperparameterId; const std::int64_t block = params.get_cpu_macro_block(); - daal_hyperparameters_t daal_hyperparameter; + daal_lr_hyperparameters_t daal_hyperparameter; auto status = daal_hyperparameter.set(HyperparameterId::denseUpdateStepBlockSize, block); interop::status_to_exception(status); @@ -97,33 +103,41 @@ static train_result call_daal_kernel(const context_cpu& ctx, auto x_daal_table = interop::convert_to_daal_table(data); auto y_daal_table = interop::convert_to_daal_table(resp); - const daal_hyperparameters_t& hp = convert_parameters(params); - - { - const auto status = interop::call_daal_kernel(ctx, - *x_daal_table, - *y_daal_table, - *xtx_daal_table, - *xty_daal_table, - intp, - &hp); - - interop::status_to_exception(status); + double alpha = desc.get_alpha(); + if (alpha != 0.0) { + auto ridge_matrix_array = array::full(1, static_cast(alpha)); + auto ridge_matrix = interop::convert_to_daal_homogen_table(ridge_matrix_array, 1, 1); + + { + const auto status = + interop::call_daal_kernel(ctx, + *x_daal_table, + *y_daal_table, + *xtx_daal_table, + *xty_daal_table, + *betas_daal_table, + intp, + *ridge_matrix); + + interop::status_to_exception(status); + } } - - { - const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { - constexpr auto cpu_type = interop::to_daal_cpu_type::value; - return online_kernel_t().finalizeCompute(*xtx_daal_table, - *xty_daal_table, - *xtx_daal_table, - *xty_daal_table, - *betas_daal_table, - intp, - &hp); - }); - - interop::status_to_exception(status); + else { + const daal_lr_hyperparameters_t& hp = convert_parameters(params); + + { + const auto status = + interop::call_daal_kernel(ctx, + *x_daal_table, + *y_daal_table, + *xtx_daal_table, + *xty_daal_table, + *betas_daal_table, + intp, + &hp); + + interop::status_to_exception(status); + } } auto betas_table = homogen_table::wrap(betas_arr, response_count, feature_count + 1); diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp index 733bb46b0b3..a74723e1b00 100644 --- a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp @@ -14,116 +14,32 @@ * limitations under the License. *******************************************************************************/ -#include "oneapi/dal/detail/common.hpp" -#include "oneapi/dal/backend/dispatcher.hpp" -#include "oneapi/dal/backend/primitives/ndarray.hpp" -#include "oneapi/dal/backend/primitives/lapack.hpp" -#include "oneapi/dal/backend/primitives/utils.hpp" - -#include "oneapi/dal/table/row_accessor.hpp" - -#include "oneapi/dal/algo/linear_regression/common.hpp" -#include "oneapi/dal/algo/linear_regression/train_types.hpp" -#include "oneapi/dal/algo/linear_regression/backend/model_impl.hpp" #include "oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel.hpp" -#include "oneapi/dal/algo/linear_regression/backend/gpu/update_kernel.hpp" - -namespace oneapi::dal::linear_regression::backend { - -using dal::backend::context_gpu; - -namespace be = dal::backend; -namespace pr = be::primitives; - -template -static train_result call_dal_kernel(const context_gpu& ctx, - const detail::descriptor_base& desc, - const detail::train_parameters& params, - const partial_train_result& input) { - using dal::detail::check_mul_overflow; - - using model_t = model; - using model_impl_t = detail::model_impl; - - auto& queue = ctx.get_queue(); - - const bool beta = desc.get_compute_intercept(); - - constexpr auto uplo = pr::mkl::uplo::upper; - constexpr auto alloc = sycl::usm::alloc::device; - - const auto response_count = input.get_partial_xty().get_row_count(); - const auto ext_feature_count = input.get_partial_xty().get_column_count(); - const auto feature_count = ext_feature_count - beta; - - const pr::ndshape<2> xtx_shape{ ext_feature_count, ext_feature_count }; - - const auto xtx_nd = - pr::table2ndarray(queue, input.get_partial_xtx(), sycl::usm::alloc::device); - const auto xty_nd = pr::table2ndarray(queue, - input.get_partial_xty(), - sycl::usm::alloc::device); +#include "oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl.hpp" - const pr::ndshape<2> betas_shape{ response_count, feature_count + 1 }; - - const auto betas_size = check_mul_overflow(response_count, feature_count + 1); - auto betas_arr = array::zeros(queue, betas_size, alloc); - - auto nxtx = pr::ndarray::empty(queue, xtx_shape, alloc); - auto nxty = pr::ndview::wrap_mutable(betas_arr, betas_shape); - auto solve_event = pr::solve_system(queue, beta, xtx_nd, xty_nd, nxtx, nxty, {}); - sycl::event::wait_and_throw({ solve_event }); - - auto betas = homogen_table::wrap(betas_arr, response_count, feature_count + 1); - - const auto model_impl = std::make_shared(betas); - const auto model = dal::detail::make_private(model_impl); - - const auto options = desc.get_result_options(); - auto result = train_result().set_model(model).set_result_options(options); - - if (options.test(result_options::intercept)) { - auto arr = array::zeros(queue, response_count, alloc); - auto dst = pr::ndview::wrap_mutable(arr, { 1l, response_count }); - const auto src = nxty.get_col_slice(0l, 1l).t(); - - pr::copy(queue, dst, src).wait_and_throw(); - - auto intercept = homogen_table::wrap(arr, 1l, response_count); - result.set_intercept(intercept); - } - - if (options.test(result_options::coefficients)) { - const auto size = check_mul_overflow(response_count, feature_count); - - auto arr = array::zeros(queue, size, alloc); - const auto src = nxty.get_col_slice(1l, feature_count + 1); - auto dst = pr::ndview::wrap_mutable(arr, { response_count, feature_count }); +#include "oneapi/dal/detail/common.hpp" - pr::copy(queue, dst, src).wait_and_throw(); +#include "oneapi/dal/backend/dispatcher.hpp" - auto coefficients = homogen_table::wrap(arr, response_count, feature_count); - result.set_coefficients(coefficients); - } +namespace oneapi::dal::linear_regression::backend { - return result; -} +namespace bk = dal::backend; template -static train_result train(const context_gpu& ctx, - const detail::descriptor_base& desc, - const detail::train_parameters& params, - const partial_train_result& input) { - return call_dal_kernel(ctx, desc, params, input); +static train_result finalize_train(const bk::context_gpu& ctx, + const detail::descriptor_base& desc, + const detail::train_parameters& params, + const partial_train_result& input) { + return finalize_train_kernel_norm_eq_impl(ctx)(desc, params, input); } template struct finalize_train_kernel_gpu { - train_result operator()(const context_gpu& ctx, + train_result operator()(const bk::context_gpu& ctx, const detail::descriptor_base& desc, const detail::train_parameters& params, const partial_train_result& input) const { - return train(ctx, desc, params, input); + return finalize_train(ctx, desc, params, input); } }; diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl.hpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl.hpp new file mode 100644 index 00000000000..6eeaf17c0da --- /dev/null +++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl.hpp @@ -0,0 +1,51 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include "oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel.hpp" +#include "oneapi/dal/backend/primitives/utils.hpp" + +#ifdef ONEDAL_DATA_PARALLEL + +namespace oneapi::dal::linear_regression::backend { + +namespace bk = dal::backend; + +template +class finalize_train_kernel_norm_eq_impl { + using comm_t = bk::communicator; + using input_t = partial_train_result; + using result_t = train_result; + using descriptor_t = detail::descriptor_base; + using train_parameters_t = detail::train_parameters; + +public: + finalize_train_kernel_norm_eq_impl(const bk::context_gpu& ctx) + : q(ctx.get_queue()), + comm_(ctx.get_communicator()) {} + result_t operator()(const descriptor_t& desc, + const train_parameters_t& params, + const input_t& input); + +private: + sycl::queue q; + comm_t comm_; +}; + +} // namespace oneapi::dal::linear_regression::backend + +#endif // ONEDAL_DATA_PARALLEL diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl_dpc.cpp new file mode 100644 index 00000000000..c470f45403e --- /dev/null +++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl_dpc.cpp @@ -0,0 +1,127 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl.hpp" +#include "oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp" +#include "oneapi/dal/algo/linear_regression/backend/model_impl.hpp" + +#include "oneapi/dal/backend/primitives/lapack.hpp" + +namespace oneapi::dal::linear_regression::backend { + +namespace be = dal::backend; +namespace pr = be::primitives; + +using be::context_gpu; + +template +train_result finalize_train_kernel_norm_eq_impl::operator()( + const detail::descriptor_base& desc, + const detail::train_parameters& params, + const partial_train_result& input) { + using dal::detail::check_mul_overflow; + + using model_t = model; + using model_impl_t = detail::model_impl; + + const bool compute_intercept = desc.get_compute_intercept(); + + constexpr auto uplo = pr::mkl::uplo::upper; + constexpr auto alloc = sycl::usm::alloc::device; + + const auto response_count = input.get_partial_xty().get_row_count(); + const auto ext_feature_count = input.get_partial_xty().get_column_count(); + const auto feature_count = ext_feature_count - compute_intercept; + + const pr::ndshape<2> xtx_shape{ ext_feature_count, ext_feature_count }; + + const auto xtx_nd = + pr::table2ndarray(q, input.get_partial_xtx(), sycl::usm::alloc::device); + const auto xty_nd = pr::table2ndarray(q, + input.get_partial_xty(), + sycl::usm::alloc::device); + + const pr::ndshape<2> betas_shape{ response_count, feature_count + 1 }; + + const auto betas_size = check_mul_overflow(response_count, feature_count + 1); + auto betas_arr = array::zeros(q, betas_size, alloc); + + if (comm_.get_rank_count() > 1) { + { + ONEDAL_PROFILER_TASK(xtx_allreduce); + auto xtx_arr = + dal::array::wrap(q, xtx_nd.get_mutable_data(), xtx_nd.get_count()); + comm_.allreduce(xtx_arr).wait(); + } + { + ONEDAL_PROFILER_TASK(xty_allreduce); + auto xty_arr = + dal::array::wrap(q, xty_nd.get_mutable_data(), xty_nd.get_count()); + comm_.allreduce(xty_arr).wait(); + } + } + + double alpha = desc.get_alpha(); + sycl::event ridge_event; + if (alpha != 0.0) { + ridge_event = add_ridge_penalty(q, xtx_nd, compute_intercept, alpha); + } + + auto nxtx = pr::ndarray::empty(q, xtx_shape, alloc); + auto nxty = pr::ndview::wrap_mutable(betas_arr, betas_shape); + auto solve_event = + pr::solve_system(q, compute_intercept, xtx_nd, xty_nd, nxtx, nxty, { ridge_event }); + sycl::event::wait_and_throw({ solve_event }); + + auto betas = homogen_table::wrap(betas_arr, response_count, feature_count + 1); + + const auto model_impl = std::make_shared(betas); + const auto model = dal::detail::make_private(model_impl); + + const auto options = desc.get_result_options(); + auto result = train_result().set_model(model).set_result_options(options); + + if (options.test(result_options::intercept)) { + auto arr = array::zeros(q, response_count, alloc); + auto dst = pr::ndview::wrap_mutable(arr, { 1l, response_count }); + const auto src = nxty.get_col_slice(0l, 1l).t(); + + pr::copy(q, dst, src).wait_and_throw(); + + auto intercept = homogen_table::wrap(arr, 1l, response_count); + result.set_intercept(intercept); + } + + if (options.test(result_options::coefficients)) { + const auto size = check_mul_overflow(response_count, feature_count); + + auto arr = array::zeros(q, size, alloc); + const auto src = nxty.get_col_slice(1l, feature_count + 1); + auto dst = pr::ndview::wrap_mutable(arr, { response_count, feature_count }); + + pr::copy(q, dst, src).wait_and_throw(); + + auto coefficients = homogen_table::wrap(arr, response_count, feature_count); + result.set_coefficients(coefficients); + } + + return result; +} + +template class finalize_train_kernel_norm_eq_impl; +template class finalize_train_kernel_norm_eq_impl; + +} // namespace oneapi::dal::linear_regression::backend diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp new file mode 100644 index 00000000000..723fde68fb9 --- /dev/null +++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp @@ -0,0 +1,66 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/detail/profiler.hpp" +#include "oneapi/dal/backend/primitives/ndarray.hpp" + +namespace oneapi::dal::linear_regression::backend { + +#ifdef ONEDAL_DATA_PARALLEL + +using alloc = sycl::usm::alloc; +namespace bk = dal::backend; +namespace pr = dal::backend::primitives; + +/// Adds ridge penalty to the diagonal elements of the xtx matrix + +/// +/// @tparam Float Floating-point type used to perform computations +/// +/// @param[in] q The SYCL queue +/// @param[in] xtx The input matrix to which the ridge penalty is added +/// @param[in] compute_intercept Flag indicating whether the intercept term is used in the matrix, extending it with extra dimension if true +/// @param[in] alpha The regularization parameter +/// @param[in] deps Events indicating the availability of the `xtx` for reading or writing +/// +/// @return A SYCL event indicating the availability of the matrix for reading and writing +template +sycl::event add_ridge_penalty(sycl::queue& q, + const pr::ndarray& xtx, + bool compute_intercept, + Float alpha, + const bk::event_vector& deps = {}) { + ONEDAL_ASSERT(xtx.has_mutable_data()); + ONEDAL_ASSERT(bk::is_known_usm(q, xtx.get_mutable_data())); + ONEDAL_ASSERT(xtx.get_dimension(0) == xtx.get_dimension(1)); + + Float* xtx_ptr = xtx.get_mutable_data(); + std::int64_t feature_count = xtx.get_dimension(0); + std::int64_t original_feature_count = feature_count - compute_intercept; + + return q.submit([&](sycl::handler& cgh) { + const auto range = bk::make_range_1d(original_feature_count); + cgh.depends_on(deps); + std::int64_t step = feature_count + 1; + cgh.parallel_for(range, [=](sycl::id<1> idx) { + xtx_ptr[idx * step] += alpha; + }); + }); +} + +} // namespace oneapi::dal::linear_regression::backend + +#endif // ONEDAL_DATA_PARALLEL diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/partial_train_kernel_norm_eq_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/partial_train_kernel_norm_eq_dpc.cpp index dff0548afe4..a9aa7c373e4 100644 --- a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/partial_train_kernel_norm_eq_dpc.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/partial_train_kernel_norm_eq_dpc.cpp @@ -45,11 +45,11 @@ static partial_train_result call_dal_kernel(const context_gpu& ctx, constexpr auto alloc = sycl::usm::alloc::device; - const bool beta = desc.get_compute_intercept(); + const bool compute_intercept = desc.get_compute_intercept(); const auto feature_count = input.get_data().get_column_count(); const auto response_count = input.get_responses().get_column_count(); - const std::int64_t ext_feature_count = feature_count + beta; + const std::int64_t ext_feature_count = feature_count + compute_intercept; const pr::ndshape<2> xty_shape{ response_count, ext_feature_count }; const pr::ndshape<2> xtx_shape{ ext_feature_count, ext_feature_count }; @@ -74,8 +74,10 @@ static partial_train_result call_dal_kernel(const context_gpu& ctx, input_.get_partial_xty(), sycl::usm::alloc::device); auto copy_xty_event = copy(queue, xty, xty_nd, { fill_xty_event }); - auto last_xtx_event = update_xtx(queue, beta, data_nd, xtx, { copy_xtx_event }); - auto last_xty_event = update_xty(queue, beta, data_nd, res_nd, xty, { copy_xty_event }); + auto last_xtx_event = + update_xtx(queue, compute_intercept, data_nd, xtx, { copy_xtx_event }); + auto last_xty_event = + update_xty(queue, compute_intercept, data_nd, res_nd, xty, { copy_xty_event }); result.set_partial_xtx(homogen_table::wrap(xtx.flatten(queue, { last_xtx_event }), ext_feature_count, @@ -97,8 +99,10 @@ static partial_train_result call_dal_kernel(const context_gpu& ctx, auto [xtx, fill_xtx_event] = pr::ndarray::zeros(queue, xtx_shape, alloc); - auto last_xty_event = update_xty(queue, beta, data_nd, res_nd, xty, { fill_xty_event }); - auto last_xtx_event = update_xtx(queue, beta, data_nd, xtx, { fill_xtx_event }); + auto last_xty_event = + update_xty(queue, compute_intercept, data_nd, res_nd, xty, { fill_xty_event }); + auto last_xtx_event = + update_xtx(queue, compute_intercept, data_nd, xtx, { fill_xtx_event }); result.set_partial_xtx(homogen_table::wrap(xtx.flatten(queue, { last_xtx_event }), ext_feature_count, diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp index bf0cd04c00e..04d76fe86b7 100644 --- a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp @@ -29,6 +29,7 @@ #include "oneapi/dal/algo/linear_regression/backend/model_impl.hpp" #include "oneapi/dal/algo/linear_regression/backend/gpu/train_kernel.hpp" #include "oneapi/dal/algo/linear_regression/backend/gpu/update_kernel.hpp" +#include "oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp" namespace oneapi::dal::linear_regression::backend { @@ -62,8 +63,8 @@ static train_result call_dal_kernel(const context_gpu& ctx, const auto feature_count = data.get_column_count(); const auto response_count = resp.get_column_count(); ONEDAL_ASSERT(sample_count == resp.get_row_count()); - const bool beta = desc.get_compute_intercept(); - const std::int64_t ext_feature_count = feature_count + beta; + const bool compute_intercept = desc.get_compute_intercept(); + const std::int64_t ext_feature_count = feature_count + compute_intercept; const auto betas_size = check_mul_overflow(response_count, feature_count + 1); auto betas_arr = array::zeros(queue, betas_size, alloc); @@ -95,19 +96,17 @@ static train_result call_dal_kernel(const context_gpu& ctx, auto y_arr = y_accessor.pull(queue, { first, last }, alloc); auto y = pr::ndview::wrap(y_arr.get_data(), { length, response_count }); - last_xty_event = update_xty(queue, beta, x, y, xty, { last_xty_event }); - last_xtx_event = update_xtx(queue, beta, x, xtx, { last_xtx_event }); + last_xty_event = update_xty(queue, compute_intercept, x, y, xty, { last_xty_event }); + last_xtx_event = update_xtx(queue, compute_intercept, x, xtx, { last_xtx_event }); // We keep the latest slice of data up to date because of pimpl - // it virtually extend lifetime of pulled arrays old_x_arr = std::move(x_arr), old_y_arr = std::move(y_arr); } - const be::event_vector solve_deps{ last_xty_event, last_xtx_event }; - auto& comm = ctx.get_communicator(); if (comm.get_rank_count() > 1) { - sycl::event::wait_and_throw(solve_deps); + sycl::event::wait_and_throw({ last_xty_event, last_xtx_event }); { ONEDAL_PROFILER_TASK(xtx_allreduce); auto xtx_arr = dal::array::wrap(queue, xtx.get_mutable_data(), xtx.get_count()); @@ -120,9 +119,17 @@ static train_result call_dal_kernel(const context_gpu& ctx, } } + double alpha = desc.get_alpha(); + if (alpha != 0.0) { + last_xtx_event = + add_ridge_penalty(queue, xtx, compute_intercept, alpha, { last_xtx_event }); + } + const be::event_vector solve_deps{ last_xty_event, last_xtx_event }; + auto nxtx = pr::ndarray::empty(queue, xtx_shape, alloc); auto nxty = pr::ndview::wrap_mutable(betas_arr, betas_shape); - auto solve_event = pr::solve_system(queue, beta, xtx, xty, nxtx, nxty, solve_deps); + auto solve_event = + pr::solve_system(queue, compute_intercept, xtx, xty, nxtx, nxty, solve_deps); sycl::event::wait_and_throw({ solve_event }); auto betas = homogen_table::wrap(betas_arr, response_count, feature_count + 1); diff --git a/cpp/oneapi/dal/algo/linear_regression/common.cpp b/cpp/oneapi/dal/algo/linear_regression/common.cpp index 70fd04f221e..949898f3524 100644 --- a/cpp/oneapi/dal/algo/linear_regression/common.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/common.cpp @@ -42,6 +42,8 @@ class descriptor_impl : public base { explicit descriptor_impl() = default; bool compute_intercept = true; + double alpha = 0; + result_option_id result_options = get_default_result_options(); }; @@ -81,6 +83,16 @@ void descriptor_base::set_compute_intercept_impl(bool compute_intercept) { impl_->compute_intercept = compute_intercept; } +template +double descriptor_base::get_alpha() const { + return impl_->alpha; +} + +template +void descriptor_base::set_alpha_impl(double value) { + impl_->alpha = value; +} + template class ONEDAL_EXPORT descriptor_base; } // namespace v1 diff --git a/cpp/oneapi/dal/algo/linear_regression/common.hpp b/cpp/oneapi/dal/algo/linear_regression/common.hpp index 633e919f1bb..57d597a984d 100644 --- a/cpp/oneapi/dal/algo/linear_regression/common.hpp +++ b/cpp/oneapi/dal/algo/linear_regression/common.hpp @@ -112,10 +112,12 @@ class descriptor_base : public base { descriptor_base(bool compute_intercept); bool get_compute_intercept() const; + double get_alpha() const; result_option_id get_result_options() const; protected: void set_compute_intercept_impl(bool compute_intercept); + void set_alpha_impl(double alpha); void set_result_options_impl(const result_option_id& value); private: @@ -165,6 +167,14 @@ class descriptor : public detail::descriptor_base { /// Creates a new instance of the class with default parameters explicit descriptor() : base_t(true) {} + explicit descriptor(bool compute_intercept, double alpha) : base_t(compute_intercept) { + set_alpha(alpha); + } + + explicit descriptor(double alpha) : base_t(true) { + set_alpha(alpha); + } + /// Defines should intercept be taken into consideration. bool get_compute_intercept() const { return base_t::get_compute_intercept(); @@ -175,6 +185,16 @@ class descriptor : public detail::descriptor_base { return *this; } + /// Defines regularization term alpha used in Ridge Regression + double get_alpha() const { + return base_t::get_alpha(); + } + + auto& set_alpha(double value) { + base_t::set_alpha_impl(value); + return *this; + } + /// Choose which results should be computed and returned. result_option_id get_result_options() const { return base_t::get_result_options(); diff --git a/cpp/oneapi/dal/algo/linear_regression/detail/finalize_train_ops_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/detail/finalize_train_ops_dpc.cpp index 3592aeefccb..21a5ce8108d 100644 --- a/cpp/oneapi/dal/algo/linear_regression/detail/finalize_train_ops_dpc.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/detail/finalize_train_ops_dpc.cpp @@ -38,7 +38,7 @@ struct finalize_train_ops_dispatcher { const partial_train_result& input) const { using kernel_dispatcher_t = dal::backend::kernel_dispatcher< KERNEL_SINGLE_NODE_CPU(parameters::train_parameters_cpu), - KERNEL_SINGLE_NODE_GPU(parameters::train_parameters_gpu)>; + KERNEL_UNIVERSAL_SPMD_GPU(parameters::train_parameters_gpu)>; return kernel_dispatcher_t{}(ctx, desc, input); } @@ -56,14 +56,16 @@ struct finalize_train_ops_dispatcher { const partial_train_result& input) const { using kernel_dispatcher_t = dal::backend::kernel_dispatcher< KERNEL_SINGLE_NODE_CPU(backend::finalize_train_kernel_cpu), - KERNEL_SINGLE_NODE_GPU(backend::finalize_train_kernel_gpu)>; + KERNEL_UNIVERSAL_SPMD_GPU(backend::finalize_train_kernel_gpu)>; return kernel_dispatcher_t{}(ctx, desc, params, input); } }; -#define INSTANTIATE(F, M, T) \ - template struct ONEDAL_EXPORT \ - finalize_train_ops_dispatcher; +#define INSTANTIATE(F, M, T) \ + template struct ONEDAL_EXPORT \ + finalize_train_ops_dispatcher; \ + template struct ONEDAL_EXPORT \ + finalize_train_ops_dispatcher; INSTANTIATE(float, method::norm_eq, task::regression) INSTANTIATE(double, method::norm_eq, task::regression) diff --git a/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp b/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp index 270b34b9ddc..00ec7babbb9 100644 --- a/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp @@ -47,7 +47,15 @@ TEMPLATE_LIST_TEST_M(lr_batch_test, "LR common flow", "[lr][batch]", lr_types) { this->generate(777); - this->run_and_check(); + this->run_and_check_linear(); +} + +TEMPLATE_LIST_TEST_M(lr_batch_test, "RR common flow", "[rr][batch]", lr_types) { + SKIP_IF(this->not_float64_friendly()); + + this->generate(777); + + this->run_and_check_ridge(); } } // namespace oneapi::dal::linear_regression::test diff --git a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp index a8994a7c704..fb935174cfe 100644 --- a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp +++ b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp @@ -54,6 +54,9 @@ class lr_test : public te::crtp_algo_fixture { using test_input_t = infer_input; using test_result_t = infer_result; + using partial_input_t = partial_train_input<>; + using partial_result_t = partial_train_result<>; + te::table_id get_homogen_table_id() const { return te::table_id::homogen(); } @@ -123,6 +126,17 @@ class lr_test : public te::crtp_algo_fixture { return result; } + double generate_alpha(std::int64_t seed) const { + std::mt19937 gen(seed); + + double alpha_min = 1; + double alpha_max = 5; + + std::uniform_real_distribution dist(alpha_min, alpha_max); + + return dist(gen); + } + void check_table_dimensions(const table& x_train, const table& y_train, const table& x_test, @@ -144,13 +158,14 @@ class lr_test : public te::crtp_algo_fixture { this->bias_ = std::move(bias); this->beta_ = std::move(beta); + this->alpha_ = generate_alpha(seed); } - auto get_descriptor() const { + auto get_descriptor(double alpha = 0.0) const { result_option_id resopts = result_options::coefficients; if (this->intercept_) resopts = resopts | result_options::intercept; - return linear_regression::descriptor(intercept_) + return linear_regression::descriptor(intercept_, alpha) .set_result_options(resopts); } @@ -191,7 +206,25 @@ class lr_test : public te::crtp_algo_fixture { } } - void run_and_check(std::int64_t seed = 888, double tol = 1e-2) { + void check_coefficient_shrinkage(const table& lr_coeffs, + const table& rr_coeffs, + double tol = 1e-3) { + row_accessor lr_acc(lr_coeffs); + row_accessor rr_acc(rr_coeffs); + const auto lr_arr = lr_acc.pull({ 0, -1 }); + const auto rr_arr = rr_acc.pull({ 0, -1 }); + + double lr_norm_squared = 0, rr_norm_squared = 0; + for (std::int64_t i = 0; i < lr_arr.get_count(); ++i) { + lr_norm_squared += lr_arr[i] * lr_arr[i]; + rr_norm_squared += rr_arr[i] * rr_arr[i]; + } + + REQUIRE(rr_norm_squared <= lr_norm_squared + tol); + } + + std::tuple prepare_inputs(std::int64_t seed = 888, + double tol = 1e-2) { using namespace ::oneapi::dal::detail; std::mt19937 meta_gen(seed); @@ -214,6 +247,29 @@ class lr_test : public te::crtp_algo_fixture { auto y_test = compute_responses(this->beta_, this->bias_, x_test); check_table_dimensions(x_train, y_train, x_test, y_test); + return { x_train, y_train, x_test, y_test }; + } + + void run_and_check_ridge(std::int64_t seed = 888, double tol = 1e-2) { + table x_train, y_train, x_test, y_test; + std::tie(x_train, y_train, x_test, y_test) = prepare_inputs(seed, tol); + + const auto linear_desc = this->get_descriptor(); + const auto linear_train_res = this->train(linear_desc, x_train, y_train); + + const auto ridge_desc = this->get_descriptor(this->alpha_); + const auto ridge_train_res = this->train(ridge_desc, x_train, y_train); + + SECTION("Checking coefficient shrinkage") { + this->check_coefficient_shrinkage(linear_train_res.get_coefficients(), + ridge_train_res.get_coefficients(), + tol); + } + } + + void run_and_check_linear(std::int64_t seed = 888, double tol = 1e-2) { + table x_train, y_train, x_test, y_test; + std::tie(x_train, y_train, x_test, y_test) = prepare_inputs(seed, tol); const auto desc = this->get_descriptor(); const auto train_res = this->train(desc, x_train, y_train); @@ -234,6 +290,7 @@ class lr_test : public te::crtp_algo_fixture { check_if_close(infer_res.get_responses(), y_test, tol); } } + template std::vector split_table_by_rows(const dal::table& t, std::int64_t split_count) { ONEDAL_ASSERT(0l < split_count); @@ -259,31 +316,12 @@ class lr_test : public te::crtp_algo_fixture { return result; } - void run_and_check_online(std::int64_t nBlocks) { - using namespace ::oneapi::dal::detail; + void run_and_check_linear_online(std::int64_t nBlocks) { std::int64_t seed = 888; double tol = 1e-2; - - std::mt19937 meta_gen(seed); - const std::int64_t train_seed = meta_gen(); - const auto train_dataframe = GENERATE_DATAFRAME( - te::dataframe_builder{ this->s_count_, this->f_count_ }.fill_uniform(-5.5, - 3.5, - train_seed)); - auto x_train = train_dataframe.get_table(this->get_homogen_table_id()); - - const std::int64_t test_seed = meta_gen(); - const auto test_dataframe = GENERATE_DATAFRAME( - te::dataframe_builder{ this->t_count_, this->f_count_ }.fill_uniform(-3.5, - 5.5, - test_seed)); - auto x_test = test_dataframe.get_table(this->get_homogen_table_id()); - - auto y_train = compute_responses(this->beta_, this->bias_, x_train); - auto y_test = compute_responses(this->beta_, this->bias_, x_test); - - check_table_dimensions(x_train, y_train, x_test, y_test); + table x_train, y_train, x_test, y_test; + std::tie(x_train, y_train, x_test, y_test) = prepare_inputs(seed, tol); const auto desc = this->get_descriptor(); dal::linear_regression::partial_train_result<> partial_result; @@ -312,8 +350,45 @@ class lr_test : public te::crtp_algo_fixture { } } + void run_and_check_ridge_online(std::int64_t nBlocks) { + std::int64_t seed = 888; + double tol = 1e-2; + table x_train, y_train, x_test, y_test; + std::tie(x_train, y_train, x_test, y_test) = prepare_inputs(seed, tol); + + auto input_table_x = split_table_by_rows(x_train, nBlocks); + auto input_table_y = split_table_by_rows(y_train, nBlocks); + + const auto linear_desc = this->get_descriptor(); + dal::linear_regression::partial_train_result<> linear_partial_result; + for (std::int64_t i = 0; i < nBlocks; i++) { + linear_partial_result = this->partial_train(linear_desc, + linear_partial_result, + input_table_x[i], + input_table_y[i]); + } + auto linear_train_res = this->finalize_train(linear_desc, linear_partial_result); + + const auto ridge_desc = this->get_descriptor(this->alpha_); + dal::linear_regression::partial_train_result<> ridge_partial_result; + for (std::int64_t i = 0; i < nBlocks; i++) { + ridge_partial_result = this->partial_train(ridge_desc, + ridge_partial_result, + input_table_x[i], + input_table_y[i]); + } + auto ridge_train_res = this->finalize_train(ridge_desc, ridge_partial_result); + + SECTION("Checking coefficient shrinkage") { + this->check_coefficient_shrinkage(linear_train_res.get_coefficients(), + ridge_train_res.get_coefficients(), + tol); + } + } + protected: bool intercept_ = true; + float_t alpha_; std::int64_t t_count_; std::int64_t s_count_; std::int64_t f_count_; diff --git a/cpp/oneapi/dal/algo/linear_regression/test/online.cpp b/cpp/oneapi/dal/algo/linear_regression/test/online.cpp index 2724768491b..c16e1c06f26 100644 --- a/cpp/oneapi/dal/algo/linear_regression/test/online.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/test/online.cpp @@ -47,7 +47,15 @@ TEMPLATE_LIST_TEST_M(lr_online_test, "LR common flow", "[lr][online]", lr_types) this->generate(777); const int64_t nBlocks = GENERATE(1, 3, 5, 8); - this->run_and_check_online(nBlocks); + this->run_and_check_linear_online(nBlocks); +} + +TEMPLATE_LIST_TEST_M(lr_online_test, "RR common flow", "[rr][online]", lr_types) { + SKIP_IF(this->not_float64_friendly()); + this->generate(777); + const int64_t nBlocks = GENERATE(1, 3, 5, 8); + + this->run_and_check_ridge_online(nBlocks); } } // namespace oneapi::dal::linear_regression::test diff --git a/cpp/oneapi/dal/algo/linear_regression/test/online_spmd.cpp b/cpp/oneapi/dal/algo/linear_regression/test/online_spmd.cpp new file mode 100644 index 00000000000..c0f7968adfc --- /dev/null +++ b/cpp/oneapi/dal/algo/linear_regression/test/online_spmd.cpp @@ -0,0 +1,126 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/linear_regression/test/fixture.hpp" +#include "oneapi/dal/test/engine/tables.hpp" +#include "oneapi/dal/test/engine/io.hpp" + +namespace oneapi::dal::linear_regression::test { + +namespace te = dal::test::engine; +namespace la = te::linalg; +namespace linear_regression = oneapi::dal::linear_regression; + +template +class lr_online_spmd_test : public lr_test> { +public: + using base_t = lr_test>; + using float_t = typename base_t::float_t; + using input_t = typename base_t::train_input_t; + using partial_input_t = typename base_t::partial_input_t; + using partial_result_t = typename base_t::partial_result_t; + using result_t = typename base_t::train_result_t; + + void set_rank_count(std::int64_t rank_count) { + n_rank = rank_count; + } + + std::int64_t get_rank_count() { + return n_rank; + } + + void generate_dimensions() { + this->t_count_ = GENERATE(307, 12999); + this->s_count_ = GENERATE(10000); + this->f_count_ = GENERATE(2, 17); + this->r_count_ = GENERATE(2, 15); + this->intercept_ = GENERATE(0, 1); + } + + template + result_t finalize_train_override(Args&&... args) { + return this->finalize_train_via_spmd_threads_and_merge(n_rank, std::forward(args)...); + } + + result_t merge_finalize_train_result_override(const std::vector& results) { + return results[0]; + } + + template + std::vector split_finalize_train_input_override(std::int64_t split_count, + Args&&... args) { + ONEDAL_ASSERT(split_count == n_rank); + const std::vector input{ std::forward(args)... }; + + return input; + } + + void run_and_check_linear_online_spmd(std::int64_t n_rank, + std::int64_t n_blocks, + std::int64_t seed = 888, + double tol = 1e-2) { + table x_train, y_train, x_test, y_test; + std::tie(x_train, y_train, x_test, y_test) = this->prepare_inputs(seed, tol); + + const auto desc = this->get_descriptor(); + std::vector partial_results; + auto input_table_x = base_t::template split_table_by_rows(x_train, n_rank); + auto input_table_y = base_t::template split_table_by_rows(y_train, n_rank); + for (int64_t i = 0; i < n_rank; i++) { + partial_result_t partial_result; + auto input_table_x_blocks = + base_t::template split_table_by_rows(input_table_x[i], n_blocks); + auto input_table_y_blocks = + base_t::template split_table_by_rows(input_table_y[i], n_blocks); + for (int64_t j = 0; j < n_blocks; j++) { + partial_result = this->partial_train(desc, + partial_result, + input_table_x_blocks[j], + input_table_y_blocks[j]); + } + partial_results.push_back(partial_result); + } + + const auto train_result = this->finalize_train_override(desc, partial_results); + + SECTION("Checking intercept values") { + if (desc.get_result_options().test(result_options::intercept)) + base_t::check_if_close(train_result.get_intercept(), base_t::bias_, tol); + } + + SECTION("Checking coefficient values") { + if (desc.get_result_options().test(result_options::coefficients)) + base_t::check_if_close(train_result.get_coefficients(), base_t::beta_, tol); + } + } + +private: + std::int64_t n_rank; +}; + +TEMPLATE_LIST_TEST_M(lr_online_spmd_test, "lr common flow", "[lr][integration][spmd]", lr_types) { + SKIP_IF(this->get_policy().is_cpu()); + SKIP_IF(this->not_float64_friendly()); + + this->generate(777); + + this->set_rank_count(GENERATE(1, 2, 4)); + std::int64_t n_blocks = GENERATE(1, 3, 10); + + this->run_and_check_linear_online_spmd(this->get_rank_count(), n_blocks); +} + +} // namespace oneapi::dal::linear_regression::test diff --git a/cpp/oneapi/dal/algo/linear_regression/test/spmd.cpp b/cpp/oneapi/dal/algo/linear_regression/test/spmd.cpp index d0cca4e943c..62223f03fdd 100644 --- a/cpp/oneapi/dal/algo/linear_regression/test/spmd.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/test/spmd.cpp @@ -25,7 +25,7 @@ TEMPLATE_LIST_TEST_M(lr_spmd_test, "LR common flow", "[lr][spmd]", lr_types) { this->generate(777); this->set_rank_count(GENERATE(2, 3)); - this->run_and_check(); + this->run_and_check_linear(); } } // namespace oneapi::dal::linear_regression::test diff --git a/cpp/oneapi/dal/algo/linear_regression/test/train_parameters.cpp b/cpp/oneapi/dal/algo/linear_regression/test/train_parameters.cpp index 48f9ead5d3a..835b8ecc1b4 100644 --- a/cpp/oneapi/dal/algo/linear_regression/test/train_parameters.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/test/train_parameters.cpp @@ -89,7 +89,7 @@ TEMPLATE_LIST_TEST_M(lr_train_params_test, "LR train params", "[lr][train][param this->generate(999); this->generate_parameters(); - this->run_and_check(); + this->run_and_check_linear(); } } // namespace oneapi::dal::linear_regression::test diff --git a/cpp/oneapi/dal/algo/linear_regression/train_types.hpp b/cpp/oneapi/dal/algo/linear_regression/train_types.hpp index d0b748333c3..cc96db9ccfb 100644 --- a/cpp/oneapi/dal/algo/linear_regression/train_types.hpp +++ b/cpp/oneapi/dal/algo/linear_regression/train_types.hpp @@ -17,6 +17,7 @@ #pragma once #include "oneapi/dal/algo/linear_regression/common.hpp" +#include "oneapi/dal/detail/parameters/system_parameters.hpp" namespace oneapi::dal::linear_regression { @@ -35,7 +36,7 @@ template class partial_train_result_impl; template -class train_parameters : public base { +class train_parameters : public dal::detail::system_parameters { public: explicit train_parameters(); train_parameters(train_parameters&&) = default; diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/infer_kernel_dense_batch.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/infer_kernel_dense_batch.cpp index 0bc8ddb219d..88f04248e38 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/infer_kernel_dense_batch.cpp +++ b/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/infer_kernel_dense_batch.cpp @@ -34,7 +34,20 @@ struct infer_kernel_cpu { } }; +template +struct infer_kernel_cpu { + infer_result operator()(const context_cpu& ctx, + const detail::descriptor_base& desc, + const infer_input& input) const { + throw unimplemented( + dal::detail::error_messages::log_reg_sparse_method_is_not_implemented_for_cpu()); + } +}; + template struct infer_kernel_cpu; template struct infer_kernel_cpu; +template struct infer_kernel_cpu; +template struct infer_kernel_cpu; + } // namespace oneapi::dal::logistic_regression::backend diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/train_kernel_dense_batch.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/train_kernel_dense_batch.cpp index e5728534b1d..b2f7dc0e951 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/train_kernel_dense_batch.cpp +++ b/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/train_kernel_dense_batch.cpp @@ -32,7 +32,21 @@ struct train_kernel_cpu { } }; +template +struct train_kernel_cpu { + train_result operator()(const context_cpu& ctx, + const detail::descriptor_base& desc, + const detail::train_parameters& params, + const train_input& input) const { + throw unimplemented( + dal::detail::error_messages::log_reg_sparse_method_is_not_implemented_for_cpu()); + } +}; + template struct train_kernel_cpu; template struct train_kernel_cpu; +template struct train_kernel_cpu; +template struct train_kernel_cpu; + } // namespace oneapi::dal::logistic_regression::backend diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/BUILD b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/BUILD index c22cb27010c..4d9e7679fe7 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/BUILD +++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/BUILD @@ -13,6 +13,7 @@ dal_module( "@onedal//cpp/oneapi/dal/backend/primitives:lapack", "@onedal//cpp/oneapi/dal/backend/primitives:objective_function", "@onedal//cpp/oneapi/dal/backend/primitives:optimizers", + "@onedal//cpp/oneapi/dal/backend/primitives:sparse_blas", "@onedal//cpp/oneapi/dal/algo/logistic_regression:core", ], ) diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/infer_kernel_sparse_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/infer_kernel_sparse_dpc.cpp new file mode 100644 index 00000000000..51fab5a66e1 --- /dev/null +++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/infer_kernel_sparse_dpc.cpp @@ -0,0 +1,138 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/logistic_regression/backend/model_impl.hpp" +#include "oneapi/dal/algo/logistic_regression/backend/gpu/infer_kernel.hpp" +#include "oneapi/dal/algo/logistic_regression/common.hpp" +#include "oneapi/dal/algo/logistic_regression/train_types.hpp" + +#include "oneapi/dal/backend/dispatcher.hpp" +#include "oneapi/dal/backend/interop/common.hpp" +#include "oneapi/dal/backend/interop/common_dpc.hpp" +#include "oneapi/dal/backend/interop/error_converter.hpp" +#include "oneapi/dal/backend/interop/table_conversion.hpp" +#include "oneapi/dal/backend/primitives/blas.hpp" +#include "oneapi/dal/backend/primitives/ndarray.hpp" +#include "oneapi/dal/backend/primitives/ndindexer.hpp" +#include "oneapi/dal/backend/primitives/objective_function.hpp" +#include "oneapi/dal/backend/primitives/sparse_blas.hpp" + +#include "oneapi/dal/detail/profiler.hpp" +#include "oneapi/dal/table/csr_accessor.hpp" + +namespace oneapi::dal::logistic_regression::backend { + +using dal::backend::context_gpu; + +namespace be = dal::backend; +namespace pr = be::primitives; +namespace interop = dal::backend::interop; + +template +static infer_result call_dal_kernel(const context_gpu& ctx, + const detail::descriptor_base& desc, + const table& infer, + const model& m) { + auto queue = ctx.get_queue(); + ONEDAL_PROFILER_TASK(logreg_infer_kernel, queue); + + constexpr auto alloc = sycl::usm::alloc::device; + + const auto& betas = m.get_packed_coefficients(); + + const auto sample_count = infer.get_row_count(); + const auto feature_count = infer.get_column_count(); + const bool fit_intercept = desc.get_compute_intercept(); + ONEDAL_ASSERT((feature_count + 1) == betas.get_column_count()); + ONEDAL_ASSERT(1 == betas.get_row_count()); + ONEDAL_ASSERT(infer.get_kind() == dal::csr_table::kind()); + + pr::ndarray params = pr::table2ndarray_1d(queue, betas, alloc); + pr::ndview params_suf = fit_intercept ? params : params.slice(1, feature_count); + + pr::ndarray probs = pr::ndarray::empty(queue, { sample_count }, alloc); + pr::ndarray responses = + pr::ndarray::empty(queue, { sample_count }, alloc); + + auto [csr_data, column_indices, row_offsets] = + csr_accessor(static_cast(infer)) + .pull(queue, { 0, -1 }, sparse_indexing::zero_based); + + auto csr_data_gpu = + pr::ndarray::wrap(csr_data.get_data(), csr_data.get_count()).to_device(queue); + auto column_indices_gpu = + pr::ndarray::wrap(column_indices.get_data(), column_indices.get_count()) + .to_device(queue); + auto row_offsets_gpu = + pr::ndarray::wrap(row_offsets.get_data(), row_offsets.get_count()) + .to_device(queue); + + table data_gpu = csr_table::wrap(queue, + csr_data_gpu.get_data(), + column_indices_gpu.get_data(), + row_offsets_gpu.get_data(), + sample_count, + feature_count, + sparse_indexing::zero_based); + + dal::backend::primitives::sparse_matrix_handle sp_handle(queue); + set_csr_data(queue, sp_handle, static_cast(data_gpu)); + + sycl::event probabilities_event = + compute_probabilities_sparse(queue, params_suf, sp_handle, probs, fit_intercept, {}); + + const auto* const prob_ptr = probs.get_data(); + auto* const resp_ptr = responses.get_mutable_data(); + + auto fill_resp_event = queue.submit([&](sycl::handler& cgh) { + cgh.depends_on(probabilities_event); + const auto range = be::make_range_1d(sample_count); + cgh.parallel_for(range, [=](sycl::id<1> idx) { + constexpr Float half = 0.5f; + resp_ptr[idx] = prob_ptr[idx] < half ? 0 : 1; + }); + }); + + auto resp_table = + homogen_table::wrap(responses.flatten(queue, { fill_resp_event }), sample_count, 1); + auto prob_table = + homogen_table::wrap(probs.flatten(queue, { probabilities_event }), sample_count, 1); + + auto result = infer_result().set_responses(resp_table).set_probabilities(prob_table); + + return result; +} + +template +static infer_result infer(const context_gpu& ctx, + const detail::descriptor_base& desc, + const infer_input& input) { + return call_dal_kernel(ctx, desc, input.get_data(), input.get_model()); +} + +template +struct infer_kernel_gpu { + infer_result operator()(const context_gpu& ctx, + const detail::descriptor_base& desc, + const infer_input& input) const { + return infer(ctx, desc, input); + } +}; + +template struct infer_kernel_gpu; +template struct infer_kernel_gpu; + +} // namespace oneapi::dal::logistic_regression::backend diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp new file mode 100644 index 00000000000..baf648c12dc --- /dev/null +++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp @@ -0,0 +1,34 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/logistic_regression/common.hpp" + +#include "oneapi/dal/detail/common.hpp" +#include "oneapi/dal/backend/dispatcher.hpp" +#include "oneapi/dal/algo/logistic_regression/train_types.hpp" + +namespace oneapi::dal::logistic_regression::backend { + +using dal::backend::context_gpu; + +template +train_result call_dal_kernel(const context_gpu& ctx, + const detail::descriptor_base& desc, + const detail::train_parameters& params, + const table& data, + const table& resp); + +} // namespace oneapi::dal::logistic_regression::backend diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common_dpc.cpp new file mode 100644 index 00000000000..d409ce3816d --- /dev/null +++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common_dpc.cpp @@ -0,0 +1,128 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel.hpp" +#include "oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp" +#include "oneapi/dal/algo/logistic_regression/backend/model_impl.hpp" +#include "oneapi/dal/algo/logistic_regression/backend/optimizer_impl.hpp" +#include "oneapi/dal/algo/logistic_regression/common.hpp" +#include "oneapi/dal/algo/logistic_regression/train_types.hpp" +#include "oneapi/dal/backend/dispatcher.hpp" +#include "oneapi/dal/backend/primitives/ndarray.hpp" +#include "oneapi/dal/backend/primitives/lapack.hpp" +#include "oneapi/dal/backend/primitives/utils.hpp" +#include "oneapi/dal/backend/primitives/objective_function.hpp" +#include "oneapi/dal/backend/primitives/optimizers.hpp" +#include "oneapi/dal/detail/profiler.hpp" +#include "oneapi/dal/detail/common.hpp" + +namespace oneapi::dal::logistic_regression::backend { + +using dal::backend::context_gpu; + +namespace be = dal::backend; +namespace pr = be::primitives; + +template +train_result call_dal_kernel(const context_gpu& ctx, + const detail::descriptor_base& desc, + const detail::train_parameters& params, + const table& data, + const table& resp) { + using dal::detail::check_mul_overflow; + + auto queue = ctx.get_queue(); + + ONEDAL_PROFILER_TASK(log_reg_train_kernel, queue); + + using model_t = model; + using model_impl_t = detail::model_impl; + + auto opt_impl = detail::get_optimizer_impl(desc); + + if (!opt_impl) { + throw internal_error{ dal::detail::error_messages::unknown_optimizer() }; + } + + const auto sample_count = data.get_row_count(); + const auto feature_count = data.get_column_count(); + ONEDAL_ASSERT(sample_count == resp.get_row_count()); + const auto responses_nd = + pr::table2ndarray_1d(queue, resp, sycl::usm::alloc::device); + + const std::int64_t bsize = params.get_gpu_macro_block(); + + const Float l2 = Float(1.0) / desc.get_inverse_regularization(); + const bool fit_intercept = desc.get_compute_intercept(); + + auto& comm = ctx.get_communicator(); + + pr::logloss_function loss_func = + pr::logloss_function(queue, comm, data, responses_nd, l2, fit_intercept, bsize); + + auto [x, fill_event] = + pr::ndarray::zeros(queue, { feature_count + 1 }, sycl::usm::alloc::device); + + pr::ndview x_suf = fit_intercept ? x : x.slice(1, feature_count); + + auto [train_event, iter_num] = opt_impl->minimize(queue, loss_func, x_suf, { fill_event }); + + auto all_coefs = homogen_table::wrap(x.flatten(queue, { train_event }), 1, feature_count + 1); + + const auto model_impl = std::make_shared(all_coefs); + const auto model = dal::detail::make_private(model_impl); + + const auto options = desc.get_result_options(); + auto result = train_result().set_model(model).set_result_options(options); + + if (options.test(result_options::intercept)) { + ONEDAL_ASSERT(fit_intercept); + table intercept_table = + homogen_table::wrap(x.slice(0, 1).flatten(queue, { train_event }), 1, 1); + result.set_intercept(intercept_table); + } + + if (options.test(result_options::coefficients)) { + auto coefs_array = x.slice(1, feature_count).flatten(queue, { train_event }); + auto coefs_table = homogen_table::wrap(coefs_array, 1, feature_count); + result.set_coefficients(coefs_table); + } + + if (options.test(result_options::iterations_count)) { + result.set_iterations_count(iter_num); + } + + if (options.test(result_options::inner_iterations_count)) { + result.set_inner_iterations_count(opt_impl->get_inner_iter()); + } + + return result; +} + +template train_result call_dal_kernel( + const context_gpu&, + const detail::descriptor_base&, + const detail::train_parameters&, + const table&, + const table&); +template train_result call_dal_kernel( + const context_gpu&, + const detail::descriptor_base&, + const detail::train_parameters&, + const table&, + const table&); + +} // namespace oneapi::dal::logistic_regression::backend diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_dense_batch_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_dense_batch_dpc.cpp index e24cdb02539..778e3330841 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_dense_batch_dpc.cpp +++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_dense_batch_dpc.cpp @@ -31,6 +31,7 @@ #include "oneapi/dal/backend/primitives/objective_function.hpp" #include "oneapi/dal/backend/primitives/optimizers.hpp" #include "oneapi/dal/algo/logistic_regression/backend/optimizer_impl.hpp" +#include "oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp" namespace oneapi::dal::logistic_regression::backend { @@ -39,93 +40,20 @@ using dal::backend::context_gpu; namespace be = dal::backend; namespace pr = be::primitives; -template -static train_result call_dal_kernel(const context_gpu& ctx, - const detail::descriptor_base& desc, - const detail::train_parameters& params, - const table& data, - const table& resp) { - using dal::detail::check_mul_overflow; - - auto queue = ctx.get_queue(); - - ONEDAL_PROFILER_TASK(log_reg_train_kernel, queue); - - using model_t = model; - using model_impl_t = detail::model_impl; - - auto opt_impl = detail::get_optimizer_impl(desc); - - if (!opt_impl) { - throw internal_error{ dal::detail::error_messages::unknown_optimizer() }; - } - - const auto sample_count = data.get_row_count(); - const auto feature_count = data.get_column_count(); - ONEDAL_ASSERT(sample_count == resp.get_row_count()); - const auto responses_nd = - pr::table2ndarray_1d(queue, resp, sycl::usm::alloc::device); - - const std::int64_t bsize = params.get_gpu_macro_block(); - - const Float l2 = Float(1.0) / desc.get_inverse_regularization(); - const bool fit_intercept = desc.get_compute_intercept(); - - // TODO: add check if the dataset can be moved to gpu - // Move data to gpu - pr::ndarray data_nd = pr::table2ndarray(queue, data, sycl::usm::alloc::device); - table data_gpu = homogen_table::wrap(data_nd.flatten(queue, {}), sample_count, feature_count); - - auto& comm = ctx.get_communicator(); - - pr::logloss_function loss_func = - pr::logloss_function(queue, comm, data_gpu, responses_nd, l2, fit_intercept, bsize); - - auto [x, fill_event] = - pr::ndarray::zeros(queue, { feature_count + 1 }, sycl::usm::alloc::device); - - pr::ndview x_suf = fit_intercept ? x : x.slice(1, feature_count); - - auto [train_event, iter_num] = opt_impl->minimize(queue, loss_func, x_suf, { fill_event }); - - auto all_coefs = homogen_table::wrap(x.flatten(queue, { train_event }), 1, feature_count + 1); - - const auto model_impl = std::make_shared(all_coefs); - const auto model = dal::detail::make_private(model_impl); - - const auto options = desc.get_result_options(); - auto result = train_result().set_model(model).set_result_options(options); - - if (options.test(result_options::intercept)) { - ONEDAL_ASSERT(fit_intercept); - table intercept_table = - homogen_table::wrap(x.slice(0, 1).flatten(queue, { train_event }), 1, 1); - result.set_intercept(intercept_table); - } - - if (options.test(result_options::coefficients)) { - auto coefs_array = x.slice(1, feature_count).flatten(queue, { train_event }); - auto coefs_table = homogen_table::wrap(coefs_array, 1, feature_count); - result.set_coefficients(coefs_table); - } - - if (options.test(result_options::iterations_count)) { - result.set_iterations_count(iter_num); - } - - if (options.test(result_options::inner_iterations_count)) { - result.set_inner_iterations_count(opt_impl->get_inner_iter()); - } - - return result; -} - template static train_result train(const context_gpu& ctx, const detail::descriptor_base& desc, const detail::train_parameters& params, const train_input& input) { - return call_dal_kernel(ctx, desc, params, input.get_data(), input.get_responses()); + // TODO: add check if the dataset can be moved to gpu + // Move data to gpu + const auto sample_count = input.get_data().get_row_count(); + const auto feature_count = input.get_data().get_column_count(); + auto queue = ctx.get_queue(); + pr::ndarray data_nd = + pr::table2ndarray(queue, input.get_data(), sycl::usm::alloc::device); + table data_gpu = homogen_table::wrap(data_nd.flatten(queue, {}), sample_count, feature_count); + return call_dal_kernel(ctx, desc, params, data_gpu, input.get_responses()); } template diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_sparse_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_sparse_dpc.cpp new file mode 100644 index 00000000000..6b19c32c37d --- /dev/null +++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_sparse_dpc.cpp @@ -0,0 +1,91 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/detail/profiler.hpp" + +#include "oneapi/dal/detail/common.hpp" +#include "oneapi/dal/backend/dispatcher.hpp" +#include "oneapi/dal/backend/primitives/ndarray.hpp" +#include "oneapi/dal/backend/primitives/lapack.hpp" +#include "oneapi/dal/backend/primitives/utils.hpp" + +#include "oneapi/dal/table/csr_accessor.hpp" + +#include "oneapi/dal/algo/logistic_regression/common.hpp" +#include "oneapi/dal/algo/logistic_regression/train_types.hpp" +#include "oneapi/dal/algo/logistic_regression/backend/model_impl.hpp" +#include "oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel.hpp" +#include "oneapi/dal/backend/primitives/objective_function.hpp" +#include "oneapi/dal/backend/primitives/optimizers.hpp" +#include "oneapi/dal/algo/logistic_regression/backend/optimizer_impl.hpp" +#include "oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp" + +namespace oneapi::dal::logistic_regression::backend { + +using dal::backend::context_gpu; + +namespace be = dal::backend; +namespace pr = be::primitives; + +template +static train_result train(const context_gpu& ctx, + const detail::descriptor_base& desc, + const detail::train_parameters& params, + const train_input& input) { + // TODO: add check if the dataset can be moved to gpu + // Move data to gpu + const auto sample_count = input.get_data().get_row_count(); + const auto feature_count = input.get_data().get_column_count(); + auto queue = ctx.get_queue(); + + auto [csr_data, column_indices, row_offsets] = + csr_accessor(static_cast(input.get_data())) + .pull(queue, { 0, -1 }, sparse_indexing::zero_based); + + auto csr_data_gpu = + pr::ndarray::wrap(csr_data.get_data(), csr_data.get_count()).to_device(queue); + auto column_indices_gpu = + pr::ndarray::wrap(column_indices.get_data(), column_indices.get_count()) + .to_device(queue); + auto row_offsets_gpu = + pr::ndarray::wrap(row_offsets.get_data(), row_offsets.get_count()) + .to_device(queue); + + table data_gpu = csr_table::wrap(queue, + csr_data_gpu.get_data(), + column_indices_gpu.get_data(), + row_offsets_gpu.get_data(), + sample_count, + feature_count, + sparse_indexing::zero_based); + + return call_dal_kernel(ctx, desc, params, data_gpu, input.get_responses()); +} + +template +struct train_kernel_gpu { + train_result operator()(const context_gpu& ctx, + const detail::descriptor_base& desc, + const detail::train_parameters& params, + const train_input& input) const { + return train(ctx, desc, params, input); + } +}; + +template struct train_kernel_gpu; +template struct train_kernel_gpu; + +} // namespace oneapi::dal::logistic_regression::backend diff --git a/cpp/oneapi/dal/algo/logistic_regression/common.hpp b/cpp/oneapi/dal/algo/logistic_regression/common.hpp index b817b5b1101..664705d260c 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/common.hpp +++ b/cpp/oneapi/dal/algo/logistic_regression/common.hpp @@ -44,12 +44,15 @@ namespace method { namespace v1 { /// Tag-type that denotes :ref:`dense_batch ` computational method. struct dense_batch {}; +/// Tag-type that denotes :ref:`sparse ` computational method. +struct sparse {}; /// Alias tag-type for the dense_batch method using by_default = dense_batch; } // namespace v1 using v1::dense_batch; +using v1::sparse; using v1::by_default; } // namespace method @@ -105,7 +108,8 @@ template constexpr bool is_valid_float_v = dal::detail::is_one_of_v; template -constexpr bool is_valid_method_v = dal::detail::is_one_of_v; +constexpr bool is_valid_method_v = + dal::detail::is_one_of_v; template constexpr bool is_valid_task_v = dal::detail::is_one_of_v; @@ -168,7 +172,7 @@ namespace v1 { /// intermediate computations. Can be :expr:`float` or /// :expr:`double`. /// @tparam Method Tag-type that specifies an implementation of algorithm. Can -/// be :expr:`method::dense_batch`. +/// be :expr:`method::dense_batch` or :expr:`method::sparse`. /// @tparam Task Tag-type that specifies type of the problem to solve. Can /// be :expr:`task::classification`. /// @tparam Optimizer The descriptor of the optimizer used for minimization. Can diff --git a/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops.cpp b/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops.cpp index 935bd6ab9af..2d462694bbe 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops.cpp +++ b/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops.cpp @@ -40,5 +40,8 @@ struct infer_ops_dispatcher { INSTANTIATE(float, method::dense_batch, task::classification) INSTANTIATE(double, method::dense_batch, task::classification) +INSTANTIATE(float, method::sparse, task::classification) +INSTANTIATE(double, method::sparse, task::classification) + } // namespace v1 } // namespace oneapi::dal::logistic_regression::detail diff --git a/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops_dpc.cpp index e28a5b722b2..9bb1d7c35ad 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops_dpc.cpp +++ b/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops_dpc.cpp @@ -43,5 +43,8 @@ struct infer_ops_dispatcher { INSTANTIATE(float, method::dense_batch, task::classification) INSTANTIATE(double, method::dense_batch, task::classification) +INSTANTIATE(float, method::sparse, task::classification) +INSTANTIATE(double, method::sparse, task::classification) + } // namespace v1 } // namespace oneapi::dal::logistic_regression::detail diff --git a/cpp/oneapi/dal/algo/logistic_regression/detail/optimizer.cpp b/cpp/oneapi/dal/algo/logistic_regression/detail/optimizer.cpp index b56df6bf0e9..c051591e8ec 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/detail/optimizer.cpp +++ b/cpp/oneapi/dal/algo/logistic_regression/detail/optimizer.cpp @@ -37,7 +37,7 @@ class newton_cg_optimizer_impl : public optimizer_impl { tol_(tol), inner_iter_(0) {} - optimizer_type get_optimizer_type() override { + optimizer_type get_optimizer_type() final { return optimizer_type::newton_cg; } @@ -45,12 +45,12 @@ class newton_cg_optimizer_impl : public optimizer_impl { return tol_; } - std::int64_t get_max_iter() override { + std::int64_t get_max_iter() final { return max_iter_; } // this parameter is set after minimize function was called - std::int64_t get_inner_iter() override { + std::int64_t get_inner_iter() final { return inner_iter_; } diff --git a/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops.cpp b/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops.cpp index ab8b385154e..28663fffa3d 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops.cpp +++ b/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops.cpp @@ -65,5 +65,8 @@ struct train_ops_dispatcher { INSTANTIATE(float, method::dense_batch, task::classification) INSTANTIATE(double, method::dense_batch, task::classification) +INSTANTIATE(float, method::sparse, task::classification) +INSTANTIATE(double, method::sparse, task::classification) + } // namespace v1 } // namespace oneapi::dal::logistic_regression::detail diff --git a/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops_dpc.cpp index 603f509578c..8ea686e5ebd 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops_dpc.cpp +++ b/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops_dpc.cpp @@ -70,5 +70,8 @@ struct train_ops_dispatcher { INSTANTIATE(float, method::dense_batch, task::classification) INSTANTIATE(double, method::dense_batch, task::classification) +INSTANTIATE(float, method::sparse, task::classification) +INSTANTIATE(double, method::sparse, task::classification) + } // namespace v1 } // namespace oneapi::dal::logistic_regression::detail diff --git a/cpp/oneapi/dal/algo/logistic_regression/parameters/cpu/train_parameters.cpp b/cpp/oneapi/dal/algo/logistic_regression/parameters/cpu/train_parameters.cpp index 96e7d8e0ddb..523fbba91a0 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/parameters/cpu/train_parameters.cpp +++ b/cpp/oneapi/dal/algo/logistic_regression/parameters/cpu/train_parameters.cpp @@ -55,9 +55,22 @@ struct train_parameters_cpu { } }; +template +struct train_parameters_cpu { + using params_t = detail::train_parameters; + params_t operator()(const context_cpu& ctx, + const detail::descriptor_base& desc, + const train_input& input) const { + return params_t{}; + } +}; + template struct ONEDAL_EXPORT train_parameters_cpu; template struct ONEDAL_EXPORT train_parameters_cpu; +template struct ONEDAL_EXPORT train_parameters_cpu; +template struct ONEDAL_EXPORT train_parameters_cpu; + } // namespace oneapi::dal::logistic_regression::parameters diff --git a/cpp/oneapi/dal/algo/logistic_regression/parameters/gpu/train_parameters_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/parameters/gpu/train_parameters_dpc.cpp index f2537443c68..d83f8a2888e 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/parameters/gpu/train_parameters_dpc.cpp +++ b/cpp/oneapi/dal/algo/logistic_regression/parameters/gpu/train_parameters_dpc.cpp @@ -55,9 +55,22 @@ struct train_parameters_gpu { } }; +template +struct train_parameters_gpu { + using params_t = detail::train_parameters; + params_t operator()(const context_gpu& ctx, + const detail::descriptor_base& desc, + const train_input& input) const { + return params_t{}; + } +}; + template struct ONEDAL_EXPORT train_parameters_gpu; template struct ONEDAL_EXPORT train_parameters_gpu; +template struct ONEDAL_EXPORT train_parameters_gpu; +template struct ONEDAL_EXPORT train_parameters_gpu; + } // namespace oneapi::dal::logistic_regression::parameters diff --git a/cpp/oneapi/dal/algo/logistic_regression/test/fixture.hpp b/cpp/oneapi/dal/algo/logistic_regression/test/fixture.hpp index 83092265519..d5e41f4c31c 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/test/fixture.hpp +++ b/cpp/oneapi/dal/algo/logistic_regression/test/fixture.hpp @@ -28,6 +28,7 @@ #include "oneapi/dal/test/engine/fixtures.hpp" #include "oneapi/dal/test/engine/math.hpp" +#include "oneapi/dal/test/engine/csr_table_builder.hpp" namespace oneapi::dal::logistic_regression::test { @@ -55,6 +56,10 @@ class log_reg_test : public te::crtp_algo_fixture { return static_cast(this); } + bool is_sparse() { + return std::is_same_v; + } + auto get_descriptor(double tol = 1e-4, std::int64_t maxiter = 100) const { result_option_id resopts = result_options::coefficients; @@ -67,7 +72,9 @@ class log_reg_test : public te::crtp_algo_fixture { .set_optimizer(optimizer_desc); } - virtual void gen_dimensions(std::int64_t n = -1, std::int64_t p = -1) { + virtual void gen_dimensions(std::int64_t n = -1, + std::int64_t p = -1, + double train_size_coef = 0.7) { if (n == -1 || p == -1) { this->n_ = GENERATE(100, 200, 1000, 10000, 50000); this->p_ = GENERATE(10, 20, 30); @@ -76,6 +83,8 @@ class log_reg_test : public te::crtp_algo_fixture { this->n_ = n; this->p_ = p; } + this->train_size_ = (this->n_ * train_size_coef); + this->test_size_ = this->n_ - this->train_size_; } float_t predict_proba(float_t* ptr, float_t* params_ptr, float_t intercept) { @@ -84,7 +93,7 @@ class log_reg_test : public te::crtp_algo_fixture { val += ptr[j] * params_ptr[j]; } val += intercept; - return float_t(1) / (1 + std::exp(-val)); + return float_t(1.0) / (1.0 + std::exp(-val)); } void gen_input(bool fit_intercept = true, double C = 1.0, std::int64_t seed = 2007) { @@ -93,31 +102,73 @@ class log_reg_test : public te::crtp_algo_fixture { std::int64_t dim = fit_intercept_ ? p_ + 1 : p_; - X_host_ = array::zeros(n_ * p_); - auto* x_ptr = X_host_.get_mutable_data(); + std::mt19937 rnd(seed + n_ + p_); - y_host_ = array::zeros(n_); - auto* y_ptr = y_host_.get_mutable_data(); + std::uniform_real_distribution<> dis_params(-3.0, 3.0); - params_host_ = array::zeros(dim); - auto* params_ptr = params_host_.get_mutable_data(); + if (this->is_sparse()) { + auto builder_train = + te::csr_table_builder(train_size_, p_, 0.5, sparse_indexing::zero_based); + this->X_train_ = builder_train.build_csr_table(this->get_policy()); - std::mt19937 rnd(seed + n_ + p_); - std::uniform_real_distribution<> dis_data(-10.0, 10.0); - std::uniform_real_distribution<> dis_params(-3.0, 3.0); + auto builder_test = + te::csr_table_builder(test_size_, p_, 0.5, sparse_indexing::zero_based); + this->X_test_ = builder_test.build_csr_table(this->get_policy()); + + table dense_train = builder_train.build_dense_table(); + table dense_test = builder_test.build_dense_table(); - for (std::int64_t i = 0; i < n_; ++i) { - for (std::int64_t j = 0; j < p_; ++j) { - *(x_ptr + i * p_ + j) = dis_data(rnd); + X_train_host_ = row_accessor(dense_train) + .pull(this->get_queue(), { 0, -1 }, sycl::usm::alloc::host); + X_test_host_ = row_accessor(dense_test) + .pull(this->get_queue(), { 0, -1 }, sycl::usm::alloc::host); + } + else { + std::uniform_real_distribution<> dis_data(-10.0, 10.0); + X_train_host_ = array::zeros(train_size_ * p_); + X_test_host_ = array::zeros(test_size_ * p_); + + auto* x_ptr = X_train_host_.get_mutable_data(); + for (std::int64_t i = 0; i < train_size_; ++i) { + for (std::int64_t j = 0; j < p_; ++j) { + *(x_ptr + i * p_ + j) = dis_data(rnd); + } + } + x_ptr = X_test_host_.get_mutable_data(); + for (std::int64_t i = 0; i < test_size_; ++i) { + for (std::int64_t j = 0; j < p_; ++j) { + *(x_ptr + i * p_ + j) = dis_data(rnd); + } } + X_train_ = + homogen_table::wrap(X_train_host_.get_mutable_data(), train_size_, p_); + X_test_ = homogen_table::wrap(X_test_host_.get_mutable_data(), test_size_, p_); } + auto* x_ptr = X_train_host_.get_mutable_data(); + y_train_host_ = array::zeros(train_size_); + auto* y_ptr = y_train_host_.get_mutable_data(); + + params_host_ = array::zeros(dim); + auto* params_ptr = params_host_.get_mutable_data(); + for (std::int64_t i = 0; i < dim; ++i) { *(params_ptr + i) = dis_params(rnd); } constexpr float_t half = 0.5; - for (std::int64_t i = 0; i < n_; ++i) { + for (std::int64_t i = 0; i < train_size_; ++i) { + float_t val = predict_proba(x_ptr + i * p_, + params_ptr + (std::int64_t)fit_intercept_, + fit_intercept_ ? *params_ptr : 0); + y_ptr[i] = bool(val < half); + } + + x_ptr = X_test_host_.get_mutable_data(); + y_test_host_ = array::zeros(test_size_); + y_ptr = y_test_host_.get_mutable_data(); + + for (std::int64_t i = 0; i < test_size_; ++i) { float_t val = predict_proba(x_ptr + i * p_, params_ptr + (std::int64_t)fit_intercept_, fit_intercept_ ? *params_ptr : 0); @@ -126,18 +177,12 @@ class log_reg_test : public te::crtp_algo_fixture { } void run_test(double tol = 1e-4, std::int64_t maxiter = 100) { - std::int64_t train_size = n_ * 0.7; - std::int64_t test_size = n_ - train_size; - - table X_train = homogen_table::wrap(X_host_.get_mutable_data(), train_size, p_); - table X_test = homogen_table::wrap(X_host_.get_mutable_data() + train_size * p_, - test_size, - p_); table y_train = - homogen_table::wrap(y_host_.get_mutable_data(), train_size, 1); - + homogen_table::wrap(y_train_host_.get_mutable_data(), train_size_, 1); + table y_table = + homogen_table::wrap(y_test_host_.get_mutable_data(), test_size_, 1); const auto desc = this->get_descriptor(tol, maxiter); - const auto train_res = this->train(desc, X_train, y_train); + const auto train_res = this->train(desc, X_train_, y_train); table intercept; array bias_host; if (fit_intercept_) { @@ -150,7 +195,7 @@ class log_reg_test : public te::crtp_algo_fixture { std::int64_t train_acc = 0; std::int64_t test_acc = 0; - const auto infer_res = this->infer(desc, X_test, train_res.get_model()); + const auto infer_res = this->infer(desc, X_test_, train_res.get_model()); table resp_table = infer_res.get_responses(); auto resp_host = row_accessor(resp_table).pull({ 0, -1 }); @@ -158,38 +203,43 @@ class log_reg_test : public te::crtp_algo_fixture { table prob_table = infer_res.get_probabilities(); auto prob_host = row_accessor(prob_table).pull({ 0, -1 }); - for (std::int64_t i = 0; i < n_; ++i) { - float_t val = predict_proba(X_host_.get_mutable_data() + i * p_, + for (std::int64_t i = 0; i < train_size_; ++i) { + float_t val = predict_proba(X_train_host_.get_mutable_data() + i * p_, coefs_host.get_mutable_data(), fit_intercept_ ? *bias_host.get_mutable_data() : 0); - std::int32_t resp = 0; - if (val >= 0.5) { - resp = 1; - } - if (resp == *(y_host_.get_mutable_data() + i)) { - bool is_train = i < train_size; - train_acc += std::int64_t(is_train); - test_acc += std::int64_t(!is_train); - } - if (i >= train_size) { - REQUIRE(abs(val - *(prob_host.get_mutable_data() + i - train_size)) < 1e-5); - REQUIRE(*(resp_host.get_mutable_data() + i - train_size) == resp); + std::int32_t resp = val >= 0.5 ? 1 : 0; + if (resp == *(y_train_host_.get_mutable_data() + i)) { + train_acc += 1; } } + std::int64_t acc_algo = 0; - for (std::int64_t i = 0; i < test_size; ++i) { - if (*(resp_host.get_mutable_data() + i) == - *(y_host_.get_mutable_data() + train_size + i)) { + for (std::int64_t i = 0; i < test_size_; ++i) { + float_t val = predict_proba(X_test_host_.get_mutable_data() + i * p_, + coefs_host.get_mutable_data(), + fit_intercept_ ? *bias_host.get_mutable_data() : 0); + std::int32_t resp = val >= 0.5 ? 1 : 0; + if (resp == *(y_test_host_.get_mutable_data() + i)) { + test_acc += 1; + } + + REQUIRE(abs(val - *(prob_host.get_mutable_data() + i)) < 1e-5); + REQUIRE(*(resp_host.get_mutable_data() + i) == resp); + + if (*(resp_host.get_mutable_data() + i) == *(y_test_host_.get_mutable_data() + i)) { acc_algo++; } } float_t min_train_acc = 0.95; float_t min_test_acc = n_ < 500 ? 0.7 : 0.85; + if (this->is_sparse() && n_ < 500) { + min_test_acc = 0.65; + } - REQUIRE(train_size * min_train_acc < train_acc); - REQUIRE(test_size * min_test_acc < test_acc); - REQUIRE(test_size * min_test_acc < acc_algo); + REQUIRE(train_size_ * min_train_acc < train_acc); + REQUIRE(test_size_ * min_test_acc < test_acc); + REQUIRE(test_size_ * min_test_acc < acc_algo); REQUIRE(test_acc == acc_algo); } @@ -198,14 +248,23 @@ class log_reg_test : public te::crtp_algo_fixture { double C_ = 1.0; std::int64_t n_ = 0; std::int64_t p_ = 0; - array X_host_; + std::int64_t train_size_ = 0; + std::int64_t test_size_ = 0; + array X_train_host_; + array X_test_host_; array params_host_; - array y_host_; - array resp_; + array y_train_host_; + array y_test_host_; + + array resp_train_; + array resp_test_; + table X_train_; + table X_test_; }; using log_reg_types = COMBINE_TYPES((float, double), - (logistic_regression::method::dense_batch), + (logistic_regression::method::dense_batch, + logistic_regression::method::sparse), (logistic_regression::task::classification)); } // namespace oneapi::dal::logistic_regression::test diff --git a/cpp/oneapi/dal/algo/logistic_regression/test/spmd_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/test/spmd_dpc.cpp index 0e61b8056cb..c1ecf0b1adc 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/test/spmd_dpc.cpp +++ b/cpp/oneapi/dal/algo/logistic_regression/test/spmd_dpc.cpp @@ -21,7 +21,7 @@ namespace oneapi::dal::logistic_regression::test { TEMPLATE_LIST_TEST_M(log_reg_spmd_test, "LogReg common flow - fit intercept", "[lr][spmd]", - log_reg_types) { + log_reg_spmd_types) { SKIP_IF(this->get_policy().is_cpu()); SKIP_IF(this->not_float64_friendly()); @@ -35,7 +35,7 @@ TEMPLATE_LIST_TEST_M(log_reg_spmd_test, TEMPLATE_LIST_TEST_M(log_reg_spmd_test, "LogReg common flow - no fit intercept", "[lr][spmd]", - log_reg_types) { + log_reg_spmd_types) { SKIP_IF(this->get_policy().is_cpu()); SKIP_IF(this->not_float64_friendly()); diff --git a/cpp/oneapi/dal/algo/logistic_regression/test/spmd_fixture.hpp b/cpp/oneapi/dal/algo/logistic_regression/test/spmd_fixture.hpp index 120d4258750..89b7859b7cd 100644 --- a/cpp/oneapi/dal/algo/logistic_regression/test/spmd_fixture.hpp +++ b/cpp/oneapi/dal/algo/logistic_regression/test/spmd_fixture.hpp @@ -37,7 +37,9 @@ class log_reg_spmd_test : public log_reg_testn_ = GENERATE(50, 99); this->p_ = GENERATE(3, 10); @@ -46,6 +48,8 @@ class log_reg_spmd_test : public log_reg_testn_ = n; this->p_ = p; } + this->train_size_ = (this->n_ * train_size_coef); + this->test_size_ = this->n_ - this->train_size_; } template @@ -82,4 +86,8 @@ class log_reg_spmd_test : public log_reg_test call_daal_kernel_finalize_train(const context_cpu& ctx &daal_parameter, &daal_hyperparameter)); - { - const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { - constexpr auto cpu_type = interop::to_daal_cpu_type::value; - return daal_pca_cor_kernel_t().computeCorrelationEigenvalues( - *daal_cor_matrix, - *daal_eigenvectors, - *daal_eigenvalues); - }); - - interop::status_to_exception(status); + interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { + return daal_pca_cor_kernel_t< + Float, + dal::backend::interop::to_daal_cpu_type::value>() + .computeCorrelationEigenvalues(*daal_cor_matrix, *daal_eigenvectors, *daal_eigenvalues); + })); + + if (desc.get_deterministic()) { + interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { + return daal_pca_cor_kernel_t< + Float, + dal::backend::interop::to_daal_cpu_type::value>() + .signFlipEigenvectors(*daal_eigenvectors); + })); } - { - if (desc.get_deterministic()) { - const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { - constexpr auto cpu_type = interop::to_daal_cpu_type::value; - return daal_pca_cor_kernel_t().signFlipEigenvectors( - *daal_eigenvectors); - }); - - interop::status_to_exception(status); - } - } - - { - const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { - constexpr auto cpu_type = interop::to_daal_cpu_type::value; - return daal_pca_cor_kernel_t().computeSingularValues( - *daal_eigenvalues, - *daal_singular_values, - row_count); - }); - - interop::status_to_exception(status); - } - - { - const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { - constexpr auto cpu_type = interop::to_daal_cpu_type::value; - return daal_pca_cor_kernel_t().computeVariancesFromCov( - *daal_cor_matrix, - *daal_variances); - }); - interop::status_to_exception(status); - } - - { - const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { - constexpr auto cpu_type = interop::to_daal_cpu_type::value; - return daal_pca_cor_kernel_t().computeExplainedVariancesRatio( - *daal_eigenvalues, - *daal_variances, - *daal_explained_variances_ratio); - }); - - interop::status_to_exception(status); - } + interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { + return daal_pca_cor_kernel_t< + Float, + dal::backend::interop::to_daal_cpu_type::value>() + .computeSingularValues(*daal_eigenvalues, *daal_singular_values, row_count); + })); + + interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { + return daal_pca_cor_kernel_t< + Float, + dal::backend::interop::to_daal_cpu_type::value>() + .computeVariancesFromCov(*daal_cor_matrix, *daal_variances); + })); + + interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { + return daal_pca_cor_kernel_t< + Float, + dal::backend::interop::to_daal_cpu_type::value>() + .computeExplainedVariancesRatio(*daal_eigenvalues, + *daal_variances, + *daal_explained_variances_ratio); + })); if (desc.get_result_options().test(result_options::eigenvectors)) { result.set_eigenvectors(homogen_table::wrap(arr_eigvec, component_count, column_count)); diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp index 127d19ed4c4..3ff7d9b40e3 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp @@ -97,16 +97,14 @@ static train_result call_daal_kernel_finalize_train(const context_cpu& ctx result.set_singular_values(homogen_table::wrap(reshaped_eigval, 1, component_count)); if (desc.get_normalization_mode() == normalization::mean_center) { - const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { - constexpr auto cpu_type = interop::to_daal_cpu_type::value; - return daal_svd_kernel_t().computeEigenValues( - *daal_singular_values, - *daal_eigenvalues, - rows_count_global); - }); - - interop::status_to_exception(status); - result.set_eigenvalues(homogen_table::wrap(reshaped_eigval, 1, component_count)); + interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { + return daal_svd_kernel_t< + Float, + dal::backend::interop::to_daal_cpu_type::value>() + .computeEigenValues(*daal_singular_values, + *daal_eigenvalues, + rows_count_global); + })); } else { result.set_eigenvalues(homogen_table::wrap(reshaped_eigval, 1, component_count)); diff --git a/cpp/oneapi/dal/backend/primitives/objective_function.hpp b/cpp/oneapi/dal/backend/primitives/objective_function.hpp index 6803d825954..013de5b0c98 100644 --- a/cpp/oneapi/dal/backend/primitives/objective_function.hpp +++ b/cpp/oneapi/dal/backend/primitives/objective_function.hpp @@ -17,3 +17,4 @@ #pragma once #include "oneapi/dal/backend/primitives/objective_function/logloss.hpp" +#include "oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp" diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/BUILD b/cpp/oneapi/dal/backend/primitives/objective_function/BUILD index 91609568c7f..6d183debdcb 100644 --- a/cpp/oneapi/dal/backend/primitives/objective_function/BUILD +++ b/cpp/oneapi/dal/backend/primitives/objective_function/BUILD @@ -11,6 +11,7 @@ dal_module( "@onedal//cpp/oneapi/dal/backend/primitives:common", "@onedal//cpp/oneapi/dal/backend/primitives:blas", "@onedal//cpp/oneapi/dal/backend/primitives/optimizers", + "@onedal//cpp/oneapi/dal/backend/primitives:sparse_blas", ], ) diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/logloss.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/logloss.hpp index 9a7513d4dde..28a7fa9212c 100644 --- a/cpp/oneapi/dal/backend/primitives/objective_function/logloss.hpp +++ b/cpp/oneapi/dal/backend/primitives/objective_function/logloss.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include "oneapi/dal/backend/primitives/optimizers/common.hpp" #include "oneapi/dal/table/common.hpp" #include "oneapi/dal/backend/communicator.hpp" +#include "oneapi/dal/backend/primitives/sparse_blas/handle.hpp" namespace oneapi::dal::backend::primitives { @@ -32,6 +33,14 @@ sycl::event compute_probabilities(sycl::queue& q, bool fit_intercept = true, const event_vector& deps = {}); +template +sycl::event compute_probabilities_sparse(sycl::queue& q, + const ndview& parameters, + sparse_matrix_handle& sp_handler, + ndview& probabilities, + bool fit_intercept = true, + const event_vector& deps = {}); + template sycl::event compute_logloss(sycl::queue& q, const ndview& labels, @@ -50,6 +59,16 @@ sycl::event compute_logloss_with_der(sycl::queue& q, bool fit_intercept = true, const event_vector& deps = {}); +template +sycl::event compute_logloss_with_der_sparse(sycl::queue& q, + sparse_matrix_handle& sp_handler, + const ndview& labels, + const ndview& probabilities, + ndview& out, + ndview& out_derivative, + bool fit_intercept = true, + const event_vector& deps = {}); + template sycl::event compute_derivative(sycl::queue& q, const ndview& data, @@ -104,88 +123,4 @@ sycl::event compute_raw_hessian(sycl::queue& q, ndview& out_hessian, const event_vector& deps = {}); -using comm_t = backend::communicator; - -template -class logloss_hessian_product : public base_matrix_operator { -public: - logloss_hessian_product(sycl::queue& q, - const table& data, - Float L2 = Float(0), - bool fit_intercept = true, - std::int64_t bsz = -1); - logloss_hessian_product(sycl::queue& q, - comm_t comm, - const table& data, - Float L2 = Float(0), - bool fit_intercept = true, - std::int64_t bsz = -1); - sycl::event operator()(const ndview& vec, - ndview& out, - const event_vector& deps) final; - ndview& get_raw_hessian(); - -private: - sycl::event compute_with_fit_intercept(const ndview& vec, - ndview& out, - const event_vector& deps); - sycl::event compute_without_fit_intercept(const ndview& vec, - ndview& out, - const event_vector& deps); - - sycl::queue q_; - comm_t comm_; - const table data_; - Float L2_; - bool fit_intercept_; - ndarray raw_hessian_; - ndarray buffer_; - ndarray tmp_gpu_; - const std::int64_t n_; - const std::int64_t p_; - const std::int64_t bsz_; -}; - -template -class logloss_function : public base_function { -public: - logloss_function(sycl::queue queue, - const table& data, - const ndview& labels, - Float L2 = 0.0, - bool fit_intercept = true, - std::int64_t bsz = -1); - logloss_function(sycl::queue queue, - comm_t comm, - const table& data, - const ndview& labels, - Float L2 = 0.0, - bool fit_intercept = true, - std::int64_t bsz = -1); - Float get_value() final; - ndview& get_gradient() final; - base_matrix_operator& get_hessian_product() final; - - event_vector update_x(const ndview& x, - bool need_hessp = false, - const event_vector& deps = {}) final; - -private: - sycl::queue q_; - comm_t comm_; - const table data_; - const ndview labels_; - const std::int64_t n_; - const std::int64_t p_; - Float L2_; - bool fit_intercept_; - const std::int64_t bsz_; - ndarray probabilities_; - ndarray gradient_; - ndarray buffer_; - logloss_hessian_product hessp_; - const std::int64_t dimension_; - Float value_; -}; - } // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/logloss_dpc.cpp b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_dpc.cpp index a288239f1d2..e3dce105dbc 100644 --- a/cpp/oneapi/dal/backend/primitives/objective_function/logloss_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_dpc.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include "oneapi/dal/backend/primitives/blas/gemv.hpp" #include "oneapi/dal/backend/primitives/element_wise.hpp" #include "oneapi/dal/detail/profiler.hpp" +#include "oneapi/dal/backend/primitives/sparse_blas.hpp" namespace oneapi::dal::backend::primitives { @@ -40,7 +41,7 @@ sycl::event compute_probabilities(sycl::queue& q, ONEDAL_ASSERT(parameters.get_dimension(0) == fit_intercept ? p + 1 : p); ONEDAL_ASSERT(probabilities.get_dimension(0) == n); - auto fill_event = fill(q, probabilities, Float(1), {}); + auto fill_event = fill(q, probabilities, Float(1), deps); using oneapi::dal::backend::operator+; Float w0 = fit_intercept ? parameters.get_slice(0, 1).at_device(q, 0l) : 0; // Poor perfomance @@ -72,6 +73,58 @@ sycl::event compute_probabilities(sycl::queue& q, }); } +template +sycl::event compute_probabilities_sparse(sycl::queue& q, + const ndview& parameters, + sparse_matrix_handle& sp_handler, + ndview& probabilities, + bool fit_intercept, + const event_vector& deps) { + ONEDAL_ASSERT(probabilities.has_mutable_data()); + ONEDAL_PROFILER_TASK(compute_probabilities_sparse, q); + + const std::int64_t n = probabilities.get_dimension(0); + const std::int64_t p = parameters.get_dimension(0) - (fit_intercept ? 1 : 0); + + auto fill_event = fill(q, probabilities, Float(1), deps); + Float w0 = fit_intercept ? parameters.get_slice(0, 1).at_device(q, 0l) : 0; // Poor perfomance + ndview param_suf = fit_intercept ? parameters.get_slice(1, p + 1) : parameters; + + sycl::event gemv_event; + { + gemv_event = gemv(q, + transpose::nontrans, + sp_handler, + param_suf, + probabilities, + Float(1), + w0, + { fill_event }); + // To ensure sparse blas kernel stability + gemv_event.wait_and_throw(); + } + + auto* const prob_ptr = probabilities.get_mutable_data(); + + const Float bottom = sizeof(Float) == 4 ? 1e-7 : 1e-15; + const Float top = Float(1.0) - bottom; + // Log Loss is undefined for p = 0 and p = 1 so probabilities are clipped into [eps, 1 - eps] + + return q.submit([&](sycl::handler& cgh) { + cgh.depends_on(gemv_event); + const auto range = make_range_1d(n); + cgh.parallel_for(range, [=](sycl::id<1> idx) { + prob_ptr[idx] = 1 / (1 + sycl::exp(-prob_ptr[idx])); + if (prob_ptr[idx] < bottom) { + prob_ptr[idx] = bottom; + } + if (prob_ptr[idx] > top) { + prob_ptr[idx] = top; + } + }); + }); +} + template sycl::event compute_logloss(sycl::queue& q, const ndview& labels, @@ -108,6 +161,92 @@ sycl::event compute_logloss(sycl::queue& q, return loss_event; } +template +sycl::event compute_logloss_with_der_sparse(sycl::queue& q, + sparse_matrix_handle& sp_handler, + const ndview& labels, + const ndview& probabilities, + ndview& out, + ndview& out_derivative, + bool fit_intercept, + const event_vector& deps) { + ONEDAL_PROFILER_TASK(compute_logloss_with_grad_sparse, q); + + const std::int64_t n = labels.get_dimension(0); + const std::int64_t p = out_derivative.get_dimension(0) - (fit_intercept ? 1 : 0); + + ONEDAL_ASSERT(labels.has_data()); + ONEDAL_ASSERT(probabilities.has_data()); + ONEDAL_ASSERT(out.has_mutable_data()); + ONEDAL_ASSERT(out_derivative.has_mutable_data()); + ONEDAL_ASSERT(out.get_dimension(0) == 1); + ONEDAL_ASSERT(probabilities.get_dimension(0) == n); + + auto derivative_object = ndarray::empty(q, { n }, sycl::usm::alloc::device); + + auto* const der_obj_ptr = derivative_object.get_mutable_data(); + const auto* const proba_ptr = probabilities.get_data(); + const auto* const labels_ptr = labels.get_data(); + auto* const out_ptr = out.get_mutable_data(); + auto* const out_derivative_ptr = out_derivative.get_mutable_data(); + + auto loss_event = q.submit([&](sycl::handler& cgh) { + using oneapi::dal::backend::operator+; + using sycl::reduction; + + cgh.depends_on(deps); + auto sum_reduction_logloss = reduction(out_ptr, sycl::plus<>()); + const auto wg_size = propose_wg_size(q); + const auto range = make_multiple_nd_range_1d(n, wg_size); + + cgh.parallel_for(range, sum_reduction_logloss, [=](sycl::nd_item<1> id, auto& sum_logloss) { + auto idx = id.get_group_linear_id() * wg_size + id.get_local_linear_id(); + if (idx >= std::size_t(n)) + return; + const Float prob = proba_ptr[idx]; + const float label = labels_ptr[idx]; + sum_logloss += -label * sycl::log(prob) - (1 - label) * sycl::log(1 - prob); + der_obj_ptr[idx] = prob - label; + }); + }); + sycl::event derw0_event = sycl::event{}; + if (fit_intercept) { + derw0_event = q.submit([&](sycl::handler& cgh) { + using oneapi::dal::backend::operator+; + using sycl::reduction; + + cgh.depends_on(deps + loss_event); + auto sum_reduction_derivative_w0 = reduction(out_derivative_ptr, sycl::plus<>()); + const auto wg_size = propose_wg_size(q); + const auto range = make_multiple_nd_range_1d(n, wg_size); + + cgh.parallel_for(range, + sum_reduction_derivative_w0, + [=](sycl::nd_item<1> id, auto& sum_dw0) { + auto idx = + id.get_group_linear_id() * wg_size + id.get_local_linear_id(); + if (idx >= std::size_t(n)) + return; + sum_dw0 += der_obj_ptr[idx]; + }); + }); + } + + auto out_der_suffix = fit_intercept ? out_derivative.get_slice(1, p + 1) : out_derivative; + sycl::event gemv_event; + { + gemv_event = gemv(q, + transpose::trans, + sp_handler, + derivative_object, + out_der_suffix, + Float(1), + Float(0), + { loss_event, derw0_event }); + } + return gemv_event; +} + template sycl::event compute_logloss_with_der(sycl::queue& q, const ndview& data, @@ -469,452 +608,83 @@ sycl::event compute_raw_hessian(sycl::queue& q, return element_wise(q, kernel, probabilities, Float(0), out_hessian, deps); } -std::int64_t get_block_size(std::int64_t n, std::int64_t p) { - constexpr std::int64_t max_alloc_size = 1 << 21; - return p > max_alloc_size ? 512 : max_alloc_size / p; -} - -template -logloss_hessian_product::logloss_hessian_product(sycl::queue& q, - const table& data, - Float L2, - bool fit_intercept, - std::int64_t bsz) - : q_(q), - data_(data), - L2_(L2), - fit_intercept_(fit_intercept), - n_(data.get_row_count()), - p_(data.get_column_count()), - bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz) { - raw_hessian_ = ndarray::empty(q_, { n_ }, sycl::usm::alloc::device); - buffer_ = ndarray::empty(q_, { n_ }, sycl::usm::alloc::device); - tmp_gpu_ = ndarray::empty(q_, { p_ + 1 }, sycl::usm::alloc::device); -} - -template -logloss_hessian_product::logloss_hessian_product(sycl::queue& q, - comm_t comm, - const table& data, - Float L2, - bool fit_intercept, - std::int64_t bsz) - : q_(q), - comm_(comm), - data_(data), - L2_(L2), - fit_intercept_(fit_intercept), - n_(data.get_row_count()), - p_(data.get_column_count()), - bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz) { - raw_hessian_ = ndarray::empty(q_, { n_ }, sycl::usm::alloc::device); - buffer_ = ndarray::empty(q_, { n_ }, sycl::usm::alloc::device); - tmp_gpu_ = ndarray::empty(q_, { p_ + 1 }, sycl::usm::alloc::device); -} - -template -ndview& logloss_hessian_product::get_raw_hessian() { - return raw_hessian_; -} - -template -sycl::event logloss_hessian_product::compute_with_fit_intercept(const ndview& vec, - ndview& out, - const event_vector& deps) { - ONEDAL_PROFILER_TASK(compute_hessp_with_fit_intercept, q_); - auto* const tmp_ptr = tmp_gpu_.get_mutable_data(); - ONEDAL_ASSERT(vec.get_dimension(0) == p_ + 1); - ONEDAL_ASSERT(out.get_dimension(0) == p_ + 1); - auto fill_buffer_event = fill(q_, buffer_, Float(1), deps); - auto out_suf = out.get_slice(1, p_ + 1); - auto tmp_suf = tmp_gpu_.slice(1, p_); - auto out_bias = out.get_slice(0, 1); - auto vec_suf = vec.get_slice(1, p_ + 1); - ndview tmp_ndview = tmp_gpu_; - - sycl::event fill_out_event = fill(q_, out, Float(0), deps); - - Float v0 = vec.at_device(q_, 0, deps); - - const uniform_blocking blocking(n_, bsz_); - - row_accessor data_accessor(data_); - event_vector last_iter_deps = { fill_buffer_event, fill_out_event }; - - for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) { - const auto last = blocking.get_block_end_index(b); - const auto first = blocking.get_block_start_index(b); - const auto length = last - first; - auto x_rows = data_accessor.pull(q_, { first, last }, sycl::usm::alloc::device); - auto x_nd = pr::ndarray::wrap(x_rows, { length, p_ }); - auto buffer_batch = buffer_.slice(first, length); - sycl::event event_xv = gemv(q_, x_nd, vec_suf, buffer_batch, Float(1), v0, last_iter_deps); - event_xv.wait_and_throw(); // Without this line gemv does not work correctly - - auto* const buffer_ptr = buffer_batch.get_mutable_data(); - const auto* const hess_ptr = raw_hessian_.get_data() + first; - - auto fill_tmp_event = fill(q_, tmp_gpu_, Float(0), last_iter_deps); - - sycl::event event_dxv = q_.submit([&](sycl::handler& cgh) { - cgh.depends_on({ event_xv, fill_tmp_event }); - const auto range = make_range_1d(length); - auto sum_reduction = sycl::reduction(tmp_ptr, sycl::plus<>()); - cgh.parallel_for(range, sum_reduction, [=](sycl::id<1> idx, auto& sum_v0) { - buffer_ptr[idx] = buffer_ptr[idx] * hess_ptr[idx]; - sum_v0 += buffer_ptr[idx]; - }); - }); - - sycl::event event_xtdxv = - gemv(q_, x_nd.t(), buffer_batch, tmp_suf, Float(1), Float(0), { event_dxv }); - event_xtdxv.wait_and_throw(); // Without this line gemv does not work correctly - - sycl::event update_result_e = - element_wise(q_, sycl::plus<>(), out, tmp_ndview, out, { event_xtdxv }); - - last_iter_deps = { update_result_e }; - } - - if (comm_.get_rank_count() > 1) { - sycl::event::wait_and_throw(last_iter_deps); - { - ONEDAL_PROFILER_TASK(hessp_allreduce); - auto hessp_arr = dal::array::wrap(q_, out.get_mutable_data(), out.get_count()); - comm_.allreduce(hessp_arr).wait(); - } - } - - const Float regularization_factor = L2_; - - const auto kernel_regularization = [=](const Float a, const Float param) { - return a + param * regularization_factor; - }; - - auto add_regularization_event = - element_wise(q_, kernel_regularization, out_suf, vec_suf, out_suf, last_iter_deps); - return add_regularization_event; -} - -template -sycl::event logloss_hessian_product::compute_without_fit_intercept( - const ndview& vec, - ndview& out, - const event_vector& deps) { - ONEDAL_PROFILER_TASK(compute_hessp_without_fit_intercept, q_); - ONEDAL_ASSERT(vec.get_dimension(0) == p_); - ONEDAL_ASSERT(out.get_dimension(0) == p_); - - sycl::event fill_out_event = fill(q_, out, Float(0), deps); - - const uniform_blocking blocking(n_, bsz_); - - ndview tmp_ndview = tmp_gpu_.slice(0, p_); - - row_accessor data_accessor(data_); - event_vector last_iter_deps = { fill_out_event }; - - for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) { - const auto last = blocking.get_block_end_index(b); - const auto first = blocking.get_block_start_index(b); - const auto length = last - first; - ONEDAL_ASSERT(0l < length); - auto x_rows = data_accessor.pull(q_, { first, last }, sycl::usm::alloc::device); - auto x_nd = pr::ndarray::wrap(x_rows, { length, p_ }); - ndview buffer_batch = buffer_.slice(first, length); - ndview hess_batch = raw_hessian_.slice(first, length); - - sycl::event event_xv = - gemv(q_, x_nd, vec, buffer_batch, Float(1), Float(0), last_iter_deps); - event_xv.wait_and_throw(); // Without this line gemv does not work correctly - - constexpr sycl::multiplies kernel_mul{}; - auto event_dxv = - element_wise(q_, kernel_mul, buffer_batch, hess_batch, buffer_batch, { event_xv }); - - auto fill_tmp_event = fill(q_, tmp_ndview, Float(0), last_iter_deps); - - sycl::event event_xtdxv = gemv(q_, - x_nd.t(), - buffer_batch, - tmp_ndview, - Float(1), - Float(0), - { event_dxv, fill_tmp_event }); - event_xtdxv.wait_and_throw(); // Without this line gemv does not work correctly - - sycl::event update_grad_e = - element_wise(q_, sycl::plus<>(), out, tmp_ndview, out, { event_xtdxv }); - last_iter_deps = { update_grad_e }; - } - - if (comm_.get_rank_count() > 1) { - { - ONEDAL_PROFILER_TASK(hessp_allreduce); - auto hessp_arr = dal::array::wrap(q_, - out.get_mutable_data(), - out.get_count(), - last_iter_deps); - comm_.allreduce(hessp_arr).wait(); - } - } - - const Float regularization_factor = L2_; - - const auto kernel_regularization = [=](const Float a, const Float param) { - return a + param * regularization_factor; - }; - - auto add_regularization_event = - element_wise(q_, kernel_regularization, out, vec, out, last_iter_deps); - - return add_regularization_event; -} - -template -sycl::event logloss_hessian_product::operator()(const ndview& vec, - ndview& out, - const event_vector& deps) { - if (fit_intercept_) { - return compute_with_fit_intercept(vec, out, deps); - } - else { - return compute_without_fit_intercept(vec, out, deps); - } -} - -template -logloss_function::logloss_function(sycl::queue q, - const table& data, - const ndview& labels, - Float L2, - bool fit_intercept, - std::int64_t bsz) - : q_(q), - data_(data), - labels_(labels), - n_(data.get_row_count()), - p_(data.get_column_count()), - L2_(L2), - fit_intercept_(fit_intercept), - bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz), - hessp_(q, data, L2, fit_intercept, bsz_), - dimension_(fit_intercept ? p_ + 1 : p_) { - ONEDAL_ASSERT(labels.get_dimension(0) == n_); - probabilities_ = ndarray::empty(q_, { n_ }, sycl::usm::alloc::device); - gradient_ = ndarray::empty(q_, { dimension_ }, sycl::usm::alloc::device); - buffer_ = ndarray::empty(q_, { p_ + 2 }, sycl::usm::alloc::device); -} - -template -logloss_function::logloss_function(sycl::queue q, - comm_t comm, - const table& data, - const ndview& labels, - Float L2, - bool fit_intercept, - std::int64_t bsz) - : q_(q), - comm_(comm), - data_(data), - labels_(labels), - n_(data.get_row_count()), - p_(data.get_column_count()), - L2_(L2), - fit_intercept_(fit_intercept), - bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz), - hessp_(q, comm, data, L2, fit_intercept, bsz_), - dimension_(fit_intercept ? p_ + 1 : p_) { - ONEDAL_ASSERT(labels.get_dimension(0) == n_); - probabilities_ = ndarray::empty(q_, { n_ }, sycl::usm::alloc::device); - gradient_ = ndarray::empty(q_, { dimension_ }, sycl::usm::alloc::device); - buffer_ = ndarray::empty(q_, { p_ + 2 }, sycl::usm::alloc::device); -} - -template -event_vector logloss_function::update_x(const ndview& x, - bool need_hessp, - const event_vector& deps) { - ONEDAL_PROFILER_TASK(logloss_function_update_weights, q_); - using dal::backend::operator+; - value_ = 0; - auto fill_event = fill(q_, gradient_, Float(0), deps); - const uniform_blocking blocking(n_, bsz_); - - event_vector last_iter_e = { fill_event }; - - ndview grad_ndview = gradient_; - ndview grad_batch = buffer_.slice(1, dimension_); - ndview loss_batch = buffer_.slice(0, 1); - - ndview raw_hessian = hessp_.get_raw_hessian(); - - for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) { - const auto first = blocking.get_block_start_index(b); - const auto last = blocking.get_block_end_index(b); - const std::int64_t cursize = last - first; - ONEDAL_ASSERT(0l < cursize); - - const auto data_rows = - row_accessor(data_).pull(q_, { first, last }, sycl::usm::alloc::device); - const auto data_batch = ndarray::wrap(data_rows, { cursize, p_ }); - const auto labels_batch = labels_.get_slice(first, first + cursize); - auto prob_batch = probabilities_.slice(first, cursize); - sycl::event prob_e = - compute_probabilities(q_, x, data_batch, prob_batch, fit_intercept_, last_iter_e); - - constexpr Float zero(0); - - auto fill_buffer_e = fill(q_, buffer_, zero, last_iter_e); - - sycl::event compute_e = compute_logloss_with_der(q_, - data_batch, - labels_batch, - prob_batch, - loss_batch, - grad_batch, - fit_intercept_, - { fill_buffer_e, prob_e }); - - sycl::event update_grad_e = - element_wise(q_, sycl::plus<>(), grad_ndview, grad_batch, grad_ndview, { compute_e }); - - value_ += loss_batch.at_device(q_, 0, { compute_e }); - - last_iter_e = { update_grad_e }; - - if (need_hessp) { - auto raw_hessian_batch = raw_hessian.get_slice(first, first + cursize); - auto hess_e = compute_raw_hessian(q_, prob_batch, raw_hessian_batch, { prob_e }); - last_iter_e = last_iter_e + hess_e; - } - - // TODO: Delete this wait_and_throw - // ensure that while event is running in the background data is not overwritten - wait_or_pass(last_iter_e).wait_and_throw(); - } - - if (comm_.get_rank_count() > 1) { - { - ONEDAL_PROFILER_TASK(gradient_allreduce); - auto gradient_arr = dal::array::wrap(q_, - gradient_.get_mutable_data(), - gradient_.get_count(), - last_iter_e); - comm_.allreduce(gradient_arr).wait(); - } - { - ONEDAL_PROFILER_TASK(value_allreduce); - comm_.allreduce(value_).wait(); - } - } - - if (L2_ > 0) { - auto fill_loss_e = fill(q_, loss_batch, Float(0), { last_iter_e }); - auto loss_ptr = loss_batch.get_mutable_data(); - auto grad_ptr = gradient_.get_mutable_data(); - auto w_ptr = x.get_data(); - Float regularization_factor = L2_; - - auto regularization_e = q_.submit([&](sycl::handler& cgh) { - cgh.depends_on(last_iter_e + fill_loss_e); - const auto range = make_range_1d(p_); - const std::int64_t st_id = fit_intercept_; - auto sum_reduction = sycl::reduction(loss_ptr, sycl::plus<>()); - cgh.parallel_for(range, sum_reduction, [=](sycl::id<1> idx, auto& sum_v0) { - const Float param = w_ptr[st_id + idx]; - grad_ptr[st_id + idx] += regularization_factor * param; - sum_v0 += regularization_factor * param * param / 2; - }); - }); - - value_ += loss_batch.at_device(q_, 0, { regularization_e }); - - last_iter_e = { regularization_e }; - } - - return last_iter_e; -} - -template -Float logloss_function::get_value() { - return value_; -} -template -ndview& logloss_function::get_gradient() { - return gradient_; -} - -template -base_matrix_operator& logloss_function::get_hessian_product() { - return hessp_; -} - -#define INSTANTIATE(F) \ - template sycl::event compute_probabilities(sycl::queue&, \ - const ndview&, \ - const ndview&, \ - ndview&, \ - bool, \ - const event_vector&); \ - template sycl::event compute_logloss(sycl::queue&, \ - const ndview&, \ - const ndview&, \ - ndview&, \ - bool, \ - const event_vector&); \ - template sycl::event compute_logloss_with_der(sycl::queue&, \ - const ndview&, \ - const ndview&, \ - const ndview&, \ - ndview&, \ - ndview&, \ - bool, \ - const event_vector&); \ - template sycl::event compute_derivative(sycl::queue&, \ - const ndview&, \ - const ndview&, \ - const ndview&, \ - ndview&, \ - bool, \ - const event_vector&); \ - template sycl::event add_regularization_loss(sycl::queue&, \ - const ndview&, \ - ndview&, \ - F, \ - F, \ - bool, \ - const event_vector&); \ - template sycl::event add_regularization_gradient_loss(sycl::queue&, \ - const ndview&, \ - ndview&, \ - ndview&, \ - F, \ - F, \ - bool, \ - const event_vector&); \ - template sycl::event add_regularization_gradient(sycl::queue&, \ - const ndview&, \ - ndview&, \ - F, \ - F, \ - bool, \ - const event_vector&); \ - template sycl::event compute_hessian(sycl::queue&, \ - const ndview&, \ - const ndview&, \ - const ndview&, \ - ndview&, \ - const F, \ - const F, \ - bool, \ - const event_vector&); \ - template sycl::event compute_raw_hessian(sycl::queue&, \ - const ndview&, \ - ndview&, \ - const event_vector&); \ - template class logloss_hessian_product; \ - template class logloss_function; +#define INSTANTIATE(F) \ + template sycl::event compute_probabilities(sycl::queue&, \ + const ndview&, \ + const ndview&, \ + ndview&, \ + bool, \ + const event_vector&); \ + template sycl::event compute_probabilities_sparse(sycl::queue&, \ + const ndview&, \ + sparse_matrix_handle&, \ + ndview&, \ + bool, \ + const event_vector&); \ + template sycl::event compute_logloss(sycl::queue&, \ + const ndview&, \ + const ndview&, \ + ndview&, \ + bool, \ + const event_vector&); \ + template sycl::event compute_logloss_with_der(sycl::queue&, \ + const ndview&, \ + const ndview&, \ + const ndview&, \ + ndview&, \ + ndview&, \ + bool, \ + const event_vector&); \ + template sycl::event compute_logloss_with_der_sparse(sycl::queue&, \ + sparse_matrix_handle&, \ + const ndview&, \ + const ndview&, \ + ndview&, \ + ndview&, \ + bool, \ + const event_vector&); \ + template sycl::event compute_derivative(sycl::queue&, \ + const ndview&, \ + const ndview&, \ + const ndview&, \ + ndview&, \ + bool, \ + const event_vector&); \ + template sycl::event add_regularization_loss(sycl::queue&, \ + const ndview&, \ + ndview&, \ + F, \ + F, \ + bool, \ + const event_vector&); \ + template sycl::event add_regularization_gradient_loss(sycl::queue&, \ + const ndview&, \ + ndview&, \ + ndview&, \ + F, \ + F, \ + bool, \ + const event_vector&); \ + template sycl::event add_regularization_gradient(sycl::queue&, \ + const ndview&, \ + ndview&, \ + F, \ + F, \ + bool, \ + const event_vector&); \ + template sycl::event compute_hessian(sycl::queue&, \ + const ndview&, \ + const ndview&, \ + const ndview&, \ + ndview&, \ + const F, \ + const F, \ + bool, \ + const event_vector&); \ + template sycl::event compute_raw_hessian(sycl::queue&, \ + const ndview&, \ + ndview&, \ + const event_vector&); INSTANTIATE(float); INSTANTIATE(double); diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp new file mode 100644 index 00000000000..b00a788a324 --- /dev/null +++ b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp @@ -0,0 +1,121 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include "oneapi/dal/backend/primitives/utils.hpp" +#include "oneapi/dal/backend/primitives/ndarray.hpp" +#include "oneapi/dal/backend/primitives/optimizers/common.hpp" +#include "oneapi/dal/table/common.hpp" +#include "oneapi/dal/backend/communicator.hpp" +#include "oneapi/dal/backend/primitives/sparse_blas/handle.hpp" + +namespace oneapi::dal::backend::primitives { + +using comm_t = backend::communicator; + +template +class logloss_hessian_product : public base_matrix_operator { + friend dal::detail::pimpl_accessor; + +public: + logloss_hessian_product(sycl::queue& q, + const table& data, + Float L2 = Float(0), + bool fit_intercept = true, + std::int64_t bsz = -1); + logloss_hessian_product(sycl::queue& q, + comm_t comm, + const table& data, + Float L2 = Float(0), + bool fit_intercept = true, + std::int64_t bsz = -1); + sycl::event operator()(const ndview& vec, + ndview& out, + const event_vector& deps) final; + ndview& get_raw_hessian(); + +private: + void reserve_memory(); + + sycl::event compute_with_fit_intercept(const ndview& vec, + ndview& out, + const event_vector& deps); + sycl::event compute_without_fit_intercept(const ndview& vec, + ndview& out, + const event_vector& deps); + sycl::queue& q_; + comm_t comm_; + const table data_; + dal::detail::pimpl sp_handle_; + ndarray raw_hessian_; + ndarray buffer_; + ndarray tmp_gpu_; + const std::int64_t n_; + const std::int64_t p_; + Float L2_; + bool fit_intercept_; + const std::int64_t bsz_; +}; + +template +class logloss_function : public base_function { + friend dal::detail::pimpl_accessor; + +public: + logloss_function(sycl::queue& queue, + const table& data, + const ndview& labels, + Float L2 = 0.0, + bool fit_intercept = true, + std::int64_t bsz = -1); + logloss_function(sycl::queue& queue, + comm_t comm, + const table& data, + const ndview& labels, + Float L2 = 0.0, + bool fit_intercept = true, + std::int64_t bsz = -1); + Float get_value() final; + ndview& get_gradient() final; + base_matrix_operator& get_hessian_product() final; + + event_vector update_x(const ndview& x, + bool need_hessp = false, + const event_vector& deps = {}) final; + +private: + void reserve_memory(); + + sycl::queue& q_; + comm_t comm_; + const table data_; + dal::detail::pimpl sp_handle_; + const ndview labels_; + ndarray probabilities_; + ndarray gradient_; + ndarray buffer_; + const std::int64_t n_; + const std::int64_t p_; + Float L2_; + bool fit_intercept_; + const std::int64_t bsz_; + const std::int64_t dimension_; + Float value_; + logloss_hessian_product hessp_; +}; + +} // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors_dpc.cpp b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors_dpc.cpp new file mode 100644 index 00000000000..4fc3f16270f --- /dev/null +++ b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors_dpc.cpp @@ -0,0 +1,536 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/backend/primitives/objective_function/logloss.hpp" +#include "oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp" +#include "oneapi/dal/backend/primitives/blas/gemv.hpp" +#include "oneapi/dal/backend/primitives/element_wise.hpp" +#include "oneapi/dal/detail/profiler.hpp" +#include "oneapi/dal/backend/primitives/sparse_blas.hpp" + +namespace oneapi::dal::backend::primitives { + +namespace pr = dal::backend::primitives; + +std::int64_t get_block_size(std::int64_t n, std::int64_t p) { + constexpr std::int64_t max_alloc_size = 1 << 21; + return p > max_alloc_size ? 512 : max_alloc_size / p; +} + +template +void logloss_hessian_product::reserve_memory() { + raw_hessian_ = ndarray::empty(q_, { n_ }, sycl::usm::alloc::device); + buffer_ = ndarray::empty(q_, { n_ }, sycl::usm::alloc::device); + tmp_gpu_ = ndarray::empty(q_, { p_ + 1 }, sycl::usm::alloc::device); + if (data_.get_kind() == dal::csr_table::kind()) { + sp_handle_.reset(new sparse_matrix_handle(q_)); + set_csr_data(q_, *sp_handle_, static_cast(data_)); + } +} + +template +logloss_hessian_product::logloss_hessian_product(sycl::queue& q, + const table& data, + Float L2, + bool fit_intercept, + std::int64_t bsz) + : q_(q), + data_(data), + n_(data.get_row_count()), + p_(data.get_column_count()), + L2_(L2), + fit_intercept_(fit_intercept), + bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz) { + this->reserve_memory(); +} + +template +logloss_hessian_product::logloss_hessian_product(sycl::queue& q, + comm_t comm, + const table& data, + Float L2, + bool fit_intercept, + std::int64_t bsz) + : q_(q), + comm_(comm), + data_(data), + n_(data.get_row_count()), + p_(data.get_column_count()), + L2_(L2), + fit_intercept_(fit_intercept), + bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz) { + this->reserve_memory(); +} + +template +ndview& logloss_hessian_product::get_raw_hessian() { + return raw_hessian_; +} + +template +sycl::event logloss_hessian_product::compute_with_fit_intercept(const ndview& vec, + ndview& out, + const event_vector& deps) { + ONEDAL_PROFILER_TASK(compute_hessp_with_fit_intercept, q_); + auto* const tmp_ptr = tmp_gpu_.get_mutable_data(); + ONEDAL_ASSERT(vec.get_dimension(0) == p_ + 1); + ONEDAL_ASSERT(out.get_dimension(0) == p_ + 1); + auto fill_buffer_event = fill(q_, buffer_, Float(1), deps); + auto out_suf = out.get_slice(1, p_ + 1); + auto tmp_suf = tmp_gpu_.slice(1, p_); + auto out_bias = out.get_slice(0, 1); + auto vec_suf = vec.get_slice(1, p_ + 1); + ndview tmp_ndview = tmp_gpu_; + + sycl::event fill_out_event = fill(q_, out, Float(0), deps); + + const Float v0 = vec.at_device(q_, 0, deps); + event_vector last_iter_deps = { fill_buffer_event, fill_out_event }; + + if (data_.get_kind() == dal::csr_table::kind()) { + const auto* const hess_ptr = raw_hessian_.get_data(); + auto* const out_ptr = out.get_mutable_data(); + auto* const buffer_ptr = buffer_.get_mutable_data(); + sycl::event event_xv; + { + event_xv = gemv(q_, + transpose::nontrans, + *sp_handle_, + vec_suf, + buffer_, + Float(1), + v0, + last_iter_deps); + // to ensure sparse blas kernel stability + event_xv.wait_and_throw(); + } + + sycl::event event_dxv = q_.submit([&](sycl::handler& cgh) { + cgh.depends_on({ event_xv }); + const auto range = make_range_1d(n_); + auto sum_reduction = sycl::reduction(out_ptr, sycl::plus<>()); + cgh.parallel_for(range, sum_reduction, [=](sycl::id<1> idx, auto& sum_v0) { + buffer_ptr[idx] = buffer_ptr[idx] * hess_ptr[idx]; + sum_v0 += buffer_ptr[idx]; + }); + }); + sycl::event event_xtdxv; + { + event_xtdxv = gemv(q_, + transpose::trans, + *sp_handle_, + buffer_, + out_suf, + Float(1), + Float(0), + { event_dxv }); + // To ensure sparse blas kernel stability + event_xtdxv.wait_and_throw(); + } + last_iter_deps = { event_xtdxv }; + } + else { + const uniform_blocking blocking(n_, bsz_); + row_accessor data_accessor(data_); + + for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) { + const auto last = blocking.get_block_end_index(b); + const auto first = blocking.get_block_start_index(b); + const auto length = last - first; + auto x_rows = data_accessor.pull(q_, { first, last }, sycl::usm::alloc::device); + auto x_nd = pr::ndarray::wrap(x_rows, { length, p_ }); + auto buffer_batch = buffer_.slice(first, length); + sycl::event event_xv = + gemv(q_, x_nd, vec_suf, buffer_batch, Float(1), v0, last_iter_deps); + event_xv.wait_and_throw(); // Without this line gemv does not work correctly + + auto* const buffer_ptr = buffer_batch.get_mutable_data(); + const auto* const hess_ptr = raw_hessian_.get_data() + first; + + auto fill_tmp_event = fill(q_, tmp_gpu_, Float(0), last_iter_deps); + + sycl::event event_dxv = q_.submit([&](sycl::handler& cgh) { + cgh.depends_on({ event_xv, fill_tmp_event }); + const auto range = make_range_1d(length); + auto sum_reduction = sycl::reduction(tmp_ptr, sycl::plus<>()); + cgh.parallel_for(range, sum_reduction, [=](sycl::id<1> idx, auto& sum_v0) { + buffer_ptr[idx] = buffer_ptr[idx] * hess_ptr[idx]; + sum_v0 += buffer_ptr[idx]; + }); + }); + + sycl::event event_xtdxv = + gemv(q_, x_nd.t(), buffer_batch, tmp_suf, Float(1), Float(0), { event_dxv }); + event_xtdxv.wait_and_throw(); // Without this line gemv does not work correctly + + sycl::event update_result_e = + element_wise(q_, sycl::plus<>(), out, tmp_ndview, out, { event_xtdxv }); + + last_iter_deps = { update_result_e }; + } + } + + if (comm_.get_rank_count() > 1) { + sycl::event::wait_and_throw(last_iter_deps); + { + ONEDAL_PROFILER_TASK(hessp_allreduce); + auto hessp_arr = dal::array::wrap(q_, out.get_mutable_data(), out.get_count()); + comm_.allreduce(hessp_arr).wait(); + } + } + + const Float regularization_factor = L2_; + + const auto kernel_regularization = [=](const Float a, const Float param) { + return a + param * regularization_factor; + }; + + auto add_regularization_event = + element_wise(q_, kernel_regularization, out_suf, vec_suf, out_suf, last_iter_deps); + return add_regularization_event; +} + +template +sycl::event logloss_hessian_product::compute_without_fit_intercept( + const ndview& vec, + ndview& out, + const event_vector& deps) { + ONEDAL_PROFILER_TASK(compute_hessp_without_fit_intercept, q_); + ONEDAL_ASSERT(vec.get_dimension(0) == p_); + ONEDAL_ASSERT(out.get_dimension(0) == p_); + + ndview buffer_view_ = buffer_; + ndview hess_view_ = raw_hessian_; + + sycl::event fill_out_event = fill(q_, out, Float(0), deps); + + event_vector last_iter_deps = { fill_out_event }; + + if (data_.get_kind() == dal::csr_table::kind()) { + sycl::event event_xv = gemv(q_, + transpose::nontrans, + *sp_handle_, + vec, + buffer_, + Float(1), + Float(0), + last_iter_deps); + event_xv.wait_and_throw(); // Without this line gemv does not work correctly + + constexpr sycl::multiplies kernel_mul{}; + auto event_dxv = + element_wise(q_, kernel_mul, buffer_view_, hess_view_, buffer_view_, { event_xv }); + + sycl::event event_xtdxv = gemv(q_, + transpose::trans, + *sp_handle_, + buffer_, + out, + Float(1), + Float(0), + { event_dxv }); + event_xtdxv.wait_and_throw(); // Without this line gemv does not work correctly + + last_iter_deps = { event_xtdxv }; + } + else { + const uniform_blocking blocking(n_, bsz_); + ndview tmp_ndview = tmp_gpu_.slice(0, p_); + row_accessor data_accessor(data_); + + for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) { + const auto last = blocking.get_block_end_index(b); + const auto first = blocking.get_block_start_index(b); + const auto length = last - first; + ONEDAL_ASSERT(0l < length); + auto x_rows = data_accessor.pull(q_, { first, last }, sycl::usm::alloc::device); + auto x_nd = pr::ndarray::wrap(x_rows, { length, p_ }); + ndview buffer_batch = buffer_.slice(first, length); + ndview hess_batch = raw_hessian_.slice(first, length); + + sycl::event event_xv = + gemv(q_, x_nd, vec, buffer_batch, Float(1), Float(0), last_iter_deps); + event_xv.wait_and_throw(); // Without this line gemv does not work correctly + + constexpr sycl::multiplies kernel_mul{}; + auto event_dxv = + element_wise(q_, kernel_mul, buffer_batch, hess_batch, buffer_batch, { event_xv }); + + auto fill_tmp_event = fill(q_, tmp_ndview, Float(0), last_iter_deps); + + sycl::event event_xtdxv = gemv(q_, + x_nd.t(), + buffer_batch, + tmp_ndview, + Float(1), + Float(0), + { event_dxv, fill_tmp_event }); + event_xtdxv.wait_and_throw(); // Without this line gemv does not work correctly + + sycl::event update_grad_e = + element_wise(q_, sycl::plus<>(), out, tmp_ndview, out, { event_xtdxv }); + last_iter_deps = { update_grad_e }; + } + } + + if (comm_.get_rank_count() > 1) { + { + ONEDAL_PROFILER_TASK(hessp_allreduce); + auto hessp_arr = dal::array::wrap(q_, + out.get_mutable_data(), + out.get_count(), + last_iter_deps); + comm_.allreduce(hessp_arr).wait(); + } + } + + const Float regularization_factor = L2_; + + const auto kernel_regularization = [=](const Float a, const Float param) { + return a + param * regularization_factor; + }; + + auto add_regularization_event = + element_wise(q_, kernel_regularization, out, vec, out, last_iter_deps); + + return add_regularization_event; +} + +template +sycl::event logloss_hessian_product::operator()(const ndview& vec, + ndview& out, + const event_vector& deps) { + if (fit_intercept_) { + return compute_with_fit_intercept(vec, out, deps); + } + else { + return compute_without_fit_intercept(vec, out, deps); + } +} + +template +void logloss_function::reserve_memory() { + probabilities_ = ndarray::empty(q_, { n_ }, sycl::usm::alloc::device); + gradient_ = ndarray::empty(q_, { dimension_ }, sycl::usm::alloc::device); + buffer_ = ndarray::empty(q_, { p_ + 2 }, sycl::usm::alloc::device); + if (data_.get_kind() == dal::csr_table::kind()) { + sp_handle_.reset(new sparse_matrix_handle(q_)); + set_csr_data(q_, *sp_handle_, static_cast(data_)); + } +} + +template +logloss_function::logloss_function(sycl::queue& q, + const table& data, + const ndview& labels, + Float L2, + bool fit_intercept, + std::int64_t bsz) + : q_(q), + data_(data), + labels_(labels), + n_(data.get_row_count()), + p_(data.get_column_count()), + L2_(L2), + fit_intercept_(fit_intercept), + bsz_(bsz == -1l ? get_block_size(n_, p_) : bsz), + dimension_(fit_intercept ? p_ + 1 : p_), + hessp_(q, data, L2, fit_intercept, bsz_) { + ONEDAL_ASSERT(labels.get_dimension(0) == n_); + this->reserve_memory(); +} + +template +logloss_function::logloss_function(sycl::queue& q, + comm_t comm, + const table& data, + const ndview& labels, + Float L2, + bool fit_intercept, + std::int64_t bsz) + : q_(q), + comm_(comm), + data_(data), + labels_(labels), + n_(data.get_row_count()), + p_(data.get_column_count()), + L2_(L2), + fit_intercept_(fit_intercept), + bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz), + dimension_(fit_intercept ? p_ + 1 : p_), + hessp_(q, comm, data, L2, fit_intercept, bsz_) { + ONEDAL_ASSERT(labels.get_dimension(0) == n_); + this->reserve_memory(); +} + +template +event_vector logloss_function::update_x(const ndview& x, + bool need_hessp, + const event_vector& deps) { + ONEDAL_PROFILER_TASK(logloss_function_update_weights, q_); + using dal::backend::operator+; + value_ = 0; + auto fill_event = fill(q_, gradient_, Float(0), deps); + ndview grad_ndview = gradient_; + ndview raw_hessian = hessp_.get_raw_hessian(); + ndview loss_batch = buffer_.slice(0, 1); + event_vector last_iter_e = { fill_event }; + constexpr Float zero(0); + + if (data_.get_kind() == dal::csr_table::kind()) { + auto prob_e = compute_probabilities_sparse(q_, + x, + *sp_handle_, + probabilities_, + fit_intercept_, + { fill_event }); + + auto fill_loss_e = fill(q_, loss_batch, zero, deps); + + sycl::event compute_e = compute_logloss_with_der_sparse(q_, + *sp_handle_, + labels_, + probabilities_, + loss_batch, + grad_ndview, + fit_intercept_, + { fill_loss_e, prob_e }); + + value_ = loss_batch.at_device(q_, 0, { compute_e }); + + last_iter_e = { compute_e }; + + if (need_hessp) { + auto hess_e = compute_raw_hessian(q_, probabilities_, raw_hessian, { prob_e }); + last_iter_e = last_iter_e + hess_e; + } + } + else { + const uniform_blocking blocking(n_, bsz_); + ndview grad_batch = buffer_.slice(1, dimension_); + + for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) { + const auto first = blocking.get_block_start_index(b); + const auto last = blocking.get_block_end_index(b); + const std::int64_t cursize = last - first; + ONEDAL_ASSERT(0l < cursize); + + const auto data_rows = row_accessor(data_).pull(q_, + { first, last }, + sycl::usm::alloc::device); + const auto data_batch = ndarray::wrap(data_rows, { cursize, p_ }); + const auto labels_batch = labels_.get_slice(first, first + cursize); + auto prob_batch = probabilities_.slice(first, cursize); + sycl::event prob_e = + compute_probabilities(q_, x, data_batch, prob_batch, fit_intercept_, last_iter_e); + + auto fill_buffer_e = fill(q_, buffer_, zero, last_iter_e); + + sycl::event compute_e = compute_logloss_with_der(q_, + data_batch, + labels_batch, + prob_batch, + loss_batch, + grad_batch, + fit_intercept_, + { fill_buffer_e, prob_e }); + + sycl::event update_grad_e = element_wise(q_, + sycl::plus<>(), + grad_ndview, + grad_batch, + grad_ndview, + { compute_e }); + + value_ += loss_batch.at_device(q_, 0, { compute_e }); + + last_iter_e = { update_grad_e }; + + if (need_hessp) { + auto raw_hessian_batch = raw_hessian.get_slice(first, first + cursize); + auto hess_e = compute_raw_hessian(q_, prob_batch, raw_hessian_batch, { prob_e }); + last_iter_e = last_iter_e + hess_e; + } + + // TODO: Delete this wait_and_throw + // ensure that while event is running in the background data is not overwritten + wait_or_pass(last_iter_e).wait_and_throw(); + } + } + if (comm_.get_rank_count() > 1) { + { + ONEDAL_PROFILER_TASK(gradient_allreduce); + auto gradient_arr = dal::array::wrap(q_, + gradient_.get_mutable_data(), + gradient_.get_count(), + last_iter_e); + comm_.allreduce(gradient_arr).wait(); + } + { + ONEDAL_PROFILER_TASK(value_allreduce); + comm_.allreduce(value_).wait(); + } + } + + if (L2_ > 0) { + auto fill_loss_e = fill(q_, loss_batch, Float(0), { last_iter_e }); + auto loss_ptr = loss_batch.get_mutable_data(); + auto grad_ptr = gradient_.get_mutable_data(); + auto w_ptr = x.get_data(); + Float regularization_factor = L2_; + + auto regularization_e = q_.submit([&](sycl::handler& cgh) { + cgh.depends_on(last_iter_e + fill_loss_e); + const auto range = make_range_1d(p_); + const std::int64_t st_id = fit_intercept_; + auto sum_reduction = sycl::reduction(loss_ptr, sycl::plus<>()); + cgh.parallel_for(range, sum_reduction, [=](sycl::id<1> idx, auto& sum_v0) { + const Float param = w_ptr[st_id + idx]; + grad_ptr[st_id + idx] += regularization_factor * param; + sum_v0 += regularization_factor * param * param / 2; + }); + }); + + value_ += loss_batch.at_device(q_, 0, { regularization_e }); + + last_iter_e = { regularization_e }; + } + + return last_iter_e; +} + +template +Float logloss_function::get_value() { + return value_; +} +template +ndview& logloss_function::get_gradient() { + return gradient_; +} + +template +base_matrix_operator& logloss_function::get_hessian_product() { + return hessp_; +} + +#define INSTANTIATE_FUNCTORS(F) \ + template class logloss_hessian_product; \ + template class logloss_function; + +INSTANTIATE_FUNCTORS(float) +INSTANTIATE_FUNCTORS(double) + +} // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp index 39cae7db796..fabe919b34e 100644 --- a/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp +++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,39 +17,39 @@ #include #include "oneapi/dal/backend/primitives/objective_function/logloss.hpp" +#include "oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp" #include "oneapi/dal/test/engine/common.hpp" #include "oneapi/dal/test/engine/fixtures.hpp" +#include "oneapi/dal/test/engine/csr_table_builder.hpp" #include "oneapi/dal/table/row_accessor.hpp" +#include "oneapi/dal/table/csr_accessor.hpp" #include "oneapi/dal/detail/debug.hpp" #include "oneapi/dal/backend/primitives/rng/rng_engine.hpp" namespace oneapi::dal::backend::primitives::test { -using oneapi::dal::detail::operator<<; - namespace te = dal::test::engine; -template -struct order_tag { - static constexpr ndorder value = order; +template +struct fit_intercept_tag { + static constexpr bool value = fit_intercept; }; -using c_order = order_tag; -using f_order = order_tag; +using use_fit_intercept = fit_intercept_tag; +using no_fit_intercept = fit_intercept_tag; + +using logloss_types = COMBINE_TYPES((float, double), (use_fit_intercept, no_fit_intercept)); + +#define IS_CLOSE(ftype, real, expected, rtol, atol) \ + REQUIRE(abs(real - expected) < atol); \ + REQUIRE(abs(real - expected) / std::max(std::abs(expected), (ftype)1.0) < rtol); template -class logloss_test : public te::float_algo_fixture { +class logloss_test : public te::float_algo_fixture> { public: - using float_t = Param; - - void check_val(const float_t real, - const float_t expected, - const float_t rtol, - const float_t atol) { - REQUIRE(abs(real - expected) < atol); - REQUIRE(abs(real - expected) / std::max(std::abs(expected), (float_t)1.0) < rtol); - } + using float_t = std::tuple_element_t<0, Param>; + bool fit_intercept_ = std::tuple_element_t<1, Param>::value; void generate_input(std::int64_t n = -1, std::int64_t p = -1) { if (n == -1 || p == -1) { @@ -60,7 +60,6 @@ class logloss_test : public te::float_algo_fixture { this->n_ = n; this->p_ = p; } - const auto dataframe = GENERATE_DATAFRAME(te::dataframe_builder{ n_, p_ }.fill_uniform(-0.5, 0.5)); const auto parameters = @@ -77,6 +76,32 @@ class logloss_test : public te::float_algo_fixture { } } + void generate_sparse_input(std::int64_t n = -1, std::int64_t p = -1) { + if (n == -1 || p == -1) { + this->n_ = GENERATE(7, 827, 13, 216); + this->p_ = GENERATE(4, 17, 41, 256); + } + else { + this->n_ = n; + this->p_ = p; + } + + auto builder = te::csr_table_builder(n_, p_, 0.3, sparse_indexing::zero_based); + this->data_ = builder.build_csr_table(this->get_policy()); + this->dense_data_ = builder.build_dense_table(); + + const auto parameters = + GENERATE_DATAFRAME(te::dataframe_builder{ 1, p_ + 1 }.fill_uniform(-1, 1)); + this->params_ = parameters.get_table(this->get_homogen_table_id()); + this->labels_ = + ndarray::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host); + std::srand(2007 + n_); + auto* const ptr_lab = this->labels_.get_mutable_data(); + for (std::int64_t i = 0; i < n_; ++i) { + ptr_lab[i] = std::rand() % 2; + } + } + void run_test(const float_t L1 = 0, const float_t L2 = 0, bool fit_intercept = true, @@ -93,6 +118,57 @@ class logloss_test : public te::float_algo_fixture { SUCCEED(); } + void run_sparse_test(const float_t L2 = 0, bool fit_intercept = true) { + constexpr float_t rtol = sizeof(float_t) > 4 ? 1e-6 : 5e-4; + constexpr float_t atol = sizeof(float_t) > 4 ? 1e-6 : 1e-1; + + REQUIRE(this->data_.get_kind() == csr_table::kind()); + + auto data_array = row_accessor{ this->dense_data_ }.pull(this->get_queue()); + auto data_host = ndarray::wrap(data_array.get_data(), { n_, p_ }); + + std::int64_t dim = fit_intercept ? this->p_ + 1 : this->p_; + auto param_array = row_accessor{ this->params_ }.pull(this->get_queue()); + auto params_host = ndarray::wrap(param_array.get_data(), { dim }); + auto params_gpu = params_host.to_device(this->get_queue()); + auto labels_gpu = this->labels_.to_device(this->get_queue()); + + float_t gth_logloss = + naive_logloss(data_host, params_host, this->labels_, float_t(0), L2, fit_intercept); + + auto gth_probs = + ndarray::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host); + naive_probabilities(data_host, params_host, this->labels_, gth_probs, fit_intercept); + + auto gth_gradient = + ndarray::empty(this->get_queue(), { dim }, sycl::usm::alloc::host); + naive_derivative(data_host, + gth_probs, + params_host, + this->labels_, + gth_gradient, + float_t(0), + L2, + fit_intercept); + + auto gth_hessian = ndarray::empty(this->get_queue(), + { p_ + 1, p_ + 1 }, + sycl::usm::alloc::host); + naive_hessian(data_host, gth_probs, gth_hessian, L2, fit_intercept); + + test_functors(data_, + labels_gpu, + params_gpu, + gth_gradient, + gth_hessian, + gth_logloss, + L2, + fit_intercept, + false, + rtol, + atol); + } + void test_gold_input(bool fit_intercept = true) { constexpr std::int64_t n = 5; constexpr std::int64_t p = 3; @@ -181,7 +257,7 @@ class logloss_test : public te::float_algo_fixture { logloss_reg_event.wait_and_throw(); const float_t val_logloss1 = out_logloss.to_host(this->get_queue(), {}).at(0); - check_val(val_logloss1, logloss, rtol, atol); + IS_CLOSE(float_t, val_logloss1, logloss, rtol, atol); auto fill_event = fill(this->get_queue(), out_logloss, float_t(0), {}); auto [out_derivative, out_der_e] = @@ -206,7 +282,7 @@ class logloss_test : public te::float_algo_fixture { auto out_derivative_host = out_derivative.to_host(this->get_queue()); const float_t val_logloss2 = out_logloss.to_host(this->get_queue(), {}).at(0); - check_val(val_logloss2, logloss, rtol, atol); + IS_CLOSE(float_t, val_logloss2, logloss, rtol, atol); auto [out_derivative2, out_der_e2] = ndarray::zeros(this->get_queue(), { dim }, sycl::usm::alloc::device); @@ -265,29 +341,17 @@ class logloss_test : public te::float_algo_fixture { atol); if (L1 == 0) { - std::int64_t bsz = -1; - if (batch_test) { - bsz = GENERATE(4, 8, 16, 20, 37, 512); - } - // logloss_function has different regularization so we need to multiply it by 2 to allign with other implementations - auto functor = logloss_function(this->get_queue(), - data_, - labels_gpu, - L2 * 2, - fit_intercept, - bsz); - auto set_point_event = functor.update_x(params_gpu, true, {}); - wait_or_pass(set_point_event).wait_and_throw(); - - check_val(logloss, functor.get_value(), rtol, atol); - auto grad_func = functor.get_gradient(); - auto grad_func_host = grad_func.to_host(this->get_queue()); - std::int64_t dim = fit_intercept ? p + 1 : p; - for (std::int64_t i = 0; i < dim; ++i) { - check_val(out_derivative_host.at(i), grad_func_host.at(i), rtol, atol); - } - base_matrix_operator& hessp = functor.get_hessian_product(); - test_hessian_product(hessian_host, hessp, fit_intercept, L2, rtol, atol); + test_functors(data_, + labels_gpu, + params_gpu, + out_derivative_host, + hessian_host, + logloss, + L2, + fit_intercept, + batch_test, + rtol, + atol); } } @@ -339,15 +403,16 @@ class logloss_test : public te::float_algo_fixture { return logloss; } - double naive_logloss(const ndview& data_host, - const ndview& params_host, - const ndview& labels_host, - const float_t L1, - const float_t L2, - bool fit_intercept) { + float_t naive_logloss(const ndview& data_host, + const ndview& params_host, + const ndview& labels_host, + float_t L1, + float_t L2, + bool fit_intercept) { const std::int64_t n = data_host.get_dimension(0); const std::int64_t p = data_host.get_dimension(1); + // We use double for gth computation to achieve better precision double logloss = 0; std::int64_t st = fit_intercept; for (std::int64_t i = 0; i < n; ++i) { @@ -358,7 +423,10 @@ class logloss_test : public te::float_algo_fixture { if (fit_intercept) { pred += (double)params_host.at(0); } - logloss += std::log(1 + std::exp(-(2 * labels_host.at(i) - 1) * pred)); + // We cast argument to float_t to ensure correct clipping + double prob = clip_prob(float_t(1.0) / (float_t)(1 + std::exp(-pred))); + logloss -= + labels_host.at(i) * std::log(prob) + (1 - labels_host.at(i)) * std::log(1 - prob); } for (std::int64_t i = 0; i < p; ++i) { logloss += L1 * abs(params_host.at(i + st)); @@ -376,14 +444,14 @@ class logloss_test : public te::float_algo_fixture { const std::int64_t p = data.get_dimension(1); std::int64_t st_ind = fit_intercept; for (std::int64_t i = 0; i < n; ++i) { - float_t pred = 0; + double pred = 0; for (std::int64_t j = 0; j < p; ++j) { pred += params.at(j + st_ind) * data.at(i, j); } if (fit_intercept) { pred += params.at(0); } - out_prob.at(i) = float_t(1) / (1 + std::exp(-pred)); + out_prob.at(i) = clip_prob((double)1 / (1 + std::exp(-pred))); } } @@ -470,7 +538,7 @@ class logloss_test : public te::float_algo_fixture { fit_intercept); for (std::int64_t i = 0; i < dim; ++i) { - check_val(out_derivative.at(i), derivative.at(i), rtol, atol); + IS_CLOSE(float_t, out_derivative.at(i), derivative.at(i), rtol, atol); } } @@ -489,7 +557,7 @@ class logloss_test : public te::float_algo_fixture { for (std::int64_t i = 0; i <= p; ++i) { for (std::int64_t j = 0; j <= p; ++j) { - check_val(out_hessian.at(i, j), hessian.at(i, j), rtol, atol); + IS_CLOSE(float_t, out_hessian.at(i, j), hessian.at(i, j), rtol, atol); } } } @@ -519,20 +587,62 @@ class logloss_test : public te::float_algo_fixture { auto out_vector_host = out_vector.to_host(this->get_queue()); const std::int64_t st = fit_intercept ? 0 : 1; + // We use double for gth computations to achieve better precision for (std::int64_t i = st; i < p + 1; ++i) { - float_t correct = 0; + double correct = 0; for (std::int64_t j = st; j < p + 1; ++j) { - correct += vec_host.at(j - st) * hessian_host.at(i, j); + correct += static_cast(vec_host.at(j - st)) * + static_cast(hessian_host.at(i, j)); } - check_val(out_vector_host.at(i - st), correct, rtol, atol); + IS_CLOSE(float_t, out_vector_host.at(i - st), (float_t)correct, rtol, atol); } } } + void test_functors(table& data, + ndview& labels_gpu, + ndview& params_gpu, + ndview& gth_grad, + ndview& gth_hessian, + float_t gth_logloss, + const float_t L2 = 0, + bool fit_intercept = true, + bool batch_test = false, + const float_t rtol = 1e-3, + const float_t atol = 1e-3) { + const std::int64_t p = gth_hessian.get_dimension(0) - 1; + std::int64_t bsz = -1; + if (batch_test) { + bsz = GENERATE(4, 8, 16, 20, 37, 512); + } + // logloss_function has different regularization so we need to multiply it by 2 to align with other implementations + + auto functor = logloss_function(this->get_queue(), + data, + labels_gpu, + L2 * 2, + fit_intercept, + bsz); + auto set_point_event = functor.update_x(params_gpu, true, {}); + wait_or_pass(set_point_event).wait_and_throw(); + + IS_CLOSE(float_t, gth_logloss, functor.get_value(), rtol, atol); + auto grad_func = functor.get_gradient(); + auto grad_func_host = grad_func.to_host(this->get_queue()); + std::int64_t dim = fit_intercept ? p + 1 : p; + + for (std::int64_t i = 0; i < dim; ++i) { + IS_CLOSE(float_t, gth_grad.at(i), grad_func_host.at(i), rtol, atol); + } + base_matrix_operator& hessp = functor.get_hessian_product(); + test_hessian_product(gth_hessian, hessp, fit_intercept, L2, rtol, atol); + } + protected: std::int64_t n_; std::int64_t p_; table data_; + table dense_data_; table params_; ndarray labels_; }; diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_dpc.cpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_dpc.cpp index b0ba99ac85a..1bd51dfc14b 100644 --- a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_dpc.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,86 +18,45 @@ namespace oneapi::dal::backend::primitives::test { -TEMPLATE_TEST_M(logloss_test, "gold input test - double", "[logloss]", double) { +TEMPLATE_LIST_TEST_M(logloss_test, "gold input test", "[logloss]", logloss_types) { SKIP_IF(this->not_float64_friendly()); SKIP_IF(this->get_policy().is_cpu()); - this->test_gold_input(); + this->test_gold_input(this->fit_intercept_); } -TEMPLATE_TEST_M(logloss_test, "gold input test - double - no fit_intercept", "[logloss]", double) { - SKIP_IF(this->not_float64_friendly()); - SKIP_IF(this->get_policy().is_cpu()); - this->test_gold_input(false); -} - -TEMPLATE_TEST_M(logloss_test, "gold input test - float", "[logloss]", float) { - SKIP_IF(this->get_policy().is_cpu()); - this->test_gold_input(); -} - -TEMPLATE_TEST_M(logloss_test, "gold input test - float - no fit intercept", "[logloss]", float) { - SKIP_IF(this->get_policy().is_cpu()); - this->test_gold_input(false); -} - -TEMPLATE_TEST_M(logloss_test, "test random input - double without L1", "[logloss]", double) { +TEMPLATE_LIST_TEST_M(logloss_test, "test random input without L1", "[logloss]", logloss_types) { SKIP_IF(this->not_float64_friendly()); SKIP_IF(this->get_policy().is_cpu()); this->generate_input(); - this->run_test(0.0, 1.3); + this->run_test(0.0f, 1.3f, this->fit_intercept_); } -TEMPLATE_TEST_M(logloss_test, - "test random input - double without L1 - no fit intercept", - "[logloss]", - double) { +TEMPLATE_LIST_TEST_M(logloss_test, "batch test", "[logloss]", logloss_types) { SKIP_IF(this->not_float64_friendly()); SKIP_IF(this->get_policy().is_cpu()); this->generate_input(); - this->run_test(0.0, 1.3, false); + this->run_test(0.0f, 1.3f, this->fit_intercept_, true); } -TEMPLATE_TEST_M(logloss_test, "batch test - double", "[logloss]", double) { +TEMPLATE_LIST_TEST_M(logloss_test, "test random input with L1", "[logloss]", logloss_types) { SKIP_IF(this->not_float64_friendly()); SKIP_IF(this->get_policy().is_cpu()); this->generate_input(); - this->run_test(0.0, 1.3, true, true); + this->run_test(0.4f, 1.3f, this->fit_intercept_); } -TEMPLATE_TEST_M(logloss_test, "batch test - double - no fit intercept", "[logloss]", double) { +TEMPLATE_LIST_TEST_M(logloss_test, "sparse data test without L2", "[logloss]", logloss_types) { SKIP_IF(this->not_float64_friendly()); SKIP_IF(this->get_policy().is_cpu()); - this->generate_input(); - this->run_test(0.0, 1.3, false, true); + this->generate_sparse_input(); + this->run_sparse_test(0.0f, this->fit_intercept_); } -TEMPLATE_TEST_M(logloss_test, "test random input - double with L1", "[logloss]", double) { +TEMPLATE_LIST_TEST_M(logloss_test, "sparse data test", "[logloss]", logloss_types) { SKIP_IF(this->not_float64_friendly()); SKIP_IF(this->get_policy().is_cpu()); - this->generate_input(); - this->run_test(0.4, 1.3); -} - -TEMPLATE_TEST_M(logloss_test, - "test random input - double with L1 -- no fit intercept", - "[logloss]", - double) { - SKIP_IF(this->not_float64_friendly()); - SKIP_IF(this->get_policy().is_cpu()); - this->generate_input(); - this->run_test(0.4, 1.3, false); -} - -TEMPLATE_TEST_M(logloss_test, "test random input - float", "[logloss]", float) { - SKIP_IF(this->get_policy().is_cpu()); - this->generate_input(); - this->run_test(0.4, 1.3); -} - -TEMPLATE_TEST_M(logloss_test, "test random input - float - no fit intercept", "[logloss]", float) { - SKIP_IF(this->get_policy().is_cpu()); - this->generate_input(); - this->run_test(0.4, 1.3, false); + this->generate_sparse_input(); + this->run_sparse_test(1.3f, this->fit_intercept_); } } // namespace oneapi::dal::backend::primitives::test diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_perf_dpc.cpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_perf_dpc.cpp index cdb3b7ddd5c..6d76198ed78 100644 --- a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_perf_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_perf_dpc.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_spmd_dpc.cpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_spmd_dpc.cpp index 203e406736f..d86d583e354 100644 --- a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_spmd_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_spmd_dpc.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,15 +18,12 @@ namespace oneapi::dal::backend::primitives::test { -TEMPLATE_TEST_M(logloss_spmd_test, "spmd test - double", "[logloss spmd]", double) { - SKIP_IF(this->not_float64_friendly()); - SKIP_IF(this->get_policy().is_cpu()); - this->generate_input(); - this->run_spmd(-1, 1.0, true); - this->run_spmd(-1, 1.0, false); -} +using logloss_spmd_types = COMBINE_TYPES((float, double), (use_fit_intercept)); -TEMPLATE_TEST_M(logloss_spmd_test, "spmd test - float", "[logloss spmd]", float) { +TEMPLATE_LIST_TEST_M(logloss_spmd_test, + "spmd test - double", + "[logloss spmd]", + logloss_spmd_types) { SKIP_IF(this->not_float64_friendly()); SKIP_IF(this->get_policy().is_cpu()); this->generate_input(); diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp index a9369ac619e..e902dd452e1 100644 --- a/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp +++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,9 @@ namespace de = dal::detail; template class logloss_spmd_test : public logloss_test { public: - using float_t = Param; + using float_t = std::tuple_element_t<0, Param>; + bool fit_intercept_ = std::tuple_element_t<1, Param>::value; + // using float_t = Param; using comm_t = te::thread_communicator; std::vector>> @@ -54,7 +56,7 @@ class logloss_spmd_test : public logloss_test { return result; } - std::vector> get_functors(comm_t comm, + std::vector> get_functors(comm_t& comm, std::int64_t thr_cnt, table data, ndview& labels, @@ -138,12 +140,12 @@ class logloss_spmd_test : public logloss_test { this->naive_probabilities(data_host, params_host, this->labels_, probs_gth, fit_intercept); - double logloss_gth = this->naive_logloss(data_host, - params_host, - this->labels_, - float_t(0.0), - float_t(L2), - fit_intercept); + float_t logloss_gth = this->naive_logloss(data_host, + params_host, + this->labels_, + float_t(0.0), + float_t(L2), + fit_intercept); this->naive_derivative(data_host, probs_gth, params_host, @@ -153,13 +155,13 @@ class logloss_spmd_test : public logloss_test { float_t(L2), fit_intercept); for (std::int64_t k = 0; k < thr_cnt; ++k) { - this->check_val(std::get<0>(results[k]), logloss_gth, rtol, atol); + IS_CLOSE(float_t, std::get<0>(results[k]), logloss_gth, rtol, atol); } for (int k = 0; k < thr_cnt; ++k) { auto grad_host = std::get<1>(results[k]).to_host(this->get_queue()); for (int j = 0; j < dim; ++j) { - this->check_val(grad_host.at(j), grad_gth.at(j), rtol, atol); + IS_CLOSE(float_t, grad_host.at(j), grad_gth.at(j), rtol, atol); } } @@ -179,7 +181,7 @@ class logloss_spmd_test : public logloss_test { for (std::int64_t k = 0; k < thr_cnt; ++k) { auto hessp_host = std::get<2>(results[k])[ij].to_host(this->get_queue()); for (std::int64_t j = 0; j < dim; ++j) { - this->check_val(hessp_host.at(j), hessp_gth.at(j), rtol, atol); + IS_CLOSE(float_t, hessp_host.at(j), hessp_gth.at(j), rtol, atol); } } } diff --git a/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp b/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp index 914bda60f1f..62dd0140e28 100644 --- a/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp @@ -25,7 +25,7 @@ #include "oneapi/dal/backend/primitives/rng/rng_engine.hpp" #include -#include "oneapi/dal/backend/primitives/objective_function/logloss.hpp" +#include "oneapi/dal/backend/primitives/objective_function.hpp" namespace oneapi::dal::backend::primitives::test { diff --git a/cpp/oneapi/dal/backend/primitives/reduction/functors.hpp b/cpp/oneapi/dal/backend/primitives/reduction/functors.hpp index 0da7340d14c..bccb6ade072 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/functors.hpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/functors.hpp @@ -51,6 +51,30 @@ struct square { } }; +template +struct isinfornan { + using tag_t = reduce_unary_op_tag; + bool operator()(const T& arg) const { +#ifdef ONEDAL_DATA_PARALLEL + return static_cast(sycl::isinf(arg) || sycl::isnan(arg)); +#else + return static_cast(isinf(arg) || (arg != arg)); +#endif + } +}; + +template +struct isinf { + using tag_t = reduce_unary_op_tag; + bool operator()(const T& arg) const { +#ifdef ONEDAL_DATA_PARALLEL + return static_cast(sycl::isinf(arg)); +#else + return static_cast(isinf(arg)); +#endif + } +}; + struct reduce_binary_op_tag; template @@ -99,6 +123,21 @@ struct min { } }; +template +struct logical_or { + using tag_t = reduce_binary_op_tag; + constexpr static inline T init_value = false; +#ifdef ONEDAL_DATA_PARALLEL + constexpr static inline sycl::logical_or native{}; +#else + constexpr static inline std::logical_or native{}; +}; +#endif + T operator()(const T& a, const T& b) const { + return native(a, b); + } +}; + template constexpr bool is_typed_sum_op_v = std::is_same_v, BinaryOp>; diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_1d_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_1d_dpc.cpp index 152121afdcb..b4d6d73c629 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_1d_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_1d_dpc.cpp @@ -88,6 +88,9 @@ INSTANTIATE_FLOAT(sum, identity) INSTANTIATE_FLOAT(sum, abs) INSTANTIATE_FLOAT(sum, square) +INSTANTIATE_FLOAT(logical_or, isinfornan) +INSTANTIATE_FLOAT(logical_or, isinf) + #undef INSTANTIATE_FLOAT #undef INSTANTIATE_LAYOUT diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_dpc.cpp index 7e1251cb915..da2ac3f13e2 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_dpc.cpp @@ -215,6 +215,9 @@ INSTANTIATE_FLOAT(sum, identity) INSTANTIATE_FLOAT(sum, abs) INSTANTIATE_FLOAT(sum, square) +INSTANTIATE_FLOAT(logical_or, isinfornan) +INSTANTIATE_FLOAT(logical_or, isinf) + #undef INSTANTIATE_FLOAT #undef INSTANTIATE_LAYOUT diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_atomic_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_atomic_dpc.cpp index 05d19df5f45..a373b161911 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_atomic_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_atomic_dpc.cpp @@ -196,6 +196,9 @@ INSTANTIATE_FLOAT(sum, identity) INSTANTIATE_FLOAT(sum, abs) INSTANTIATE_FLOAT(sum, square) +INSTANTIATE_FLOAT(logical_or, isinfornan) +INSTANTIATE_FLOAT(logical_or, isinf) + #undef INSTANTIATE_FLOAT #undef INSTANTIATE diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_blocking_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_blocking_dpc.cpp index 4c5b93598d7..75414e4d98d 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_blocking_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_blocking_dpc.cpp @@ -116,6 +116,9 @@ INSTANTIATE_FLOAT(sum, identity) INSTANTIATE_FLOAT(sum, abs) INSTANTIATE_FLOAT(sum, square) +INSTANTIATE_FLOAT(logical_or, isinfornan) +INSTANTIATE_FLOAT(logical_or, isinf) + #undef INSTANTIATE_FLOAT #undef INSTANTIATE diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_dpc.cpp index 2d4420a9232..8449211779c 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_dpc.cpp @@ -157,6 +157,9 @@ INSTANTIATE_FLOAT(sum, identity) INSTANTIATE_FLOAT(sum, abs) INSTANTIATE_FLOAT(sum, square) +INSTANTIATE_FLOAT(logical_or, isinfornan) +INSTANTIATE_FLOAT(logical_or, isinf) + #undef INSTANTIATE_FLOAT #undef INSTANTIATE diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_local_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_local_dpc.cpp index 2e9efed192a..20a31d62ea6 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_local_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_local_dpc.cpp @@ -196,6 +196,9 @@ INSTANTIATE_FLOAT(sum, identity) INSTANTIATE_FLOAT(sum, abs) INSTANTIATE_FLOAT(sum, square) +INSTANTIATE_FLOAT(logical_or, isinfornan) +INSTANTIATE_FLOAT(logical_or, isinf) + #undef INSTANTIATE_FLOAT #undef INSTANTIATE diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_wrapper_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_wrapper_dpc.cpp index b6d25e479c7..71fb1d21b5e 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_wrapper_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_wrapper_dpc.cpp @@ -136,6 +136,9 @@ INSTANTIATE_FLOAT(sum, identity) INSTANTIATE_FLOAT(sum, abs) INSTANTIATE_FLOAT(sum, square) +INSTANTIATE_FLOAT(logical_or, isinfornan) +INSTANTIATE_FLOAT(logical_or, isinf) + #undef INSTANTIATE_FLOAT #undef INSTANTIATE diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_blocking_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_blocking_dpc.cpp index 96cd9d05da9..25f2befe449 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_blocking_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_blocking_dpc.cpp @@ -117,6 +117,9 @@ INSTANTIATE_FLOAT(sum, identity) INSTANTIATE_FLOAT(sum, abs) INSTANTIATE_FLOAT(sum, square) +INSTANTIATE_FLOAT(logical_or, isinfornan) +INSTANTIATE_FLOAT(logical_or, isinf) + #undef INSTANTIATE_FLOAT } // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_narrow_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_narrow_dpc.cpp index d99775b9ad4..8e39c040400 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_narrow_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_narrow_dpc.cpp @@ -159,6 +159,9 @@ INSTANTIATE_FLOAT(sum, identity) INSTANTIATE_FLOAT(sum, abs) INSTANTIATE_FLOAT(sum, square) +INSTANTIATE_FLOAT(logical_or, isinfornan) +INSTANTIATE_FLOAT(logical_or, isinf) + #undef INSTANTIATE_FLOAT #undef INSTANTIATE diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wide_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wide_dpc.cpp index f6e35aaa896..39b106357a6 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wide_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wide_dpc.cpp @@ -156,6 +156,9 @@ INSTANTIATE_FLOAT(sum, identity) INSTANTIATE_FLOAT(sum, abs) INSTANTIATE_FLOAT(sum, square) +INSTANTIATE_FLOAT(logical_or, isinfornan) +INSTANTIATE_FLOAT(logical_or, isinf) + #undef INSTANTIATE_FLOAT } // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wrapper_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wrapper_dpc.cpp index 057899731f5..e0e8c7f40d9 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wrapper_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wrapper_dpc.cpp @@ -132,6 +132,9 @@ INSTANTIATE_FLOAT(sum, identity) INSTANTIATE_FLOAT(sum, abs) INSTANTIATE_FLOAT(sum, square) +INSTANTIATE_FLOAT(logical_or, isinfornan) +INSTANTIATE_FLOAT(logical_or, isinf) + #undef INSTANTIATE_FLOAT #undef INSTANTIATE diff --git a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_1d_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_1d_dpc.cpp index f2d645ce742..1e9b76e7548 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_1d_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_1d_dpc.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include "oneapi/dal/test/engine/common.hpp" @@ -34,6 +35,11 @@ namespace pr = oneapi::dal::backend::primitives; using reduction_types = std::tuple, square>, std::tuple, square>>; +using finiteness_types = std::tuple, identity>, + std::tuple, identity>, + std::tuple, isinfornan>, + std::tuple, isinfornan>>; + template class reduction_test_random_1d : public te::float_algo_fixture> { public: @@ -87,7 +93,7 @@ class reduction_test_random_1d : public te::float_algo_fixturetest_1d_reduce(); } +template +class infinite_sum_test_random_1d : public reduction_test_random_1d { +public: + using float_t = std::tuple_element_t<0, Param>; + using binary_t = std::tuple_element_t<1, Param>; + using unary_t = std::tuple_element_t<2, Param>; + + void generate(bool maxval) { + this->n_ = GENERATE(17, 999, 1, 5, 1001); + CAPTURE(this->n_, maxval); + generate_input(maxval); + } + + void generate_input(bool maxval) { + double mininp = 0.9 * (double)maxval * std::numeric_limits::max() - 1.0f; + double maxinp = (double)maxval * std::numeric_limits::max(); + const auto train_dataframe = + GENERATE_DATAFRAME(te::dataframe_builder{ 1, this->n_ }.fill_uniform(mininp, maxinp)); + this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id()); + } +}; + +TEMPLATE_LIST_TEST_M(infinite_sum_test_random_1d, + "Randomly filled array with infinite sum", + "[reduction][1d][small]", + finiteness_types) { + SKIP_IF(this->not_float64_friendly()); + + const bool use_infnan = GENERATE(0, 1); + this->generate(use_infnan); + this->test_1d_reduce(); +} + +template +class single_infinite_test_random_1d : public reduction_test_random_1d { +public: + using float_t = std::tuple_element_t<0, Param>; + using binary_t = std::tuple_element_t<1, Param>; + using unary_t = std::tuple_element_t<2, Param>; + + void generate(bool infval) { + this->n_ = GENERATE(17, 999, 1, 5, 1001); + CAPTURE(this->n_, infval); + generate_input(infval); + } + + void generate_input(bool infval) { + const auto train_dataframe = + GENERATE_DATAFRAME(te::dataframe_builder{ 1, this->n_ }.fill_uniform(-0.2, 0.5)); + auto train_data = train_dataframe.get_array().get_mutable_data(); + // train_data is a float ndarray + train_data[5] = infval ? std::numeric_limits::infinity() + : std::numeric_limits::quiet_NaN(); + this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id()); + } +}; + +TEMPLATE_LIST_TEST_M(single_infinite_test_random_1d, + "Randomly filled array with a single inf or nan", + "[reduction][1d][small]", + finiteness_types) { + SKIP_IF(this->not_float64_friendly()); + + const bool use_infnan = GENERATE(0, 1); + this->generate(use_infnan); + this->test_1d_reduce(); +} + } // namespace oneapi::dal::backend::primitives::test diff --git a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp index a233517ef78..cc0c26ceee0 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include "oneapi/dal/test/engine/common.hpp" @@ -34,6 +35,11 @@ namespace pr = oneapi::dal::backend::primitives; using reduction_types = std::tuple, square>, std::tuple, square>>; +using finiteness_types = std::tuple, identity>, + std::tuple, identity>, + std::tuple, isinfornan>, + std::tuple, isinfornan>>; + template class reduction_test_random : public te::float_algo_fixture> { public: @@ -262,7 +268,7 @@ class reduction_test_random : public te::float_algo_fixturetest_cm_rw_reduce(); } +template +class infinite_sum_test_random : public reduction_test_random { +public: + using float_t = std::tuple_element_t<0, Param>; + using binary_t = std::tuple_element_t<1, Param>; + using unary_t = std::tuple_element_t<2, Param>; + + void generate(bool maxval) { + this->height_ = GENERATE(17, 999, 1, 5, 1001); + this->width_ = GENERATE(7, 707, 1, 251, 5); + this->override_init_ = true; // poorly named variable + CAPTURE(this->override_init_, this->width_, this->height_, maxval); + generate_input(maxval); + this->generate_offset(); + } + + void generate_input(bool maxval) { + float mininp = 0.9 * (float)maxval * std::numeric_limits::max() - 1.0f; + float maxinp = (float)maxval * std::numeric_limits::max(); + const auto train_dataframe = GENERATE_DATAFRAME( + te::dataframe_builder{ this->height_, this->width_ }.fill_uniform(mininp, maxinp)); + this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id()); + } +}; + +TEMPLATE_LIST_TEST_M(infinite_sum_test_random, + "Randomly filled reduction with infinte sum", + "[reduction][rm][small]", + finiteness_types) { + // Temporary workaround: skip tests on architectures that do not support native float64 + SKIP_IF(!this->get_policy().has_native_float64()); + const bool use_infnan = GENERATE(0, 1); + this->generate(use_infnan); + SECTION("Reduce Row-Major by Rows") { + this->test_rm_rw_reduce(); + } + SECTION("Reduce Row-Major by Cols") { + this->test_rm_cw_reduce(); + } + SECTION("Reduce Col-Major by Rows") { + this->test_cm_cw_reduce(); + } + SECTION("Reduce Row-Major by Cols") { + this->test_cm_rw_reduce(); + } +} + +template +class single_infinite_test_random : public reduction_test_random { +public: + using float_t = std::tuple_element_t<0, Param>; + using binary_t = std::tuple_element_t<1, Param>; + using unary_t = std::tuple_element_t<2, Param>; + + void generate(bool infval) { + this->height_ = GENERATE(17, 999, 1, 5, 1001); + this->width_ = GENERATE(7, 707, 1, 251, 5); + this->override_init_ = true; // poorly named variable + CAPTURE(this->override_init_, this->width_, this->height_, infval); + generate_input(infval); + this->generate_offset(); + } + + void generate_input(bool infval) { + const auto train_dataframe = GENERATE_DATAFRAME( + te::dataframe_builder{ this->height_, this->width_ }.fill_uniform(-3.0, 4.0)); + auto train_data = train_dataframe.get_array().get_mutable_data(); + + // train_data is a float array + train_data[5] = infval ? std::numeric_limits::infinity() + : std::numeric_limits::quiet_NaN(); + this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id()); + } +}; + +TEMPLATE_LIST_TEST_M(single_infinite_test_random, + "Randomly filled reduction with single inf or nan", + "[reduction][rm][small]", + finiteness_types) { + // Temporary workaround: skip tests on architectures that do not support native float64 + SKIP_IF(!this->get_policy().has_native_float64()); + const bool use_infnan = GENERATE(0, 1); + this->generate(use_infnan); + SECTION("Reduce Row-Major by Rows") { + this->test_rm_rw_reduce(); + } + SECTION("Reduce Row-Major by Cols") { + this->test_rm_cw_reduce(); + } + SECTION("Reduce Col-Major by Rows") { + this->test_cm_cw_reduce(); + } + SECTION("Reduce Col-Major by Cols") { + this->test_cm_rw_reduce(); + } +} + } // namespace oneapi::dal::backend::primitives::test diff --git a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp index 3d7ca3ce8f3..f6e719d8fdb 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include "oneapi/dal/test/engine/common.hpp" @@ -45,6 +46,13 @@ using reduction_types = std::tuple, identity std::tuple, identity>, std::tuple, identity>>; +using finiteness_types = std::tuple, identity>, + std::tuple, identity>, + std::tuple, isinfornan>, + std::tuple, isinf>, + std::tuple, isinfornan>, + std::tuple, isinf>>; + template class reduction_rm_test_random : public te::float_algo_fixture> { public: @@ -98,7 +106,8 @@ class reduction_rm_test_random : public te::float_algo_fixture::full(width_, binary_.init_value); auto* res_ptr = res.get_mutable_data(); for (std::int64_t j = 0; j < height_; ++j) { - const auto row_acc = row_accessor{ input_table_ }.pull({ j, j + 1 }); + //input_table_ is a float ndarray + const auto row_acc = row_accessor{ input_table_ }.pull({ j, j + 1 }); for (std::int64_t i = 0; i < width_; ++i) { const auto val = row_acc[i]; res_ptr[i] = binary_(res_ptr[i], unary_(val)); @@ -111,7 +120,7 @@ class reduction_rm_test_random : public te::float_algo_fixture::full(height_, binary_.init_value); auto* res_ptr = res.get_mutable_data(); for (std::int64_t j = 0; j < height_; ++j) { - const auto row_acc = row_accessor{ input_table_ }.pull({ j, j + 1 }); + const auto row_acc = row_accessor{ input_table_ }.pull({ j, j + 1 }); for (std::int64_t i = 0; i < width_; ++i) { const auto val = row_acc[i]; res_ptr[j] = binary_(res_ptr[j], unary_(val)); @@ -127,7 +136,7 @@ class reduction_rm_test_random : public te::float_algo_fixturetest_raw_cw_reduce_wrapper(); } +template +class infinite_sum_rm_test_random : public reduction_rm_test_random { +public: + using float_t = std::tuple_element_t<0, Param>; + using binary_t = std::tuple_element_t<1, Param>; + using unary_t = std::tuple_element_t<2, Param>; + + void generate(bool maxval) { + this->width_ = GENERATE(7, 707, 5); + this->stride_ = GENERATE(707, 812, 1024); + this->height_ = GENERATE(17, 999, 1, 1001); + CAPTURE(this->width_, this->stride_, this->height_, maxval); + generate_input(maxval); + } + + void generate_input(bool maxval) { + float mininp = 0.9 * (float)maxval * std::numeric_limits::max() - 1.0f; + float maxinp = (float)maxval * std::numeric_limits::max(); + const auto train_dataframe = GENERATE_DATAFRAME( + te::dataframe_builder{ this->height_, this->stride_ }.fill_uniform(mininp, maxinp)); + this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id()); + } +}; + +TEMPLATE_LIST_TEST_M(infinite_sum_rm_test_random, + "Randomly filled Row-Major Row-Wise reduction with infinte sum", + "[reduction][rm][small]", + finiteness_types) { + // Temporary workaround: skip tests on architectures that do not support native float64 + SKIP_IF(!this->get_policy().has_native_float64()); + const bool use_infnan = GENERATE(0, 1); + this->generate(use_infnan); + SKIP_IF(this->should_be_skipped()); + this->test_raw_rw_reduce_wide(); + this->test_raw_rw_reduce_narrow(); + this->test_raw_rw_reduce_wrapper(); +} + +TEMPLATE_LIST_TEST_M(infinite_sum_rm_test_random, + "Randomly filled Row-Major Col-Wise reduction with infinte sum", + "[reduction][rm][small]", + finiteness_types) { + // Temporary workaround: skip tests on architectures that do not support native float64 + SKIP_IF(!this->get_policy().has_native_float64()); + const bool use_infnan = GENERATE(0, 1); + this->generate(use_infnan); + SKIP_IF(this->should_be_skipped()); + this->test_raw_cw_reduce_naive(); + this->test_raw_cw_reduce_atomic(); + this->test_raw_cw_reduce_wrapper(); +} + +template +class single_infinite_rm_test_random : public reduction_rm_test_random { +public: + using float_t = std::tuple_element_t<0, Param>; + using binary_t = std::tuple_element_t<1, Param>; + using unary_t = std::tuple_element_t<2, Param>; + void generate(bool infval) { + this->width_ = GENERATE(7, 707, 5); + this->stride_ = GENERATE(707, 812, 1024); + this->height_ = GENERATE(17, 999, 1, 1001); + CAPTURE(this->width_, this->stride_, this->height_, infval); + generate_input(infval); + } + + void generate_input(bool infval) { + float infinp = infval ? std::numeric_limits::infinity() + : std::numeric_limits::quiet_NaN(); + const auto train_dataframe = GENERATE_DATAFRAME( + te::dataframe_builder{ this->height_, this->stride_ }.fill_uniform(-0.2, 0.5)); + + // train_data is a float ndarray + auto train_data = train_dataframe.get_array().get_mutable_data(); + train_data[0] = infinp; + this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id()); + // no inf added to see what will happen in testing + } +}; + +TEMPLATE_LIST_TEST_M(single_infinite_rm_test_random, + "Randomly filled Row-Major Row-Wise reduction with single inf or nan", + "[reduction][rm][small]", + finiteness_types) { + // Temporary workaround: skip tests on architectures that do not support native float64 + SKIP_IF(!this->get_policy().has_native_float64()); + const bool use_infnan = GENERATE(0, 1); + this->generate(use_infnan); + SKIP_IF(this->should_be_skipped()); + this->test_raw_rw_reduce_wide(); + this->test_raw_rw_reduce_narrow(); + this->test_raw_rw_reduce_wrapper(); +} + +TEMPLATE_LIST_TEST_M(single_infinite_rm_test_random, + "Randomly filled Row-Major Col-Wise reduction with single inf or nan", + "[reduction][rm][small]", + finiteness_types) { + // Temporary workaround: skip tests on architectures that do not support native float64 + SKIP_IF(!this->get_policy().has_native_float64()); + const bool use_infnan = GENERATE(0, 1); + this->generate(use_infnan); + SKIP_IF(this->should_be_skipped()); + SECTION("Reduce Naive") { + this->test_raw_cw_reduce_naive(); + } + // Investigation into atomic reduction discrepancies ongoing + //SECTION("Reduce Atomic") { + // this->test_raw_cw_reduce_atomic(); + //} + SECTION("Reduce Wrapper") { + this->test_raw_cw_reduce_wrapper(); + } +} + } // namespace oneapi::dal::backend::primitives::test diff --git a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_uniform_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_uniform_dpc.cpp index 69e1c7d2879..330fe1c46b4 100644 --- a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_uniform_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_uniform_dpc.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include "oneapi/dal/test/engine/common.hpp" @@ -39,7 +40,11 @@ using reduction_types = std::tuple, identity std::tuple, abs>, std::tuple, identity>, std::tuple, square>, - std::tuple, abs>>; + std::tuple, abs>, + std::tuple, isinfornan>, + std::tuple, isinf>, + std::tuple, isinfornan>, + std::tuple, isinf>>; template class reduction_rm_test_uniform : public te::float_algo_fixture> { @@ -119,6 +124,14 @@ class reduction_rm_test_uniform : public te::float_algo_fixture, binary_t>) { + if (std::is_same_v, unary_t>) { + return static_cast(std::isinf(arg_)); + } + if (std::is_same_v, unary_t>) { + return static_cast(std::isinf(arg_) || std::isnan(arg_)); + } + } ONEDAL_ASSERT(false); return 0; } @@ -157,6 +170,14 @@ class reduction_rm_test_uniform : public te::float_algo_fixture, binary_t>) { + if (std::is_same_v, unary_t>) { + return static_cast(std::isinf(arg_)); + } + if (std::is_same_v, unary_t>) { + return static_cast(std::isinf(arg_) || std::isnan(arg_)); + } + } ONEDAL_ASSERT(false); return 0; } diff --git a/cpp/oneapi/dal/backend/primitives/sparse_blas/handle.hpp b/cpp/oneapi/dal/backend/primitives/sparse_blas/handle.hpp index 69ede40e9ff..86de2e8a4af 100644 --- a/cpp/oneapi/dal/backend/primitives/sparse_blas/handle.hpp +++ b/cpp/oneapi/dal/backend/primitives/sparse_blas/handle.hpp @@ -27,13 +27,13 @@ namespace oneapi::dal::backend::primitives { /// Handle that is used to store the information about the data in starse format class sparse_matrix_handle { - friend detail::pimpl_accessor; + friend dal::detail::pimpl_accessor; public: sparse_matrix_handle(sycl::queue& queue); private: - detail::pimpl impl_; + dal::detail::pimpl impl_; }; #endif // ONEDAL_DATA_PARALLEL diff --git a/cpp/oneapi/dal/backend/primitives/sparse_blas/test/handle_dpc.cpp b/cpp/oneapi/dal/backend/primitives/sparse_blas/test/handle_dpc.cpp new file mode 100644 index 00000000000..f9f32a54e89 --- /dev/null +++ b/cpp/oneapi/dal/backend/primitives/sparse_blas/test/handle_dpc.cpp @@ -0,0 +1,36 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/backend/primitives/sparse_blas.hpp" +#include "oneapi/dal/test/engine/common.hpp" + +namespace oneapi::dal::backend::primitives::test { + +TEST("can construct sparse matrix handle") { + DECLARE_TEST_POLICY(policy); + // DPC++ Sparse BLAS from micro MKL libs is not supported on CPU + SKIP_IF(policy.is_cpu()); + + try { + sparse_matrix_handle h(policy.get_queue()); + } + catch (...) { + REQUIRE(false); + } + SUCCEED(); +} + +} // namespace oneapi::dal::backend::primitives::test diff --git a/cpp/oneapi/dal/detail/cpu.cpp b/cpp/oneapi/dal/detail/cpu.cpp index 8aa1b3ce531..1369da2d231 100644 --- a/cpp/oneapi/dal/detail/cpu.cpp +++ b/cpp/oneapi/dal/detail/cpu.cpp @@ -20,7 +20,8 @@ namespace oneapi::dal::detail { namespace v1 { -inline constexpr cpu_extension from_daal_cpu_type(daal::CpuType cpu) { +cpu_extension from_daal_cpu_type(int cpu_type) { + daal::CpuType cpu = static_cast(cpu_type); switch (cpu) { #if defined(TARGET_X86_64) case daal::sse2: return cpu_extension::sse2; @@ -35,6 +36,7 @@ inline constexpr cpu_extension from_daal_cpu_type(daal::CpuType cpu) { } return cpu_extension::none; } + cpu_extension detect_top_cpu_extension() { if (!__daal_serv_cpu_extensions_available()) { #if defined(TARGET_X86_64) @@ -45,7 +47,7 @@ cpu_extension detect_top_cpu_extension() { return detail::cpu_extension::rv64; #endif } - const auto daal_cpu = (daal::CpuType)__daal_serv_cpu_detect(0); + const auto daal_cpu = __daal_serv_cpu_detect(0); return from_daal_cpu_type(daal_cpu); } diff --git a/cpp/oneapi/dal/detail/cpu.hpp b/cpp/oneapi/dal/detail/cpu.hpp index e2bae4a3566..7b6e282e006 100644 --- a/cpp/oneapi/dal/detail/cpu.hpp +++ b/cpp/oneapi/dal/detail/cpu.hpp @@ -50,6 +50,7 @@ enum class cpu_extension : uint64_t { #endif }; +cpu_extension from_daal_cpu_type(int); cpu_extension detect_top_cpu_extension(); } // namespace v1 diff --git a/cpp/oneapi/dal/detail/cpu_info.cpp b/cpp/oneapi/dal/detail/cpu_info.cpp index 67ae32e9cac..78e879e1920 100644 --- a/cpp/oneapi/dal/detail/cpu_info.cpp +++ b/cpp/oneapi/dal/detail/cpu_info.cpp @@ -31,21 +31,21 @@ namespace v1 { cpu_info::cpu_info() { #if defined(TARGET_X86_64) - impl_ = detail::pimpl(new cpu_info_x86()); + impl_ = detail::pimpl(std::make_unique()); #elif defined(TARGET_ARM) - impl_ = detail::pimpl(new cpu_info_arm()); + impl_ = detail::pimpl(std::make_unique()); #elif defined(TARGET_RISCV64) - impl_ = detail::pimpl(new cpu_info_riscv64()); + impl_ = detail::pimpl(std::make_unique()); #endif } cpu_info::cpu_info(const cpu_extension cpu_extension_) { #if defined(TARGET_X86_64) - impl_ = detail::pimpl(new cpu_info_x86(cpu_extension_)); + impl_ = detail::pimpl(std::make_unique(cpu_extension_)); #elif defined(TARGET_ARM) - impl_ = detail::pimpl(new cpu_info_arm(cpu_extension_)); + impl_ = detail::pimpl(std::make_unique(cpu_extension_)); #elif defined(TARGET_RISCV64) - impl_ = detail::pimpl(new cpu_info_riscv64(cpu_extension_)); + impl_ = detail::pimpl(std::make_unique(cpu_extension_)); #endif } diff --git a/cpp/oneapi/dal/detail/cpu_info_impl.cpp b/cpp/oneapi/dal/detail/cpu_info_impl.cpp index 9d5194e2f3b..8af80578a0c 100644 --- a/cpp/oneapi/dal/detail/cpu_info_impl.cpp +++ b/cpp/oneapi/dal/detail/cpu_info_impl.cpp @@ -15,6 +15,7 @@ *******************************************************************************/ #include "oneapi/dal/detail/cpu_info_impl.hpp" +#include "oneapi/dal/detail/error_messages.hpp" #include @@ -52,30 +53,38 @@ std::string to_string(cpu_extension extension) { } cpu_vendor cpu_info_impl::get_cpu_vendor() const { - return std::any_cast(info_.find("vendor")->second); + const auto entry = info_.find("vendor"); + if (entry == info_.end()) { + throw invalid_argument{ error_messages::invalid_key() }; + } + return std::any_cast(entry->second); } cpu_extension cpu_info_impl::get_top_cpu_extension() const { - return std::any_cast(info_.find("top_cpu_extension")->second); + const auto entry = info_.find("top_cpu_extension"); + if (entry == info_.end()) { + throw invalid_argument{ error_messages::invalid_key() }; + } + return std::any_cast(entry->second); } std::string cpu_info_impl::dump() const { - std::stringstream ss; - for (auto it = info_.begin(); it != info_.end(); ++it) { - ss << it->first << " : "; - print_any(it->second, ss); + std::ostringstream ss; + for (auto const& [name, value] : info_) { + ss << name << " : "; + print_any(value, ss); ss << "; "; } return std::move(ss).str(); } template -void cpu_info_impl::print(const std::any& value, std::stringstream& ss) const { +void cpu_info_impl::print(const std::any& value, std::ostringstream& ss) const { T typed_value = std::any_cast(value); ss << to_string(typed_value); } -void cpu_info_impl::print_any(const std::any& value, std::stringstream& ss) const { +void cpu_info_impl::print_any(const std::any& value, std::ostringstream& ss) const { const std::type_info& ti = value.type(); if (ti == typeid(cpu_extension)) { print(value, ss); @@ -83,6 +92,9 @@ void cpu_info_impl::print_any(const std::any& value, std::stringstream& ss) cons else if (ti == typeid(cpu_vendor)) { print(value, ss); } + else { + throw unimplemented{ dal::detail::error_messages::unsupported_data_type() }; + } } } // namespace v1 diff --git a/cpp/oneapi/dal/detail/cpu_info_impl.hpp b/cpp/oneapi/dal/detail/cpu_info_impl.hpp index 2f5c7ea711f..76b395b316c 100644 --- a/cpp/oneapi/dal/detail/cpu_info_impl.hpp +++ b/cpp/oneapi/dal/detail/cpu_info_impl.hpp @@ -40,9 +40,9 @@ class cpu_info_impl : public cpu_info_iface { std::map info_; template - void print(const std::any& value, std::stringstream& ss) const; + void print(const std::any& value, std::ostringstream& ss) const; - void print_any(const std::any& value, std::stringstream& ss) const; + void print_any(const std::any& value, std::ostringstream& ss) const; }; } // namespace v1 diff --git a/cpp/oneapi/dal/detail/error_messages.cpp b/cpp/oneapi/dal/detail/error_messages.cpp index d05952a154a..a49ce87794e 100644 --- a/cpp/oneapi/dal/detail/error_messages.cpp +++ b/cpp/oneapi/dal/detail/error_messages.cpp @@ -310,6 +310,8 @@ MSG(l1_coef_neq_zero, "Currently L1 regularization is not supported, so l1_coef should be equal to zero") MSG(log_reg_dense_batch_method_is_not_implemented_for_cpu, "LogisticRegression is not implemented for CPU") +MSG(log_reg_sparse_method_is_not_implemented_for_cpu, + "LogisticRegression does not have sparsity support for CPU") MSG(unknown_optimizer, "Custom optimizers are not supported, use on of provided by the library") /* Decision Forest */ diff --git a/cpp/oneapi/dal/detail/error_messages.hpp b/cpp/oneapi/dal/detail/error_messages.hpp index 43cda2f7977..ec4452f65a9 100644 --- a/cpp/oneapi/dal/detail/error_messages.hpp +++ b/cpp/oneapi/dal/detail/error_messages.hpp @@ -247,6 +247,7 @@ class ONEDAL_EXPORT error_messages { MSG(inverse_regularization_leq_zero); MSG(l1_coef_neq_zero); MSG(log_reg_dense_batch_method_is_not_implemented_for_cpu); + MSG(log_reg_sparse_method_is_not_implemented_for_cpu); MSG(unknown_optimizer); /* Louvain */ diff --git a/cpp/oneapi/dal/detail/hash_map.hpp b/cpp/oneapi/dal/detail/hash_map.hpp index 2e5d50e4922..ab47ba52028 100644 --- a/cpp/oneapi/dal/detail/hash_map.hpp +++ b/cpp/oneapi/dal/detail/hash_map.hpp @@ -71,6 +71,14 @@ class hash_map { } ~hash_map() { + for (std::int64_t i = 0; i < capacity_; i++) { + entry_ptr current = entries_[i]; + while (current) { + entry_ptr next = current->get_next(); + delete current; + current = next; + } + } delete[] entries_; entries_ = nullptr; capacity_ = 0; diff --git a/cpp/oneapi/dal/detail/parameters/BUILD b/cpp/oneapi/dal/detail/parameters/BUILD new file mode 100644 index 00000000000..e0d6c7373ca --- /dev/null +++ b/cpp/oneapi/dal/detail/parameters/BUILD @@ -0,0 +1,15 @@ +load( + "@onedal//dev/bazel:dal.bzl", + "dal_module", + "dal_test_suite", +) + +package(default_visibility = ["//visibility:public"]) + +dal_module( + name = "parameters", + auto = True, + dal_deps = [ + "@onedal//cpp/oneapi/dal:common", + ] +) diff --git a/cpp/oneapi/dal/detail/parameters/system_parameters.cpp b/cpp/oneapi/dal/detail/parameters/system_parameters.cpp new file mode 100644 index 00000000000..8216564d939 --- /dev/null +++ b/cpp/oneapi/dal/detail/parameters/system_parameters.cpp @@ -0,0 +1,52 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/detail/parameters/system_parameters.hpp" + +namespace oneapi::dal { + +namespace detail { + +system_parameters::system_parameters() + : impl_(detail::pimpl(std::make_unique())) { +} + +cpu_extension system_parameters::get_top_enabled_cpu_extension() const { + return impl_->get_top_enabled_cpu_extension(); +} + +std::uint32_t system_parameters::get_max_number_of_threads() const { + return impl_->get_max_number_of_threads(); +} + +std::string system_parameters::dump() const { + return impl_->dump(); +} + +#ifdef ONEDAL_DATA_PARALLEL + +std::uint32_t system_parameters::get_max_workgroup_size(sycl::queue& queue) const { + return impl_->get_max_workgroup_size(queue); +} + +std::string system_parameters::dump(sycl::queue& queue) const { + return impl_->dump(queue); +} + +#endif + +} // namespace detail +} // namespace oneapi::dal diff --git a/cpp/oneapi/dal/detail/parameters/system_parameters.hpp b/cpp/oneapi/dal/detail/parameters/system_parameters.hpp new file mode 100644 index 00000000000..b173edbfbf7 --- /dev/null +++ b/cpp/oneapi/dal/detail/parameters/system_parameters.hpp @@ -0,0 +1,70 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include + +#include "oneapi/dal/detail/common.hpp" +#include "oneapi/dal/detail/parameters/system_parameters_impl.hpp" + +namespace oneapi::dal { + +namespace detail { + +/// Stores system-related parameters that affect the performance of the algorithms. +/// Those parameters can differ from the `get_global_context().get_cpu_info()`. +/// +/// `cpu_info` reports the parameters available in hardware, where `system_parameters` +/// are the software-enabled parameters that can differ from `cpu_info`. +class system_parameters : public base { +public: + /// Creates a new default `system_parameters` instance. + explicit system_parameters(); + + /// Host related parameters. + + /// Top enabled CPU instruction set. + cpu_extension get_top_enabled_cpu_extension() const; + + /// Maximal number of threads available to the algorithm. + std::uint32_t get_max_number_of_threads() const; + +#ifdef ONEDAL_DATA_PARALLEL + /// Device related parameters. + + /// Maximal SYCL workgroup size on the device. + /// + /// @param queue The SYCL* queue object + std::uint32_t get_max_workgroup_size(sycl::queue& queue) const; +#endif + + /// Logs host parameters in the format: name_1: value_1; ... ; name_N: value_N. + std::string dump() const; + +#ifdef ONEDAL_DATA_PARALLEL + /// Logs host and device parameters in the format: name_1: value_1; ... ; name_N: value_N. + /// + /// @param queue The SYCL* queue object + std::string dump(sycl::queue& queue) const; +#endif + +private: + detail::pimpl impl_; +}; + +} // namespace detail +} // namespace oneapi::dal diff --git a/cpp/oneapi/dal/detail/parameters/system_parameters_impl.cpp b/cpp/oneapi/dal/detail/parameters/system_parameters_impl.cpp new file mode 100644 index 00000000000..14219a45226 --- /dev/null +++ b/cpp/oneapi/dal/detail/parameters/system_parameters_impl.cpp @@ -0,0 +1,93 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/backend/common.hpp" +#include "oneapi/dal/detail/cpu_info_impl.hpp" +#include "oneapi/dal/detail/error_messages.hpp" +#include "oneapi/dal/detail/parameters/system_parameters_impl.hpp" +#include +#include + +#include + +namespace oneapi::dal::detail { +namespace v1 { + +system_parameters_impl::system_parameters_impl() { + using daal::services::Environment; + Environment* env = Environment::getInstance(); + sys_info_["top_enabled_cpu_extension"] = + from_daal_cpu_type(DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID); + sys_info_["max_number_of_threads"] = static_cast(env->getNumberOfThreads()); +} + +cpu_extension system_parameters_impl::get_top_enabled_cpu_extension() const { + const auto entry = sys_info_.find("top_enabled_cpu_extension"); + if (entry == sys_info_.end()) { + throw invalid_argument{ error_messages::invalid_key() }; + } + return std::any_cast(entry->second); +} + +std::uint32_t system_parameters_impl::get_max_number_of_threads() const { + const auto entry = sys_info_.find("max_number_of_threads"); + if (entry == sys_info_.end()) { + throw invalid_argument{ error_messages::invalid_key() }; + } + return std::any_cast(entry->second); +} + +void system_parameters_impl::print_any(const std::any& value, std::ostringstream& ss) const { + const std::type_info& ti = value.type(); + if (ti == typeid(cpu_extension)) { + ss << to_string(std::any_cast(value)); + } + else if (ti == typeid(std::uint32_t)) { + ss << std::any_cast(value); + } + else { + throw unimplemented{ dal::detail::error_messages::unsupported_data_type() }; + } +} + +std::string system_parameters_impl::dump() const { + std::ostringstream ss; + for (auto const& [name, value] : sys_info_) { + ss << name << " : "; + print_any(value, ss); + ss << "; "; + } + return std::move(ss).str(); +} + +#ifdef ONEDAL_DATA_PARALLEL + +std::uint32_t system_parameters_impl::get_max_workgroup_size(sycl::queue& queue) const { + return dal::backend::device_max_wg_size(queue); +} + +std::string system_parameters_impl::dump(sycl::queue& queue) const { + std::ostringstream ss; + ss << "max_workgroup_size" + << " : " << get_max_workgroup_size(queue) << "; "; + ss << dump(); + return std::move(ss).str(); +} + +#endif + +} // namespace v1 +} // namespace oneapi::dal::detail diff --git a/cpp/oneapi/dal/detail/parameters/system_parameters_impl.hpp b/cpp/oneapi/dal/detail/parameters/system_parameters_impl.hpp new file mode 100644 index 00000000000..57da24d7bc6 --- /dev/null +++ b/cpp/oneapi/dal/detail/parameters/system_parameters_impl.hpp @@ -0,0 +1,56 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include "oneapi/dal/detail/cpu.hpp" + +#ifdef ONEDAL_DATA_PARALLEL +#include +#endif + +#include +#include + +namespace oneapi::dal::detail { +namespace v1 { + +class system_parameters_impl { +public: + explicit system_parameters_impl(); + + cpu_extension get_top_enabled_cpu_extension() const; + std::uint32_t get_max_number_of_threads() const; + +#ifdef ONEDAL_DATA_PARALLEL + std::uint32_t get_max_workgroup_size(sycl::queue& queue) const; +#endif + + std::string dump() const; + +#ifdef ONEDAL_DATA_PARALLEL + std::string dump(sycl::queue& queue) const; +#endif + +private: + std::map sys_info_; + + void print_any(const std::any& value, std::ostringstream& ss) const; +}; + +} // namespace v1 +using v1::system_parameters_impl; +} // namespace oneapi::dal::detail diff --git a/cpp/oneapi/dal/table/backend/csr_kernels.cpp b/cpp/oneapi/dal/table/backend/csr_kernels.cpp index 58a73b74549..7bf510318bd 100644 --- a/cpp/oneapi/dal/table/backend/csr_kernels.cpp +++ b/cpp/oneapi/dal/table/backend/csr_kernels.cpp @@ -14,6 +14,7 @@ * limitations under the License. *******************************************************************************/ +#include "oneapi/dal/backend/common.hpp" #include "oneapi/dal/table/backend/csr_kernels.hpp" #include "oneapi/dal/table/backend/convert.hpp" @@ -408,8 +409,15 @@ bool is_sorted(sycl::queue& queue, // number of pairs of the subsequent elements in the data array that are sorted in desccending order, // i.e. for which data[i] > data[i + 1] is true. std::int64_t count_descending_pairs = 0L; + sycl::buffer count_buf(&count_descending_pairs, sycl::range<1>(1)); + const auto count_m1 = count - 1LL; + const auto wg_size = dal::backend::device_max_wg_size(queue); + const size_t count_m1_unsigned = static_cast(count_m1); + + const size_t wg_count = (count_m1 + wg_size - 1) / wg_size; + // count the number of pairs of the subsequent elements in the data array that are sorted // in desccending order using sycl::reduction queue @@ -418,11 +426,11 @@ bool is_sorted(sycl::queue& queue, auto count_descending_reduction = sycl::reduction(count_buf, cgh, sycl::ext::oneapi::plus()); - cgh.parallel_for(sycl::nd_range<1>{ count - 1, 1 }, + cgh.parallel_for(sycl::nd_range<1>{ wg_count * wg_size, wg_size }, count_descending_reduction, [=](sycl::nd_item<1> idx, auto& count_descending) { const auto i = idx.get_global_id(0); - if (data[i] > data[i + 1]) + if (i < count_m1_unsigned && data[i + 1] < data[i]) count_descending.combine(1); }); }) @@ -485,39 +493,29 @@ out_of_bound_type check_bounds(const array& arr, sycl::buffer count_lt_buf(&count_lt_min, sycl::range<1>(1)); sycl::buffer count_gt_buf(&count_gt_max, sycl::range<1>(1)); - // count the number of elements which are less than min_vaule using sycl::reduction - auto event_count_lt_min = queue.submit([&](sycl::handler& cgh) { - cgh.depends_on(dependencies); - auto count_lt_reduction = - sycl::reduction(count_lt_buf, cgh, sycl::ext::oneapi::plus()); - - cgh.parallel_for(sycl::nd_range<1>{ count, 1 }, - count_lt_reduction, - [=](sycl::nd_item<1> idx, auto& count_lt) { - const auto i = idx.get_global_id(0); - if (data[i] < min_value) { - count_lt.combine(1); - } - }); - }); - - // count the number of elements which are greater than max_vaule using sycl::reduction - auto event_count_gt_max = queue.submit([&](sycl::handler& cgh) { - cgh.depends_on(dependencies); - auto count_gt_reduction = - sycl::reduction(count_gt_buf, cgh, sycl::ext::oneapi::plus()); - - cgh.parallel_for(sycl::nd_range<1>{ count, 1 }, - count_gt_reduction, - [=](sycl::nd_item<1> idx, auto& count_gt) { - const auto i = idx.get_global_id(0); - if (data[i] > max_value) { - count_gt.combine(1); - } - }); - }); - - sycl::event::wait_and_throw({ event_count_lt_min, event_count_gt_max }); + // count the number of elements which are less than min_vaule and + // the the number of elements which are greater than max_value using sycl::reduction + queue + .submit([&](sycl::handler& cgh) { + cgh.depends_on(dependencies); + auto count_lt_reduction = + sycl::reduction(count_lt_buf, cgh, sycl::ext::oneapi::plus()); + auto count_gt_reduction = + sycl::reduction(count_gt_buf, cgh, sycl::ext::oneapi::plus()); + + cgh.parallel_for(sycl::range<1>{ dal::detail::integral_cast(count) }, + count_lt_reduction, + count_gt_reduction, + [=](sycl::id<1> i, auto& count_lt, auto& count_gt) { + if (data[i] < min_value) { + count_lt.combine(1); + } + if (data[i] > max_value) { + count_gt.combine(1); + } + }); + }) + .wait_and_throw(); out_of_bound_type result{ out_of_bound_type::within_bounds }; if (count_lt_min > 0) diff --git a/cpp/oneapi/dal/test/engine/csr_table_builder.hpp b/cpp/oneapi/dal/test/engine/csr_table_builder.hpp index 2e4656f388c..6c542fb106b 100644 --- a/cpp/oneapi/dal/test/engine/csr_table_builder.hpp +++ b/cpp/oneapi/dal/test/engine/csr_table_builder.hpp @@ -19,7 +19,8 @@ namespace oneapi::dal::test::engine { -csr_table copy_data_to_csr(const dal::array& data, +template +csr_table copy_data_to_csr(const dal::array& data, const dal::array& column_indices, const dal::array& row_offsets, const sparse_indexing indexing, @@ -29,7 +30,7 @@ csr_table copy_data_to_csr(const dal::array& data, auto data_ptr = data.get_data(); auto col_indices_ptr = column_indices.get_data(); auto nnz_count = row_offs_ptr[row_count] - row_offs_ptr[0]; - const auto copied_data = dal::array::empty(nnz_count); + const auto copied_data = dal::array::empty(nnz_count); const auto copied_col_indices = dal::array::empty(nnz_count); const auto copied_row_offsets = dal::array::empty(row_count + 1); @@ -51,8 +52,9 @@ csr_table copy_data_to_csr(const dal::array& data, } #ifdef ONEDAL_DATA_PARALLEL +template csr_table copy_data_to_csr(sycl::queue& queue, - const dal::array& data, + const dal::array& data, const dal::array& column_indices, const dal::array& row_offsets, const sparse_indexing indexing, @@ -60,12 +62,12 @@ csr_table copy_data_to_csr(sycl::queue& queue, const std::int64_t row_count) { auto row_offs_ptr = row_offsets.get_data(); auto nnz_count = row_offs_ptr[row_count] - row_offs_ptr[0]; - const auto copied_data = dal::array::empty(queue, nnz_count, sycl::usm::alloc::device); + const auto copied_data = dal::array::empty(queue, nnz_count, sycl::usm::alloc::device); const auto copied_col_indices = dal::array::empty(queue, nnz_count, sycl::usm::alloc::device); const auto copied_row_offsets = dal::array::empty(queue, row_count + 1, sycl::usm::alloc::device); - auto data_event = queue.copy(data.get_data(), copied_data.get_mutable_data(), nnz_count); + auto data_event = queue.copy(data.get_data(), copied_data.get_mutable_data(), nnz_count); auto col_indices_event = queue.copy(column_indices.get_data(), copied_col_indices.get_mutable_data(), nnz_count); @@ -84,8 +86,8 @@ csr_table copy_data_to_csr(sycl::queue& queue, /** * Generates random CSR table based on inputs */ +template struct csr_table_builder { - using Float = float; std::int64_t row_count_, column_count_; float nonzero_fraction_; sparse_indexing indexing_; diff --git a/deploy/local/dal b/deploy/local/dal index 6ddff7e537b..dd64884cf86 100644 --- a/deploy/local/dal +++ b/deploy/local/dal @@ -1,3 +1,4 @@ + #%Module1.0################################################################### #=============================================================================== # Copyright 2020 Intel Corporation @@ -33,9 +34,9 @@ if { $tcl_version < $min_tcl_ver } { set scriptpath "${ModulesCurrentModulefile}" set scriptpath "[file dirname [file normalize "$scriptpath/___"]]" -# define componentroot, modulefileroot, modulefilename and modulefilever +# define componentroot, modulefilepath, modulefilename and modulefilever set modulefilename "[file tail [file dirname "${scriptpath}"]]" -set modulefilever "[file tail "$scriptpath"]" +set modulefilever "[file tail "${scriptpath}"]" set modulefilepath "${scriptpath}" set componentroot "[file dirname [file dirname [file dirname [file dirname "${scriptpath}"]]]]" @@ -51,8 +52,7 @@ set moduleinfoname [file dirname [module-info name]] proc ModulesHelp { } { global moduleinfoname - global modulefilever - module whatis "${modulefilename}/${modulefilever}" + puts "module whatis ${moduleinfoname}" } ############################################################################## @@ -61,14 +61,8 @@ proc ModulesHelp { } { # Set intermediate variables set dalroot "$componentroot" -set daalroot "$componentroot/$modulefilever" -if {[string equal [info machine] "aarch64"]} { - set daal_target_arch "arm" -} else { - set daal_target_arch "intel64" -} -module-whatis "oneAPI Data Analytics Library for $daal_target_arch." +set daal_target_arch "intel64" # Setup environment variables setenv DAL_MAJOR_BINARY 1 diff --git a/deploy/pkg-config/pkg-config.tpl b/deploy/pkg-config/pkg-config.tpl index 520bde0a9bf..0145dc1cc19 100755 --- a/deploy/pkg-config/pkg-config.tpl +++ b/deploy/pkg-config/pkg-config.tpl @@ -22,7 +22,7 @@ includedir=${{prefix}}/include #info Name: oneDAL Description: Intel(R) oneAPI Data Analytics Library -Version: 2024.4 +Version: 2024.6 URL: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onedal.html #Link line Libs: {libs} diff --git a/dev/bazel/config/config.bzl b/dev/bazel/config/config.bzl index 658e9c0704a..b1bf70b2c10 100644 --- a/dev/bazel/config/config.bzl +++ b/dev/bazel/config/config.bzl @@ -210,7 +210,7 @@ def _declare_onedal_config_impl(repo_ctx): substitutions = { "%{auto_cpu}": auto_cpu, "%{version_major}": "2024", - "%{version_minor}": "4", + "%{version_minor}": "6", "%{version_update}": "0", "%{version_build}": utils.datestamp(repo_ctx), "%{version_buildrev}": "work", diff --git a/dev/download_micromkl.bat b/dev/download_micromkl.bat index 6a30a2a44e3..a38515735a5 100755 --- a/dev/download_micromkl.bat +++ b/dev/download_micromkl.bat @@ -20,7 +20,7 @@ powershell.exe -command "if ($PSVersionTable.PSVersion.Major -ge 3) {exit 1} els set MKLURLROOT=https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/ set MKLVERSION=20230413 -set MKLGPUVERSION="2024-02-20" +set MKLGPUVERSION=20240605 set MKLPACKAGE=mklfpk_win_%MKLVERSION% set MKLGPUPACKAGE=mklgpufpk_win_%MKLGPUVERSION% diff --git a/dev/download_micromkl.sh b/dev/download_micromkl.sh index 0aaef938e9a..6eb52ddca76 100755 --- a/dev/download_micromkl.sh +++ b/dev/download_micromkl.sh @@ -18,7 +18,7 @@ MKLFPK_URL_ROOT="https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/" MKLFPK_VERSION="20230413" MKLFPK_VERSION_MAC="20210426" -MKLGPUFPK_VERSION="2024-02-20" +MKLGPUFPK_VERSION="20240605" WITH_GPU=true while true ; do diff --git a/dev/make/deps.mk b/dev/make/deps.mk index 07c5d37971c..eb50d8006b7 100644 --- a/dev/make/deps.mk +++ b/dev/make/deps.mk @@ -79,9 +79,9 @@ $1 = $$(if $$(or $$(.sources-changed),$$(and $$(.mkfiles-changed),$$(call .trigg dep-gen-enhanced-common = $(call $(SELF),$1 $(.copt-gen-deps)) && $(.keep-raw-deps) sed -n $(sed.-i) $(sed.fix-deps) $(sed.rm-abs-paths) -e '/./{ p; $(sed.mk-phony-targets)}' $(.dep-file-tmp) dep-gen-enhanced.icc = $(dep-gen-enhanced-common) dep-gen-enhanced.icl = $(dep-gen-enhanced-common) -dep-gen-enhanced.icx = $(dep-gen-enhanced-common) +dep-gen-enhanced.icx = $(if $(COMPILER_is_vc),,$(dep-gen-enhanced-common)) dep-gen-enhanced.g++ = $(dep-gen-enhanced-common) -dep-gen-enhanced.dpcpp = $(if $(OS_is_win),,$(dep-gen-enhanced-common)) +dep-gen-enhanced.icpx = $(dep-gen-enhanced-common) cmd-enhanced-with-dep-gen = $(or $(dep-gen-enhanced.$(call get-command-name,$($(SELF)))),$($(SELF))) $(call .inject.dep.gen, C.COMPILE, $$(cmd-enhanced-with-dep-gen)) diff --git a/docs/doxygen/doxygen_conf_cpp.txt b/docs/doxygen/doxygen_conf_cpp.txt index 9b71b554edc..1ac2861b4bc 100644 --- a/docs/doxygen/doxygen_conf_cpp.txt +++ b/docs/doxygen/doxygen_conf_cpp.txt @@ -38,7 +38,7 @@ PROJECT_NAME = "C++ API Reference for Intel(R) oneAPI Data Analytics L # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = "2024.4" +PROJECT_NUMBER = "2024.6" # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/docs/doxygen/doxygen_conf_cpp_examples.txt b/docs/doxygen/doxygen_conf_cpp_examples.txt index 5eb3161865b..52ac8ef8627 100644 --- a/docs/doxygen/doxygen_conf_cpp_examples.txt +++ b/docs/doxygen/doxygen_conf_cpp_examples.txt @@ -38,7 +38,7 @@ PROJECT_NAME = "C++ API Reference for Intel(R) oneAPI Data Analytics L # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = "2024.4" +PROJECT_NUMBER = "2024.6" # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/docs/doxygen/doxygen_conf_cpp_web.txt b/docs/doxygen/doxygen_conf_cpp_web.txt index 7e06b3c224f..28e11e19666 100644 --- a/docs/doxygen/doxygen_conf_cpp_web.txt +++ b/docs/doxygen/doxygen_conf_cpp_web.txt @@ -38,7 +38,7 @@ PROJECT_NAME = "C++ API Reference for Intel(R) oneAPI Data Analytics L # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = "2024.4" +PROJECT_NUMBER = "2024.6" # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/docs/requirements.txt b/docs/requirements.txt index 741d74a508f..6f23e800c49 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,16 +1,16 @@ alabaster==0.7.13 Babel==2.13.1 beautifulsoup4==4.12.2 -certifi==2024.2.2 +certifi==2024.7.4 chardet==5.2.0 click==8.1.7 colorama==0.4.6 docutils~=0.18.0 idna==3.7 imagesize==1.4.1 -importlib-metadata==7.0.0 +importlib-metadata==8.0.0 importlib-resources==6.1.1 -Jinja2==3.1.3 +Jinja2==3.1.4 lxml==5.1.0 MarkupSafe==2.1.3 packaging==24.0 @@ -19,7 +19,7 @@ Pygments==2.16.1 pyparsing==3.1.1 pytz==2024.1 PyYAML==6.0.1 -requests==2.31.0 +requests==2.32.0 six==1.16.0 snowballstemmer==2.2.0 soupsieve==2.5 @@ -36,5 +36,5 @@ sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.6 sphinxcontrib-serializinghtml==1.1.9 typing-extensions==4.8.0 -urllib3==2.1.0 -zipp==3.17.0 +urllib3==2.2.2 +zipp==3.19.1 diff --git a/docs/source/daal/algorithms/association_rules/association-rules.rst b/docs/source/daal/algorithms/association_rules/association-rules.rst index 3e1761aba00..26714d092fc 100644 --- a/docs/source/daal/algorithms/association_rules/association-rules.rst +++ b/docs/source/daal/algorithms/association_rules/association-rules.rst @@ -235,7 +235,7 @@ Examples Batch Processing: - - :daal4py_example:`association_rules_batch.py` + - :daal4py_example:`association_rules.py` Performance Considerations ************************** diff --git a/docs/source/daal/algorithms/boosting/adaboost.rst b/docs/source/daal/algorithms/boosting/adaboost.rst index 9e2212060d0..0a20f221006 100644 --- a/docs/source/daal/algorithms/boosting/adaboost.rst +++ b/docs/source/daal/algorithms/boosting/adaboost.rst @@ -141,4 +141,4 @@ Examples .. tab:: Python* - - :daal4py_example:`adaboost_batch.py` + - :daal4py_example:`adaboost.py` diff --git a/docs/source/daal/algorithms/boosting/brownboost.rst b/docs/source/daal/algorithms/boosting/brownboost.rst index 27f127e3445..75d4b6da0cb 100644 --- a/docs/source/daal/algorithms/boosting/brownboost.rst +++ b/docs/source/daal/algorithms/boosting/brownboost.rst @@ -192,4 +192,4 @@ Examples Batch Processing: - - :daal4py_example:`brownboost_batch.py` + - :daal4py_example:`brownboost.py` diff --git a/docs/source/daal/algorithms/boosting/logitboost.rst b/docs/source/daal/algorithms/boosting/logitboost.rst index 537b7d4389d..06dec03a048 100644 --- a/docs/source/daal/algorithms/boosting/logitboost.rst +++ b/docs/source/daal/algorithms/boosting/logitboost.rst @@ -193,4 +193,4 @@ Examples Batch Processing: - - :daal4py_example:`logitboost_batch.py` + - :daal4py_example:`logitboost.py` diff --git a/docs/source/daal/algorithms/cholesky/cholesky.rst b/docs/source/daal/algorithms/cholesky/cholesky.rst index 3bef41b6f3e..2cff2d92bbc 100644 --- a/docs/source/daal/algorithms/cholesky/cholesky.rst +++ b/docs/source/daal/algorithms/cholesky/cholesky.rst @@ -115,7 +115,7 @@ Examples Batch Processing: - - :daal4py_example:`cholesky_batch.py` + - :daal4py_example:`cholesky.py` Performance Considerations diff --git a/docs/source/daal/algorithms/covariance/correlation-and-variance-covariance-matrices.rst b/docs/source/daal/algorithms/covariance/correlation-and-variance-covariance-matrices.rst index b4b4cc0b6af..759e3e9829e 100644 --- a/docs/source/daal/algorithms/covariance/correlation-and-variance-covariance-matrices.rst +++ b/docs/source/daal/algorithms/covariance/correlation-and-variance-covariance-matrices.rst @@ -88,7 +88,7 @@ Examples Batch Processing: - - :daal4py_example:`covariance_batch.py` + - :daal4py_example:`covariance.py` Online Processing: diff --git a/docs/source/daal/algorithms/dbscan/index.rst b/docs/source/daal/algorithms/dbscan/index.rst index 50f69b3ba84..82a3ff22dd5 100644 --- a/docs/source/daal/algorithms/dbscan/index.rst +++ b/docs/source/daal/algorithms/dbscan/index.rst @@ -92,7 +92,7 @@ Examples Batch Processing: - - :daal4py_example:`dbscan_batch.py` + - :daal4py_example:`dbscan.py` Distributed Processing: diff --git a/docs/source/daal/algorithms/decision_forest/decision-forest-classification.rst b/docs/source/daal/algorithms/decision_forest/decision-forest-classification.rst index 28e6a7634b4..42d5fc06177 100644 --- a/docs/source/daal/algorithms/decision_forest/decision-forest-classification.rst +++ b/docs/source/daal/algorithms/decision_forest/decision-forest-classification.rst @@ -269,7 +269,7 @@ Examples Batch Processing: - - :daal4py_example:`decision_forest_classification_default_dense_batch.py` - - :daal4py_example:`decision_forest_classification_hist_batch.py` - - :daal4py_example:`decision_forest_classification_traverse_batch.py` + - :daal4py_example:`decision_forest_classification_default_dense.py` + - :daal4py_example:`decision_forest_classification_hist.py` + - :daal4py_example:`decision_forest_classification_traverse.py` diff --git a/docs/source/daal/algorithms/decision_forest/decision-forest-regression.rst b/docs/source/daal/algorithms/decision_forest/decision-forest-regression.rst index 552829735ed..db3077f023b 100644 --- a/docs/source/daal/algorithms/decision_forest/decision-forest-regression.rst +++ b/docs/source/daal/algorithms/decision_forest/decision-forest-regression.rst @@ -174,6 +174,6 @@ Examples Batch Processing: - - :daal4py_example:`decision_forest_regression_default_dense_batch.py` - - :daal4py_example:`decision_forest_regression_hist_batch.py` - - :daal4py_example:`decision_forest_regression_traverse_batch.py` + - :daal4py_example:`decision_forest_regression_default_dense.py` + - :daal4py_example:`decision_forest_regression_hist.py` + - :daal4py_example:`decision_forest_regression_traverse.py` diff --git a/docs/source/daal/algorithms/decision_tree/decision-tree-classification.rst b/docs/source/daal/algorithms/decision_tree/decision-tree-classification.rst index 6d72c037a59..4087dd407df 100644 --- a/docs/source/daal/algorithms/decision_tree/decision-tree-classification.rst +++ b/docs/source/daal/algorithms/decision_tree/decision-tree-classification.rst @@ -217,5 +217,5 @@ Examples Batch Processing: - - :daal4py_example:`decision_tree_classification_batch.py` - - :daal4py_example:`decision_tree_classification_traverse_batch.py` + - :daal4py_example:`decision_tree_classification.py` + - :daal4py_example:`decision_tree_classification_traverse.py` diff --git a/docs/source/daal/algorithms/decision_tree/decision-tree-regression.rst b/docs/source/daal/algorithms/decision_tree/decision-tree-regression.rst index 03fed253842..bbf1d36d63d 100644 --- a/docs/source/daal/algorithms/decision_tree/decision-tree-regression.rst +++ b/docs/source/daal/algorithms/decision_tree/decision-tree-regression.rst @@ -167,5 +167,5 @@ Examples Batch Processing: - - :daal4py_example:`decision_tree_regression_batch.py` - - :daal4py_example:`decision_tree_regression_traverse_batch.py` + - :daal4py_example:`decision_tree_regression.py` + - :daal4py_example:`decision_tree_regression_traverse.py` diff --git a/docs/source/daal/algorithms/distance/correlation.rst b/docs/source/daal/algorithms/distance/correlation.rst index 7e232bfb060..469e02e7f8b 100644 --- a/docs/source/daal/algorithms/distance/correlation.rst +++ b/docs/source/daal/algorithms/distance/correlation.rst @@ -124,7 +124,7 @@ Examples Batch Processing: - - :daal4py_example:`correlation_distance_batch.py` + - :daal4py_example:`correlation_distance.py` Performance Considerations ************************** diff --git a/docs/source/daal/algorithms/distance/cosine.rst b/docs/source/daal/algorithms/distance/cosine.rst index e40525b367c..d3ea497e95b 100644 --- a/docs/source/daal/algorithms/distance/cosine.rst +++ b/docs/source/daal/algorithms/distance/cosine.rst @@ -118,7 +118,7 @@ Examples Batch Processing: - - :daal4py_example:`cosine_distance_batch.py` + - :daal4py_example:`cosine_distance.py` Performance Considerations ************************** diff --git a/docs/source/daal/algorithms/distributions/bernoulli.rst b/docs/source/daal/algorithms/distributions/bernoulli.rst index c7d719f60bd..b21332635ec 100644 --- a/docs/source/daal/algorithms/distributions/bernoulli.rst +++ b/docs/source/daal/algorithms/distributions/bernoulli.rst @@ -83,7 +83,7 @@ Examples Batch Processing: - - :daal4py_example:`distributions_bernoulli_batch.py` + - :daal4py_example:`distributions_bernoulli.py` Performance Considerations ************************** diff --git a/docs/source/daal/algorithms/distributions/normal.rst b/docs/source/daal/algorithms/distributions/normal.rst index 30e66ee7902..e0da45b632d 100644 --- a/docs/source/daal/algorithms/distributions/normal.rst +++ b/docs/source/daal/algorithms/distributions/normal.rst @@ -73,4 +73,4 @@ Examples Batch Processing: - - :daal4py_example:`distributions_normal_batch.py` + - :daal4py_example:`distributions_normal.py` diff --git a/docs/source/daal/algorithms/distributions/uniform.rst b/docs/source/daal/algorithms/distributions/uniform.rst index 15c30a2dc50..c795ee56d68 100644 --- a/docs/source/daal/algorithms/distributions/uniform.rst +++ b/docs/source/daal/algorithms/distributions/uniform.rst @@ -83,4 +83,4 @@ Examples Batch Processing: - - :daal4py_example:`distributions_uniform_batch.py` + - :daal4py_example:`distributions_uniform.py` diff --git a/docs/source/daal/algorithms/em/expectation-maximization.rst b/docs/source/daal/algorithms/em/expectation-maximization.rst index dc27069b430..f20a5a207d2 100644 --- a/docs/source/daal/algorithms/em/expectation-maximization.rst +++ b/docs/source/daal/algorithms/em/expectation-maximization.rst @@ -456,7 +456,7 @@ Examples Batch Processing: - - :daal4py_example:`em_gmm_batch.py` + - :daal4py_example:`em_gmm.py` Performance Considerations ========================== diff --git a/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-classification.rst b/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-classification.rst index 49f67a4d5c2..356e6906511 100644 --- a/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-classification.rst +++ b/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-classification.rst @@ -196,5 +196,5 @@ Examples Batch Processing: - - :daal4py_example:`gradient_boosted_classification_batch.py` - - :daal4py_example:`gradient_boosted_classification_traverse_batch.py` + - :daal4py_example:`gradient_boosted_classification.py` + - :daal4py_example:`gradient_boosted_classification_traverse.py` diff --git a/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-regression.rst b/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-regression.rst index 863e2ae462d..d096cd14f01 100644 --- a/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-regression.rst +++ b/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-regression.rst @@ -172,5 +172,5 @@ Examples Batch Processing: - - :daal4py_example:`gradient_boosted_regression_batch.py` - - :daal4py_example:`gradient_boosted_regression_traverse_batch.py` + - :daal4py_example:`gradient_boosted_regression.py` + - :daal4py_example:`gradient_boosted_regression_traverse.py` diff --git a/docs/source/daal/algorithms/implicit_als/implicit-alternating-least-squares.rst b/docs/source/daal/algorithms/implicit_als/implicit-alternating-least-squares.rst index 682e580f556..7d4d109ed25 100644 --- a/docs/source/daal/algorithms/implicit_als/implicit-alternating-least-squares.rst +++ b/docs/source/daal/algorithms/implicit_als/implicit-alternating-least-squares.rst @@ -130,7 +130,7 @@ Examples Batch Processing: - - :daal4py_example:`implicit_als_batch.py` + - :daal4py_example:`implicit_als.py` Performance Considerations diff --git a/docs/source/daal/algorithms/k_nearest_neighbors/k-nearest-neighbors-knn-classifier.rst b/docs/source/daal/algorithms/k_nearest_neighbors/k-nearest-neighbors-knn-classifier.rst index 192b8b8a849..a871d0a6e3d 100644 --- a/docs/source/daal/algorithms/k_nearest_neighbors/k-nearest-neighbors-knn-classifier.rst +++ b/docs/source/daal/algorithms/k_nearest_neighbors/k-nearest-neighbors-knn-classifier.rst @@ -323,5 +323,5 @@ Examples Batch Processing: - - :daal4py_example:`kdtree_knn_classification_batch.py` - - :daal4py_example:`bf_knn_classification_batch.py` + - :daal4py_example:`kdtree_knn_classification.py` + - :daal4py_example:`bf_knn_classification.py` diff --git a/docs/source/daal/algorithms/kmeans/k-means-clustering.rst b/docs/source/daal/algorithms/kmeans/k-means-clustering.rst index 3ce239c9109..ba0df9b4f1e 100644 --- a/docs/source/daal/algorithms/kmeans/k-means-clustering.rst +++ b/docs/source/daal/algorithms/kmeans/k-means-clustering.rst @@ -289,7 +289,7 @@ Examples Batch Processing: - - :daal4py_example:`kmeans_batch.py` + - :daal4py_example:`kmeans.py` Distributed Processing diff --git a/docs/source/daal/algorithms/linear_ridge_regression/linear-regression.rst b/docs/source/daal/algorithms/linear_ridge_regression/linear-regression.rst index f34b149615f..90a93d65beb 100644 --- a/docs/source/daal/algorithms/linear_ridge_regression/linear-regression.rst +++ b/docs/source/daal/algorithms/linear_ridge_regression/linear-regression.rst @@ -108,4 +108,4 @@ Examples .. tab:: Python* - - :daal4py_example:`lin_reg_model_builder.py` + - :daal4py_example:`lin_reg_model.py` diff --git a/docs/source/daal/algorithms/linear_ridge_regression/linear-ridge-regression-computation.rst b/docs/source/daal/algorithms/linear_ridge_regression/linear-ridge-regression-computation.rst index db56a69da18..1a9351afc4b 100644 --- a/docs/source/daal/algorithms/linear_ridge_regression/linear-ridge-regression-computation.rst +++ b/docs/source/daal/algorithms/linear_ridge_regression/linear-ridge-regression-computation.rst @@ -455,8 +455,8 @@ Examples Batch Processing: - - :daal4py_example:`linear_regression_batch.py` - - :daal4py_example:`ridge_regression_batch.py` + - :daal4py_example:`linear_regression.py` + - :daal4py_example:`ridge_regression.py` Online Processing: diff --git a/docs/source/daal/algorithms/logistic_regression/logistic-regression.rst b/docs/source/daal/algorithms/logistic_regression/logistic-regression.rst index 3e0a5abb76f..04dc52c14e2 100644 --- a/docs/source/daal/algorithms/logistic_regression/logistic-regression.rst +++ b/docs/source/daal/algorithms/logistic_regression/logistic-regression.rst @@ -274,5 +274,5 @@ Examples Batch Processing: - - :daal4py_example:`log_reg_dense_batch.py` - - :daal4py_example:`log_reg_binary_dense_batch.py` + - :daal4py_example:`log_reg_dense.py` + - :daal4py_example:`log_reg_binary_dense.py` diff --git a/docs/source/daal/algorithms/moments/moments-of-low-order.rst b/docs/source/daal/algorithms/moments/moments-of-low-order.rst index fb341fdd13b..70ac92e0b20 100644 --- a/docs/source/daal/algorithms/moments/moments-of-low-order.rst +++ b/docs/source/daal/algorithms/moments/moments-of-low-order.rst @@ -108,7 +108,7 @@ Examples Batch Processing: - - :daal4py_example:`low_order_moms_dense_batch.py` + - :daal4py_example:`low_order_moms_dense.py` Online Processing: diff --git a/docs/source/daal/algorithms/naive_bayes/naive-bayes-classifier.rst b/docs/source/daal/algorithms/naive_bayes/naive-bayes-classifier.rst index 5740a82957f..3bc7a604307 100644 --- a/docs/source/daal/algorithms/naive_bayes/naive-bayes-classifier.rst +++ b/docs/source/daal/algorithms/naive_bayes/naive-bayes-classifier.rst @@ -104,7 +104,7 @@ Examples Batch Processing: - - :daal4py_example:`naive_bayes_batch.py` + - :daal4py_example:`naive_bayes.py` Online Processing: diff --git a/docs/source/daal/algorithms/normalization/min-max.rst b/docs/source/daal/algorithms/normalization/min-max.rst index 7d86c24b9f0..68058fb48ee 100644 --- a/docs/source/daal/algorithms/normalization/min-max.rst +++ b/docs/source/daal/algorithms/normalization/min-max.rst @@ -137,4 +137,4 @@ Examples Batch Processing: - - :daal4py_example:`normalization_minmax_batch.py` + - :daal4py_example:`normalization_minmax.py` diff --git a/docs/source/daal/algorithms/normalization/z-score.rst b/docs/source/daal/algorithms/normalization/z-score.rst index 61800faf5a7..cfa8324aebf 100644 --- a/docs/source/daal/algorithms/normalization/z-score.rst +++ b/docs/source/daal/algorithms/normalization/z-score.rst @@ -191,4 +191,4 @@ Examples Batch Processing: - - :daal4py_example:`normalization_zscore_batch.py` + - :daal4py_example:`normalization_zscore.py` diff --git a/docs/source/daal/algorithms/optimization-solvers/objective-functions/cross-entropy.rst b/docs/source/daal/algorithms/optimization-solvers/objective-functions/cross-entropy.rst index 58d37049dc6..06e398be40e 100644 --- a/docs/source/daal/algorithms/optimization-solvers/objective-functions/cross-entropy.rst +++ b/docs/source/daal/algorithms/optimization-solvers/objective-functions/cross-entropy.rst @@ -251,4 +251,4 @@ Examples .. tab:: Python* - - :daal4py_example:`lbfgs_cr_entr_loss_batch.py` + - :daal4py_example:`lbfgs_cr_entr_loss.py` diff --git a/docs/source/daal/algorithms/optimization-solvers/solvers/adaptive-subgradient-method.rst b/docs/source/daal/algorithms/optimization-solvers/solvers/adaptive-subgradient-method.rst index a78b19dd517..ae7f0946a9e 100644 --- a/docs/source/daal/algorithms/optimization-solvers/solvers/adaptive-subgradient-method.rst +++ b/docs/source/daal/algorithms/optimization-solvers/solvers/adaptive-subgradient-method.rst @@ -150,4 +150,4 @@ Examples .. tab:: Python* - - :daal4py_example:`adagrad_mse_batch.py` + - :daal4py_example:`adagrad_mse.py` diff --git a/docs/source/daal/algorithms/optimization-solvers/solvers/lbfgs.rst b/docs/source/daal/algorithms/optimization-solvers/solvers/lbfgs.rst index 9b7e64a4027..aad70393c8b 100644 --- a/docs/source/daal/algorithms/optimization-solvers/solvers/lbfgs.rst +++ b/docs/source/daal/algorithms/optimization-solvers/solvers/lbfgs.rst @@ -306,5 +306,5 @@ Examples Batch Processing: - - :daal4py_example:`lbfgs_cr_entr_loss_batch.py` - - :daal4py_example:`lbfgs_mse_batch.py` + - :daal4py_example:`lbfgs_cr_entr_loss.py` + - :daal4py_example:`lbfgs_mse.py` diff --git a/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-average-gradient-accelerated-method.rst b/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-average-gradient-accelerated-method.rst index d38f3e2ac7a..8cadad8915c 100644 --- a/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-average-gradient-accelerated-method.rst +++ b/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-average-gradient-accelerated-method.rst @@ -185,4 +185,4 @@ Examples Batch Processing: - - :daal4py_example:`saga_batch.py` + - :daal4py_example:`saga.py` diff --git a/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-gradient-descent-algorithm.rst b/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-gradient-descent-algorithm.rst index 57ca4cf518f..41fcb76b236 100644 --- a/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-gradient-descent-algorithm.rst +++ b/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-gradient-descent-algorithm.rst @@ -226,8 +226,8 @@ Examples Batch Processing: - - :daal4py_example:`sgd_logistic_loss_batch.py` - - :daal4py_example:`sgd_mse_batch.py` + - :daal4py_example:`sgd_logistic_loss.py` + - :daal4py_example:`sgd_mse.py` .. Python*: diff --git a/docs/source/daal/algorithms/outlier_detection/multivariate-bacon.rst b/docs/source/daal/algorithms/outlier_detection/multivariate-bacon.rst index 6a1412e3171..d611040849b 100644 --- a/docs/source/daal/algorithms/outlier_detection/multivariate-bacon.rst +++ b/docs/source/daal/algorithms/outlier_detection/multivariate-bacon.rst @@ -156,4 +156,4 @@ Examples Batch Processing: - - :daal4py_example:`bacon_outlier_batch.py` + - :daal4py_example:`bacon_outlier.py` diff --git a/docs/source/daal/algorithms/outlier_detection/multivariate.rst b/docs/source/daal/algorithms/outlier_detection/multivariate.rst index deef0b23466..73f090e69c7 100644 --- a/docs/source/daal/algorithms/outlier_detection/multivariate.rst +++ b/docs/source/daal/algorithms/outlier_detection/multivariate.rst @@ -151,7 +151,7 @@ Examples Batch Processing: - - :daal4py_example:`multivariate_outlier_batch.py` + - :daal4py_example:`multivariate_outlier.py` Performance Considerations ************************** diff --git a/docs/source/daal/algorithms/outlier_detection/univariate.rst b/docs/source/daal/algorithms/outlier_detection/univariate.rst index df79ff7314e..b0335e24656 100644 --- a/docs/source/daal/algorithms/outlier_detection/univariate.rst +++ b/docs/source/daal/algorithms/outlier_detection/univariate.rst @@ -157,4 +157,4 @@ Examples Batch Processing: - - :daal4py_example:`univariate_outlier_batch.py` + - :daal4py_example:`univariate_outlier.py` diff --git a/docs/source/daal/algorithms/pca/principal-component-analysis.rst b/docs/source/daal/algorithms/pca/principal-component-analysis.rst index 00bd6e8d184..c03ddb97061 100644 --- a/docs/source/daal/algorithms/pca/principal-component-analysis.rst +++ b/docs/source/daal/algorithms/pca/principal-component-analysis.rst @@ -138,7 +138,7 @@ Examples Batch Processing: - - :daal4py_example:`pca_batch.py` + - :daal4py_example:`pca.py` Distributed Processing: diff --git a/docs/source/daal/algorithms/pca/transform.rst b/docs/source/daal/algorithms/pca/transform.rst index 4d141434c99..d680773ec19 100644 --- a/docs/source/daal/algorithms/pca/transform.rst +++ b/docs/source/daal/algorithms/pca/transform.rst @@ -137,4 +137,4 @@ Examples Batch Processing: - - :daal4py_example:`pca_transform_batch.py` + - :daal4py_example:`pca_transform.py` diff --git a/docs/source/daal/algorithms/qr/qr-pivoted.rst b/docs/source/daal/algorithms/qr/qr-pivoted.rst index d729968b52a..35aef56d166 100644 --- a/docs/source/daal/algorithms/qr/qr-pivoted.rst +++ b/docs/source/daal/algorithms/qr/qr-pivoted.rst @@ -148,4 +148,4 @@ Examples Batch Processing: - - :daal4py_example:`pivoted_qr_batch.py` + - :daal4py_example:`pivoted_qr.py` diff --git a/docs/source/daal/algorithms/qr/qr-without-pivoting.rst b/docs/source/daal/algorithms/qr/qr-without-pivoting.rst index f8f2a7bf55e..bec3b9405a3 100644 --- a/docs/source/daal/algorithms/qr/qr-without-pivoting.rst +++ b/docs/source/daal/algorithms/qr/qr-without-pivoting.rst @@ -66,7 +66,7 @@ Examples Batch Processing: - - :daal4py_example:`qr_batch.py` + - :daal4py_example:`qr.py` Online Processing: diff --git a/docs/source/daal/algorithms/quality_metrics/default/for-multi-class-classification.rst b/docs/source/daal/algorithms/quality_metrics/default/for-multi-class-classification.rst index 6b3177557a7..58d5157d3c4 100644 --- a/docs/source/daal/algorithms/quality_metrics/default/for-multi-class-classification.rst +++ b/docs/source/daal/algorithms/quality_metrics/default/for-multi-class-classification.rst @@ -47,7 +47,7 @@ Further definitions use the following notations: * - :math:`\text{fp}_i` - false positive - the number of observations that were incorrectly assigned to the class :math:`C_1` - * - :math:`\text{fn_i}` + * - :math:`\text{fn}_i` - false negative - the number of observations that were not recognized as belonging to the class :math:`C_1` diff --git a/docs/source/daal/algorithms/quantiles/index.rst b/docs/source/daal/algorithms/quantiles/index.rst index c45636e3b7d..487d017f417 100644 --- a/docs/source/daal/algorithms/quantiles/index.rst +++ b/docs/source/daal/algorithms/quantiles/index.rst @@ -123,4 +123,4 @@ Examples Batch Processing: - - :daal4py_example:`quantiles_batch.py` + - :daal4py_example:`quantiles.py` diff --git a/docs/source/daal/algorithms/sorting/index.rst b/docs/source/daal/algorithms/sorting/index.rst index b6806d9339b..d7abb8d781d 100644 --- a/docs/source/daal/algorithms/sorting/index.rst +++ b/docs/source/daal/algorithms/sorting/index.rst @@ -111,4 +111,4 @@ Examples Batch Processing: - - :daal4py_example:`sorting_batch.py` + - :daal4py_example:`sorting.py` diff --git a/docs/source/daal/algorithms/stump/classification.rst b/docs/source/daal/algorithms/stump/classification.rst index b575bf90cf7..610a9a61e27 100644 --- a/docs/source/daal/algorithms/stump/classification.rst +++ b/docs/source/daal/algorithms/stump/classification.rst @@ -120,4 +120,4 @@ Examples Batch Processing: - - :daal4py_example:`stump_classification_batch.py` + - :daal4py_example:`stump_classification.py` diff --git a/docs/source/daal/algorithms/stump/regression.rst b/docs/source/daal/algorithms/stump/regression.rst index cb5e5099582..bb1b6ba691d 100644 --- a/docs/source/daal/algorithms/stump/regression.rst +++ b/docs/source/daal/algorithms/stump/regression.rst @@ -94,4 +94,4 @@ Examples Batch Processing: - - :daal4py_example:`stump_regression_batch.py` + - :daal4py_example:`stump_regression.py` diff --git a/docs/source/daal/algorithms/svd/singular-value-decomposition.rst b/docs/source/daal/algorithms/svd/singular-value-decomposition.rst index 80a77dc3051..ee9e24554c7 100644 --- a/docs/source/daal/algorithms/svd/singular-value-decomposition.rst +++ b/docs/source/daal/algorithms/svd/singular-value-decomposition.rst @@ -77,7 +77,7 @@ Examples Batch Processing: - - :daal4py_example:`svd_batch.py` + - :daal4py_example:`svd.py` Online Processing: diff --git a/docs/source/daal/algorithms/svm/support-vector-machine-classifier.rst b/docs/source/daal/algorithms/svm/support-vector-machine-classifier.rst index b5aaf81f25f..f04871e8fc7 100644 --- a/docs/source/daal/algorithms/svm/support-vector-machine-classifier.rst +++ b/docs/source/daal/algorithms/svm/support-vector-machine-classifier.rst @@ -264,7 +264,7 @@ Examples Batch Processing: - - :daal4py_example:`svm_batch.py` + - :daal4py_example:`svm.py` Performance Considerations ************************** diff --git a/docs/source/daal/algorithms/svm_multi_class/multi-class-classifier.rst b/docs/source/daal/algorithms/svm_multi_class/multi-class-classifier.rst index 666f0fefd4f..6c5405fe720 100644 --- a/docs/source/daal/algorithms/svm_multi_class/multi-class-classifier.rst +++ b/docs/source/daal/algorithms/svm_multi_class/multi-class-classifier.rst @@ -255,4 +255,4 @@ Examples Batch Processing: - - :daal4py_example:`svm_multiclass_batch.py` + - :daal4py_example:`svm_multiclass.py` diff --git a/docs/source/data-analytics-pipeline.rst b/docs/source/data-analytics-pipeline.rst index 107c2ff1ff0..4cd9f32845e 100644 --- a/docs/source/data-analytics-pipeline.rst +++ b/docs/source/data-analytics-pipeline.rst @@ -19,9 +19,6 @@ Introduction .. _onedal_data_analytics_pipeline: -Data Analytics Pipeline -*********************** - |full_name| (|short_name|) is a library that provides building blocks covering all stages of data analytics: data acquisition from a data source, preprocessing, transformation, data mining, modeling, diff --git a/docs/source/onedal/algorithms/logistic-regression/index.rst b/docs/source/onedal/algorithms/logistic-regression/index.rst index fced9085d39..7c6dff57a3d 100644 --- a/docs/source/onedal/algorithms/logistic-regression/index.rst +++ b/docs/source/onedal/algorithms/logistic-regression/index.rst @@ -60,6 +60,14 @@ During training, the data is divided into batches, and the gradients from each b Refer to :ref:`Mathematical formulation: Newton-CG `. +.. _logreg_t_math_sparse: + +Training Method: *sparse* +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using this method you can train Logistic Regression model on sparse data. All you need is to provide matrix with feature vectors as +sparse table. Find more info about sparse tables here :ref:`Compressed Sparse Rows (CSR) Table: `. + .. _logreg_i_math: Inference diff --git a/docs/source/onedal/build_app/build-application.rst b/docs/source/onedal/build_app/build-application.rst index 7574d8b9cbf..5e7249b29bb 100644 --- a/docs/source/onedal/build_app/build-application.rst +++ b/docs/source/onedal/build_app/build-application.rst @@ -29,7 +29,7 @@ Applications on Linux* OS #. Install |short_name|. -#. Set environment variables by calling ``/setvars.sh``. +#. Set environment variables by calling ``/vars.sh``. #. Build the application using ``icpx`` (Linux* OS) and ``icx-cl`` (Windows* OS) commands: @@ -117,13 +117,11 @@ Applications on Windows* OS * - Static linking - - | onedal_core.lib, | onedal_core.lib, | onedal_thread.lib * - Dynamic linking - | onedal_core_dll.lib - | onedal_core_dll.lib You may also add debug versions of the libraries based on the threading mode and linking method: @@ -137,10 +135,6 @@ Applications on Windows* OS * - Static linking - - | onedal_cored.lib, - | onedald.lib, - | onedal_dpcd.lib, - | onedal_sycld.lib, | onedal_cored.lib, | onedald.lib, | onedal_dpcd.lib, @@ -148,12 +142,6 @@ Applications on Windows* OS | onedal_threadd.lib * - Dynamic linking - - | onedal_cored_dll.lib (onedal_cored_dll.2.lib), - | onedald_dll.lib (onedald_dll.2.lib), - | onedal_dpcd_dll.lib (onedal_dpcd_dll.2.lib), - | onedald.2.dll, - | onedal_cored.2.dll, - | onedal_dpcd.2.dll, | onedal_cored_dll.lib (onedal_cored_dll.2.lib), | onedald_dll.lib (onedald_dll.2.lib), | onedal_dpcd_dll.lib (onedal_dpcd_dll.2.lib), diff --git a/examples/daal/cpp/CMakeLists.txt b/examples/daal/cpp/CMakeLists.txt index fc7fdfd6f06..ed41306e810 100644 --- a/examples/daal/cpp/CMakeLists.txt +++ b/examples/daal/cpp/CMakeLists.txt @@ -1,5 +1,6 @@ #=============================================================================== # Copyright 2021 Intel Corporation +# Copyright contributors to the oneDAL project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -43,11 +44,6 @@ if(REF_BACKEND) set(EXCLUDE_LIST ${EXCLUDE_LIST} "source/boosting/brownboost_dense_batch.cpp" - "source/em/em_gmm_dense_batch.cpp" - "source/linear_regression/lin_reg_qr_dense_distr.cpp" - "source/pca/pca_cor*" - "source/qr/qr_dense*" - "source/svd/svd_dense_batch.cpp" ) endif() diff --git a/examples/daal/cpp/target_excludes.cmake b/examples/daal/cpp/target_excludes.cmake index 2b338146cee..99d1cb9331b 100644 --- a/examples/daal/cpp/target_excludes.cmake +++ b/examples/daal/cpp/target_excludes.cmake @@ -42,30 +42,7 @@ elseif((CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") AND (CMAKE_C_COMPILER MATCHES "gcc")) set(EXCLUDE_LIST ${EXCLUDE_LIST} - "assoc_rules_apriori_batch" - "cd_dense_batch" - "cholesky_dense_batch" - "cor_csr_distr" - "cor_csr_online" - "cor_dense_distr" - "cor_dense_online" - "cov_csr_distr" - "cov_csr_online" - "cov_dense_distr" - "cov_dense_online" - "elastic_net_dense_batch" "enable_thread_pinning" - "lasso_reg_dense_batch" - "lin_reg_metrics_dense_batch" - "lin_reg_qr_dense_batch" - "lin_reg_qr_dense_online" - "low_order_moms_csr_distr" - "low_order_moms_csr_online" - "low_order_moms_dense_distr" - "low_order_moms_dense_online" - "out_detect_mult_dense_batch" - "pca_metrics_dense_batch" - "pivoted_qr_dense_batch" ) elseif((CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64") AND (CMAKE_C_COMPILER MATCHES "clang")) diff --git a/makefile b/makefile index 1ca3cd701f7..96081ea0354 100644 --- a/makefile +++ b/makefile @@ -489,7 +489,6 @@ $(CORE.objs_a): COPT += -D__TBB_NO_IMPLICIT_LINKAGE -DDAAL_NOTHROW_EXCEPTIONS \ -DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX \ $(if $(CHECK_DLL_SIG),-DDAAL_CHECK_DLL_SIG) $(CORE.objs_a): COPT += @$(CORE.tmpdir_a)/inc_a_folders.txt -$(filter %threading.$o, $(CORE.objs_a)): COPT += -D__DO_TBB_LAYER__ $(eval $(call append_uarch_copt,$(CORE.objs_a))) @@ -500,7 +499,6 @@ $(CORE.objs_y): COPT += -D__DAAL_IMPLEMENTATION \ -DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX \ $(if $(CHECK_DLL_SIG),-DDAAL_CHECK_DLL_SIG) $(CORE.objs_y): COPT += @$(CORE.tmpdir_y)/inc_y_folders.txt -$(filter %threading.$o, $(CORE.objs_y)): COPT += -D__DO_TBB_LAYER__ $(eval $(call append_uarch_copt,$(CORE.objs_y))) @@ -563,14 +561,16 @@ ONEAPI.srcdirs.base := $(ONEAPI.srcdir) \ $(addprefix $(ONEAPI.srcdir)/io/, $(ONEAPI.IO)) ONEAPI.srcdirs.detail := $(foreach x,$(ONEAPI.srcdirs.base),$(shell find $x -maxdepth 1 -type d -name detail)) ONEAPI.srcdirs.backend := $(foreach x,$(ONEAPI.srcdirs.base),$(shell find $x -maxdepth 1 -type d -name backend)) -ONEAPI.srcdirs.parameters := $(foreach x,$(ONEAPI.srcdirs.base),$(shell find $x -maxdepth 1 -type d -name parameters)) +ONEAPI.srcdirs.parameters := $(ONEAPI.srcdir)/detail/parameters \ + $(foreach x,$(ONEAPI.srcdirs.base),$(shell find $x -maxdepth 1 -type d -name parameters)) ONEAPI.srcdirs := $(ONEAPI.srcdirs.base) $(ONEAPI.srcdirs.detail) $(ONEAPI.srcdirs.backend) $(ONEAPI.srcdirs.parameters) -ONEAPI.srcs.all.exclude := ! -path "*_test.*" ! -path "*/test/*" +ONEAPI.srcs.all.exclude := ! -path "*_test.*" ! -path "*/test/*" ! -path "*/detail/parameters/*" +ONEAPI.srcs.parameters.exclude := ! -path "*_test.*" ! -path "*/test/*" ONEAPI.srcs.all := $(foreach x,$(ONEAPI.srcdirs.base),$(shell find $x -maxdepth 1 -type f -name "*.cpp" $(ONEAPI.srcs.all.exclude))) \ $(foreach x,$(ONEAPI.srcdirs.detail),$(shell find $x -type f -name "*.cpp" $(ONEAPI.srcs.all.exclude))) \ $(foreach x,$(ONEAPI.srcdirs.backend),$(shell find $x -type f -name "*.cpp" $(ONEAPI.srcs.all.exclude))) \ - $(foreach x,$(ONEAPI.srcdirs.parameters),$(shell find $x -type f -name "*.cpp" $(ONEAPI.srcs.all.exclude))) + $(foreach x,$(ONEAPI.srcdirs.parameters),$(shell find $x -type f -name "*.cpp" $(ONEAPI.srcs.parameters.exclude))) ONEAPI.srcs.all := $(ONEAPI.srcs.all:./%=%) ONEAPI.srcs.dpc := $(filter %_dpc.cpp,$(ONEAPI.srcs.all)) ONEAPI.srcs := $(filter-out %_dpc.cpp,$(ONEAPI.srcs.all)) @@ -705,14 +705,14 @@ $(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-DEBC) $(-EHsc) $(pedantic.op $(eval $(call update_copt_from_dispatcher_tag,$(ONEAPI.objs_y.dpc),.dpcpp)) # Filtering parameter files -PARAMETERS.objs_a.filtered := $(filter %parameters.$(o),$(ONEAPI.objs_a)) -ONEAPI.objs_a.filtered := $(filter-out %parameters.$(o),$(ONEAPI.objs_a)) -PARAMETERS.objs_y.filtered := $(filter %parameters.$(o),$(ONEAPI.objs_y)) -ONEAPI.objs_y.filtered := $(filter-out %parameters.$(o),$(ONEAPI.objs_y)) -PARAMETERS.objs_a.dpc.filtered := $(filter %parameters.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_a.dpc)) -ONEAPI.objs_a.dpc.filtered := $(filter-out %parameters.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_a.dpc)) -PARAMETERS.objs_y.dpc.filtered := $(filter %parameters.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_y.dpc)) -ONEAPI.objs_y.dpc.filtered := $(filter-out %parameters.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_y.dpc)) +PARAMETERS.objs_a.filtered := $(filter %parameters.$(o) %parameters_impl.$(o),$(ONEAPI.objs_a)) +ONEAPI.objs_a.filtered := $(filter-out %parameters.$(o) %parameters_impl.$(o),$(ONEAPI.objs_a)) +PARAMETERS.objs_y.filtered := $(filter %parameters.$(o) %parameters_impl.$(o),$(ONEAPI.objs_y)) +ONEAPI.objs_y.filtered := $(filter-out %parameters.$(o) %parameters_impl.$(o),$(ONEAPI.objs_y)) +PARAMETERS.objs_a.dpc.filtered := $(filter %parameters.$(o) %parameters_impl.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_a.dpc)) +ONEAPI.objs_a.dpc.filtered := $(filter-out %parameters.$(o) %parameters_impl.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_a.dpc)) +PARAMETERS.objs_y.dpc.filtered := $(filter %parameters.$(o) %parameters_impl.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_y.dpc)) +ONEAPI.objs_y.dpc.filtered := $(filter-out %parameters.$(o) %parameters_impl.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_y.dpc)) # Actual compilation $(foreach x,$(ONEAPI.objs_a.filtered),$(eval $(call .ONEAPI.compile,$x,$(ONEAPI.tmpdir_a),C))) @@ -842,7 +842,6 @@ THR_TBB.objs := $(THR_TBB.objs_a) $(THR_TBB.objs_y) THR.objs := $(THR.objs_a) $(THR.objs_y) $(THR.objs): COPT += $(-fPIC) $(-cxx11) $(-Zl) $(-DEBC) -DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX -$(THR_TBB.objs): COPT += -D__DO_TBB_LAYER__ $(THR.objs_a): $(THR.tmpdir_a)/thr_inc_a_folders.txt $(THR.objs_a): COPT += @$(THR.tmpdir_a)/thr_inc_a_folders.txt diff --git a/makefile.lst b/makefile.lst index de7afb1090c..92dc52ff521 100755 --- a/makefile.lst +++ b/makefile.lst @@ -228,6 +228,7 @@ ONEAPI.ALGOS := \ dbscan \ decision_forest \ decision_tree \ + finiteness_checker \ kmeans \ kmeans_init \ knn \ diff --git a/makefile.ver b/makefile.ver index 45b346d284f..b00ff4ae913 100644 --- a/makefile.ver +++ b/makefile.ver @@ -15,7 +15,7 @@ #=============================================================================== MAJOR = 2024 -MINOR = 4 +MINOR = 6 UPDATE = 0 BUILD = $(shell date +'%Y%m%d') STATUS = P diff --git a/samples/cmake/setup_samples.cmake b/samples/cmake/setup_samples.cmake index afaf9042417..dd7d3cc758b 100644 --- a/samples/cmake/setup_samples.cmake +++ b/samples/cmake/setup_samples.cmake @@ -111,7 +111,7 @@ function(add_samples samples_paths) set_target_properties(${sample} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/_cmake_results/${CPU_ARCHITECTURE}_${LINK_TYPE}") add_custom_target(run_${sample} - COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} \\ + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${MPIEXEC_MAX_NUMPROCS} -ppn ${MPIEXEC_NUMPROCS_PER_NODE} $ DEPENDS ${sample} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/samples/oneapi/dpc/ccl/sources/pca_online_distr_ccl.cpp b/samples/oneapi/dpc/ccl/sources/pca_online_distr_ccl.cpp index c33c899581b..68046122b8d 100644 --- a/samples/oneapi/dpc/ccl/sources/pca_online_distr_ccl.cpp +++ b/samples/oneapi/dpc/ccl/sources/pca_online_distr_ccl.cpp @@ -33,7 +33,7 @@ namespace dal = oneapi::dal; void run(sycl::queue& queue) { const auto data_file_name = get_data_path("data/pca_normalized.csv"); - + const std::int64_t nBlocks = 10; const auto data = dal::read(queue, dal::csv::data_source{ data_file_name }); const auto pca_desc = dal::pca::descriptor{}; @@ -45,12 +45,12 @@ void run(sycl::queue& queue) { auto input_vec = split_table_by_rows(queue, data, rank_count); auto input_blocks = split_table_by_rows(queue, input_vec[rank_id], nBlocks); - dal::covariance::partial_train_result<> partial_result; + dal::pca::partial_train_result<> partial_result; for (std::int64_t i = 0; i < nBlocks; i++) { - partial_result = dal::partial_train(queue, cov_desc, partial_result, input_blocks[i]); + partial_result = dal::partial_train(queue, pca_desc, partial_result, input_blocks[i]); } - const auto result = dal::preview::finalize_train(comm, cov_desc, partial_result); + const auto result = dal::preview::finalize_train(comm, pca_desc, partial_result); if (comm.get_rank() == 0) { std::cout << "Eigenvectors:\n" << result.get_eigenvectors() << std::endl; diff --git a/samples/oneapi/dpc/mpi/sources/pca_online_distr_mpi.cpp b/samples/oneapi/dpc/mpi/sources/pca_online_distr_mpi.cpp index 12d37b0a69c..2b94c5f9d97 100644 --- a/samples/oneapi/dpc/mpi/sources/pca_online_distr_mpi.cpp +++ b/samples/oneapi/dpc/mpi/sources/pca_online_distr_mpi.cpp @@ -33,7 +33,7 @@ namespace dal = oneapi::dal; void run(sycl::queue& queue) { const auto data_file_name = get_data_path("data/pca_normalized.csv"); - + const std::int64_t nBlocks = 10; const auto data = dal::read(queue, dal::csv::data_source{ data_file_name }); const auto pca_desc = dal::pca::descriptor{}; @@ -45,12 +45,12 @@ void run(sycl::queue& queue) { auto input_vec = split_table_by_rows(queue, data, rank_count); auto input_blocks = split_table_by_rows(queue, input_vec[rank_id], nBlocks); - dal::covariance::partial_train_result<> partial_result; + dal::pca::partial_train_result<> partial_result; for (std::int64_t i = 0; i < nBlocks; i++) { - partial_result = dal::partial_train(queue, cov_desc, partial_result, input_blocks[i]); + partial_result = dal::partial_train(queue, pca_desc, partial_result, input_blocks[i]); } - const auto result = dal::preview::finalize_train(comm, cov_desc, partial_result); + const auto result = dal::preview::finalize_train(comm, pca_desc, partial_result); if (comm.get_rank() == 0) { std::cout << "Eigenvectors:\n" << result.get_eigenvectors() << std::endl;