From cc1a83e1651fc9ce75cac7f69a594c763f67f967 Mon Sep 17 00:00:00 2001 From: Jing Xu Date: Sat, 30 Mar 2024 08:15:13 +0800 Subject: [PATCH] update dependency version (#3895) * add torch-ccl into compile bundle * fix dead link in doc * update footer link * update deepspeed dependency version, remove cpu related md files from build_doc.sh * add xpu perf * version to 2.1.20 * fix example import * update torch ccl version * add mpi path in the scripts * update dependency version * move known issue to tutorial repo * update known issue link * add note for not contain cpu features * update log version * update feature and example doc * update model zoo version * add paper to publications * remove cheetsheet --------- Co-authored-by: Zheng, Zhaoqiong Co-authored-by: Ye Ting --- dependency_version.yml | 14 +- docs/_static/custom.css | 3 + docs/_templates/footer.html | 2 +- docs/index.rst | 9 +- docs/tutorials/api_doc.rst | 5 +- docs/tutorials/blogs_publications.md | 1 + docs/tutorials/cheat_sheet.md | 23 --- docs/tutorials/examples.md | 36 ++--- docs/tutorials/features.rst | 25 ++-- docs/tutorials/features/amp_cpu.md | 102 -------------- docs/tutorials/features/int4.md | 42 ------ docs/tutorials/installation.rst | 2 +- .../{performance_tuning => }/known_issues.md | 0 docs/tutorials/llm.rst | 16 +-- .../llm/llm_optimize_transformers.md | 18 --- docs/tutorials/performance_tuning.rst | 11 -- docs/tutorials/releases.md | 10 +- examples/gpu/inference/python/llm/Dockerfile | 131 +++++++++--------- examples/gpu/inference/python/llm/README.md | 9 +- .../python/llm/tools/env_activate.sh | 2 - .../inference/python/llm/tools/env_setup.sh | 24 +++- scripts/build_doc.sh | 6 - scripts/compile_bundle.sh | 27 ++-- tools/basekit_driver_install_helper.sh | 123 ++++++++-------- 24 files changed, 222 insertions(+), 419 deletions(-) delete mode 100644 docs/tutorials/cheat_sheet.md delete mode 100644 docs/tutorials/features/amp_cpu.md delete mode 100644 docs/tutorials/features/int4.md rename docs/tutorials/{performance_tuning => }/known_issues.md (100%) delete mode 100644 docs/tutorials/performance_tuning.rst diff --git a/dependency_version.yml b/dependency_version.yml index b3435783b..b9df37a0f 100644 --- a/dependency_version.yml +++ b/dependency_version.yml @@ -4,21 +4,21 @@ gcc: llvm: version: 16.0.6 pytorch: - version: 2.1.0a0 + version: 2.1.0.post0+cxx11.abi commit: v2.1.0 torchaudio: - version: 2.1.0a0 + version: 2.1.0.post0+cxx11.abi commit: v2.1.0 torchvision: - version: 0.16.0a0 + version: 0.16.0.post0+cxx11.abi commit: v0.16.0 torch-ccl: repo: https://github.com/intel/torch-ccl.git - commit: 5f20135ccf8f828738cb3bc5a5ae7816df8100ae - version: 2.1.100+xpu + commit: 5ee65b42c42a0d91c4cf459d9be40020274003b6 + version: 2.1.200+xpu deepspeed: repo: https://github.com/microsoft/DeepSpeed.git - version: + version: v0.11.2 commit: 4fc181b01077521ba42379013ce91a1c294e5d8e intel-extension-for-deepspeed: repo: https://github.com/intel/intel-extension-for-deepspeed.git @@ -28,7 +28,7 @@ transformers: commit: v4.31.0 protobuf: version: 3.20.3 -llm_eval: +lm_eval: version: 0.3.0 basekit: dpcpp-cpp-rt: diff --git a/docs/_static/custom.css b/docs/_static/custom.css index ec82c8204..a8a04605f 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -15,6 +15,9 @@ a#wap_dns { display: none; } +a#wap_nac { + display: none; +} /* replace the copyright to eliminate the copyright symbol enforced by the ReadTheDocs theme */ diff --git a/docs/_templates/footer.html b/docs/_templates/footer.html index 2ba1962f1..a48d262fc 100644 --- a/docs/_templates/footer.html +++ b/docs/_templates/footer.html @@ -1,3 +1,3 @@ {% extends '!footer.html' %} {% block extrafooter %} {{super}} -

Cookies | Privacy | Do Not Share My Personal Information

© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (OBSD), http://opensource.org/licenses/0BSD.
+

Cookies | Privacy Your Privacy Choices Notice at Collection

© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (OBSD), http://opensource.org/licenses/0BSD.
{% endblock %} diff --git a/docs/index.rst b/docs/index.rst index d1a1f04d0..d82e70052 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -15,7 +15,7 @@ Large Language Models (LLMs) are introduced in the Intel® Extension for PyTorch The extension can be loaded as a Python module for Python programs or linked as a C++ library for C++ programs. In Python scripts, users can enable it dynamically by importing ``intel_extension_for_pytorch``. .. note:: - + - CPU features are not included in GPU-only packages. - GPU features are not included in CPU-only packages. - Optimizations for CPU-only may have a newer code base due to different development schedules. @@ -26,8 +26,8 @@ Intel® Extension for PyTorch* has been released as an open–source project at You can find more information about the product at: -- `Features `_ -- `Performance <./tutorials/performance.html>`_ +- `Features `_ +- `Performance `_ Architecture ------------ @@ -62,7 +62,7 @@ The team tracks bugs and enhancement requests using `GitHub issues -f https://developer.intel.com/ipex-whl-stable-xpu`
`pip install intel_extension_for_pytorch== -f https://developer.intel.com/ipex-whl-stable-xpu`| -| Import Intel® Extension for PyTorch\* | `import intel_extension_for_pytorch as ipex`| -| Capture a Verbose Log (Command Prompt) | `export ONEDNN_VERBOSE=1` | -| Optimization During Training | `model = ...`
`optimizer = ...`
`model.train()`
`model, optimizer = ipex.optimize(model, optimizer=optimizer)`| -| Optimization During Inference | `model = ...`
`model.eval()`
`model = ipex.optimize(model)` | -| Optimization Using the Low-Precision Data Type bfloat16
During Training (Default FP32) | `model = ...`
`optimizer = ...`
`model.train()`

`model, optimizer = ipex.optimize(model, optimizer=optimizer, dtype=torch.bfloat16)`

`with torch.no_grad():`
` with torch.cpu.amp.autocast():`
` model(data)` | -| Optimization Using the Low-Precision Data Type bfloat16
During Inference (Default FP32) | `model = ...`
`model.eval()`

`model = ipex.optimize(model, dtype=torch.bfloat16)`

`with torch.cpu.amp.autocast():`
` model(data)` -| [Prototype] Fast BERT Optimization | `from transformers import BertModel`
`model = BertModel.from_pretrained("bert-base-uncased")`
`model.eval()`

`model = ipex.fast_bert(model, dtype=torch.bfloat16)`| -| Run CPU Launch Script (Command Prompt):
Automate Configuration Settings for Performance | `ipexrun [knobs] [args]`| -| [Prototype] Run HyperTune to perform hyperparameter/execution configuration search | `python -m intel_extension_for_pytorch.cpu.hypertune --conf-file [args]`| -| [Prototype] Enable Graph capture | `model = …`
`model.eval()`
`model = ipex.optimize(model, graph_mode=True)`| -| Post-Training INT8 Quantization (Static) | `model = …`
`model.eval()`
`data = …`

`qconfig = ipex.quantization.default_static_qconfig`

`prepared_model = ipex.quantization.prepare(model, qconfig, example_inputs=data, anyplace=False)`

`for d in calibration_data_loader():`
` prepared_model(d)`

`converted_model = ipex.quantization.convert(prepared_model)`| -| Post-Training INT8 Quantization (Dynamic) | `model = …`
`model.eval()`
`data = …`

`qconfig = ipex.quantization.default_dynamic_qconfig`

`prepared_model = ipex.quantization.prepare(model, qconfig, example_inputs=data)`

`converted_model = ipex.quantization.convert(prepared_model)` | -| [Prototype] Post-Training INT8 Quantization (Tuning Recipe): | `model = …`
`model.eval()`
`data = …`

`qconfig = ipex.quantization.default_static_qconfig`

`prepared_model = ipex.quantization.prepare(model, qconfig, example_inputs=data, inplace=False)`

`tuned_model = ipex.quantization.autotune(prepared_model, calibration_data_loader, eval_function, sampling_sizes=[100],`
` accuracy_criterion={'relative': .01}, tuning_time=0)`

`convert_model = ipex.quantization.convert(tuned_model)`| - diff --git a/docs/tutorials/examples.md b/docs/tutorials/examples.md index b929766e1..221251125 100644 --- a/docs/tutorials/examples.md +++ b/docs/tutorials/examples.md @@ -4,8 +4,6 @@ Examples These examples will help you get started using Intel® Extension for PyTorch\* with Intel GPUs. -For examples on Intel CPUs, check the [CPU examples](../../../cpu/latest/tutorials/examples.html). - **Prerequisites**: Before running these examples, install the `torchvision` and `transformers` Python packages. @@ -27,7 +25,7 @@ Before running these examples, install the `torchvision` and `transformers` Pyth To use Intel® Extension for PyTorch\* on training, you need to make the following changes in your code: 1. Import `intel_extension_for_pytorch` as `ipex`. -2. Use the `ipex.optimize` function, which applies optimizations against the model object, as well as an optimizer object. +2. Use the `ipex.optimize` function for additional performance boost, which applies optimizations against the model object, as well as an optimizer object. 3. Use Auto Mixed Precision (AMP) with BFloat16 data type. 4. Convert input tensors, loss criterion and model to XPU, as shown below: @@ -219,18 +217,20 @@ The is the absolute path of libtorch we install at the first s If *Found IPEX* is shown as dynamic library paths, the extension was linked into the binary. This can be verified with the Linux command *ldd*. +The value of x, y, z in the following log will change depending on the version you choose. + ```bash $ CC=icx CXX=icpx cmake -DCMAKE_PREFIX_PATH=/workspace/libtorch .. --- The C compiler identification is IntelLLVM 2024.0.0 --- The CXX compiler identification is IntelLLVM 2024.0.0 +-- The C compiler identification is IntelLLVM 202x.y.z +-- The CXX compiler identification is IntelLLVM 202x.y.z -- Detecting C compiler ABI info -- Detecting C compiler ABI info - done --- Check for working C compiler: /workspace/intel/oneapi/compiler/2024.0.0/linux/bin/icx - skipped +-- Check for working C compiler: /workspace/intel/oneapi/compiler/202x.y.z/linux/bin/icx - skipped -- Detecting C compile features -- Detecting C compile features - done -- Detecting CXX compiler ABI info -- Detecting CXX compiler ABI info - done --- Check for working CXX compiler: /workspace/intel/oneapi/compiler/2024.0.0/linux/bin/icpx - skipped +-- Check for working CXX compiler: /workspace/intel/oneapi/compiler/202x.y.z/linux/bin/icpx - skipped -- Detecting CXX compile features -- Detecting CXX compile features - done -- Looking for pthread.h @@ -252,16 +252,16 @@ $ ldd example-app libintel-ext-pt-cpu.so => /workspace/libtorch/lib/libintel-ext-pt-cpu.so (0x00007fd5a1a1b000) libintel-ext-pt-gpu.so => /workspace/libtorch/lib/libintel-ext-pt-gpu.so (0x00007fd5862b0000) ... - libmkl_intel_lp64.so.2 => /workspace/intel/oneapi/mkl/2024.0.0/lib/intel64/libmkl_intel_lp64.so.2 (0x00007fd584ab0000) - libmkl_core.so.2 => /workspace/intel/oneapi/mkl/2024.0.0/lib/intel64/libmkl_core.so.2 (0x00007fd5806cc000) - libmkl_gnu_thread.so.2 => /workspace/intel/oneapi/mkl/2024.0.0/lib/intel64/libmkl_gnu_thread.so.2 (0x00007fd57eb1d000) - libmkl_sycl.so.3 => /workspace/intel/oneapi/mkl/2024.0.0/lib/intel64/libmkl_sycl.so.3 (0x00007fd55512c000) - libOpenCL.so.1 => /workspace/intel/oneapi/compiler/2024.0.0/linux/lib/libOpenCL.so.1 (0x00007fd55511d000) - libsvml.so => /workspace/intel/oneapi/compiler/2024.0.0/linux/compiler/lib/intel64_lin/libsvml.so (0x00007fd553b11000) - libirng.so => /workspace/intel/oneapi/compiler/2024.0.0/linux/compiler/lib/intel64_lin/libirng.so (0x00007fd553600000) - libimf.so => /workspace/intel/oneapi/compiler/2024.0.0/linux/compiler/lib/intel64_lin/libimf.so (0x00007fd55321b000) - libintlc.so.5 => /workspace/intel/oneapi/compiler/2024.0.0/linux/compiler/lib/intel64_lin/libintlc.so.5 (0x00007fd553a9c000) - libsycl.so.6 => /workspace/intel/oneapi/compiler/2024.0.0/linux/lib/libsycl.so.6 (0x00007fd552f36000) + libmkl_intel_lp64.so.2 => /workspace/intel/oneapi/mkl/202x.y.z/lib/intel64/libmkl_intel_lp64.so.2 (0x00007fd584ab0000) + libmkl_core.so.2 => /workspace/intel/oneapi/mkl/202x.y.z/lib/intel64/libmkl_core.so.2 (0x00007fd5806cc000) + libmkl_gnu_thread.so.2 => /workspace/intel/oneapi/mkl/202x.y.z/lib/intel64/libmkl_gnu_thread.so.2 (0x00007fd57eb1d000) + libmkl_sycl.so.3 => /workspace/intel/oneapi/mkl/202x.y.z/lib/intel64/libmkl_sycl.so.3 (0x00007fd55512c000) + libOpenCL.so.1 => /workspace/intel/oneapi/compiler/202x.y.z/linux/lib/libOpenCL.so.1 (0x00007fd55511d000) + libsvml.so => /workspace/intel/oneapi/compiler/202x.y.z/linux/compiler/lib/intel64_lin/libsvml.so (0x00007fd553b11000) + libirng.so => /workspace/intel/oneapi/compiler/202x.y.z/linux/compiler/lib/intel64_lin/libirng.so (0x00007fd553600000) + libimf.so => /workspace/intel/oneapi/compiler/202x.y.z/linux/compiler/lib/intel64_lin/libimf.so (0x00007fd55321b000) + libintlc.so.5 => /workspace/intel/oneapi/compiler/202x.y.z/linux/compiler/lib/intel64_lin/libintlc.so.5 (0x00007fd553a9c000) + libsycl.so.6 => /workspace/intel/oneapi/compiler/202x.y.z/linux/lib/libsycl.so.6 (0x00007fd552f36000) ... ``` @@ -286,4 +286,4 @@ Intel® Extension for PyTorch\* provides its C++ dynamic library to allow users ## Intel® AI Reference Models -Use cases that have already been optimized by Intel engineers are available at [Intel® AI Reference Models](https://github.com/IntelAI/models/tree/v2.12.0) (former Model Zoo). A number of PyTorch use cases for benchmarking are also available in the [Use Cases](https://github.com/IntelAI/models/tree/v2.12.0#use-cases) section. Models verified on Intel GPUs are marked in the `Model Documentation` column. You can get performance benefits out-of-the-box by simply running scripts in the Intel® AI Reference Models. +Use cases that have already been optimized by Intel engineers are available at [Intel® AI Reference Models](https://github.com/IntelAI/models/tree/v3.1.1) (former Model Zoo). A number of PyTorch use cases for benchmarking are also available in the [Use Cases](https://github.com/IntelAI/models/tree/v3.1.1?tab=readme-ov-file#use-cases) section. Models verified on Intel GPUs are marked in the `Model Documentation` column. You can get performance benefits out-of-the-box by simply running scripts in the Intel® AI Reference Models. diff --git a/docs/tutorials/features.rst b/docs/tutorials/features.rst index b907d5443..fb22239c2 100644 --- a/docs/tutorials/features.rst +++ b/docs/tutorials/features.rst @@ -1,8 +1,8 @@ Features ======== -Device-Agnostic -*************** +GPU-Specific +************ Easy-to-use Python API ---------------------- @@ -46,16 +46,15 @@ Quantization Intel® Extension for PyTorch* currently supports imperative mode and TorchScript mode for post-training static quantization on GPU. This section illustrates the quantization workflow on Intel GPUs. -Check more detailed information for `INT8 Quantization [XPU] `_. +Check more detailed information for `INT8 Quantization `_. -On Intel® GPUs, Intel® Extension for PyTorch* also provides INT4 and FP8 Quantization. Check more detailed information for `FP8 Quantization <./features/float8.md>`_ and `INT4 Quantization <./features/int4.md>`_ +On Intel® GPUs, Intel® Extension for PyTorch* also provides FP8 Quantization. Check more detailed information for `FP8 Quantization <./features/float8.md>`_. .. toctree:: :hidden: :maxdepth: 1 features/int8_overview_xpu - features/int4 features/float8 @@ -74,9 +73,6 @@ For more detailed information, check `DDP `_ and `Horovod (Prot features/horovod -GPU-Specific -************ - DLPack Solution --------------- @@ -131,11 +127,12 @@ For more detailed information, check `FSDP `_. features/FSDP -Inductor --------- +torch.compile for GPU (Beta) +---------------------------- + Intel® Extension for PyTorch\* now empowers users to seamlessly harness graph compilation capabilities for optimal PyTorch model performance on Intel GPU via the flagship `torch.compile `_ API through the default "inductor" backend (`TorchInductor `_ ). -For more detailed information, check `Inductor `_. +For more detailed information, check `torch.compile for GPU `_. .. toctree:: :hidden: @@ -144,7 +141,7 @@ For more detailed information, check `Inductor `_ features/torch_compile_gpu Legacy Profiler Tool (Prototype) ------------------------------------ +-------------------------------- The legacy profiler tool is an extension of PyTorch* legacy profiler for profiling operators' overhead on XPU devices. With this tool, you can get the information in many fields of the run models or code scripts. Build Intel® Extension for PyTorch* with profiler support as default and enable this tool by adding a `with` statement before the code segment. @@ -157,7 +154,7 @@ For more detailed information, check `Legacy Profiler Tool `_. +Select your preferences and follow the installation instructions provided on the `Installation page <../../../index.html#installation?platform=gpu&version=v2.1.20%2Bxpu>`_. After successful installation, refer to the `Quick Start `_ and `Examples `_ sections to start using the extension in your code. diff --git a/docs/tutorials/performance_tuning/known_issues.md b/docs/tutorials/known_issues.md similarity index 100% rename from docs/tutorials/performance_tuning/known_issues.md rename to docs/tutorials/known_issues.md diff --git a/docs/tutorials/llm.rst b/docs/tutorials/llm.rst index 10b470224..e33397ccf 100644 --- a/docs/tutorials/llm.rst +++ b/docs/tutorials/llm.rst @@ -3,7 +3,7 @@ Large Language Models (LLM) Optimizations Overview In the current technological landscape, Generative AI (GenAI) workloads and models have gained widespread attention and popularity. LLMs have emerged as the dominant models driving these GenAI applications. Most of LLMs are GPT-like architectures that consist of multiple Decoder layers. The MultiHeadAttention and FeedForward layer are two key components of every Decoder layer. The generation task is memory bound because iterative decode and kv_cache require special management to reduce memory overheads. Intel® Extension for PyTorch* provides a lot of specific optimizations for these LLMs. -On the operator level, the extension provides highly efficient GEMM kernel to speed up Linear layer and customized operators to reduce the memory footprint. To better trade-off the performance and accuracy, different low-precision solutions e.g., smoothQuant and weight-only-quantization are also enabled. Besides, tensor parallel can also adopt to get lower latency for LLMs. +On the operator level, the extension provides highly efficient GEMM kernel to speed up Linear layer and customized operators to reduce the memory footprint. To better trade-off the performance and accuracy, different low-precision solutions e.g., smoothQuant is enabled. Besides, tensor parallel can also adopt to get lower latency for LLMs. These LLM-specific optimizations can be automatically applied with a single frontend API function in Python interface, `ipex.optimize_transformers()`. Check `optimize_transformers <./llm/llm_optimize_transformers.md>`_ for more details. @@ -35,16 +35,10 @@ Optimized Models - ✅ - ✅ - ✅ - * - Weight only quantzation INT4 - - ❎ - - ✅ - - ❎ - - ❎ - *Note*: The above verified models (including other models in the same model family, like "codellama/CodeLlama-7b-hf" from LLAMA family) are well supported with all optimizations like indirect access KV cache, fused ROPE, and prepacked TPP Linear (fp16). For other LLMs families, we are working in progress to cover those optimizations, which will expand the model list above. -Check `LLM best known practice `_ for instructions to install/setup environment and example scripts.. +Check `LLM best known practice `_ for instructions to install/setup environment and example scripts.. Optimization Methodologies -------------------------- @@ -111,9 +105,3 @@ heavier computations and places higher requirements to the underlying hardware. Given that, quantization becomes a more important methodology for inference workloads. -Intel® Extension for PyTorch\* also delivers INT4 optimizations via -4-bit weight-only quantization (WOQ). As the name indicates, WOQ -quantizes only weights to 4-bit integers to further improve the -computation efficiency via saved memory bandwidth utilization. This -technique reduces text generation latency especially from the second -token. diff --git a/docs/tutorials/llm/llm_optimize_transformers.md b/docs/tutorials/llm/llm_optimize_transformers.md index 4682b8ac6..82c8a2cfd 100644 --- a/docs/tutorials/llm/llm_optimize_transformers.md +++ b/docs/tutorials/llm/llm_optimize_transformers.md @@ -113,24 +113,6 @@ print(modelJit.graph_for(inference_dta)) ``` -### Weight Only Quantization (WOQ) - -Supports INT4. - -``` python -from transformers import GPTJForCausalLM - -model_path = ... -dataset = ... -model = GPTJForCausalLM.from_pretrained(model_path) -model.eval() - -ipex.quantization._gptq(model, dataset, 'quantized_weight.pt', wbits=4) - -# inference with model.generate() -... -``` - ### Distributed Inference with DeepSpeed Distributed inference can be performed with `DeepSpeed`. Based on original Intel® Extension for PyTorch\* scripts, the following code changes are required. diff --git a/docs/tutorials/performance_tuning.rst b/docs/tutorials/performance_tuning.rst deleted file mode 100644 index 2e4ebd747..000000000 --- a/docs/tutorials/performance_tuning.rst +++ /dev/null @@ -1,11 +0,0 @@ -Performance Tuning Guide -======================== - -Intel® Extension for PyTorch\* should yield a satisfying performance with its default configuration for general use cases. This page shows solutions for some known issues. - -- `Known Issues `_ - -.. toctree:: - :hidden: - - performance_tuning/known_issues diff --git a/docs/tutorials/releases.md b/docs/tutorials/releases.md index bfb3f10c0..b2a9c0f02 100644 --- a/docs/tutorials/releases.md +++ b/docs/tutorials/releases.md @@ -24,7 +24,7 @@ Intel® Extension for PyTorch\* v2.1.20+xpu is a minor release which supports In ### Known Issues -Please refer to [Known Issues webpage](./performance_tuning/known_issues.md). +Please refer to [Known Issues webpage](./known_issues.md). ## 2.1.10+xpu @@ -59,7 +59,7 @@ This release provides the following features: ### Known Issues -Please refer to [Known Issues webpage](./performance_tuning/known_issues.md). +Please refer to [Known Issues webpage](./known_issues.md). ## 2.0.110+xpu @@ -84,7 +84,7 @@ This release adds the following fusion patterns in PyTorch\* JIT mode for Intel ### Known Issues -Please refer to [Known Issues webpage](./performance_tuning/known_issues.md). +Please refer to [Known Issues webpage](./known_issues.md). ## 1.13.120+xpu @@ -112,7 +112,7 @@ This release adds the following fusion patterns in PyTorch\* JIT mode for Intel ### Known Issues -Please refer to [Known Issues webpage](./performance_tuning/known_issues.md). +Please refer to [Known Issues webpage](./known_issues.md). ## 1.13.10+xpu @@ -137,7 +137,7 @@ This release adds the following fusion patterns in PyTorch\* JIT mode for Intel ### Known Issues -Please refer to [Known Issues webpage](./performance_tuning/known_issues.md). +Please refer to [Known Issues webpage](./known_issues.md). ## 1.10.200+gpu diff --git a/examples/gpu/inference/python/llm/Dockerfile b/examples/gpu/inference/python/llm/Dockerfile index 1cc4d7720..59dac10c0 100644 --- a/examples/gpu/inference/python/llm/Dockerfile +++ b/examples/gpu/inference/python/llm/Dockerfile @@ -1,66 +1,65 @@ - -ARG BASE_IMAGE=ubuntu:22.04 -FROM ${BASE_IMAGE} AS base -SHELL ["/bin/bash", "-c"] -RUN if [ -f /etc/apt/apt.conf.d/proxy.conf ]; then rm /etc/apt/apt.conf.d/proxy.conf; fi && \ - if [ ! -z ${HTTP_PROXY} ]; then echo "Acquire::http::Proxy \"${HTTP_PROXY}\";" >> /etc/apt/apt.conf.d/proxy.conf; fi && \ - if [ ! -z ${HTTPS_PROXY} ]; then echo "Acquire::https::Proxy \"${HTTPS_PROXY}\";" >> /etc/apt/apt.conf.d/proxy.conf; fi -RUN apt update && \ - apt full-upgrade -y && \ - DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y \ - sudo \ - git \ - wget \ - curl \ - vim \ - patch \ - gcc \ - g++ \ - make \ - pkg-config \ - software-properties-common \ - gnupg \ - gpg-agent -COPY ./tools/basekit_driver_install_helper.sh . -RUN bash ./basekit_driver_install_helper.sh driver - -ARG GID_RENDER=109 -RUN useradd -m -s /bin/bash ubuntu && \ - echo 'ubuntu ALL=(ALL) NOPASSWD: ALL' >> /etc/sudoers && \ - groupadd -g $GID_RENDER render && \ - usermod -a -G video,render ubuntu -USER ubuntu -WORKDIR /home/ubuntu - -RUN curl -fsSL -v -o miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - bash miniconda.sh -b -p ./miniconda3 && \ - rm miniconda.sh && \ - echo "source ~/miniconda3/bin/activate" >> ./.bashrc - -FROM base AS dev -# --build-arg COMPILE=ON to compile from source -ARG COMPILE -RUN bash /basekit_driver_install_helper.sh dev -COPY --chown=ubuntu:ubuntu . ./intel-extension-for-pytorch/ -RUN . ./miniconda3/bin/activate && \ - conda create -y -n compile_py310 python=3.10 && conda activate compile_py310 && \ - cd intel-extension-for-pytorch/examples/gpu/inference/python/llm && \ - if [ -z ${COMPILE} ]; then MODE=6; else MODE=2; fi && \ - bash tools/env_setup.sh ${MODE} /opt/intel/oneapi/compiler/latest /opt/intel/oneapi/mkl/latest /opt/intel/oneapi/ccl/latest pvc,ats-m150,acm-g11 - -FROM base AS deploy -RUN bash /basekit_driver_install_helper.sh runtime && \ - sudo apt clean && \ - sudo rm -rf /var/lib/apt/lists/* && \ - if [ -f /etc/apt/apt.conf.d/proxy.conf ]; then sudo rm /etc/apt/apt.conf.d/proxy.conf; fi && \ - sudo rm /basekit_driver_install_helper.sh -COPY --from=dev --chown=ubuntu:ubuntu /home/ubuntu/intel-extension-for-pytorch/examples/gpu/inference/python/llm ./llm -COPY --from=dev --chown=ubuntu:ubuntu /home/ubuntu/intel-extension-for-pytorch/tools/get_libstdcpp_lib.sh . -RUN . ./miniconda3/bin/activate && \ - conda create -y -n py310 python=3.10 && conda activate py310 && \ - echo "conda activate py310" >> ./.bashrc && \ - ldpreload=$(bash get_libstdcpp_lib.sh) && echo "export LD_PRELOAD=${ldpreload}" >> ./.bashrc && rm get_libstdcpp_lib.sh && \ - cd ./llm && \ - bash tools/env_setup.sh 1 && \ - python -m pip cache purge && \ - conda clean -a -y +ARG BASE_IMAGE=ubuntu:22.04 +FROM ${BASE_IMAGE} AS base +SHELL ["/bin/bash", "-c"] +RUN if [ -f /etc/apt/apt.conf.d/proxy.conf ]; then rm /etc/apt/apt.conf.d/proxy.conf; fi && \ + if [ ! -z ${HTTP_PROXY} ]; then echo "Acquire::http::Proxy \"${HTTP_PROXY}\";" >> /etc/apt/apt.conf.d/proxy.conf; fi && \ + if [ ! -z ${HTTPS_PROXY} ]; then echo "Acquire::https::Proxy \"${HTTPS_PROXY}\";" >> /etc/apt/apt.conf.d/proxy.conf; fi +RUN apt update && \ + apt full-upgrade -y && \ + DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y \ + sudo \ + git \ + wget \ + curl \ + vim \ + patch \ + gcc \ + g++ \ + make \ + pkg-config \ + software-properties-common \ + gnupg \ + gpg-agent +COPY ./tools/basekit_driver_install_helper.sh . +RUN bash ./basekit_driver_install_helper.sh driver + +ARG GID_RENDER=109 +RUN useradd -m -s /bin/bash ubuntu && \ + echo 'ubuntu ALL=(ALL) NOPASSWD: ALL' >> /etc/sudoers && \ + groupadd -g $GID_RENDER render && \ + usermod -a -G video,render ubuntu +USER ubuntu +WORKDIR /home/ubuntu + +RUN curl -fsSL -v -o miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash miniconda.sh -b -p ./miniconda3 && \ + rm miniconda.sh && \ + echo "source ~/miniconda3/bin/activate" >> ./.bashrc + +FROM base AS dev +# --build-arg COMPILE=ON to compile from source +ARG COMPILE +RUN bash /basekit_driver_install_helper.sh dev +COPY --chown=ubuntu:ubuntu . ./intel-extension-for-pytorch/ +RUN . ./miniconda3/bin/activate && \ + conda create -y -n compile_py310 python=3.10 && conda activate compile_py310 && \ + cd intel-extension-for-pytorch/examples/gpu/inference/python/llm && \ + if [ -z ${COMPILE} ]; then MODE=6; else MODE=2; fi && \ + bash tools/env_setup.sh ${MODE} /opt/intel/oneapi/compiler/latest /opt/intel/oneapi/mkl/latest /opt/intel/oneapi/ccl/latest /opt/intel/oneapi/mpi/latest pvc,ats-m150,acm-g11 + +FROM base AS deploy +RUN bash /basekit_driver_install_helper.sh runtime && \ + sudo apt clean && \ + sudo rm -rf /var/lib/apt/lists/* && \ + if [ -f /etc/apt/apt.conf.d/proxy.conf ]; then sudo rm /etc/apt/apt.conf.d/proxy.conf; fi && \ + sudo rm /basekit_driver_install_helper.sh +COPY --from=dev --chown=ubuntu:ubuntu /home/ubuntu/intel-extension-for-pytorch/examples/gpu/inference/python/llm ./llm +COPY --from=dev --chown=ubuntu:ubuntu /home/ubuntu/intel-extension-for-pytorch/tools/get_libstdcpp_lib.sh . +RUN . ./miniconda3/bin/activate && \ + conda create -y -n py310 python=3.10 && conda activate py310 && \ + echo "conda activate py310" >> ./.bashrc && \ + ldpreload=$(bash get_libstdcpp_lib.sh) && echo "export LD_PRELOAD=${ldpreload}" >> ./.bashrc && rm get_libstdcpp_lib.sh && \ + cd ./llm && \ + bash tools/env_setup.sh 1 && \ + python -m pip cache purge && \ + conda clean -a -y \ No newline at end of file diff --git a/examples/gpu/inference/python/llm/README.md b/examples/gpu/inference/python/llm/README.md index 6ca138c5c..eebf4a892 100644 --- a/examples/gpu/inference/python/llm/README.md +++ b/examples/gpu/inference/python/llm/README.md @@ -49,10 +49,7 @@ docker build -f examples/gpu/inference/python/llm/Dockerfile --build-arg GID_REN # Run the container with command below -docker run --privileged -it --rm --device /dev/dri:/dev/dri -v /dev/dri/by-path:/dev/dri/by-path \ ---ipc=host --net=host --cap-add=ALL -v /lib/modules:/lib/modules --workdir /workspace \ ---volume `pwd`/examples/gpu/inference/python/llm/:/workspace/llm ipex-llm:2.1.20 /bin/bash - +docker run --privileged -it --rm --device /dev/dri:/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --ipc=host --net=host --cap-add=ALL -v /lib/modules:/lib/modules --workdir /workspace --volume `pwd`/examples/gpu/inference/python/llm/:/workspace/llm ipex-llm:2.1.20 /bin/bash # When the command prompt shows inside the docker container, enter llm examples directory cd llm @@ -83,9 +80,9 @@ conda activate llm conda install pkg-config # Setup the environment with the provided script cd examples/gpu/inference/python/llm -# If you want to install Intel® Extension for PyTorch\* from prebuilt wheel files, use the command below: +# If you want to install Intel® Extension for PyTorch* from prebuilt wheel files, use the command below: bash ./tools/env_setup.sh 7 -# If you want to install Intel® Extension for PyTorch\* from source, use the commands below: +# If you want to install Intel® Extension for PyTorch* from source, use the commands below: bash ./tools/env_setup.sh 3 export LD_PRELOAD=$(bash ../../../../../tools/get_libstdcpp_lib.sh) export LD_LIBRARY_PATH=${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH} diff --git a/examples/gpu/inference/python/llm/tools/env_activate.sh b/examples/gpu/inference/python/llm/tools/env_activate.sh index aa6100114..39ad46712 100644 --- a/examples/gpu/inference/python/llm/tools/env_activate.sh +++ b/examples/gpu/inference/python/llm/tools/env_activate.sh @@ -13,5 +13,3 @@ fi export TORCH_LLM_ALLREDUCE=1 - - diff --git a/examples/gpu/inference/python/llm/tools/env_setup.sh b/examples/gpu/inference/python/llm/tools/env_setup.sh index 7c90f4bcd..e6275d1bd 100644 --- a/examples/gpu/inference/python/llm/tools/env_setup.sh +++ b/examples/gpu/inference/python/llm/tools/env_setup.sh @@ -15,9 +15,10 @@ MODE=0x03 DPCPP_ROOT= ONEMKL_ROOT= ONECCL_ROOT= +MPI_ROOT= AOT= if [[ $# -eq 0 ]]; then - echo "Usage: bash $0 [DPCPPROOT] [MKLROOT] [CCLROOT] [AOT]" + echo "Usage: bash $0 [DPCPPROOT] [MKLROOT] [CCLROOT] [MPIROOT] [AOT]" echo "Set MODE to 7 to install from wheel files. Set it to 3 to compile from source. When compiling from source, you need to set arguments below." echo "DPCPPROOT, MKLROOT and CCLROOT should be absolute or relative path to the root directory of DPC++ compiler, oneMKL and oneCCL in oneAPI Base Toolkit respectively." echo "AOT should be set to the text string for environment variable USE_AOT_DEVLIST. Setting it to \"none\" to disable AOT." @@ -42,6 +43,10 @@ if [[ $# -gt 0 ]]; then ONECCL_ROOT=$1 shift fi +if [[ $# -gt 0 ]]; then + MPI_ROOT=$1 + shift +fi if [[ $# -gt 0 ]]; then AOT=$1 shift @@ -64,9 +69,10 @@ if [ $((${MODE} & 0x06)) -eq 2 ] && ([ -z ${DPCPP_ROOT} ] || [ -z ${ONEMKL_ROOT} ] || [ -z ${ONECCL_ROOT} ] || + [ -z ${MPI_ROOT} ] || [ -z ${AOT} ]); then - echo "Source code compilation is needed. Please set arguments DPCPP_ROOT, ONEMKL_ROOT, ONECCL_ROOT and AOT." - echo "DPCPPROOT, MKLROOT and CCLROOT should be absolute or relative path to the root directory of DPC++ compiler, oneMKL and oneCCL in oneAPI Base Toolkit respectively." + echo "Source code compilation is needed. Please set arguments DPCPP_ROOT, ONEMKL_ROOT, ONECCL_ROOT, MPI_ROOT and AOT." + echo "DPCPPROOT, MKLROOT, CCLROOT and MPIROOT should be absolute or relative path to the root directory of DPC++ compiler, oneMKL, oneCCL and MPI in oneAPI Base Toolkit respectively." echo "AOT should be set to the text string for environment variable USE_AOT_DEVLIST. Setting it to \"none\" to disable AOT." exit 2 fi @@ -98,7 +104,7 @@ if [ $((${MODE} & 0x02)) -ne 0 ]; then VER_TORCH=$(python tools/yaml_utils.py -f dependency_version.yml -d pytorch -k version) TRANSFORMERS_COMMIT=$(python tools/yaml_utils.py -f dependency_version.yml -d transformers -k commit) VER_PROTOBUF=$(python tools/yaml_utils.py -f dependency_version.yml -d protobuf -k version) - VER_LLM_EVAL=$(python tools/yaml_utils.py -f dependency_version.yml -d llm_eval -k version) + VER_LM_EVAL=$(python tools/yaml_utils.py -f dependency_version.yml -d lm_eval -k version) VER_IPEX_MAJOR=$(grep "VERSION_MAJOR" version.txt | cut -d " " -f 2) VER_IPEX_MINOR=$(grep "VERSION_MINOR" version.txt | cut -d " " -f 2) VER_IPEX_PATCH=$(grep "VERSION_PATCH" version.txt | cut -d " " -f 2) @@ -131,10 +137,15 @@ if [ $((${MODE} & 0x02)) -ne 0 ]; then exit 6 fi + if [ ! -f ${MPI_ROOT}/env/vars.sh ]; then + echo "MPI environment ${MPI_ROOT} doesn't seem to exist." + exit 6 + fi + # Install PyTorch and Intel® Extension for PyTorch* cp intel-extension-for-pytorch/scripts/compile_bundle.sh . sed -i "s/VER_IPEX=.*/VER_IPEX=/" compile_bundle.sh - bash compile_bundle.sh ${DPCPP_ROOT} ${ONEMKL_ROOT} ${ONECCL_ROOT} ${AOT} 1 + bash compile_bundle.sh ${DPCPP_ROOT} ${ONEMKL_ROOT} ${ONECCL_ROOT} ${MPI_ROOT} ${AOT} 1 cp pytorch/dist/*.whl ${WHEELFOLDER} cp intel-extension-for-pytorch/dist/*.whl ${WHEELFOLDER} cp torch-ccl/dist/*.whl ${WHEELFOLDER} @@ -144,8 +155,7 @@ if [ $((${MODE} & 0x02)) -ne 0 ]; then echo "python -m pip install impi-devel" >> ${AUX_INSTALL_SCRIPT} echo "python -m pip install cpuid accelerate datasets sentencepiece diffusers protobuf==${VER_PROTOBUF} huggingface_hub mpi4py mkl" >> ${AUX_INSTALL_SCRIPT} - echo "python -m pip install lm_eval==${VER_LLM_EVAL}" >> ${AUX_INSTALL_SCRIPT} - + echo "python -m pip install lm_eval==${VER_LM_EVAL}" >> ${AUX_INSTALL_SCRIPT} # Install Transformers if [ -d transformers ]; then diff --git a/scripts/build_doc.sh b/scripts/build_doc.sh index f644cd762..4d0cf5f46 100644 --- a/scripts/build_doc.sh +++ b/scripts/build_doc.sh @@ -240,16 +240,10 @@ elif [[ ${DEVICE} == "gpu" ]]; then rm -rf ../csrc/include/xpu mv ../csrc/include/xpu_bk ../csrc/include/xpu fi -cp tutorials/features/graph_capture.md tutorials/features/graph_capture.md.bk -parse_example "../examples/cpu/features/graph_capture.py" tutorials/features/graph_capture.md "(marker_feature_graph_capture)" "python" -cp tutorials/features/int8_recipe_tuning_api.md tutorials/features/int8_recipe_tuning_api.md.bk -parse_example "../examples/cpu/features/int8_recipe_tuning/int8_autotune.py" tutorials/features/int8_recipe_tuning_api.md "(marker_feature_int8_autotune)" "python" make clean make html -mv tutorials/features/graph_capture.md.bk tutorials/features/graph_capture.md -mv tutorials/features/int8_recipe_tuning_api.md.bk tutorials/features/int8_recipe_tuning_api.md mv tutorials/examples.md.bk tutorials/examples.md if [[ ${DEVICE} == "cpu" ]]; then mv tutorials/features/fast_bert.md.bk tutorials/features/fast_bert.md diff --git a/scripts/compile_bundle.sh b/scripts/compile_bundle.sh index 55ae404d0..803beb54b 100644 --- a/scripts/compile_bundle.sh +++ b/scripts/compile_bundle.sh @@ -8,7 +8,7 @@ set -eo pipefail VER_IPEX=v2.1.20+xpu if [[ $# -lt 3 ]]; then - echo "Usage: bash $0 " + echo "Usage: bash $0 " echo "DPCPPROOT, MKLROOT and CCLROOT are mandatory, should be absolute or relative path to the root directory of DPC++ compiler, oneMKL and oneCCL respectively." echo "AOT should be set to the text string for environment variable USE_AOT_DEVLIST. Setting it to \"none\" to disable AOT." exit 1 @@ -16,7 +16,8 @@ fi DPCPP_ROOT=$1 ONEMKL_ROOT=$2 ONECCL_ROOT=$3 -AOT=$4 +MPI_ROOT=$4 +AOT=$5 if [[ ${AOT} == "none" ]]; then AOT="" fi @@ -33,10 +34,10 @@ fi # └--------------- Undefined MODE=0x07 if [ $# -gt 4 ]; then - if [[ ! $5 =~ ^[0-9]+$ ]] && [[ ! $5 =~ ^0x[0-9a-fA-F]+$ ]]; then + if [[ ! $6 =~ ^[0-9]+$ ]] && [[ ! $6 =~ ^0x[0-9a-fA-F]+$ ]]; then echo "Warning: Unexpected argument. Using default value." else - MODE=$5 + MODE=$6 fi fi @@ -53,10 +54,17 @@ if [ ! -f ${ONEMKL_ENV} ]; then exit 3 fi +CCL_ENV=${ONECCL_ROOT}/env/vars.sh if [ ! -f ${ONECCL_ROOT}/env/vars.sh ]; then echo "oneCCL environment ${ONECCL_ROOT} doesn't seem to exist." exit 6 fi + +MPI_ENV=${MPI_ROOT}/env/vars.sh +if [ ! -f ${MPI_ROOT}/env/vars.sh ]; then + echo "oneCCL environment ${MPI_ROOT} doesn't seem to exist." + exit 6 +fi ONEAPIROOT=${ONEMKL_ROOT}/../.. # Check existance of required Linux commands @@ -264,8 +272,8 @@ cd pytorch git apply ../intel-extension-for-pytorch/torch_patches/*.patch python -m pip install -r requirements.txt conda install --force-reinstall intel::mkl-static intel::mkl-include -y -mv version.txt version.txt.bk -echo "${COMMIT_TORCH:1}a0" > version.txt +export PYTORCH_BUILD_VERSION="${COMMIT_TORCH:1}.post0+cxx11.abi" +export PYTORCH_BUILD_NUMBER=0 # Ensure cmake can find python packages when using conda or virtualenv if [ -n "${CONDA_PREFIX-}" ]; then export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(command -v conda))/../"} @@ -286,7 +294,8 @@ unset USE_NUMA unset _GLIBCXX_USE_CXX11_ABI unset USE_STATIC_MKL unset CMAKE_PREFIX_PATH -mv version.txt.bk version.txt +unset PYTORCH_BUILD_NUMBER +unset PYTORCH_BUILD_VERSION conda remove mkl-static mkl-include -y python -m pip install dist/*.whl cd .. @@ -304,6 +313,8 @@ fi # don't fail on external scripts source ${DPCPP_ENV} source ${ONEMKL_ENV} +source ${CCL_ENV} +source ${MPI_ENV} # TorchAudio if [ $((${MODE} & 0x02)) -ne 0 ]; then cd audio @@ -413,4 +424,4 @@ CMD="${CMD} import intel_extension_for_pytorch as ipex; print(f'ipex_version: if [ $((${MODE} & 0x01)) -ne 0 ]; then CMD="${CMD} import oneccl_bindings_for_pytorch as torch_ccl; print(f'torchccl_version: {torch_ccl.__version__}');" fi -python -c "${CMD}" +python -c "${CMD}" \ No newline at end of file diff --git a/tools/basekit_driver_install_helper.sh b/tools/basekit_driver_install_helper.sh index d9b94f96e..13e164b59 100644 --- a/tools/basekit_driver_install_helper.sh +++ b/tools/basekit_driver_install_helper.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -e if [ $# -eq 0 ]; then echo "Usage: bash $0 " @@ -23,19 +24,31 @@ if [ $UID -ne 0 ]; then SUDO="sudo" fi -OS_ID="" +source /etc/os-release +OS_ID=${ID} OS_VERSION="" -while read line -do - KEY=$(echo ${line} | cut -d '=' -f 1) - VAL=$(echo ${line} | cut -d '=' -f 2) - if [ "${KEY}" = "ID" ]; then - OS_ID=${VAL} - fi - if [ "${KEY}" = "VERSION_ID" ]; then - OS_VERSION=${VAL} - fi -done < <(cat /etc/os-release) +if [ "${OS_ID}" = "ubuntu" ]; then + OS_VERSION=${VERSION_CODENAME} + if [[ ! " jammy " =~ " ${OS_VERSION} " ]]; then + echo "Ubuntu version ${OS_VERSION} not supported" + exit 3 + fi +elif [ "${OS_ID}" = "rhel" ] || [ "${OS_ID}" = "centos" ]; then + OS_VERSION=${VERSION_ID} + if [ "${OS_VERSION}" = "8" ]; then + OS_VERSION="8.6" + fi + if [ "${OS_VERSION}" = "9" ]; then + OS_VERSION="9.0" + fi + if [[ ! " 8.6 8.8 8.9 9.0 9.2 9.3 " =~ " ${OS_VERSION} " ]]; then + echo "RHEL version ${OS_VERSION} not supported" + exit 3 + fi +else + echo "${OS_ID} not supported." + exit 3 +fi function add-repo-driver() { SUDO=$1 @@ -47,26 +60,12 @@ function add-repo-driver() { if [ "${OS_ID}" = "ubuntu" ]; then wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | ${SUDO} gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg - echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy unified" | ${SUDO} tee /etc/apt/sources.list.d/intel-gpu-jammy.list + echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu ${OS_VERSION}/lts/2350 unified" | ${SUDO} tee /etc/apt/sources.list.d/intel-gpu-${OS_VERSION}.list ${SUDO} apt update fi - if [ "${OS_ID}" = "\"rhel\"" ] || [ "${OS_ID}" = "\"centos\"" ]; then - if [ "${OS_VERSION}" = "\"8\"" ] || [ "${OS_VERSION}" = "\"8.6\"" ]; then - ${SUDO} dnf install -y 'dnf-command(config-manager)' - ${SUDO} dnf config-manager --add-repo https://repositories.intel.com/gpu/rhel/8.6/unified/intel-gpu-8.6.repo - fi - if [ "${OS_VERSION}" = "\"8.8\"" ]; then - ${SUDO} dnf install -y 'dnf-command(config-manager)' - ${SUDO} dnf config-manager --add-repo https://repositories.intel.com/gpu/rhel/8.8/unified/intel-gpu-8.8.repo - fi - if [ "${OS_VERSION}" = "\"9\"" ] || [ "${OS_VERSION}" = "\"9.0\"" ]; then - ${SUDO} dnf install -y 'dnf-command(config-manager)' - ${SUDO} dnf config-manager --add-repo https://repositories.intel.com/gpu/rhel/9.0/unified/intel-gpu-9.0.repo - fi - if [ "${OS_VERSION}" = "\"9.2\"" ]; then - ${SUDO} dnf install -y 'dnf-command(config-manager)' - ${SUDO} dnf config-manager --add-repo https://repositories.intel.com/gpu/rhel/9.2/unified/intel-gpu-9.2.repo - fi + if [ "${OS_ID}" = "rhel" ] || [ "${OS_ID}" = "centos" ]; then + ${SUDO} dnf install -y 'dnf-command(config-manager)' + ${SUDO} dnf config-manager --add-repo https://repositories.intel.com/gpu/rhel/${OS_VERSION}/lts/2350/unified/intel-gpu-${OS_VERSION}.repo fi } @@ -83,7 +82,7 @@ function add-repo-basekit() { echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | ${SUDO} tee /etc/apt/sources.list.d/oneAPI.list ${SUDO} apt update fi - if [ "${OS_ID}" = "\"rhel\"" ] || [ "${OS_ID}" = "\"centos\"" ]; then + if [ "${OS_ID}" = "rhel" ] || [ "${OS_ID}" = "centos" ]; then tee > /tmp/oneAPI.repo << EOF [oneAPI] name=Intel® oneAPI repository @@ -108,19 +107,19 @@ function install-driver() { if [ "${OS_ID}" = "ubuntu" ]; then ${SUDO} apt update - ${SUDO} apt install -y intel-opencl-icd=23.30.26918.50-736~22.04 \ - level-zero=1.13.1-719~22.04 \ - level-zero-dev=1.13.1-719~22.04 \ - intel-level-zero-gpu=1.3.26918.50-736~22.04 \ - xpu-smi=1.2.22-31~22.04 - fi - if [ "${OS_ID}" = "\"rhel\"" ] || [ "${OS_ID}" = "\"centos\"" ]; then - ${SUDO} dnf install -y intel-opencl-23.30.26918.50 \ - level-zero-1.13.1 \ - level-zero-devel-1.13.1 \ - intel-level-zero-gpu-1.3.26918.50 \ - intel-ocloc-23.30.26918.50 \ - xpu-smi-1.2.22 + ${SUDO} apt install -y intel-opencl-icd=23.43.27642.40-803~22.04 \ + level-zero=1.14.0-744~22.04 \ + level-zero-dev=1.14.0-744~22.04 \ + intel-level-zero-gpu=1.3.27642.40-803~22.04 \ + xpu-smi=1.2.26-37~22.04 + fi + if [ "${OS_ID}" = "rhel" ] || [ "${OS_ID}" = "centos" ]; then + ${SUDO} dnf install -y intel-opencl-23.43.27642.40 \ + level-zero-1.14.0 \ + level-zero-devel-1.14.0 \ + intel-level-zero-gpu-1.3.27642.40 \ + intel-ocloc-23.43.27642.40 \ + xpu-smi-1.2.26 fi } @@ -135,16 +134,16 @@ function install-dev() { if [ "${OS_ID}" = "ubuntu" ]; then ${SUDO} apt update - ${SUDO} apt install -y intel-level-zero-gpu-dev=1.3.26918.50-736~22.04 \ - intel-oneapi-dpcpp-cpp-2024.0 \ - intel-oneapi-mkl-devel=2024.0.0-49656 \ - intel-oneapi-ccl-devel=2021.11.1-6 + ${SUDO} apt install -y intel-level-zero-gpu-dev=1.3.27642.40-803~22.04 \ + intel-oneapi-dpcpp-cpp-2024.1=2024.1.0-963 \ + intel-oneapi-mkl-devel=2024.1.0-691 \ + intel-oneapi-ccl-devel=2021.12.0-309 fi - if [ "${OS_ID}" = "\"rhel\"" ] || [ "${OS_ID}" = "\"centos\"" ]; then - ${SUDO} dnf install -y intel-level-zero-gpu-devel-1.3.26918.50 \ - intel-oneapi-dpcpp-cpp-2024.0 \ - intel-oneapi-mkl-devel-2024.0.0-49656 \ - intel-oneapi-ccl-devel-2021.11.1-6 + if [ "${OS_ID}" = "rhel" ] || [ "${OS_ID}" = "centos" ]; then + ${SUDO} dnf install -y intel-level-zero-gpu-devel-1.3.27642.40 \ + intel-oneapi-dpcpp-cpp-2024.1-2024.1.0-963 \ + intel-oneapi-mkl-devel-2024.1.0-691 \ + intel-oneapi-ccl-devel-2021.12.0-309 fi } @@ -159,17 +158,21 @@ function install-runtime() { if [ "${OS_ID}" = "ubuntu" ]; then ${SUDO} apt update - ${SUDO} apt install -y intel-oneapi-runtime-dpcpp-cpp=2024.0.0-49819 \ - intel-oneapi-runtime-mkl=2024.0.0-49656 \ - intel-oneapi-runtime-ccl=2021.11.1-6 + ${SUDO} apt install -y intel-oneapi-runtime-dpcpp-cpp=2024.1.0-963 \ + intel-oneapi-runtime-mkl=2024.1.0-691 \ + intel-oneapi-runtime-ccl=2021.12.0-309 fi - if [ "${OS_ID}" = "\"rhel\"" ] || [ "${OS_ID}" = "\"centos\"" ]; then - ${SUDO} dnf install -y intel-oneapi-runtime-dpcpp-cpp-2024.0.0-49819 \ - intel-oneapi-runtime-mkl-2024.0.0-49656 \ - intel-oneapi-runtime-ccl-2021.11.1-6 + if [ "${OS_ID}" = "rhel" ] || [ "${OS_ID}" = "centos" ]; then + ${SUDO} dnf install -y intel-oneapi-runtime-dpcpp-cpp-2024.1.0-963 \ + intel-oneapi-runtime-mkl-2024.1.0-691 \ + intel-oneapi-runtime-ccl-2021.12.0-309 fi } +for CMD in wget gpg; do + command -v ${CMD} > /dev/null || (echo "Error: Command \"${CMD}\" not found." ; exit 1) +done + if [ "${MODE}" = "driver" ]; then install-driver ${SUDO} ${OS_ID} ${OS_VERSION} fi