From cd296feac3e4e0bb9424a92c3753f12081b82416 Mon Sep 17 00:00:00 2001 From: Chen Xi Date: Wed, 17 Jul 2024 06:38:36 +0000 Subject: [PATCH 01/11] fix multi-gpu issue on sycl Signed-off-by: Chen Xi --- ggml/src/ggml-sycl/dpct/helper.hpp | 152 +++++++++++++++++++++-------- src/llama.cpp | 4 +- 2 files changed, 113 insertions(+), 43 deletions(-) diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index 31df1cb9e2cf4..fef5e2d8d8df2 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -255,7 +255,7 @@ namespace dpct void set_pitch(size_t pitch) { _pitch = pitch; } size_t get_x() { return _x; } - void set_x(size_t x) { _x = x; } + void set_x(size_t x) { _x = x; }; size_t get_y() { return _y; } void set_y(size_t y) { _y = y; } @@ -588,7 +588,7 @@ namespace dpct out = prop; } - /// dpct device extension + /// dpct device extension class device_ext : public sycl::device { typedef std::mutex mutex_type; @@ -687,119 +687,128 @@ namespace dpct init_queues(); } - sycl::queue &in_order_queue() { return _q_in_order; } + sycl::queue &in_order_queue() { return *_q_in_order; } - sycl::queue &out_of_order_queue() { return _q_out_of_order; } + sycl::queue &out_of_order_queue() { return *_q_out_of_order; } sycl::queue &default_queue() { return in_order_queue(); } void queues_wait_and_throw() { std::unique_lock lock(m_mutex); lock.unlock(); - for (auto &q : _queues) { - q.wait_and_throw(); + for (const auto &q : _queues) { + q->wait_and_throw(); } // Guard the destruct of current_queues to make sure the ref count is // safe. lock.lock(); } - sycl::queue create_queue(bool enable_exception_handler = false) { + sycl::queue *create_queue(bool enable_exception_handler = false) { return create_in_order_queue(enable_exception_handler); } - sycl::queue create_queue(sycl::device device, + sycl::queue *create_queue(sycl::device device, bool enable_exception_handler = false) { return create_in_order_queue(device, enable_exception_handler); } - sycl::queue create_in_order_queue(bool enable_exception_handler = false) { + sycl::queue *create_in_order_queue(bool enable_exception_handler = false) { std::lock_guard lock(m_mutex); return create_queue_impl(enable_exception_handler, sycl::property::queue::in_order()); } - sycl::queue create_in_order_queue(sycl::device device, + sycl::queue *create_in_order_queue(sycl::device device, bool enable_exception_handler = false) { std::lock_guard lock(m_mutex); return create_queue_impl(device, enable_exception_handler, sycl::property::queue::in_order()); } - sycl::queue create_out_of_order_queue( + sycl::queue *create_out_of_order_queue( bool enable_exception_handler = false) { std::lock_guard lock(m_mutex); return create_queue_impl(enable_exception_handler); } - void destroy_queue(sycl::queue queue) { + void destroy_queue(sycl::queue *&queue) { std::lock_guard lock(m_mutex); - _queues.clear(); + _queues.erase(std::remove_if(_queues.begin(), _queues.end(), + [=](const std::shared_ptr &q) -> bool + { + return q.get() == queue; + }), + _queues.end()); + queue = nullptr; } - void set_saved_queue(sycl::queue q) { + void set_saved_queue(sycl::queue *q) { std::lock_guard lock(m_mutex); _saved_queue = q; } - sycl::queue get_saved_queue() const { + sycl::queue *get_saved_queue() const { std::lock_guard lock(m_mutex); return _saved_queue; } private: - void clear_queues() { _queues.clear(); } + void clear_queues() { + _queues.clear(); + _q_in_order = _q_out_of_order = _saved_queue = nullptr; + } void init_queues() { _q_in_order = create_queue_impl(true, sycl::property::queue::in_order()); _q_out_of_order = create_queue_impl(true); - _saved_queue = default_queue(); + _saved_queue = &default_queue(); } /// Caller should acquire resource \p m_mutex before calling this /// function. template - sycl::queue create_queue_impl(bool enable_exception_handler, + sycl::queue *create_queue_impl(bool enable_exception_handler, Properties... properties) { sycl::async_handler eh = {}; if (enable_exception_handler) { eh = exception_handler; } - auto q = sycl::queue(*this, eh, - sycl::property_list( + _queues.push_back(std::make_shared( + *this, eh, + sycl::property_list( #ifdef DPCT_PROFILING_ENABLED - sycl::property::queue::enable_profiling(), + sycl::property::queue::enable_profiling(), #endif - properties...)); - _queues.push_back(q); + properties...))); - return _queues.back(); + return _queues.back().get(); } template - sycl::queue create_queue_impl(sycl::device device, + sycl::queue *create_queue_impl(sycl::device device, bool enable_exception_handler, Properties... properties) { sycl::async_handler eh = {}; if (enable_exception_handler) { eh = exception_handler; } - _queues.push_back( - sycl::queue(device, eh, + _queues.push_back(std::make_shared( + device, eh, sycl::property_list( #ifdef DPCT_PROFILING_ENABLED sycl::property::queue::enable_profiling(), #endif properties...))); - return _queues.back(); + return _queues.back().get(); } void get_version(int &major, int &minor) const { detail::get_version(*this, major, minor); } - sycl::queue _q_in_order, _q_out_of_order; - sycl::queue _saved_queue; - std::vector _queues; + sycl::queue *_q_in_order, *_q_out_of_order; + sycl::queue *_saved_queue; + std::vector> _queues; mutable mutex_type m_mutex; }; @@ -855,15 +864,69 @@ namespace dpct unsigned int get_device_id(const sycl::device &dev) { unsigned int id = 0; - for (auto dev_item : _devs) + for (auto &dev_item : _devs) { if (*dev_item == dev) { - break; + return id; } id++; } - return id; + return -1; + } + + inline std::string get_preferred_gpu_platform_name() { + std::string result; + + std::string filter = "level-zero"; + char* env = getenv("ONEAPI_DEVICE_SELECTOR"); + if (env) { + if (std::strstr(env, "level_zero")) { + filter = "level-zero"; + } + else if (std::strstr(env, "opencl")) { + filter = "opencl"; + } + else { + throw std::runtime_error("invalid device filter: " + std::string(env)); + } + } + + auto plaform_list = sycl::platform::get_platforms(); + + for (const auto& platform : plaform_list) { + auto devices = platform.get_devices(); + auto gpu_dev = std::find_if(devices.begin(), devices.end(), [](const sycl::device& d) { + return d.is_gpu(); + }); + + if (gpu_dev == devices.end()) { + // cout << "platform [" << platform_name + // << "] does not contain GPU devices, skipping\n"; + continue; + } + + auto platform_name = platform.get_info(); + std::string platform_name_low_case; + platform_name_low_case.resize(platform_name.size()); + + std::transform( + platform_name.begin(), platform_name.end(), platform_name_low_case.begin(), ::tolower); + + if (platform_name_low_case.find(filter) == std::string::npos) { + // cout << "platform [" << platform_name + // << "] does not match with requested " + // << filter << ", skipping\n"; + continue; + } + + result = platform_name; + } + + if (result.empty()) + throw std::runtime_error("can not find preferred GPU platform"); + + return result; } template @@ -915,6 +978,7 @@ namespace dpct static bool compare_backend(std::string &backend1, std::string &backend2) { return convert_backend_index(backend1) < convert_backend_index(backend2); } + dev_mgr() { sycl::device default_device = @@ -928,12 +992,17 @@ namespace dpct auto Platforms = sycl::platform::get_platforms(); // Keep track of the number of devices per backend - std::map DeviceNums; + std::map DeviceNums; std::map> backend_devices; + auto preferred_platform_name = get_preferred_gpu_platform_name(); while (!Platforms.empty()) { auto Platform = Platforms.back(); Platforms.pop_back(); + auto platform_name = Platform.get_info(); + if (platform_name.compare(preferred_platform_name) != 0) { + continue; + } auto devices = Platform.get_devices(); std::string backend_type = get_device_backend_and_type(devices[0]); for (const auto &device : devices) { @@ -945,6 +1014,7 @@ namespace dpct for(auto it = backend_devices.begin(); it != backend_devices.end(); ++it) { keys.push_back(it->first); } + std::sort(keys.begin(), keys.end(), compare_backend); for (auto &key : keys) { @@ -967,7 +1037,9 @@ namespace dpct _cpu_device = _devs.size() - 1; } } - } + + } + void check_id(unsigned int id) const { if (id >= _devs.size()) @@ -1056,7 +1128,7 @@ namespace dpct #error "Only support Windows and Linux." #endif next_free = mapped_address_space; - } + }; public: using buffer_id_t = int; @@ -1077,7 +1149,7 @@ namespace dpct #else #error "Only support Windows and Linux." #endif - } + }; mem_mgr(const mem_mgr &) = delete; mem_mgr &operator=(const mem_mgr &) = delete; @@ -2426,7 +2498,6 @@ namespace dpct b, ldb, beta, c, ldc, batch_size); break; } -#endif case detail::get_type_combination_id( library_data_t::real_int8, library_data_t::real_int8, library_data_t::real_int32, library_data_t::real_int32): @@ -2459,6 +2530,7 @@ namespace dpct batch_size); break; } +#endif case detail::get_type_combination_id( library_data_t::real_half, library_data_t::real_half, library_data_t::real_half, library_data_t::real_float): @@ -2595,7 +2667,6 @@ namespace dpct stride_c, batch_size); break; } -#endif case detail::get_type_combination_id( library_data_t::real_int8, library_data_t::real_int8, library_data_t::real_int32, library_data_t::real_int32): @@ -2624,6 +2695,7 @@ namespace dpct beta, c, ldc, stride_c, batch_size); break; } +#endif case detail::get_type_combination_id( library_data_t::real_half, library_data_t::real_half, library_data_t::real_half, library_data_t::real_float): diff --git a/src/llama.cpp b/src/llama.cpp index 400a4232beeb0..55ef90572e58b 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -19154,9 +19154,7 @@ struct llama_context * llama_new_context_with_model( for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) { ggml_backend_t backend = ggml_backend_sycl_init(i); if (backend == nullptr) { - int id_list[GGML_SYCL_MAX_DEVICES]; - ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES); - LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, id_list[i], i); + LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d for No.%d backend\n", __func__, i, i); llama_free(ctx); return nullptr; } From 6b4f7b2ac1ff4f77985430618c41e6e24ba929dd Mon Sep 17 00:00:00 2001 From: Chen Xi Date: Thu, 18 Jul 2024 07:13:01 +0000 Subject: [PATCH 02/11] fix some typo Signed-off-by: Chen Xi --- ggml/src/ggml-sycl/dpct/helper.hpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index fef5e2d8d8df2..55f003a351d6b 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -255,7 +255,7 @@ namespace dpct void set_pitch(size_t pitch) { _pitch = pitch; } size_t get_x() { return _x; } - void set_x(size_t x) { _x = x; }; + void set_x(size_t x) { _x = x; } size_t get_y() { return _y; } void set_y(size_t y) { _y = y; } @@ -978,7 +978,6 @@ namespace dpct static bool compare_backend(std::string &backend1, std::string &backend2) { return convert_backend_index(backend1) < convert_backend_index(backend2); } - dev_mgr() { sycl::device default_device = @@ -992,17 +991,17 @@ namespace dpct auto Platforms = sycl::platform::get_platforms(); // Keep track of the number of devices per backend - std::map DeviceNums; + std::map DeviceNums; std::map> backend_devices; auto preferred_platform_name = get_preferred_gpu_platform_name(); while (!Platforms.empty()) { auto Platform = Platforms.back(); Platforms.pop_back(); - auto platform_name = Platform.get_info(); - if (platform_name.compare(preferred_platform_name) != 0) { - continue; - } + auto platform_name = Platform.get_info(); + if (platform_name.compare(preferred_platform_name) != 0) { + continue; + } auto devices = Platform.get_devices(); std::string backend_type = get_device_backend_and_type(devices[0]); for (const auto &device : devices) { @@ -1016,7 +1015,6 @@ namespace dpct } std::sort(keys.begin(), keys.end(), compare_backend); - for (auto &key : keys) { std::vector devs = backend_devices[key]; std::sort(devs.begin(), devs.end(), compare_dev); From d096de2e90b22c002b5d4296bc64750687eceb5e Mon Sep 17 00:00:00 2001 From: Chen Xi Date: Thu, 18 Jul 2024 07:16:17 +0000 Subject: [PATCH 03/11] remove unnecessary whitespace Signed-off-by: Chen Xi --- ggml/src/ggml-sycl/dpct/helper.hpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index 55f003a351d6b..09c0ef8d0ca9d 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -1013,8 +1013,8 @@ namespace dpct for(auto it = backend_devices.begin(); it != backend_devices.end(); ++it) { keys.push_back(it->first); } - std::sort(keys.begin(), keys.end(), compare_backend); + for (auto &key : keys) { std::vector devs = backend_devices[key]; std::sort(devs.begin(), devs.end(), compare_dev); @@ -1035,9 +1035,7 @@ namespace dpct _cpu_device = _devs.size() - 1; } } - } - void check_id(unsigned int id) const { if (id >= _devs.size()) @@ -1126,7 +1124,7 @@ namespace dpct #error "Only support Windows and Linux." #endif next_free = mapped_address_space; - }; + } public: using buffer_id_t = int; @@ -1147,7 +1145,7 @@ namespace dpct #else #error "Only support Windows and Linux." #endif - }; + } mem_mgr(const mem_mgr &) = delete; mem_mgr &operator=(const mem_mgr &) = delete; From bd71cdac0f53357d6f6562fb0a54be81afb91afd Mon Sep 17 00:00:00 2001 From: Chen Xi Date: Thu, 18 Jul 2024 07:19:24 +0000 Subject: [PATCH 04/11] file format issue Signed-off-by: Chen Xi --- ggml/src/ggml-sycl/dpct/helper.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index 09c0ef8d0ca9d..40ac8efc32a1d 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -1035,7 +1035,7 @@ namespace dpct _cpu_device = _devs.size() - 1; } } - } + } void check_id(unsigned int id) const { if (id >= _devs.size()) From 6160a76efbf5b0c2ddb30a5a237a976bac5a1c21 Mon Sep 17 00:00:00 2001 From: Chen Xi Date: Thu, 18 Jul 2024 08:07:05 +0000 Subject: [PATCH 05/11] sycl::queue can directly use as shared_ptr Signed-off-by: Chen Xi --- ggml/src/ggml-sycl/dpct/helper.hpp | 54 ++++++++++++++---------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index 40ac8efc32a1d..0f18cff3268a6 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -687,93 +687,89 @@ namespace dpct init_queues(); } - sycl::queue &in_order_queue() { return *_q_in_order; } + sycl::queue &in_order_queue() { return _q_in_order; } - sycl::queue &out_of_order_queue() { return *_q_out_of_order; } + sycl::queue &out_of_order_queue() { return _q_out_of_order; } sycl::queue &default_queue() { return in_order_queue(); } void queues_wait_and_throw() { std::unique_lock lock(m_mutex); lock.unlock(); - for (const auto &q : _queues) { - q->wait_and_throw(); + for (auto &q : _queues) { + q.wait_and_throw(); } // Guard the destruct of current_queues to make sure the ref count is // safe. lock.lock(); } - sycl::queue *create_queue(bool enable_exception_handler = false) { + sycl::queue create_queue(bool enable_exception_handler = false) { return create_in_order_queue(enable_exception_handler); } - sycl::queue *create_queue(sycl::device device, + sycl::queue create_queue(sycl::device device, bool enable_exception_handler = false) { return create_in_order_queue(device, enable_exception_handler); } - sycl::queue *create_in_order_queue(bool enable_exception_handler = false) { + sycl::queue create_in_order_queue(bool enable_exception_handler = false) { std::lock_guard lock(m_mutex); return create_queue_impl(enable_exception_handler, sycl::property::queue::in_order()); } - sycl::queue *create_in_order_queue(sycl::device device, + sycl::queue create_in_order_queue(sycl::device device, bool enable_exception_handler = false) { std::lock_guard lock(m_mutex); return create_queue_impl(device, enable_exception_handler, sycl::property::queue::in_order()); } - sycl::queue *create_out_of_order_queue( + sycl::queue create_out_of_order_queue( bool enable_exception_handler = false) { std::lock_guard lock(m_mutex); return create_queue_impl(enable_exception_handler); } - void destroy_queue(sycl::queue *&queue) { + void destroy_queue(sycl::queue queue) { std::lock_guard lock(m_mutex); _queues.erase(std::remove_if(_queues.begin(), _queues.end(), - [=](const std::shared_ptr &q) -> bool + [=](const sycl::queue &q) -> bool { - return q.get() == queue; + return q == queue; }), _queues.end()); - queue = nullptr; } - void set_saved_queue(sycl::queue *q) { + void set_saved_queue(sycl::queue q) { std::lock_guard lock(m_mutex); _saved_queue = q; } - sycl::queue *get_saved_queue() const { + sycl::queue get_saved_queue() const { std::lock_guard lock(m_mutex); return _saved_queue; } private: - void clear_queues() { - _queues.clear(); - _q_in_order = _q_out_of_order = _saved_queue = nullptr; - } + void clear_queues() { _queues.clear(); } void init_queues() { _q_in_order = create_queue_impl(true, sycl::property::queue::in_order()); _q_out_of_order = create_queue_impl(true); - _saved_queue = &default_queue(); + _saved_queue = default_queue(); } /// Caller should acquire resource \p m_mutex before calling this /// function. template - sycl::queue *create_queue_impl(bool enable_exception_handler, + sycl::queue create_queue_impl(bool enable_exception_handler, Properties... properties) { sycl::async_handler eh = {}; if (enable_exception_handler) { eh = exception_handler; } - _queues.push_back(std::make_shared( + _queues.push_back(sycl::queue( *this, eh, sycl::property_list( #ifdef DPCT_PROFILING_ENABLED @@ -781,18 +777,18 @@ namespace dpct #endif properties...))); - return _queues.back().get(); + return _queues.back(); } template - sycl::queue *create_queue_impl(sycl::device device, + sycl::queue create_queue_impl(sycl::device device, bool enable_exception_handler, Properties... properties) { sycl::async_handler eh = {}; if (enable_exception_handler) { eh = exception_handler; } - _queues.push_back(std::make_shared( + _queues.push_back(sycl::queue( device, eh, sycl::property_list( #ifdef DPCT_PROFILING_ENABLED @@ -800,15 +796,15 @@ namespace dpct #endif properties...))); - return _queues.back().get(); + return _queues.back(); } void get_version(int &major, int &minor) const { detail::get_version(*this, major, minor); } - sycl::queue *_q_in_order, *_q_out_of_order; - sycl::queue *_saved_queue; - std::vector> _queues; + sycl::queue _q_in_order, _q_out_of_order; + sycl::queue _saved_queue; + std::vector _queues; mutable mutex_type m_mutex; }; From e4b86a12952bc5acec54a4d7a8194191f9384ef9 Mon Sep 17 00:00:00 2001 From: Chen Xi Date: Tue, 23 Jul 2024 06:50:13 +0000 Subject: [PATCH 06/11] fix the perf issue of multi-device Signed-off-by: Chen Xi --- ggml/src/ggml-sycl/common.hpp | 2 +- ggml/src/ggml-sycl/dpct/helper.hpp | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 68d41411b5ece..397bd98dd22a9 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -267,7 +267,7 @@ struct ggml_backend_sycl_context { queue_ptr stream(int device, int stream) { if (qptrs[device][stream] == nullptr) { - qptrs[device][stream] = &(dpct::get_current_device().default_queue()); + qptrs[device][stream] = &(dpct::get_device(device).default_queue()); } return qptrs[device][stream]; } diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index 0f18cff3268a6..a313ca6e5d7b1 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -883,6 +883,12 @@ namespace dpct else if (std::strstr(env, "opencl")) { filter = "opencl"; } + else if (std::strstr(env, "cuda")) { + filter = "cuda"; + } + else if (std::strstr(env, "hip")) { + filter = "hip"; + } else { throw std::runtime_error("invalid device filter: " + std::string(env)); } @@ -2053,6 +2059,11 @@ namespace dpct return dev_mgr::instance().current_device(); } + static inline device_ext &get_device(unsigned int id) + { + return dev_mgr::instance().get_device(id); + } + static inline sycl::queue &get_in_order_queue() { return dev_mgr::instance().current_device().in_order_queue(); @@ -2490,6 +2501,7 @@ namespace dpct b, ldb, beta, c, ldc, batch_size); break; } +#endif case detail::get_type_combination_id( library_data_t::real_int8, library_data_t::real_int8, library_data_t::real_int32, library_data_t::real_int32): @@ -2522,7 +2534,6 @@ namespace dpct batch_size); break; } -#endif case detail::get_type_combination_id( library_data_t::real_half, library_data_t::real_half, library_data_t::real_half, library_data_t::real_float): @@ -2669,6 +2680,7 @@ namespace dpct beta, c, ldc, stride_c, batch_size); break; } +#endif case detail::get_type_combination_id( library_data_t::real_int8, library_data_t::real_int8, library_data_t::real_float, library_data_t::real_float): @@ -2687,7 +2699,6 @@ namespace dpct beta, c, ldc, stride_c, batch_size); break; } -#endif case detail::get_type_combination_id( library_data_t::real_half, library_data_t::real_half, library_data_t::real_half, library_data_t::real_float): From 22c72c5a0f5d80dbf10640adf94f2c6edba0773e Mon Sep 17 00:00:00 2001 From: Chen Xi Date: Wed, 24 Jul 2024 04:05:08 +0000 Subject: [PATCH 07/11] fix intel mkl Signed-off-by: Chen Xi --- ggml/src/ggml-sycl/dpct/helper.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index a313ca6e5d7b1..8774eff006251 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -2670,6 +2670,7 @@ namespace dpct stride_c, batch_size); break; } +#endif case detail::get_type_combination_id( library_data_t::real_int8, library_data_t::real_int8, library_data_t::real_int32, library_data_t::real_int32): @@ -2680,7 +2681,6 @@ namespace dpct beta, c, ldc, stride_c, batch_size); break; } -#endif case detail::get_type_combination_id( library_data_t::real_int8, library_data_t::real_int8, library_data_t::real_float, library_data_t::real_float): From f3db6d7553f5a34e8f83490cda5003e757d66c1d Mon Sep 17 00:00:00 2001 From: "Meng, Hengyu" Date: Wed, 24 Jul 2024 05:38:30 +0000 Subject: [PATCH 08/11] fix format --- ggml/src/ggml-sycl/dpct/helper.hpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index 8774eff006251..4aaa76bfbbc73 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -873,7 +873,7 @@ namespace dpct inline std::string get_preferred_gpu_platform_name() { std::string result; - + std::string filter = "level-zero"; char* env = getenv("ONEAPI_DEVICE_SELECTOR"); if (env) { @@ -893,41 +893,41 @@ namespace dpct throw std::runtime_error("invalid device filter: " + std::string(env)); } } - + auto plaform_list = sycl::platform::get_platforms(); - + for (const auto& platform : plaform_list) { auto devices = platform.get_devices(); auto gpu_dev = std::find_if(devices.begin(), devices.end(), [](const sycl::device& d) { return d.is_gpu(); }); - + if (gpu_dev == devices.end()) { // cout << "platform [" << platform_name // << "] does not contain GPU devices, skipping\n"; continue; } - + auto platform_name = platform.get_info(); std::string platform_name_low_case; platform_name_low_case.resize(platform_name.size()); - + std::transform( platform_name.begin(), platform_name.end(), platform_name_low_case.begin(), ::tolower); - + if (platform_name_low_case.find(filter) == std::string::npos) { // cout << "platform [" << platform_name // << "] does not match with requested " // << filter << ", skipping\n"; continue; } - + result = platform_name; } - + if (result.empty()) throw std::runtime_error("can not find preferred GPU platform"); - + return result; } From fcce8732a1c65cf1b31f0a7a5afe194f1f96a4e2 Mon Sep 17 00:00:00 2001 From: Chen Xi Date: Thu, 25 Jul 2024 08:29:06 +0000 Subject: [PATCH 09/11] add doc for sycl multi-card Signed-off-by: Chen Xi --- docs/backend/SYCL.md | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index 885983e92277e..87d1bf8324fc5 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -471,10 +471,10 @@ On the oneAPI command line window, run the following and step into the llama.cpp Similar to the native `sycl-ls`, available SYCL devices can be queried as follow: ``` -build\bin\ls-sycl-device.exe +./build/bin/llama-ls-sycl-device ``` -The output of this command in a system with 1 *intel CPU* and 1 *intel GPU* would look like the following: +This command will only display the selected backend that is supported by SYCL. The default backend is level_zero. For example, in a system with 2 *intel GPU* it would look like the following: ``` found 6 SYCL devices: | | | |Compute |Max compute|Max work|Max sub| | @@ -482,25 +482,18 @@ found 6 SYCL devices: |--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------| | 0|[level_zero:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 1.3| 512| 1024| 32| 16225243136| | 1|[level_zero:gpu:1]| Intel(R) UHD Graphics 770| 1.3| 32| 512| 32| 53651849216| -| 2| [opencl:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 3.0| 512| 1024| 32| 16225243136| -| 3| [opencl:gpu:1]| Intel(R) UHD Graphics 770| 3.0| 32| 512| 32| 53651849216| -| 4| [opencl:cpu:0]| 13th Gen Intel(R) Core(TM) i7-13700K| 3.0| 24| 8192| 64| 67064815616| -| 5| [opencl:acc:0]| Intel(R) FPGA Emulation Device| 1.2| 24|67108864| 64| 67064815616| ``` -| Attribute | Note | -|------------------------|-----------------------------------------------------------| -| compute capability 1.3 | Level-zero running time, recommended | -| compute capability 3.0 | OpenCL running time, slower than level-zero in most cases | - 4. Launch inference There are two device selection modes: -- Single device: Use one device assigned by user. -- Multiple devices: Automatically choose the devices with the same biggest Max compute units. +- Single device: Use one device assigned by user. Default device id is 0. +- Multiple devices: Automatically choose the devices with the same backend. + +In two device selection modes, the default SYCL backend is level_zero, you can choose other backend supported by SYCL by setting environment variable ONEAPI_DEVICE_SELECTOR. | Device selection | Parameter | |------------------|----------------------------------------| From 8fe8086363e867ddced21ffc4e2cc60e254951d1 Mon Sep 17 00:00:00 2001 From: Chen Xi Date: Thu, 25 Jul 2024 08:42:03 +0000 Subject: [PATCH 10/11] add linux part change on doc Signed-off-by: Chen Xi --- docs/backend/SYCL.md | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index 87d1bf8324fc5..99890a4cce338 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -293,31 +293,26 @@ Similar to the native `sycl-ls`, available SYCL devices can be queried as follow ```sh ./build/bin/llama-ls-sycl-device ``` -A example of such log in a system with 1 *intel CPU* and 1 *intel GPU* can look like the following: +This command will only display the selected backend that is supported by SYCL. The default backend is level_zero. For example, in a system with 2 *intel GPU* it would look like the following: ``` -found 6 SYCL devices: +found 2 SYCL devices: + | | | |Compute |Max compute|Max work|Max sub| | |ID| Device Type| Name|capability|units |group |group |Global mem size| |--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------| | 0|[level_zero:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 1.3| 512| 1024| 32| 16225243136| | 1|[level_zero:gpu:1]| Intel(R) UHD Graphics 770| 1.3| 32| 512| 32| 53651849216| -| 2| [opencl:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 3.0| 512| 1024| 32| 16225243136| -| 3| [opencl:gpu:1]| Intel(R) UHD Graphics 770| 3.0| 32| 512| 32| 53651849216| -| 4| [opencl:cpu:0]| 13th Gen Intel(R) Core(TM) i7-13700K| 3.0| 24| 8192| 64| 67064815616| -| 5| [opencl:acc:0]| Intel(R) FPGA Emulation Device| 1.2| 24|67108864| 64| 67064815616| ``` -| Attribute | Note | -|------------------------|-------------------------------------------------------------| -| compute capability 1.3 | Level-zero driver/runtime, recommended | -| compute capability 3.0 | OpenCL driver/runtime, slower than level-zero in most cases | 4. Launch inference There are two device selection modes: - Single device: Use one device target specified by the user. -- Multiple devices: Automatically select the devices with the same largest Max compute-units. +- Multiple devices: Automatically choose the devices with the same backend. + +In two device selection modes, the default SYCL backend is level_zero, you can choose other backend supported by SYCL by setting environment variable ONEAPI_DEVICE_SELECTOR. | Device selection | Parameter | |------------------|----------------------------------------| @@ -471,12 +466,12 @@ On the oneAPI command line window, run the following and step into the llama.cpp Similar to the native `sycl-ls`, available SYCL devices can be queried as follow: ``` -./build/bin/llama-ls-sycl-device +build\bin\ls-sycl-device.exe ``` This command will only display the selected backend that is supported by SYCL. The default backend is level_zero. For example, in a system with 2 *intel GPU* it would look like the following: ``` -found 6 SYCL devices: +found 2 SYCL devices: | | | |Compute |Max compute|Max work|Max sub| | |ID| Device Type| Name|capability|units |group |group |Global mem size| |--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------| From fc766843258c6bc01e76de9ef178d1c16de07a7d Mon Sep 17 00:00:00 2001 From: Chen Xi Date: Thu, 25 Jul 2024 08:45:00 +0000 Subject: [PATCH 11/11] fix typo Signed-off-by: Chen Xi --- docs/backend/SYCL.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index 99890a4cce338..d36ac0a158dd4 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -293,7 +293,7 @@ Similar to the native `sycl-ls`, available SYCL devices can be queried as follow ```sh ./build/bin/llama-ls-sycl-device ``` -This command will only display the selected backend that is supported by SYCL. The default backend is level_zero. For example, in a system with 2 *intel GPU* it would look like the following: +This command will only display the selected backend that is supported by SYCL. The default backend is level_zero. For example, in a system with 2 *intel GPU* it would look like the following: ``` found 2 SYCL devices: @@ -469,7 +469,7 @@ Similar to the native `sycl-ls`, available SYCL devices can be queried as follow build\bin\ls-sycl-device.exe ``` -This command will only display the selected backend that is supported by SYCL. The default backend is level_zero. For example, in a system with 2 *intel GPU* it would look like the following: +This command will only display the selected backend that is supported by SYCL. The default backend is level_zero. For example, in a system with 2 *intel GPU* it would look like the following: ``` found 2 SYCL devices: | | | |Compute |Max compute|Max work|Max sub| |