diff --git a/cmake/developer_package/IEDevScriptsConfig.cmake b/cmake/developer_package/IEDevScriptsConfig.cmake index 78bd04c0394aa6..66d784b2d334e3 100644 --- a/cmake/developer_package/IEDevScriptsConfig.cmake +++ b/cmake/developer_package/IEDevScriptsConfig.cmake @@ -170,8 +170,6 @@ endif() include(packaging/packaging) -set(CMAKE_SKIP_INSTALL_RPATH ON) - if(APPLE) set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON) diff --git a/cmake/developer_package/packaging/common-libraries.cmake b/cmake/developer_package/packaging/common-libraries.cmake index c8cde9746ea1dc..159dce21bb14de 100644 --- a/cmake/developer_package/packaging/common-libraries.cmake +++ b/cmake/developer_package/packaging/common-libraries.cmake @@ -53,10 +53,6 @@ macro(ov_common_libraries_cpack_set_dirs) set(IE_CPACK_LIBRARY_PATH ${OV_CPACK_LIBRARYDIR}) set(IE_CPACK_RUNTIME_PATH ${OV_CPACK_RUNTIMEDIR}) set(IE_CPACK_ARCHIVE_PATH ${OV_CPACK_ARCHIVEDIR}) - - if(CPACK_GENERATOR STREQUAL "BREW") - set(CMAKE_SKIP_INSTALL_RPATH OFF) - endif() endmacro() ov_common_libraries_cpack_set_dirs() @@ -123,3 +119,8 @@ macro(ov_define_component_include_rules) endmacro() ov_define_component_include_rules() + +if(CPACK_GENERATOR STREQUAL "BREW") + # brew relies on RPATH + set(CMAKE_SKIP_INSTALL_RPATH OFF) +endif() diff --git a/cmake/developer_package/packaging/debian/debian.cmake b/cmake/developer_package/packaging/debian/debian.cmake index 9adcc83919fde8..31d21a7d8f1ad0 100644 --- a/cmake/developer_package/packaging/debian/debian.cmake +++ b/cmake/developer_package/packaging/debian/debian.cmake @@ -179,6 +179,9 @@ macro(ov_debian_specific_settings) set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE i386) endif() endif() + + # we don't need RPATHs, because libraries are search by standard paths + set(CMAKE_SKIP_INSTALL_RPATH ON) endmacro() ov_debian_specific_settings() diff --git a/cmake/developer_package/packaging/packaging.cmake b/cmake/developer_package/packaging/packaging.cmake index 79beb1cd99aa09..c20f5fff79a04d 100644 --- a/cmake/developer_package/packaging/packaging.cmake +++ b/cmake/developer_package/packaging/packaging.cmake @@ -4,6 +4,9 @@ include(CPackComponent) +# we don't need RPATHs, because setupvars.sh is used +set(CMAKE_SKIP_INSTALL_RPATH ON) + # # ov_install_static_lib( ) # diff --git a/cmake/developer_package/packaging/rpm/rpm.cmake b/cmake/developer_package/packaging/rpm/rpm.cmake index 00dc0b76f46d5c..4c5f745fc1025e 100644 --- a/cmake/developer_package/packaging/rpm/rpm.cmake +++ b/cmake/developer_package/packaging/rpm/rpm.cmake @@ -159,6 +159,9 @@ macro(ov_rpm_specific_settings) set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE i386) endif() endif() + + # we don't need RPATHs, because libraries are search by standard paths + set(CMAKE_SKIP_INSTALL_RPATH ON) endmacro() ov_rpm_specific_settings() diff --git a/cmake/templates/OpenVINOConfig.cmake.in b/cmake/templates/OpenVINOConfig.cmake.in index 29af7dd50e5b15..8f400480bcd7e2 100644 --- a/cmake/templates/OpenVINOConfig.cmake.in +++ b/cmake/templates/OpenVINOConfig.cmake.in @@ -390,9 +390,11 @@ endmacro() macro(_ov_find_protobuf_frontend_dependency) set(_OV_ENABLE_SYSTEM_PROTOBUF "@ENABLE_SYSTEM_PROTOBUF@") + set(_OV_PROTOBUF_PACKAGE_CONFIG "@protobuf_config@") if(_OV_ENABLE_SYSTEM_PROTOBUF) - _ov_find_dependency(Protobuf @Protobuf_VERSION@ EXACT NAMES Protobuf protobuf) + _ov_find_dependency(Protobuf @Protobuf_VERSION@ EXACT ${_OV_PROTOBUF_PACKAGE_CONFIG}) endif() + unset(_OV_PROTOBUF_PACKAGE_CONFIG) unset(_OV_ENABLE_SYSTEM_PROTOBUF) endmacro() diff --git a/docs/OV_Runtime_UG/supported_plugins/GNA.md b/docs/OV_Runtime_UG/supported_plugins/GNA.md index ab543b29872556..5731462615087a 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GNA.md +++ b/docs/OV_Runtime_UG/supported_plugins/GNA.md @@ -47,7 +47,7 @@ exported for GNA 2.0 runs on GNA 3.0 or vice versa. In most cases, a network compiled for GNA 2.0 runs as expected on GNA 3.0. However, performance may be worse compared to when a network is compiled specifically for the latter. The exception is a network with convolutions - with the number of filters greater than 8192 (see the :ref:`Model and Operation Limitations <#model-and-operation-limitations>` section). + with the number of filters greater than 8192 (see the `Model and Operation Limitations <#model-and-operation-limitations>`__ section). For optimal work with POT quantized models, which include 2D convolutions on GNA 3.0 hardware, the following requirements should be satisfied: @@ -136,7 +136,7 @@ quantization hints based on statistics for the provided dataset. * Performance (i8 weights) For POT quantized models, the ``ov::hint::inference_precision`` property has no effect except in cases described in the -:ref:`Model and Operation Limitations section <#model-and-operation-limitations>`. +`Model and Operation Limitations section <#model-and-operation-limitations>`__. Supported Features diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css index 392d7aff4975fe..8a6e30ed4eb9cc 100644 --- a/docs/_static/css/custom.css +++ b/docs/_static/css/custom.css @@ -100,23 +100,27 @@ ul#navbar-main-elements > li:hover { transform: rotate(0deg); } - -/* Footer links */ +/* Footer */ /* =================================================== */ -footer div.container div.footer-item p a { - float: left; - margin-right: 30px; +footer iframe { + background-color: white; + width: 100%; + border: none; + height: 60px; } -footer div.container div.footer-item p a:nth-child(1) { - margin-right: 50px; +@media screen and (min-width: 860px) { + footer iframe { + height: 45px; + } } -footer div.container div.footer-item p:nth-child(2) { - clear: both; +@media screen and (max-width: 520px) { + footer iframe { + height: 85px; + } } - /* Doc version dropdown formatting override */ /* =================================================== */ [aria-labelledby="version-selector"] { diff --git a/docs/install_guides/installing-openvino-overview.md b/docs/install_guides/installing-openvino-overview.md index 7bf0e880b04652..c617371ce5af54 100644 --- a/docs/install_guides/installing-openvino-overview.md +++ b/docs/install_guides/installing-openvino-overview.md @@ -44,11 +44,11 @@ The best way to get started with OpenVINO is to install OpenVINO Development Too **Python** -For developers working in Python, OpenVINO Development Tools can easily be installed using PyPI. See the :ref:`For Python Developers ` section of the Install OpenVINO Development Tools page for instructions. +For developers working in Python, OpenVINO Development Tools can easily be installed using PyPI. See the :ref:`For Python Developers ` section of the Install OpenVINO Development Tools page for instructions. **C++** -For developers working in C++, the core OpenVINO Runtime libraries must be installed separately. Then, OpenVINO Development Tools can be installed using requirements files or PyPI. See the :ref:`For C++ Developers ` section of the Install OpenVINO Development Tools page for instructions. +For developers working in C++, the core OpenVINO Runtime libraries must be installed separately. Then, OpenVINO Development Tools can be installed using requirements files or PyPI. See the :ref:`For C++ Developers ` section of the Install OpenVINO Development Tools page for instructions. Install OpenVINO Runtime only +++++++++++++++++++++++++++++++++++++++ diff --git a/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/footer.html b/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/footer.html new file mode 100644 index 00000000000000..3b342007179080 --- /dev/null +++ b/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/footer.html @@ -0,0 +1,3 @@ +
+ +
\ No newline at end of file diff --git a/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/footer_info.html b/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/footer_info.html deleted file mode 100644 index 7cd486bab7194d..00000000000000 --- a/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/footer_info.html +++ /dev/null @@ -1,8 +0,0 @@ -

- ©2023 Intel Corporation - Terms of Use - Cookies - Privacy - Do Not Share My Personal Information -

-

Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others.

\ No newline at end of file diff --git a/src/bindings/c/include/c_api/ie_c_api.h b/src/bindings/c/include/c_api/ie_c_api.h index 348da044321038..5683f583c4e7fb 100644 --- a/src/bindings/c/include/c_api/ie_c_api.h +++ b/src/bindings/c/include/c_api/ie_c_api.h @@ -33,8 +33,23 @@ # define INFERENCE_ENGINE_C_API_EXTERN #endif +#define IE_1_0_DEPRECATED \ + OPENVINO_DEPRECATED("The Inference Engine API is deprecated and will be removed in the 2024.0 release. " \ + "For instructions on transitioning to the new API, please refer to " \ + "https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html") + +#if !defined(IN_OV_COMPONENT) && !defined(C_API_LEGACY_HEADER_INCLUDED) +# define C_API_LEGACY_HEADER_INCLUDED +# ifdef _MSC_VER +# pragma message( + "The legacy C API is deprecated and will be removed in the 2024.0 release. For instructions on transitioning to the new API, please refer to https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html") +# else +# warning("The legacy C API is deprecated and will be removed in the 2024.0 release. For instructions on transitioning to the new API, please refer to https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html") +# endif +#endif + #if defined(OPENVINO_STATIC_LIBRARY) || defined(__GNUC__) && (__GNUC__ < 4) -# define INFERENCE_ENGINE_C_API(...) INFERENCE_ENGINE_C_API_EXTERN __VA_ARGS__ +# define INFERENCE_ENGINE_C_API(...) INFERENCE_ENGINE_C_API_EXTERN __VA_ARGS__ IE_1_0_DEPRECATED # define IE_NODISCARD #else # if defined(_WIN32) || defined(__CYGWIN__) @@ -42,12 +57,13 @@ # ifdef openvino_c_EXPORTS # define INFERENCE_ENGINE_C_API(...) INFERENCE_ENGINE_C_API_EXTERN __declspec(dllexport) __VA_ARGS__ __cdecl # else -# define INFERENCE_ENGINE_C_API(...) INFERENCE_ENGINE_C_API_EXTERN __declspec(dllimport) __VA_ARGS__ __cdecl +# define INFERENCE_ENGINE_C_API(...) \ + INFERENCE_ENGINE_C_API_EXTERN __declspec(dllimport) __VA_ARGS__ IE_1_0_DEPRECATED __cdecl # endif # define IE_NODISCARD # else # define INFERENCE_ENGINE_C_API(...) \ - INFERENCE_ENGINE_C_API_EXTERN __attribute__((visibility("default"))) __VA_ARGS__ + INFERENCE_ENGINE_C_API_EXTERN __attribute__((visibility("default"))) __VA_ARGS__ IE_1_0_DEPRECATED # define IE_NODISCARD __attribute__((warn_unused_result)) # endif #endif diff --git a/src/bindings/c/src/CMakeLists.txt b/src/bindings/c/src/CMakeLists.txt index c28c676ae8314a..98fa6e376972c5 100644 --- a/src/bindings/c/src/CMakeLists.txt +++ b/src/bindings/c/src/CMakeLists.txt @@ -6,6 +6,7 @@ set(TARGET_NAME openvino_c) # Suppress warnings due to catch macro with legacy exception types ov_deprecated_no_errors() +add_definitions(-DIN_OV_COMPONENT) file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) file(GLOB_RECURSE HEADERS ${OpenVINO_C_API_SOURCE_DIR}/include/*.h) diff --git a/src/bindings/c/src/ie_c_api.cpp b/src/bindings/c/src/ie_c_api.cpp index 6c0fde0b823423..7fab00614e816a 100644 --- a/src/bindings/c/src/ie_c_api.cpp +++ b/src/bindings/c/src/ie_c_api.cpp @@ -1706,4 +1706,4 @@ void ie_blob_free(ie_blob_t** blob) { void ie_shutdown() { InferenceEngine::shutdown(); -} +} \ No newline at end of file diff --git a/src/bindings/c/tests/ie_c_api_test.cpp b/src/bindings/c/tests/ie_c_api_test.cpp index 81e3c3c4aa72b3..feb30b1917ec47 100644 --- a/src/bindings/c/tests/ie_c_api_test.cpp +++ b/src/bindings/c/tests/ie_c_api_test.cpp @@ -5,17 +5,22 @@ #include #include #include + #include +#include +#include "inference_engine.hpp" #include -#include -#include + #include "test_model_repo.hpp" -#include #define IE_EXPECT_OK(...) EXPECT_EQ(IEStatusCode::OK, __VA_ARGS__) #define IE_ASSERT_OK(...) ASSERT_EQ(IEStatusCode::OK, __VA_ARGS__) #define IE_EXPECT_NOT_OK(...) EXPECT_NE(IEStatusCode::OK, __VA_ARGS__) +OPENVINO_SUPPRESS_DEPRECATED_START + +#include + static std::mutex m; static bool ready = false; static std::condition_variable condVar; @@ -1503,3 +1508,5 @@ TEST_P(ie_c_api_test, ie_infer_set_completion_callback) { ie_network_free(&network); ie_core_free(&core); } + +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/core/include/openvino/core/node_output.hpp b/src/core/include/openvino/core/node_output.hpp index c9746e1649c5c8..1287150dafd619 100644 --- a/src/core/include/openvino/core/node_output.hpp +++ b/src/core/include/openvino/core/node_output.hpp @@ -64,6 +64,8 @@ class OPENVINO_API Output { descriptor::Tensor& get_tensor() const; /// \return A shared point to the tensor ptr for this output. std::shared_ptr get_tensor_ptr() const; + /// \return Set new tensor desc shared pointer to this output + void set_tensor_ptr(std::shared_ptr tensor_ptr); /// \return The element type of the output referred to by this output handle. const element::Type& get_element_type() const; /// \return The shape of the output referred to by this output handle. diff --git a/src/core/shape_inference/include/element_visitor.hpp b/src/core/shape_inference/include/element_visitor.hpp new file mode 100644 index 00000000000000..096832d4f62017 --- /dev/null +++ b/src/core/shape_inference/include/element_visitor.hpp @@ -0,0 +1,122 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "openvino/core/except.hpp" +#include "openvino/core/type/element_type.hpp" + +namespace ov { +namespace element { + +/** + * @brief Primary template defines suppoted element types. + * + * The list of element types is used to check if runtime value of element type is one in the list. + * Base on this check the Visitor::visit function is called for specific element type. + * + * @tparam List of supported ov::element types. + */ +template +struct IfTypeOf; + +/** + * @brief Applies visitor action for not supported ov::element type. + */ +template <> +struct IfTypeOf<> { + /** + * @brief Applies visitor default action if input element type is not not supported by IfTypeOf. + * + * Uses Visitor::visit non-template function. + * + * @tparam Visitor Visitor class implementing visit function. + * @tparam Args Types of visit parameters. + * + * @param et Input element type. + * @param args Visitor arguments. + * @return Value of result type returned by Visitor. + */ + template + static auto apply(Type_t et, Args&&... args) -> typename Visitor::result_type { + return Visitor::visit(); + } +}; + +/** + * @brief Applies visitor action for supported element type defined by template parameters. + * + * @tparam ET Current ov::element type used for check with input. + * @tparam Others Others supported ov::element. + */ +template +struct IfTypeOf { + /** + * @brief Applies visitor action if input element type is same as ET. + * + * Uses Visitor::visit function if `et == ET`, otherwise check input element type against Others. + * + * @tparam Visitor Visitor class implementing visit function. + * @tparam Args Types of visit parameters. + * + * @param et Input element type. + * @param args Visitor arguments. + * @return Value of result type returned by Visitor. + */ + template + static auto apply(Type_t et, Args&&... args) -> typename Visitor::result_type { + return (et == ET) ? Visitor::template visit(std::forward(args)...) + : IfTypeOf::template apply(et, std::forward(args)...); + } +}; + +/** + * @brief Helper visitor which defines no action for not supported type. + * + * @tparam R Type of return value. + * @tparam value Default value returned. + */ +template +struct NoAction { + static_assert(sizeof...(value) < 2, "There should be no more than one result value."); + + using result_type = R; + + static constexpr result_type visit() { + return {value...}; + } +}; + +/** + * @brief Helper visitor which defines no action for not supported type if result is void type. + */ +template <> +struct NoAction { + using result_type = void; + + static result_type visit() {} +}; + +/** + * @brief Helper visitor which throws ov::Exception for not supported element type. + * + * @tparam R Type of return value. + */ +template +struct NotSupported { + using result_type = R; + + [[noreturn]] static result_type visit() { + throw_not_supported(); + } + +private: + [[noreturn]] static void throw_not_supported() { + OPENVINO_THROW("Element not supported"); + } +}; +} // namespace element +} // namespace ov diff --git a/src/core/shape_inference/include/utils.hpp b/src/core/shape_inference/include/utils.hpp index 66858cfd1e544c..d79f1aee460834 100644 --- a/src/core/shape_inference/include/utils.hpp +++ b/src/core/shape_inference/include/utils.hpp @@ -8,6 +8,7 @@ #include #include +#include "element_visitor.hpp" #include "openvino/core/bound_evaluation_util.hpp" #include "shape_infer_type_utils.hpp" #include "tensor_data_accessor.hpp" @@ -50,6 +51,20 @@ void eltwise_shape_infer(const OpType* op, const std::vector& input_shapes, s } namespace ov { + +struct TensorTransform : element::NotSupported { + using element::NotSupported::visit; + + template + static result_type visit(const void* const ptr, const size_t size, Iterator out_it, UnaryOperation&& func) { + using T = fundamental_type_for; + std::transform(static_cast(ptr), + static_cast(ptr) + size, + out_it, + std::forward(func)); + } +}; + /** * \brief Get the raw data as TResult object. * @@ -71,94 +86,13 @@ TResult get_raw_data_as(const element::Type_t et, const void* const ptr, const s TResult out; auto out_it = std::inserter(out, out.end()); - switch (et) { - case element::Type_t::i4: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - case element::Type_t::i8: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - case element::Type_t::i16: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - case element::Type_t::i32: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - case element::Type_t::i64: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - case element::Type_t::u4: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - case element::Type_t::u8: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - case element::Type_t::u16: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - case element::Type_t::u32: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - case element::Type_t::u64: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - case element::Type_t::f16: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - case element::Type_t::f32: { - using dtype = fundamental_type_for; - std::transform(static_cast(ptr), - static_cast(ptr) + size, - out_it, - std::forward(func)); - } break; - default: - OPENVINO_ASSERT(false, "Get raw data from tensor is not supported for element type: ", et); - }; + using namespace ov::element; + IfTypeOf::apply( + et, + ptr, + size, + out_it, + std::forward(func)); return out; } diff --git a/src/core/src/node_output.cpp b/src/core/src/node_output.cpp index 545546a3f0ac6f..abf5aa8599c7b3 100644 --- a/src/core/src/node_output.cpp +++ b/src/core/src/node_output.cpp @@ -43,6 +43,9 @@ std::shared_ptr Output::get_tensor_ptr() const { const element::Type& Output::get_element_type() const { return m_node->get_output_element_type(m_index); } +void Output::set_tensor_ptr(std::shared_ptr tensor_ptr) { + return m_node->m_outputs.at(m_index).set_tensor_ptr(tensor_ptr); +} const Shape& Output::get_shape() const { return m_node->get_output_shape(m_index); } diff --git a/src/core/src/op/round.cpp b/src/core/src/op/round.cpp index 6923294432d5af..47ef04283de5e7 100644 --- a/src/core/src/op/round.cpp +++ b/src/core/src/op/round.cpp @@ -4,6 +4,7 @@ #include "ngraph/op/round.hpp" +#include "element_visitor.hpp" #include "itt.hpp" #include "ngraph/attribute_visitor.hpp" #include "ngraph/op/util/eval_copy.hpp" @@ -15,50 +16,48 @@ using namespace std; using namespace ngraph; namespace roundop { -namespace { -// function used by TYPE_CASE -template -inline bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& out, - const size_t count, - const op::v5::Round::RoundMode mode) { - using T = typename element_type_traits::value_type; - runtime::reference::round(arg0->get_data_ptr(), out->get_data_ptr(), count, mode); - return true; -} -// function used by COPY_TENSOR -template -inline bool copy_tensor(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - runtime::reference::copy(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; -} +class Evaluate : public ov::element::NoAction { + template + static constexpr bool is_floating() { + return (ET == element::f16) || (ET == element::f32) || (ET == element::bf16); + } +public: + using ov::element::NoAction::visit; + template ()>::type* = nullptr> + static result_type visit(const HostTensorPtr& arg0, + const HostTensorPtr& out, + const size_t count, + const op::v5::Round::RoundMode) { + memcpy(out->get_data_ptr(), arg0->get_data_ptr(), out->get_size_in_bytes()); + return true; + } + + template ()>::type* = nullptr> + static result_type visit(const HostTensorPtr& arg0, + const HostTensorPtr& out, + const size_t count, + const op::v5::Round::RoundMode mode) { + ngraph::runtime::reference::round(arg0->get_data_ptr(), out->get_data_ptr(), count, mode); + return true; + } +}; + +namespace { bool evaluate_round(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count, const op::v5::Round::RoundMode mode) { - bool rc = true; out->set_unary(arg0); - switch (arg0->get_element_type()) { - NGRAPH_COPY_TENSOR(evaluate_round, boolean, arg0, out, count); - NGRAPH_COPY_TENSOR(evaluate_round, i8, arg0, out, count); - NGRAPH_COPY_TENSOR(evaluate_round, i16, arg0, out, count); - NGRAPH_COPY_TENSOR(evaluate_round, i32, arg0, out, count); - NGRAPH_COPY_TENSOR(evaluate_round, i64, arg0, out, count); - NGRAPH_COPY_TENSOR(evaluate_round, u8, arg0, out, count); - NGRAPH_COPY_TENSOR(evaluate_round, u16, arg0, out, count); - NGRAPH_COPY_TENSOR(evaluate_round, u32, arg0, out, count); - NGRAPH_COPY_TENSOR(evaluate_round, u64, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_round, f16, arg0, out, count, mode); - NGRAPH_TYPE_CASE(evaluate_round, f32, arg0, out, count, mode); - NGRAPH_TYPE_CASE(evaluate_round, bf16, arg0, out, count, mode); - default: - rc = false; - break; - } - return rc; + using namespace ov::element; + return IfTypeOf::apply( + arg0->get_element_type(), + arg0, + out, + count, + mode); } } // namespace } // namespace roundop @@ -108,9 +107,8 @@ bool op::v5::Round::has_evaluate() const { case ngraph::element::bf16: return true; default: - break; + return false; } - return false; } std::ostream& ov::operator<<(std::ostream& s, const op::v5::Round::RoundMode& type) { diff --git a/src/core/src/op/tan.cpp b/src/core/src/op/tan.cpp index b3c1995b2be90d..e8acb8b83407b7 100644 --- a/src/core/src/op/tan.cpp +++ b/src/core/src/op/tan.cpp @@ -4,6 +4,7 @@ #include "ngraph/op/tan.hpp" +#include "element_visitor.hpp" #include "itt.hpp" #include "ngraph/op/cos.hpp" #include "ngraph/op/divide.hpp" @@ -31,29 +32,21 @@ shared_ptr op::Tan::clone_with_new_inputs(const OutputVector& new_args) co namespace tanop { namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - runtime::reference::tan(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; -} +struct Evaluate : ov::element::NoAction { + using ov::element::NoAction::visit; + + template + static result_type visit(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { + ngraph::runtime::reference::tan(arg0->get_data_ptr(), out->get_data_ptr(), count); + return true; + } +}; bool evaluate_tan(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - bool rc = true; out->set_unary(arg0); - switch (arg0->get_element_type()) { - NGRAPH_TYPE_CASE(evaluate_tan, i32, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_tan, i64, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_tan, u32, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_tan, u64, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_tan, f16, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_tan, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; + using namespace ov::element; + return IfTypeOf::apply(arg0->get_element_type(), arg0, out, count); } } // namespace } // namespace tanop diff --git a/src/core/src/op/tanh.cpp b/src/core/src/op/tanh.cpp index 5671a8289a6d6b..e16e4561cea937 100644 --- a/src/core/src/op/tanh.cpp +++ b/src/core/src/op/tanh.cpp @@ -4,6 +4,7 @@ #include "ngraph/op/tanh.hpp" +#include "element_visitor.hpp" #include "itt.hpp" #include "ngraph/op/multiply.hpp" #include "ngraph/op/subtract.hpp" @@ -30,29 +31,20 @@ shared_ptr op::Tanh::clone_with_new_inputs(const OutputVector& new_args) c namespace tanhop { namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - runtime::reference::tanh(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; -} +struct Evaluate : ov::element::NoAction { + using ov::element::NoAction::visit; + + template + static result_type visit(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { + ngraph::runtime::reference::tanh(arg0->get_data_ptr(), out->get_data_ptr(), count); + return true; + } +}; bool evaluate_tanh(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - bool rc = true; out->set_unary(arg0); - - switch (arg0->get_element_type()) { - NGRAPH_TYPE_CASE(evaluate_tanh, i32, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_tanh, i64, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_tanh, u32, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_tanh, u64, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_tanh, f16, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_tanh, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; + using namespace ov::element; + return IfTypeOf::apply(arg0->get_element_type(), arg0, out, count); } } // namespace } // namespace tanhop diff --git a/src/core/src/op/unique.cpp b/src/core/src/op/unique.cpp index 0ddff3c854c399..4e91b7da1d1773 100644 --- a/src/core/src/op/unique.cpp +++ b/src/core/src/op/unique.cpp @@ -2,11 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "openvino/op/unique.hpp" +#include "ngraph/runtime/reference/unique.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/runtime/reference/unique.hpp" #include "ngraph/validation_util.hpp" +#include "openvino/op/unique.hpp" #include "openvino/op/util/op_types.hpp" namespace ov { @@ -16,20 +17,20 @@ int64_t extract_axis(const std::shared_ptr& axis_constant) { return axis_vec.at(0); } -template -ngraph::runtime::reference::UniqueElements call_unique(const Tensor& input, - std::unique_ptr axis, - const bool sorted) { - return ngraph::runtime::reference::find_unique_elements(input.data(), - input.get_shape(), - std::move(axis), - sorted); -} +struct Evaluate : element::NotSupported> { + using NotSupported>::visit; -std::tuple calculate_static_output_shapes(const Tensor& input_data, const op::v10::Unique& op) { - using Index_t = int32_t; - using Counts_t = int32_t; + template + static result_type visit(const Tensor& input, std::unique_ptr axis, const bool sorted) { + using T = fundamental_type_for; + return ngraph::runtime::reference::find_unique_elements(input.data(), + input.get_shape(), + std::move(axis), + sorted); + } +}; +std::tuple calculate_static_output_shapes(const Tensor& input_data, const op::v10::Unique& op) { const auto maybe_extract_axis = [&op]() { std::unique_ptr axis; if (op.get_input_size() == 2 && ov::op::util::is_constant(op.input_value(1).get_node())) { @@ -40,52 +41,15 @@ std::tuple calculate_static_output_shapes(const Tensor& inp return axis; }; - ngraph::runtime::reference::UniqueElements unique_elements; std::unique_ptr axis = maybe_extract_axis(); - switch (op.get_input_element_type(0)) { - case element::boolean: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::i8: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::i16: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::i32: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::i64: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::u8: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::u16: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::u32: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::u64: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::bf16: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::f16: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::f32: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - case element::f64: - unique_elements = call_unique(input_data, std::move(axis), op.get_sorted()); - break; - default: - OPENVINO_THROW("Operator `Unique-10` doesn't support element type: ", op.get_input_element_type(0)); - } + const auto et = op.get_input_element_type(0); + using namespace ov::element; + auto unique_elements = + IfTypeOf::apply(et, + input_data, + std::move(axis), + op.get_sorted()); return ngraph::runtime::reference::make_tensor_shapes(unique_elements, input_data.get_shape(), diff --git a/src/core/src/op/unsqueeze.cpp b/src/core/src/op/unsqueeze.cpp index fb8a14994b1412..f55664b0700023 100644 --- a/src/core/src/op/unsqueeze.cpp +++ b/src/core/src/op/unsqueeze.cpp @@ -9,6 +9,7 @@ #include #include "bound_evaluate.hpp" +#include "element_visitor.hpp" #include "itt.hpp" #include "ngraph/runtime/reference/copy.hpp" #include "unsqueeze_shape_inference.hpp" @@ -46,17 +47,23 @@ shared_ptr op::v0::Unsqueeze::clone_with_new_inputs(const OutputVector& ne return make_shared(new_args.at(0), new_args.at(1)); } +namespace ov { +namespace op { namespace unsqueeze { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out) { - runtime::reference::copy(arg0->get_data_ptr(), out->get_data_ptr(), shape_size(out->get_shape())); - return true; -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template + static result_type visit(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { + ngraph::runtime::reference::copy(arg0->get_data_ptr(), out->get_data_ptr(), count); + return true; + } +}; // The evaluate cannot use shape_infer for output shape calculation as shape inference accepts // repeated axis and evaluate not. When shape inference will changed to be compatible with `numpy` then // evaluate and inference can use same function to calculate output shape. TODO for next version for this operator. +namespace { bool evaluate_unsqueeze(const Node* node, const HostTensorPtr& arg0, const HostTensorPtr& arg1, @@ -86,24 +93,16 @@ bool evaluate_unsqueeze(const Node* node, } out->set_shape(out_shape); - bool rc = true; - switch (element_type) { - NGRAPH_TYPE_CASE(evaluate_unsqueeze, i32, arg0, out); - NGRAPH_TYPE_CASE(evaluate_unsqueeze, i64, arg0, out); - NGRAPH_TYPE_CASE(evaluate_unsqueeze, u32, arg0, out); - NGRAPH_TYPE_CASE(evaluate_unsqueeze, u64, arg0, out); - NGRAPH_TYPE_CASE(evaluate_unsqueeze, f16, arg0, out); - NGRAPH_TYPE_CASE(evaluate_unsqueeze, f32, arg0, out); - NGRAPH_TYPE_CASE(evaluate_unsqueeze, f64, arg0, out); - NGRAPH_TYPE_CASE(evaluate_unsqueeze, bf16, arg0, out); - default: - rc = false; - break; - } - return rc; + using namespace ov::element; + return IfTypeOf::apply(element_type, + arg0, + out, + shape_size(out_shape)); } } // namespace } // namespace unsqueeze +} // namespace op +} // namespace ov bool op::v0::Unsqueeze::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { OV_OP_SCOPE(v0_Unsqueeze_evaluate); @@ -127,9 +126,8 @@ bool op::v0::Unsqueeze::has_evaluate() const { case ngraph::element::bf16: return true; default: - break; + return false; } - return false; } bool op::v0::Unsqueeze::evaluate_lower(ov::TensorVector& output_values) const { diff --git a/src/core/tests/element_visitor_test.cpp b/src/core/tests/element_visitor_test.cpp new file mode 100644 index 00000000000000..86316aab1d2b64 --- /dev/null +++ b/src/core/tests/element_visitor_test.cpp @@ -0,0 +1,105 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "element_visitor.hpp" + +#include + +#include "common_test_utils/test_assertions.hpp" + +using namespace testing; +using namespace ov::element; + +struct TestVisitor : public ov::element::NotSupported { + using ov::element::NotSupported::visit; + + template + static result_type visit(int x) { + return true; + } +}; + +template <> +TestVisitor::result_type TestVisitor::visit(int x) { + return false; +} + +struct TestVisitorNoArgs : public ov::element::NoAction { + using ov::element::NoAction::visit; + + template + static result_type visit() { + return 10; + } +}; + +struct TestVisitorVoidReturn : public ov::element::NoAction { + using ov::element::NoAction::visit; + + template + static result_type visit(int x, int y) { + test_value = x + y; + } + + static int test_value; +}; + +int TestVisitorVoidReturn::test_value; + +class IfTypeOfTest : public Test { +protected: + void SetUp() override { + TestVisitorVoidReturn::test_value = 0; + } +}; + +TEST_F(IfTypeOfTest, throw_if_not_supported_type) { + OV_EXPECT_THROW((ov::element::IfTypeOf::apply(u8, 10)), + ov::Exception, + HasSubstr("Element not supported")); + + OV_EXPECT_THROW((ov::element::IfTypeOf::apply(u8, 10)), + ov::Exception, + HasSubstr("Element not supported")); +} + +TEST_F(IfTypeOfTest, action_for_single_supported_type) { + const auto result = ov::element::IfTypeOf::apply(ov::element::f32, 10); + EXPECT_TRUE(result); +} + +TEST_F(IfTypeOfTest, action_for_if_multiple_supported_types) { + const auto result = ov::element::IfTypeOf::apply(f32, 2); + EXPECT_TRUE(result); +} + +TEST_F(IfTypeOfTest, special_action_if_single_supported_type) { + const auto result = ov::element::IfTypeOf::apply(i16, 20); + EXPECT_FALSE(result); +} + +TEST_F(IfTypeOfTest, special_action_if_multiple_supported_types) { + const auto result = ov::element::IfTypeOf::apply(i16, 10); + EXPECT_FALSE(result); +} + +TEST_F(IfTypeOfTest, default_action_for_unsupported_type) { + const auto result = ov::element::IfTypeOf::apply(f16); + EXPECT_EQ(result, -1); +} + +TEST_F(IfTypeOfTest, apply_action_for_visitor_with_no_args) { + const auto result = ov::element::IfTypeOf::apply(u32); + EXPECT_EQ(result, 10); +} + +TEST_F(IfTypeOfTest, apply_action_for_void_return_visitor) { + ov::element::IfTypeOf::apply(u32, 2, 7); + EXPECT_EQ(TestVisitorVoidReturn::test_value, 9); +} + +TEST_F(IfTypeOfTest, apply_default_action_for_void_return_visitor) { + ov::element::IfTypeOf::apply(i32, 2, 7); + EXPECT_EQ(TestVisitorVoidReturn::test_value, 0); +} diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 05a0b81fc9138c..d6bc13fa7b9208 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -34,6 +34,7 @@ #include "transforms/prim_list_construct_pad.hpp" #include "transforms/prim_list_tuple_construct_replacer.hpp" #include "transforms/prim_list_unpack_replacer.hpp" +#include "transforms/prim_tuple_unpack_parameter_replacer.hpp" #include "transforms/rfftn_complex_replacer.hpp" #include "transforms/string_equality_replacer.hpp" #include "transforms/tuple_unpack_replacer.hpp" @@ -174,6 +175,7 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/src/frontends/pytorch/src/transforms/prim_list_tuple_construct_replacer.cpp b/src/frontends/pytorch/src/transforms/prim_list_tuple_construct_replacer.cpp index 097cdfd5f6479c..29f0d6962f4d90 100644 --- a/src/frontends/pytorch/src/transforms/prim_list_tuple_construct_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/prim_list_tuple_construct_replacer.cpp @@ -3,7 +3,7 @@ // #include "prim_list_tuple_construct_replacer.hpp" -#include +#include #include "openvino/frontend/pytorch/decoder.hpp" #include "openvino/op/result.hpp" @@ -17,21 +17,24 @@ namespace pass { bool DecomposeListTupleResults::run_on_model(const std::shared_ptr& model) { bool at_least_one_decomposed = false; - std::queue> results; - for (auto res : model->get_results()) { - results.push(res); - } + const auto& orig_results = model->get_results(); + std::deque> results(orig_results.begin(), orig_results.end()); + ov::ResultVector updated_results; // will hold final fully unpacked results list + while (!results.empty()) { auto result = results.front(); - results.pop(); + results.pop_front(); auto input_node = result->get_input_node_shared_ptr(0); auto tuple_construct = cast_fw_node(input_node, "prim::TupleConstruct"); auto list_construct = cast_fw_node(input_node, "prim::ListConstruct"); if (!tuple_construct && !list_construct) { + updated_results.push_back(result); continue; } - for (const auto& input : input_node->inputs()) { - const auto& out = input.get_source_output(); + const auto& inputs = input_node->inputs(); + // enumerating inputs in reverse order because of results.push_front below + for (auto pinput = inputs.rbegin(); pinput != inputs.rend(); ++pinput) { + const auto& out = pinput->get_source_output(); if (const auto& fw_node = cast_fw_node(out.get_node_shared_ptr(), "prim::Constant")) { const auto& attrs = fw_node->get_attrs(); if (attrs.find("none_value") != attrs.end()) { @@ -42,13 +45,19 @@ bool DecomposeListTupleResults::run_on_model(const std::shared_ptr& model } } auto new_result = std::make_shared(out); - model->add_results({new_result}); - results.push(new_result); - model->remove_result(result); + results.push_front(new_result); at_least_one_decomposed = true; } } + if (at_least_one_decomposed) { + // remove all results + while (!model->get_results().empty()) + model->remove_result(model->get_results()[0]); + // and replace them all by updated list of results + model->add_results(updated_results); + } + return at_least_one_decomposed; }; } // namespace pass diff --git a/src/frontends/pytorch/src/transforms/prim_tuple_unpack_parameter_replacer.cpp b/src/frontends/pytorch/src/transforms/prim_tuple_unpack_parameter_replacer.cpp new file mode 100644 index 00000000000000..12577daa6f2456 --- /dev/null +++ b/src/frontends/pytorch/src/transforms/prim_tuple_unpack_parameter_replacer.cpp @@ -0,0 +1,122 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "prim_tuple_unpack_parameter_replacer.hpp" + +#include +#include + +#include "openvino/frontend/pytorch/decoder.hpp" +#include "openvino/op/result.hpp" +#include "openvino/op/util/framework_node.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +bool DecomposeTupleParameters::run_on_model(const std::shared_ptr& model) { + bool at_least_one_decomposed = false; + const auto& orig_parameters = model->get_parameters(); + std::deque> parameters(orig_parameters.begin(), orig_parameters.end()); + ov::ParameterVector updated_parameters; // will hold final fully unpacked parameters list + + while (!parameters.empty()) { + auto parameter = parameters.front(); + parameters.pop_front(); + auto consumers = parameter->get_output_target_inputs(0); + size_t num_outputs = 0; // number of outputs in each unpack consumer should match + bool all_unpacks = true; + + // collects all outputs per each consumer operation for this tuple Parameter + std::vector consumer_outputs; + + // The following vector track consumer nodes having prim::TupleUnpack type to form a detailed + // error message in case when parameter replacement is required but not possible. + std::vector> consumer_unpacks; + + for (const auto& consumer : consumers) { + auto node = consumer.get_node()->shared_from_this(); + auto tuple_unpack = cast_fw_node(node, "prim::TupleUnpack"); + if (!tuple_unpack) { + all_unpacks = false; + continue; // need to look at all consumers to form good diagnostics + } + consumer_unpacks.push_back(node); + if (num_outputs == 0) { + num_outputs = node->get_output_size(); + } else if (num_outputs != node->get_output_size()) { + std::stringstream message; + message << "Unpack node " << node + << " as one of the consumers of a tuple, which is introduced by parameter " + << parameter->output(0) << ", has number of outputs " << node->get_output_size() + << " not matching number of outputs " << num_outputs << " for other consumer(s) found earlier."; + add_exception_to_fw_node(node, message.str()); + all_unpacks = false; + break; + } + consumer_outputs.push_back(node->outputs()); + } + + if (!all_unpacks || consumer_outputs.empty()) { + // if at least one consumer is not an unpack-like op or there are not matching number of unpacked objects, + // we cannot replace other unpacks even if they exist, leaving Unpack-op(s) in the graph for this Parameter + + updated_parameters.push_back(parameter); + // In case if at least one Unpack exists there is an opportinity to attach diagnostics + for (const auto& consumer : consumer_unpacks) { + std::stringstream message; + message << "Not prim::TupleUnpack operations exist except this one: " << consumer + << " found as one of the consumers of a tuple, which is introduced by parameter " + << parameter->output(0) << "."; + add_exception_to_fw_node(consumer, message.str()); + } + continue; + } + + // enumerating outputs in reverse order because of parameters.push_front below + for (size_t i = num_outputs; i--;) { + // Merged partial shape and element type among all the consumers of i-th result of unpack ops + PartialShape ps = PartialShape::dynamic(); + element::Type et = element::dynamic; + std::set> inputs; + + for (const auto& outputs : consumer_outputs) { + auto output = outputs[i]; + OPENVINO_ASSERT(PartialShape::merge_into(ps, output.get_partial_shape()), + "Consumers for unpack op have incompatible shape"); + OPENVINO_ASSERT(element::Type::merge(et, et, output.get_element_type()), + "Consumers for unpack op have incompatible types"); + auto target_inputs = output.get_target_inputs(); + inputs.insert(target_inputs.begin(), target_inputs.end()); + } + + auto new_parameter = std::make_shared(et, ps); + + for (auto input : inputs) { + auto names = input.get_tensor().get_names(); + input.replace_source_output(new_parameter->output(0)); + new_parameter->output(0).add_names(names); + } + + // TODO: Assign correct names + parameters.push_front(new_parameter); + at_least_one_decomposed = true; + } + } + + if (at_least_one_decomposed) { + // remove all parameters + while (!model->get_parameters().empty()) + model->remove_parameter(model->get_parameters()[0]); + // and replace them by updated list of parameters + model->add_parameters(updated_parameters); + } + + return at_least_one_decomposed; +}; +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/transforms/prim_tuple_unpack_parameter_replacer.hpp b/src/frontends/pytorch/src/transforms/prim_tuple_unpack_parameter_replacer.hpp new file mode 100644 index 00000000000000..46007a5c12a775 --- /dev/null +++ b/src/frontends/pytorch/src/transforms/prim_tuple_unpack_parameter_replacer.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pass.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +// This transformation replaces all prim::TupleUnpack operations coming after Parameters with +// more Parameters -- one new parameter for each prim::TupleUnpack output. The original Parameter +// is replaced with these new Parameters preserving the order relative to other Parameters in a model. +// Order of new parameters is the same as the order of prim::TupleUnpack outputs. +// If prim::TupleUnpack has a consumer that is also prim::TupleUnpack, the transformation applies +// the replacement recursively until all prim::TupleUnpacks that take a Parameter output are eliminated. +// +// For example, if a model has the following signature: a, (b, (c, d)), e, where a, b, c, d, and e are +// tensors, and (x1, x2) means tuple consisting two elements x1 and x2, then the resulting model +// after the transformation will have a, b, c, d, e as inputs (without tuples, flattened). +// Note, that there is no special 'tuple' type of an input, tuple structure is restored by +// following prim::TupleUnpack operations in the graph only assuming that they can be applied on +// tuples only and the most nested objects in those tuples are tensors. +class DecomposeTupleParameters : public ov::pass::ModelPass { +public: + OPENVINO_RTTI("ov::frontend::pytorch::pass::DecomposeTupleParameters"); + bool run_on_model(const std::shared_ptr& model) override; +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/inference/dev_api/threading/ie_cpu_streams_info.hpp b/src/inference/dev_api/threading/ie_cpu_streams_info.hpp index 119bc89a5cf263..587669456759d2 100644 --- a/src/inference/dev_api/threading/ie_cpu_streams_info.hpp +++ b/src/inference/dev_api/threading/ie_cpu_streams_info.hpp @@ -12,29 +12,37 @@ namespace InferenceEngine { /** - * @enum column_of_cpu_streams_info_table + * @enum ColumnOfCpuStreamsInfoTable * @brief This enum contains definition of each columns in cpu streams information table. * * The following are two example of processor type table. * 1. 8 streams on hybrid platform which has 4 threads per stream (TPS). + * 1.1 2 streams (4 TPS) on physical core of Intel Performance-cores + * 1.2 4 streams (4 TPS) on Intel Efficient-cores + * 1.3 2 streams (4 TPS) on logic core of Intel Performance-cores * - * NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM - * 2 1 4 // 2 streams (4 TPS) on physical core of Intel Performance-cores - * 4 2 4 // 4 streams (4 TPS) on Intel Efficient-cores - * 2 3 4 // 2 streams (4 TPS) on logic core of Intel Performance-cores + * NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM | STREAM_NUMA_NODE_ID | STREAM_SOCKET_ID + * 2 1 4 0 0 + * 4 2 4 0 0 + * 2 3 4 0 0 * * 2. 1 stream (10 TPS) on hybrid platform which has 2 threads on physical core and 8 threads on Ecore. + * 2.1 1 streams (10 TPS) on multiple types of processors + * 2.2 2 threads on physical core of Intel Performance-cores + * 2.3 8 threads on Intel Efficient-cores * - * NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM - * 1 0 10 // 1 streams (10 TPS) on multiple types of processors - * 0 1 2 // 2 threads on physical core of Intel Performance-cores - * 0 2 8 // 8 threads on Intel Efficient-cores + * NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM | STREAM_NUMA_NODE_ID | STREAM_SOCKET_ID + * 1 0 10 0 0 + * 0 1 2 0 0 + * 0 2 8 0 0 */ -typedef enum { +enum ColumnOfCpuStreamsInfoTable { NUMBER_OF_STREAMS = 0, //!< Number of streams on specific CPU core tpye PROC_TYPE = 1, //!< Core type of current streams THREADS_PER_STREAM = 2, //!< Number of threads per stream of current streams - CPU_STREAMS_TABLE_SIZE = 3 //!< Size of streams info table -} column_of_cpu_streams_info_table; + STREAM_NUMA_NODE_ID = 3, //!< Numa node id of processors in this row + STREAM_SOCKET_ID = 4, //!< Socket id of processors in this row + CPU_STREAMS_TABLE_SIZE = 5 //!< Size of streams info table +}; } // namespace InferenceEngine \ No newline at end of file diff --git a/src/inference/src/dev/icompiled_model.cpp b/src/inference/src/dev/icompiled_model.cpp index 4dded3812be38a..01db907cc6dd4e 100644 --- a/src/inference/src/dev/icompiled_model.cpp +++ b/src/inference/src/dev/icompiled_model.cpp @@ -48,6 +48,7 @@ ov::ICompiledModel::ICompiledModel(const std::shared_ptr& model } } + std::unordered_map, std::shared_ptr> tensor_map; for (const auto& param : model->get_parameters()) { const auto& param_name = param->get_friendly_name(); auto new_param = ov::as_type_ptr(param->copy_with_new_inputs({})); @@ -64,6 +65,12 @@ ov::ICompiledModel::ICompiledModel(const std::shared_ptr& model new_param->set_element_type(param->get_element_type()); new_param->set_layout(param->get_layout()); new_param->output(0).get_rt_info() = param->output(0).get_rt_info(); + auto old_tensor = param->output(0).get_tensor_ptr(); + if (tensor_map.count(old_tensor)) { + new_param->output(0).set_tensor_ptr(tensor_map[old_tensor]); + } else { + tensor_map[old_tensor] = new_param->output(0).get_tensor_ptr(); + } new_param->validate_and_infer_types(); m_inputs.emplace_back(new_param->output(0)); } @@ -88,6 +95,12 @@ ov::ICompiledModel::ICompiledModel(const std::shared_ptr& model auto r = std::dynamic_pointer_cast(new_result); r->set_layout(result->get_layout()); new_result->output(0).get_rt_info() = result->output(0).get_rt_info(); + auto old_tensor = result->output(0).get_tensor_ptr(); + if (tensor_map.count(old_tensor)) { + new_result->output(0).set_tensor_ptr(tensor_map[old_tensor]); + } else { + tensor_map[old_tensor] = new_result->output(0).get_tensor_ptr(); + } m_outputs.emplace_back(new_result->output(0)); } } diff --git a/src/plugins/auto/src/infer_request.cpp b/src/plugins/auto/src/infer_request.cpp index a364ae303d72bd..9766187cb6e7b0 100644 --- a/src/plugins/auto/src/infer_request.cpp +++ b/src/plugins/auto/src/infer_request.cpp @@ -105,7 +105,7 @@ std::vector ov::auto_plugin::InferRequest::get_profiling_info if (m_shared_request) return m_shared_request->get_profiling_info(); if (m_scheduled_request) - return m_shared_request->get_profiling_info(); + return m_scheduled_request->get_profiling_info(); OPENVINO_NOT_IMPLEMENTED; } diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 217c00cb01660c..4be16563c8991c 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -64,7 +64,7 @@ struct Config { ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; bool enableHyperThreading = true; bool changedHyperThreading = false; - Config::LatencyThreadingMode scopeOflatencyCandidate = Config::LatencyThreadingMode::PER_SOCKET; + Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET; #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) LPTransformsMode lpTransformsMode = LPTransformsMode::On; #else diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index 55dcea7781b428..952ddd8a2462db 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "cpu_map_scheduling.hpp" #include "graph.h" @@ -19,6 +20,8 @@ using namespace InferenceEngine; using namespace ov; +#define INIT_VAL -100 + namespace ov { namespace intel_cpu { @@ -28,12 +31,16 @@ std::vector> get_streams_info_table(const int input_streams, const int input_infer_requests, const int model_prefer_threads, const std::string input_perf_hint, - const Config::LatencyThreadingMode scopeOflatencyCandidate, + const Config::LatencyThreadingMode latencyThreadingMode, const std::vector> proc_type_table) { - std::vector stream_info(CPU_STREAMS_TABLE_SIZE); + std::vector stream_info(CPU_STREAMS_TABLE_SIZE, INIT_VAL); std::vector> streams_info_table; + std::vector> proc_socket_table; + + int n_streams = 0; + int n_threads_per_stream = 0; - auto UpdateMixStreamInfo = [&]() { + auto update_mix_stream_info = [&]() { stream_info[NUMBER_OF_STREAMS] = 0; int n_threads = stream_info[THREADS_PER_STREAM]; for (int n = MAIN_CORE_PROC; n <= HYPER_THREADING_PROC; n++) { @@ -52,8 +59,61 @@ std::vector> get_streams_info_table(const int input_streams, } }; + auto update_ids_method = [&](const std::vector& one_proc_info) { + stream_info[STREAM_NUMA_NODE_ID] = one_proc_info[PROC_NUMA_NODE_ID]; + stream_info[STREAM_SOCKET_ID] = one_proc_info[PROC_SOCKET_ID]; + }; + + auto update_streams_per_node = [&](const int& proc_type, const std::vector& one_proc_info) { + if (0 != one_proc_info[proc_type]) { + if (n_threads_per_stream == -1) { + stream_info[THREADS_PER_STREAM] = (proc_type == EFFICIENT_CORE_PROC) ? 2 : 1; + } + stream_info[PROC_TYPE] = proc_type; + update_ids_method(one_proc_info); + stream_info[NUMBER_OF_STREAMS] = + static_cast(one_proc_info[proc_type] / stream_info[THREADS_PER_STREAM]); + if ((stream_info[NUMBER_OF_STREAMS] == 0) && (proc_type == MAIN_CORE_PROC)) { + stream_info[NUMBER_OF_STREAMS] = + static_cast((one_proc_info[MAIN_CORE_PROC] + one_proc_info[HYPER_THREADING_PROC]) / + stream_info[THREADS_PER_STREAM]); + } + if (n_streams < stream_info[NUMBER_OF_STREAMS]) { + stream_info[NUMBER_OF_STREAMS] = n_streams; + } + streams_info_table.push_back(stream_info); + + n_streams -= stream_info[NUMBER_OF_STREAMS]; + proc_socket_table[one_proc_info[PROC_SOCKET_ID]][proc_type] -= + stream_info[NUMBER_OF_STREAMS] * stream_info[THREADS_PER_STREAM]; + } + }; + + if (proc_type_table.size() == 1) { + proc_socket_table.push_back(proc_type_table[0]); + } else { + std::unordered_set socket_id_list(proc_type_table.size()); + for (size_t i = 1; i < proc_type_table.size(); i++) { + if (!socket_id_list.count(proc_type_table[i][PROC_SOCKET_ID])) { + proc_socket_table.push_back(proc_type_table[i]); + socket_id_list.insert(proc_type_table[i][PROC_SOCKET_ID]); + } else { + for (auto& row : proc_socket_table) { + if (row[PROC_SOCKET_ID] == proc_type_table[i][PROC_SOCKET_ID]) { + for (int n = 0; n <= HYPER_THREADING_PROC; n++) { + row[n] += proc_type_table[i][n]; + } + if (row[PROC_NUMA_NODE_ID] != proc_type_table[i][PROC_NUMA_NODE_ID]) { + row[PROC_NUMA_NODE_ID] = -1; + } + } + } + } + } + } + if (((input_streams_changed == false) && (input_perf_hint == CONFIG_VALUE(LATENCY)) && - ((scopeOflatencyCandidate == Config::LatencyThreadingMode::PER_PLATFORM) || (proc_type_table.size() == 1))) || + ((latencyThreadingMode == Config::LatencyThreadingMode::PER_PLATFORM) || (proc_type_table.size() == 1))) || ((input_streams_changed == true) && (input_streams == 1))) { stream_info[NUMBER_OF_STREAMS] = 1; if (input_threads > 0) { @@ -61,14 +121,40 @@ std::vector> get_streams_info_table(const int input_streams, if ((stream_info[THREADS_PER_STREAM] > proc_type_table[0][MAIN_CORE_PROC]) && (proc_type_table[0][MAIN_CORE_PROC] > 0) && (proc_type_table[0][EFFICIENT_CORE_PROC] > 0)) { stream_info[PROC_TYPE] = ALL_PROC; + update_ids_method(proc_type_table[0]); streams_info_table.push_back(stream_info); - UpdateMixStreamInfo(); - } else if ((stream_info[THREADS_PER_STREAM] <= proc_type_table[0][MAIN_CORE_PROC]) || - (proc_type_table[0][EFFICIENT_CORE_PROC] == 0)) { - stream_info[PROC_TYPE] = MAIN_CORE_PROC; - streams_info_table.push_back(stream_info); + update_mix_stream_info(); } else { - stream_info[PROC_TYPE] = EFFICIENT_CORE_PROC; + if ((stream_info[THREADS_PER_STREAM] <= proc_type_table[0][MAIN_CORE_PROC]) || + (proc_type_table[0][EFFICIENT_CORE_PROC] == 0)) { + stream_info[PROC_TYPE] = MAIN_CORE_PROC; + } else { + stream_info[PROC_TYPE] = EFFICIENT_CORE_PROC; + } + if (proc_type_table.size() == 1) { + update_ids_method(proc_type_table[0]); + } else { + size_t i = 0; + for (i = 1; i < proc_type_table.size(); i++) { + if (proc_type_table[i][stream_info[PROC_TYPE]] >= stream_info[THREADS_PER_STREAM]) { + update_ids_method(proc_type_table[i]); + i = proc_type_table.size() + 1; + break; + } + } + if (i <= proc_type_table.size()) { + for (i = 0; i < proc_socket_table.size(); i++) { + if (proc_socket_table[i][stream_info[PROC_TYPE]] >= stream_info[THREADS_PER_STREAM]) { + update_ids_method(proc_socket_table[i]); + i = proc_socket_table.size() + 1; + break; + } + } + if (i <= proc_socket_table.size()) { + update_ids_method(proc_type_table[0]); + } + } + } streams_info_table.push_back(stream_info); } } else { @@ -78,6 +164,7 @@ std::vector> get_streams_info_table(const int input_streams, (model_prefer_threads == 0) ? proc_type_table[0][EFFICIENT_CORE_PROC] : std::min(proc_type_table[0][EFFICIENT_CORE_PROC], model_prefer_threads); + update_ids_method(proc_type_table[0]); streams_info_table.push_back(stream_info); } else if ((proc_type_table[0][EFFICIENT_CORE_PROC] > 0) && ((model_prefer_threads == 0) || (model_prefer_threads > proc_type_table[0][MAIN_CORE_PROC]))) { @@ -86,54 +173,61 @@ std::vector> get_streams_info_table(const int input_streams, (model_prefer_threads == 0 || model_prefer_threads > proc_type_table[0][MAIN_CORE_PROC]) ? proc_type_table[0][ALL_PROC] : proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC]; + update_ids_method(proc_type_table[0]); streams_info_table.push_back(stream_info); - UpdateMixStreamInfo(); + update_mix_stream_info(); } else { stream_info[PROC_TYPE] = MAIN_CORE_PROC; stream_info[THREADS_PER_STREAM] = proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC]; + update_ids_method(proc_type_table[0]); streams_info_table.push_back(stream_info); } } return streams_info_table; } else if ((input_streams_changed == false) && (input_perf_hint == CONFIG_VALUE(LATENCY))) { - stream_info[PROC_TYPE] = MAIN_CORE_PROC; - int max_per_numa_node = 0; - int numa_node_cnt = 0; - std::vector proc_per_socket; - proc_per_socket.resize(proc_type_table.size(), 0); - for (long unsigned int i = 1; i < proc_type_table.size(); i++) { - if (max_per_numa_node < proc_type_table[i][ALL_PROC]) { - max_per_numa_node = proc_type_table[i][ALL_PROC]; - numa_node_cnt = 1; - } else if (max_per_numa_node == proc_type_table[i][ALL_PROC]) { - numa_node_cnt++; + if (latencyThreadingMode == Config::LatencyThreadingMode::PER_NUMA_NODE) { + if (proc_type_table.size() == 1) { + stream_info[NUMBER_OF_STREAMS] = 1; + stream_info[PROC_TYPE] = MAIN_CORE_PROC; + stream_info[THREADS_PER_STREAM] = proc_type_table[0][ALL_PROC]; + update_ids_method(proc_type_table[0]); + streams_info_table.push_back(stream_info); + } else { + for (size_t i = 1; i < proc_type_table.size(); i++) { + if (i != 1) { + if (proc_type_table[i][ALL_PROC] < streams_info_table[0][THREADS_PER_STREAM]) { + continue; + } else if (proc_type_table[i][ALL_PROC] < streams_info_table[0][THREADS_PER_STREAM]) { + streams_info_table.clear(); + } + } + stream_info[NUMBER_OF_STREAMS] = 1; + stream_info[PROC_TYPE] = MAIN_CORE_PROC; + stream_info[THREADS_PER_STREAM] = proc_type_table[i][ALL_PROC]; + update_ids_method(proc_type_table[i]); + streams_info_table.push_back(stream_info); + } } - proc_per_socket[proc_type_table[i][PROC_SOCKET_ID]] += proc_type_table[i][ALL_PROC]; - } - if (scopeOflatencyCandidate == Config::LatencyThreadingMode::PER_NUMA_NODE) { - stream_info[NUMBER_OF_STREAMS] = numa_node_cnt; - stream_info[THREADS_PER_STREAM] = max_per_numa_node; } else { - int max_per_socket = 0; - int socket_cnt = 0; - for (long unsigned int i = 0; i < proc_per_socket.size(); i++) { - if (max_per_socket < proc_per_socket[i]) { - max_per_socket = proc_per_socket[i]; - socket_cnt = 1; - } else if (max_per_socket == proc_per_socket[i]) { - socket_cnt++; + for (size_t i = 0; i < proc_socket_table.size(); i++) { + if (streams_info_table.size() != 0) { + if (streams_info_table[0][THREADS_PER_STREAM] > proc_socket_table[i][ALL_PROC]) { + continue; + } else if (streams_info_table[0][THREADS_PER_STREAM] < proc_socket_table[i][ALL_PROC]) { + streams_info_table.clear(); + } } + stream_info[NUMBER_OF_STREAMS] = 1; + stream_info[PROC_TYPE] = MAIN_CORE_PROC; + stream_info[THREADS_PER_STREAM] = proc_socket_table[i][ALL_PROC]; + update_ids_method(proc_socket_table[i]); + streams_info_table.push_back(stream_info); } - stream_info[NUMBER_OF_STREAMS] = socket_cnt; - stream_info[THREADS_PER_STREAM] = max_per_socket; } - streams_info_table.push_back(stream_info); return streams_info_table; } else { - int n_streams = 0; int n_threads = 0; - int n_threads_per_stream = 0; int base_type = MAIN_CORE_PROC; n_threads = @@ -205,40 +299,56 @@ std::vector> get_streams_info_table(const int input_streams, stream_info[THREADS_PER_STREAM] = n_threads_per_stream; - if (proc_type_table.size() == 1) { - while (1) { - for (int n = MAIN_CORE_PROC; n < PROC_TYPE_TABLE_SIZE; n++) { - if (0 != proc_type_table[0][n]) { - if (n_threads_per_stream == -1) { - stream_info[THREADS_PER_STREAM] = (n == EFFICIENT_CORE_PROC) ? 2 : 1; - } - stream_info[PROC_TYPE] = n; + for (int n_type = MAIN_CORE_PROC; (n_type <= HYPER_THREADING_PROC) && (n_streams > 0); n_type++) { + if (proc_type_table[0][n_type] > 0) { + if (proc_type_table.size() == 1) { + update_streams_per_node(n_type, proc_type_table[0]); + } else { + for (size_t n_node = 1; (n_node < proc_type_table.size()) && (n_streams > 0); n_node++) { + update_streams_per_node(n_type, proc_type_table[n_node]); + } + } + } + } + + if (n_streams > 0) { + for (int n_type = MAIN_CORE_PROC; n_type <= HYPER_THREADING_PROC; n_type++) { + int proc_sum = 0; + for (size_t n_socket = 0; n_socket < proc_socket_table.size(); n_socket++) { + if (proc_socket_table[n_socket][n_type] >= stream_info[THREADS_PER_STREAM]) { + stream_info[PROC_TYPE] = n_type; stream_info[NUMBER_OF_STREAMS] = - static_cast(proc_type_table[0][n] / stream_info[THREADS_PER_STREAM]); - if (n_streams <= stream_info[NUMBER_OF_STREAMS]) { - stream_info[NUMBER_OF_STREAMS] = n_streams; - streams_info_table.push_back(stream_info); - return streams_info_table; - } else { - streams_info_table.push_back(stream_info); - n_streams -= stream_info[NUMBER_OF_STREAMS]; + static_cast(proc_socket_table[n_socket][n_type] / stream_info[THREADS_PER_STREAM]); + stream_info[STREAM_NUMA_NODE_ID] = -1; + stream_info[STREAM_SOCKET_ID] = n_socket; + streams_info_table.push_back(stream_info); + n_streams -= stream_info[NUMBER_OF_STREAMS]; + proc_socket_table[n_socket][n_type] -= + stream_info[THREADS_PER_STREAM] * stream_info[NUMBER_OF_STREAMS]; + if (n_streams <= 0) { + break; } } + proc_sum += proc_socket_table[n_socket][n_type]; } - if (1 == stream_info[THREADS_PER_STREAM]) { - return streams_info_table; - } else { - stream_info[THREADS_PER_STREAM] -= 1; - std::vector>().swap(streams_info_table); + if (n_streams <= 0) { + break; + } + if (proc_sum >= stream_info[THREADS_PER_STREAM]) { + stream_info[PROC_TYPE] = n_type; + stream_info[NUMBER_OF_STREAMS] = static_cast(proc_sum / stream_info[THREADS_PER_STREAM]); + stream_info[STREAM_NUMA_NODE_ID] = -1; + stream_info[STREAM_SOCKET_ID] = -1; + streams_info_table.push_back(stream_info); + n_streams -= stream_info[NUMBER_OF_STREAMS]; + if (n_streams <= 0) { + break; + } } } - } else { - stream_info[NUMBER_OF_STREAMS] = n_streams; - stream_info[PROC_TYPE] = MAIN_CORE_PROC; - stream_info[THREADS_PER_STREAM] = n_threads_per_stream; - streams_info_table.push_back(stream_info); - return streams_info_table; } + + return streams_info_table; } } @@ -337,7 +447,7 @@ void generate_stream_info(const int streams, config.perfHintsConfig.ovPerfHintNumRequests, model_prefer_threads, config.perfHintsConfig.ovPerfHint, - config.scopeOflatencyCandidate, + config.latencyThreadingMode, proc_type_table); } diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp index eacb606697d31b..16821971b1e61b 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp @@ -36,7 +36,7 @@ namespace intel_cpu { * - input "0" indicates that the function generates the optimal number of threads per stream based on * processors type information. * @param[in] input_perf_hint is performance hint set by user via ov::hint::performance_mode or the default value. - * @param[in] scopeOflatencyCandidate is the scope of candidate processors per stream for latency hint + * @param[in] latencyThreadingMode is the scope of candidate processors per stream for latency hint * - user can select all processors per numa node, per socket, or per platform. * @param[in] proc_type_table is currently available candidate processors. * - candidate processors have benn updated based on user input hints like ov::hint::scheduling_core_type @@ -49,7 +49,7 @@ std::vector> get_streams_info_table(const int input_streams, const int input_infer_requests, const int model_prefer_threads, const std::string input_perf_hint, - const Config::LatencyThreadingMode scopeOflatencyCandidate, + const Config::LatencyThreadingMode latencyThreadingMode, const std::vector> proc_type_table); /** * @brief Get model_prefer_threads diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.hpp index 478da5357c867d..38e3fc0b9d7b38 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.hpp @@ -57,7 +57,9 @@ class AclMVNExecutorBuilder : public MVNExecutorBuilder { if (!mvnAttrs.normalizeVariance_) { return false; } - if (!mvnAttrs.initAcrossChannels_ && getAclDataLayoutByMemoryDesc(srcDescs[0]) == arm_compute::DataLayout::NHWC) { + // "initAcrossChannels = false" is not supported by ACL for NHWC layout + if (!mvnAttrs.initAcrossChannels_ && + getAclDataLayoutByMemoryDesc(srcDescs[0]) == arm_compute::DataLayout::NHWC) { return false; } diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.hpp index c2184890c73302..43a7911670f865 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.hpp @@ -74,7 +74,7 @@ class AclReduceExecutorBuilder : public ReduceExecutorBuilder { for (size_t i = 0; i < reduceAttrs.axes.size(); ++i) { auto axe = axisCast(reduceAttrs.axes[i], srcDescs[0]->getShape().getRank()); if (axe > 3) { - DEBUG_LOG("ACL supports 3 or less axis for Reduce op"); + DEBUG_LOG("ACL supports tensor rank up to 4 for ReduceMean operation. Tensor rank: ", axe); return false; } } @@ -84,6 +84,7 @@ class AclReduceExecutorBuilder : public ReduceExecutorBuilder { reduceAttrs.operation == Algorithm::ReduceMin || reduceAttrs.operation == Algorithm::ReduceProd) && reduceAttrs.axes.size() != 1) { + DEBUG_LOG("ACL supports single axes reduce only. Number of axes: ", reduceAttrs.axes.size()); return false; } diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.cpp index 27104457ec097b..09fc9fc0f276ef 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.cpp @@ -12,44 +12,45 @@ template ngraph::matcher_pass_callback ov::intel_cpu::ConvertReduceMultiAxisBase::convert_reduce() { return [&](ngraph::pattern::Matcher& m) { - auto reduce = m.get_match_root(); - if (!std::dynamic_pointer_cast(reduce)) { + auto reduce = std::dynamic_pointer_cast(m.get_match_root()); + if (!reduce) { return false; } const auto& input0 = reduce->input_value(0); const auto& input1 = reduce->input_value(1); const auto& data_shape0 = input0.get_partial_shape(); - const auto& data_shape1 = input1.get_partial_shape(); - if (data_shape0.is_dynamic() || - data_shape1.is_dynamic()) { + auto reduction_axes = std::dynamic_pointer_cast(input1.get_node_shared_ptr()); + if (!reduction_axes) { return false; } - if (ngraph::shape_size(input1.get_shape()) <= 1) { return false; } - auto reduction_axes = std::dynamic_pointer_cast(input1.get_node_shared_ptr()); - if (!reduction_axes) { - return false; + + auto axes = reduction_axes->template cast_vector(); + for (auto axis : axes) { + if (data_shape0[axis].is_dynamic()) { + return false; + } } - auto axes = reduction_axes->cast_vector(); + ngraph::NodeVector new_ops; std::shared_ptr node = input0.get_node_shared_ptr(); + bool keepDims = reduce->get_keep_dims(); + //axes should be sorted in descending order if keepDims is false to be keep axis within data shape + if (!keepDims) { + sort(axes.begin(), axes.end(), std::greater()); + } for (auto axis : axes) { auto reduction_axis = ov::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {axis}); - node = std::make_shared(node, reduction_axis, true); + node = std::make_shared(node, reduction_axis, keepDims); new_ops.push_back(node); } - auto out_shape = reduce->get_output_shape(0); - auto dst_shape = std::make_shared(ngraph::element::i64, ngraph::Shape{out_shape.size()}, - std::vector(out_shape.begin(), out_shape.end())); - auto reshape = std::make_shared(node, dst_shape, true); - - reshape->set_friendly_name(reduce->get_friendly_name()); + node->set_friendly_name(reduce->get_friendly_name()); ngraph::copy_runtime_info(reduce, new_ops); - ngraph::replace_node(reduce, reshape); + ngraph::replace_node(reduce, node); return true; }; } diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp index 486c85eb62c351..e4c39020bf65da 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp @@ -41,10 +41,6 @@ * +-------+---------+ * | * +-------v---------+ - * | Reshape | - * +-------+---------+ - * | - * +-------v---------+ * | Result | * +-----------------+ * diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/mvn.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/mvn.cpp new file mode 100644 index 00000000000000..6155eec881d154 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/mvn.cpp @@ -0,0 +1,337 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mvn.hpp" +#include "gtest/gtest.h" +#include "test_utils/cpu_test_utils.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ngraph::helpers; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { + +std::string MvnLayerCPUTest::getTestCaseName(testing::TestParamInfo obj) { + basicCpuMvnParams basicParamsSet; + CPUSpecificParams cpuParams; + fusingSpecificParams fusingParams; + ElementType inputPrecision, outputPrecision; + std::tie(basicParamsSet, cpuParams, fusingParams, inputPrecision, outputPrecision) = obj.param; + + InputShape inputShapes; + ElementType netPrecision; + ngraph::AxisSet axes; + bool acrossChanels, normalizeVariance; + double eps; + std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps) = basicParamsSet; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::partialShape2str({inputShapes.first}) << "_"; + result << "TS="; + for (const auto& shape : inputShapes.second) { + result << "(" << CommonTestUtils::vec2str(shape) << ")_"; + } + result << "Precision=" << netPrecision << "_"; + if (!axes.empty()) { + result << "ReductionAxes=" << CommonTestUtils::vec2str(axes.to_vector()) << "_"; + } else { + result << "AcrossChannels=" << (acrossChanels ? "TRUE" : "FALSE") << "_"; + } + result << "NormalizeVariance=" << (normalizeVariance ? "TRUE" : "FALSE") << "_"; + result << "Epsilon=" << eps; + result << "_" + << "CNNInpPrc=" << inputPrecision; + result << "_" + << "CNNOutPrc=" << outputPrecision; + + result << CPUTestsBase::getTestCaseName(cpuParams); + + result << CpuTestWithFusing::getTestCaseName(fusingParams); + + return result.str(); +} + +bool MvnLayerCPUTest::isSupportedTestCase() { +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) + // "initAcrossChannels = false" is not supported by ACL for NHWC layout + if (!inFmts.empty() && (inFmts.front() == nwc || + inFmts.front() == nhwc || + inFmts.front() == ndhwc) && + !acrossChanels) return false; +#endif + return true; +} + +void MvnLayerCPUTest::SetUp() { + targetDevice = CommonTestUtils::DEVICE_CPU; + + basicCpuMvnParams basicParamsSet; + CPUSpecificParams cpuParams; + fusingSpecificParams fusingParams; + ElementType inPrc; + ElementType outPrc; + std::tie(basicParamsSet, cpuParams, fusingParams, inPrc, outPrc) = this->GetParam(); + + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(postOpMgrPtr, fusedOps) = fusingParams; + + InputShape inputShapes; + ElementType netPrecision; + ngraph::AxisSet axes; + bool normalizeVariance; + double eps; + std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps) = basicParamsSet; + + if (!isSupportedTestCase()) { + GTEST_SKIP() << "Skip MVN test since such combination of parameters is not supported." << std::endl; + } + + init_input_shapes({inputShapes}); + + auto param = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes); + auto paramOuts = + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(param)); + auto mvn = ngraph::builder::makeMVN(paramOuts[0], acrossChanels, normalizeVariance, eps); + if (!axes.empty()) { + mvn = ngraph::builder::makeMVN(paramOuts[0], axes, normalizeVariance, eps); + } + + selectedType = getPrimitiveType(); + selectedType = makeSelectedTypeStr(selectedType, netPrecision); + + rel_threshold = 0.015f; + function = makeNgraphFunction(netPrecision, param, mvn, "mvn"); +} + +TEST_P(MvnLayerCPUTest, CompareWithRefs) { + run(); + CheckPluginRelatedResults(compiledModel, "MVN"); +} + +namespace MVN { +const std::vector& inputShapes_1D() { + static const std::vector inputShapes_1D = { + { {}, {{5}}}, + { {}, {{16}}}, + { + // dynamic + {-1}, + // target + { + {2}, + {16}, + {1}, + {2} + } + }, + { + // dynamic + {{1, 20}}, + // target + { + {1}, + {16}, + {4}, + {16} + } + } + }; + return inputShapes_1D; +} + +const std::vector& inputShapes_2D() { + static const std::vector inputShapes_2D = { + { {}, {{1, 32}}}, + { {}, {{16, 64}}}, + + { + // dynamic + {-1, -1}, + // target + { + {2, 16}, + {4, 16}, + {1, 16}, + {4, 16} + } + }, + { + // dynamic + {{1, 5}, {1, 20}}, + // target + { + {1, 1}, + {2, 16}, + {4, 16}, + {2, 16} + } + } + }; + return inputShapes_2D; +} + +const std::vector& inputShapes_3D() { + static const std::vector inputShapes_3D = { + { {}, {{1, 32, 17}}}, + { {}, {{1, 37, 9}}}, + { {}, {{1, 16, 4}}}, + { + // dynamic + {-1, -1, -1}, + // target + { + {2, 16, 6}, + {4, 16, 2}, + {2, 16, 6}, + {4, 16, 2} + } + }, + { + // dynamic + {{1, 5}, {1, 20}, {1, 7}}, + // target + { + {1, 1, 1}, + {2, 16, 6}, + {4, 16, 2}, + {2, 16, 6} + } + } + }; + return inputShapes_3D; +} + +const std::vector& inputShapes_4D() { + static const std::vector inputShapes_4D = { + { {}, {{1, 16, 5, 8}}}, + { {}, {{2, 19, 5, 10}}}, + { {}, {{7, 32, 2, 8}}}, + { {}, {{5, 8, 3, 5}}}, + { {}, {{1, 2, 7, 5}}}, + { {}, {{1, 4, 5, 5}}}, + { {}, {{1, 7, 3, 5}}}, + { {}, {{1, 15, 9, 5}}}, + { {}, {{4, 41, 6, 9}}}, + { + // dynamic + {-1, -1, -1, -1}, + // target + { + {2, 16, 10, 6}, + {4, 16, 2, 2}, + {2, 16, 10, 6}, + {4, 16, 2, 2} + } + }, + { + // dynamic + {{1, 5}, {1, 20}, {1, 10}, {1, 7}}, + // target + { + {1, 1, 1, 1}, + {2, 16, 10, 6}, + {4, 16, 2, 2}, + {2, 16, 10, 6} + } + } + }; + return inputShapes_4D; +} + +const std::vector& inputShapes_5D() { + static const std::vector inputShapes_5D = { + { {}, {{1, 32, 8, 1, 6}}}, + { {}, {{1, 9, 1, 15, 9}}}, + { {}, {{6, 64, 6, 1, 18}}}, + { {}, {{2, 31, 2, 9, 1}}}, + { {}, {{10, 16, 5, 10, 6}}}, + { + // dynamic + {-1, -1, -1, -1, -1}, + // target + { + {2, 16, 5, 10, 6}, + {4, 16, 7, 2, 2}, + {2, 16, 5, 10, 6}, + {4, 16, 7, 2, 2} + } + }, + { + // dynamic + {{1, 5}, {1, 20}, {1, 7}, {1, 10}, {1, 7}}, + // target + { + {1, 1, 1, 1, 1}, + {2, 16, 5, 10, 6}, + {4, 16, 7, 2, 2}, + {2, 16, 5, 10, 6} + } + } + }; + return inputShapes_5D; +} + +const std::vector& inputShapesStatic_2D() { + static const std::vector inputShapesStatic_2D = { + {1}, + {16}, + {4} + }; + return inputShapesStatic_2D; +} + +const std::vector& inputShapesStatic_3D() { + static const std::vector inputShapesStatic_3D = { + {2, 16, 6}, + {4, 16, 2}, + {1, 16, 4} + }; + return inputShapesStatic_3D; +} + +const std::vector& inputShapesStatic_4D() { + static const std::vector inputShapesStatic_4D = { + {1, 7, 3, 5}, + {1, 15, 9, 5}, + {4, 41, 6, 9}, + // cover channel case 4*16*2+16+3=147 + {1, 147, 2, 2} + }; + return inputShapesStatic_4D; +} + +const std::vector& inputShapesStatic_5D() { + static const std::vector inputShapesStatic_5D = { + {1, 32, 8, 1, 6}, + {1, 9, 1, 15, 9}, + {6, 64, 6, 1, 18}, + // cover channel case 4*16*2+16+9=153 + {6, 153, 2, 2, 2} + }; + return inputShapesStatic_5D; +} + +const std::vector& emptyReductionAxes() { + static const std::vector emptyReductionAxes = {{}}; + return emptyReductionAxes; +} + +const std::vector& acrossChannels() { + static const std::vector acrossChannels = { + true, + false + }; + return acrossChannels; +} + +const std::vector& epsilon() { + static const std::vector epsilon = { + 0.000000001 + }; + return epsilon; +} + +} // namespace MVN +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/mvn.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/mvn.hpp new file mode 100644 index 00000000000000..d10ad230e37a02 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/mvn.hpp @@ -0,0 +1,65 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_layer/mvn.hpp" +#include "ngraph_functions/builders.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include +#include "test_utils/fusing_test_utils.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "gtest/gtest.h" + + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { + +using basicCpuMvnParams = std::tuple< + InputShape, // Input shapes + ElementType, // Input precision + ngraph::AxisSet, // Reduction axes + bool, // Across channels + bool, // Normalize variance + double>; // Epsilon + +using MvnLayerCPUTestParamSet = std::tuple< + basicCpuMvnParams, + CPUSpecificParams, + fusingSpecificParams, + ElementType, // CNNNetwork input precision + ElementType>; // CNNNetwork output precision + +class MvnLayerCPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest, public CpuTestWithFusing { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + bool isSupportedTestCase(); +protected: + void SetUp() override; +private: + bool acrossChanels; +}; + +namespace MVN { + const std::vector& inputShapes_1D(); + const std::vector& inputShapes_2D(); + const std::vector& inputShapes_3D(); + const std::vector& inputShapes_4D(); + const std::vector& inputShapes_5D(); + + const std::vector& inputShapesStatic_2D(); + const std::vector& inputShapesStatic_3D(); + const std::vector& inputShapesStatic_4D(); + const std::vector& inputShapesStatic_5D(); + + const std::vector& emptyReductionAxes(); + const std::vector& acrossChannels(); + const std::vector& epsilon(); + +} // namespace MVN +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.cpp new file mode 100644 index 00000000000000..fbd3f9bb3e485f --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.cpp @@ -0,0 +1,240 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "reduce.hpp" + +#include "gtest/gtest.h" +#include "test_utils/cpu_test_utils.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ngraph::helpers; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { + +std::string ReduceCPULayerTest::getTestCaseName(testing::TestParamInfo obj) { + basicReduceParams basicParams; + CPUSpecificParams cpuParams; + fusingSpecificParams fusingParams; + std::tie(basicParams, cpuParams, fusingParams) = obj.param; + + std::vector axes; + CommonTestUtils::OpType opType; + bool keepDims; + ngraph::helpers::ReductionType reductionType; + ElementType netPrecision, inPrc, outPrc; + std::vector inputShapes; + + std::tie(axes, opType, keepDims, reductionType, netPrecision, inPrc, outPrc, inputShapes) = basicParams; + + std::ostringstream result; + result << "IS=("; + for (const auto& shape : inputShapes) { + result << CommonTestUtils::partialShape2str({shape.first}) << "_"; + } + result << ")_TS=("; + for (const auto& shape : inputShapes) { + for (const auto& item : shape.second) { + result << CommonTestUtils::vec2str(item) << "_"; + } + } + result << ")_axes=" << CommonTestUtils::vec2str(axes) << "_"; + result << "opType=" << opType << "_"; + result << "type=" << reductionType << "_"; + if (keepDims) + result << "KeepDims=true_"; + else + result << "KeepDims=false_"; + result << "netPRC=" << netPrecision << "_"; + result << "inPRC=" << inPrc << "_"; + result << "outPRC=" << outPrc << "_"; + + result << CPUTestsBase::getTestCaseName(cpuParams); + result << CpuTestWithFusing::getTestCaseName(fusingParams); + + return result.str(); +} + +void ReduceCPULayerTest::SetUp() { + targetDevice = CommonTestUtils::DEVICE_CPU; + + basicReduceParams basicParams; + CPUSpecificParams cpuParams; + fusingSpecificParams fusingParams; + std::tie(basicParams, cpuParams, fusingParams) = this->GetParam(); + + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(postOpMgrPtr, fusedOps) = fusingParams; + + std::vector axes; + CommonTestUtils::OpType opType; + bool keepDims; + ElementType inPrc, outPrc; + std::vector inputShapes; + + std::tie(axes, opType, keepDims, reductionType, netPrecision, inPrc, outPrc, inputShapes) = basicParams; + inPrc = outPrc = netPrecision; + + init_input_shapes(inputShapes); + + auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes); + auto paramOuts = + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + + std::vector shapeAxes; + switch (opType) { + case CommonTestUtils::OpType::SCALAR: + if (axes.size() > 1) + FAIL() << "In reduce op if op type is scalar, 'axis' input's must contain 1 element"; + break; + case CommonTestUtils::OpType::VECTOR: + shapeAxes.push_back(axes.size()); + break; + default: + FAIL() << "Reduce op doesn't support operation type: " << opType; + } + auto reductionAxesNode = std::dynamic_pointer_cast( + std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes)); + + const auto reduce = ngraph::builder::makeReduce(paramOuts[0], reductionAxesNode, keepDims, reductionType); + + selectedType = getPrimitiveType() + "_" + + (inPrc == ElementType::boolean ? "I8" : InferenceEngine::details::convertPrecision(inPrc).name()); + + // hybrid layouts + if (inFmts.size() != 0 && outFmts.size() == 0) { + size_t outShapeSize = inputDynamicShapes[0].size() - axes.size(); + switch (outShapeSize) { + case 0: + case 1: + outFmts.push_back(x); + break; + case 2: + outFmts.push_back(nc); + break; + case 3: + outFmts.push_back(tnc); + break; + case 4: + outFmts.push_back(nchw); + break; + default: + FAIL() << "Invaid outShapeSize: " << outShapeSize; + } + } + + function = makeNgraphFunction(netPrecision, params, reduce, "Reduce"); +} + +void ReduceCPULayerTest::generate_inputs(const std::vector& targetInputStaticShapes) { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (size_t i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::Tensor tensor; + if (reductionType == ngraph::helpers::ReductionType::Prod) { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), + targetInputStaticShapes[i], + 10, + 5); + if (netPrecision == ElementType::f32) { + auto* rawBlobDataPtr = static_cast(tensor.data()); + for (size_t i = 0; i < tensor.get_size(); ++i) { + rawBlobDataPtr[i] /= 10.f; + } + } else if (netPrecision == ElementType::bf16) { + auto* rawBlobDataPtr = static_cast(tensor.data()); + for (size_t i = 0; i < tensor.get_size(); ++i) { + rawBlobDataPtr[i] /= 10.f; + } + } + } else { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + } + + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } +} + +TEST_P(ReduceCPULayerTest, CompareWithRefs) { + run(); + CheckPluginRelatedResults(compiledModel, "Reduce"); +} + +namespace Reduce { + +const std::vector& keepDims() { + static const std::vector keepDims = { + true, + false, + }; + return keepDims; +} + +const std::vector>& axes() { + static const std::vector> axes = { + {0}, + {1}, + {2}, + {3} + }; + return axes; +} + +const std::vector>& axesND() { + static const std::vector> axesND = { + {0, 1}, + {0, 2}, + {0, 3}, + {1, 2}, + {1, 3}, + {2, 3}, + {0, 1, 2}, + {0, 1, 3}, + {0, 2, 3}, + {1, 2, 3}, + {0, 1, 2, 3} + }; + return axesND; +} + +const std::vector& opTypes() { + static const std::vector opTypes = { + CommonTestUtils::OpType::SCALAR, + CommonTestUtils::OpType::VECTOR, + }; + return opTypes; +} + +const std::vector& reductionTypes() { + static const std::vector reductionTypes = { + ngraph::helpers::ReductionType::Mean, + ngraph::helpers::ReductionType::Max, + ngraph::helpers::ReductionType::Sum, + ngraph::helpers::ReductionType::Min, + ngraph::helpers::ReductionType::Prod, + ngraph::helpers::ReductionType::L1, + ngraph::helpers::ReductionType::L2, + }; + return reductionTypes; +} + +const std::vector& inpOutPrc() { + static const std::vector inpOutPrc = {ElementType::bf16, ElementType::f32}; + return inpOutPrc; +} + +const std::vector& reductionTypesInt32() { + static const std::vector reductionTypesInt32 = { + ngraph::helpers::ReductionType::Sum, + ngraph::helpers::ReductionType::Min, + ngraph::helpers::ReductionType::Max, + ngraph::helpers::ReductionType::L1, + }; + return reductionTypesInt32; +} + +} // namespace Reduce +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.hpp new file mode 100644 index 00000000000000..5325093c222313 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.hpp @@ -0,0 +1,58 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph_functions/builders.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include +#include "test_utils/fusing_test_utils.hpp" + +using namespace CPUTestUtils; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { + +typedef std::tuple< + std::vector, // Axis to reduce order + CommonTestUtils::OpType, // Scalar or vector type axis + bool, // Keep dims + ngraph::helpers::ReductionType, // Reduce operation type + ElementType, // Net precision + ElementType, // Input precision + ElementType, // Output precision + std::vector // Input shapes +> basicReduceParams; + +typedef std::tuple< + basicReduceParams, + CPUSpecificParams, + fusingSpecificParams> ReduceLayerCPUTestParamSet; + +class ReduceCPULayerTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest, public CpuTestWithFusing { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); +protected: + void SetUp() override; + void generate_inputs(const std::vector& targetInputStaticShapes) override; + +private: + ngraph::helpers::ReductionType reductionType; + ElementType netPrecision; +}; + +namespace Reduce { + +const std::vector& keepDims(); +const std::vector>& axes(); +const std::vector>& axesND(); +const std::vector& opTypes(); +const std::vector& reductionTypes(); +const std::vector& inpOutPrc(); +const std::vector& reductionTypesInt32(); + +} // namespace Reduce +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/mvn.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/mvn.cpp new file mode 100644 index 00000000000000..a66cb779b3b419 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/mvn.cpp @@ -0,0 +1,204 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/mvn.hpp" +#include "shared_test_classes/single_layer/mvn.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/fusing_test_utils.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ngraph::helpers; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { +namespace MVN { + +const std::vector normalizeVariance = { + true +}; + +std::vector inpPrc = { + ElementType::i8, + ElementType::f32, +}; +std::vector outPrc = { + ElementType::f32, +}; + +std::vector cpuParams_4D = { + CPUSpecificParams({nchw}, {nchw}, {}, {}), + CPUSpecificParams({nhwc}, {nhwc}, {}, {}), +}; + +std::vector cpuParams_5D = { + CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}), + CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}), +}; + +std::vector fusingParamsSet { + emptyFusingSpec, +}; + +std::vector fusingParamsSetStaticShape { + emptyFusingSpec, +}; + +const auto Mvn3D = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapes_3D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::ValuesIn(acrossChannels()), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn3D, MvnLayerCPUTest, Mvn3D, MvnLayerCPUTest::getTestCaseName); + +const auto Mvn4D = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapes_4D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::ValuesIn(acrossChannels()), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn4D, MvnLayerCPUTest, Mvn4D, MvnLayerCPUTest::getTestCaseName); + +const auto Mvn5D = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapes_5D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::ValuesIn(acrossChannels()), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn5D, MvnLayerCPUTest, Mvn5D, MvnLayerCPUTest::getTestCaseName); + +// 1D 2D case +std::vector fusingUnaryEltwiseParamsSet { + emptyFusingSpec, +}; + +const auto Mvn1D = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapes_1D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::ValuesIn(acrossChannels()), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingUnaryEltwiseParamsSet), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn1D, MvnLayerCPUTest, Mvn1D, MvnLayerCPUTest::getTestCaseName); + +// 2D no transformed +const auto Mvn2D = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapes_2D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::Values(false), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn2D, MvnLayerCPUTest, Mvn2D, MvnLayerCPUTest::getTestCaseName); + +// 2d transformed +const auto Mvn2DTrans = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapes_2D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::Values(true), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingUnaryEltwiseParamsSet), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn2DTrans, MvnLayerCPUTest, Mvn2DTrans, MvnLayerCPUTest::getTestCaseName); + +const auto Mvn2DStatic = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapesStatic_2D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::Values(false), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingParamsSetStaticShape), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +const auto Mvn3DStatic = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic_3D())), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::ValuesIn(acrossChannels()), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingParamsSetStaticShape), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn3D_Static, MvnLayerCPUTest, Mvn3DStatic, MvnLayerCPUTest::getTestCaseName); + +const auto Mvn4DStatic = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic_4D())), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::Values(true), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)), + ::testing::ValuesIn(fusingParamsSetStaticShape), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn4D_Static, MvnLayerCPUTest, Mvn4DStatic, MvnLayerCPUTest::getTestCaseName); + +const auto Mvn5DStatic = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic_5D())), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::Values(true), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)), + ::testing::ValuesIn(fusingParamsSetStaticShape), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn5D_Static, MvnLayerCPUTest, Mvn5DStatic, MvnLayerCPUTest::getTestCaseName); + +} // namespace MVN +} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/reduce.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/reduce.cpp new file mode 100644 index 00000000000000..386417bcf0c258 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/reduce.cpp @@ -0,0 +1,160 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/reduce.hpp" +#include "shared_test_classes/single_layer/reduce_ops.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/fusing_test_utils.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ngraph::helpers; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { +namespace Reduce { + +std::vector> inputShapes = { + {{{}, {{2, 19, 2, 9}}}}, +}; + +std::vector> inputShapes_dynamic_3dims = { + {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 9}}}}, +}; + +std::vector> inputShapes_dynamic_2dims = { + {{{2, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 9}}}}, +}; + +std::vector> inputShapes_5D = { + {{{}, {{2, 19, 2, 2, 9}}}}, +}; + +std::vector> inputShapes_6D = { + {{{}, {{2, 19, 2, 2, 2, 2}}}}, +}; + +std::vector> inputShapes_Int32 = { + {{{}, {{2, 19, 2, 3}}}}, +}; + +std::vector> inputShapes_SmallChannel = { + {{{}, {{2, 3, 2, 9}}}}, +}; + +std::vector> inputShapes_SingleBatch = { + {{{}, {{1, 19, 2, 9}}}}, +}; + +std::vector cpuParams_4D = { + CPUSpecificParams({nchw}, {nchw}, {}, {}), +//NHWC layout is disabled on ARM due to accuracy issue: https://github.com/ARM-software/ComputeLibrary/issues/1044 +#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) + CPUSpecificParams({nhwc}, {nhwc}, {}, {}), +#endif +}; + +/* ================================ 1.1 No fusion - Arithmetic ================================ */ +const auto params_OneAxis = testing::Combine( + testing::Combine( + testing::ValuesIn(axes()), + testing::ValuesIn(opTypes()), + testing::ValuesIn(keepDims()), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), + testing::Values(ElementType::undefined), + testing::Values(ElementType::undefined), + testing::ValuesIn(inputShapes)), + testing::Values(emptyCPUSpec), + testing::Values(emptyFusingSpec)); + +const auto params_OneAxis_dynamic = testing::Combine( + testing::Combine( + testing::Values(1), // ACL supports reduce against static dims only + testing::ValuesIn(opTypes()), + testing::ValuesIn(keepDims()), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), + testing::Values(ElementType::undefined), + testing::Values(ElementType::undefined), + testing::ValuesIn(inputShapes_dynamic_3dims)), + testing::Values(emptyCPUSpec), + testing::Values(emptyFusingSpec)); + +const auto params_MultiAxis_4D = testing::Combine( + testing::Combine( + testing::ValuesIn(axesND()), + testing::Values(CommonTestUtils::OpType::VECTOR), + testing::Values(true), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), + testing::Values(ElementType::undefined), + testing::Values(ElementType::undefined), + testing::ValuesIn(inputShapes)), + testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)), + testing::Values(emptyFusingSpec)); + +const auto params_MultiAxis_4D_dynamic = testing::Combine( + testing::Combine( + testing::Values(std::vector{0, 1}), // ACL supports reduce against static dims only + testing::Values(CommonTestUtils::OpType::VECTOR), + testing::Values(true), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), + testing::Values(ElementType::undefined), + testing::Values(ElementType::undefined), + testing::ValuesIn(inputShapes_dynamic_2dims)), + testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)), + testing::Values(emptyFusingSpec)); + +const auto params_Int32 = testing::Combine( + testing::Combine( + testing::ValuesIn(axes()), + testing::Values(CommonTestUtils::OpType::VECTOR), + testing::ValuesIn(keepDims()), + testing::ValuesIn(reductionTypesInt32()), + testing::Values(ElementType::i32), + testing::Values(ElementType::undefined), + testing::Values(ElementType::undefined), + testing::ValuesIn(inputShapes_Int32)), + testing::Values(emptyCPUSpec), + testing::Values(emptyFusingSpec)); + +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_OneAxis_CPU, + ReduceCPULayerTest, + params_OneAxis, + ReduceCPULayerTest::getTestCaseName +); + +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_OneAxis_dynamic_CPU, + ReduceCPULayerTest, + params_OneAxis_dynamic, + ReduceCPULayerTest::getTestCaseName +); + +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_MultiAxis_4D_CPU, + ReduceCPULayerTest, + params_MultiAxis_4D, + ReduceCPULayerTest::getTestCaseName +); + +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_MultiAxis_4D_dynamic_CPU, + ReduceCPULayerTest, + params_MultiAxis_4D_dynamic, + ReduceCPULayerTest::getTestCaseName +); + +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_Int32_CPU, + ReduceCPULayerTest, + params_Int32, + ReduceCPULayerTest::getTestCaseName +); + +} // namespace Reduce +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/mvn.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/mvn.cpp new file mode 100644 index 00000000000000..b79acf38b751df --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/mvn.cpp @@ -0,0 +1,215 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/mvn.hpp" +#include "shared_test_classes/single_layer/mvn.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/fusing_test_utils.hpp" +#include +#include + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ngraph::helpers; +using namespace ov::test; + + +namespace CPULayerTestsDefinitions { +namespace MVN { +namespace { + +const std::vector normalizeVariance = { + false +}; + +std::vector inpPrc = { + ElementType::bf16 +}; +std::vector outPrc = { + ElementType::bf16 +}; + +std::vector cpuParams_4D = { + CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}) +}; + +std::vector cpuParams_5D = { + CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}) +}; + +std::vector fusingParamsSet { + /* activations */ + fusingRelu, + fusingElu, + fusingTanh, + fusingSwish, + /* FQ */ + fusingFakeQuantizePerTensorRelu, + /* another patterns */ + fusingAddPerTensor +}; + +std::vector fusingParamsSetStaticShape { + /* FQ */ + fusingFakeQuantizePerChannel, + fusingFakeQuantizePerChannelRelu, + /* another patterns */ + fusingScaleShift, +}; + +const auto Mvn3D = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapes_3D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::ValuesIn(acrossChannels()), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn3D, MvnLayerCPUTest, Mvn3D, MvnLayerCPUTest::getTestCaseName); + +// 1D 2D case +std::vector fusingUnaryEltwiseParamsSet { + /* activations */ + fusingRelu, + fusingElu, + fusingTanh, + fusingSwish, +}; + +const auto Mvn1D = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapes_1D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::ValuesIn(acrossChannels()), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingUnaryEltwiseParamsSet), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn1D, MvnLayerCPUTest, Mvn1D, MvnLayerCPUTest::getTestCaseName); + +// 2D no transformed +const auto Mvn2D = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapes_2D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::Values(false), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn2D, MvnLayerCPUTest, Mvn2D, MvnLayerCPUTest::getTestCaseName); + +// 2d transformed +const auto Mvn2DTrans = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapes_2D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::Values(true), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingUnaryEltwiseParamsSet), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn2DTrans, MvnLayerCPUTest, Mvn2DTrans, MvnLayerCPUTest::getTestCaseName); + +const auto Mvn2DStatic = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapesStatic_2D()), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::Values(false), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingParamsSetStaticShape), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +const auto Mvn3DStatic = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic_3D())), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::ValuesIn(acrossChannels()), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingParamsSetStaticShape), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn3D_Static, MvnLayerCPUTest, Mvn3DStatic, MvnLayerCPUTest::getTestCaseName); + +const auto Mvn4DStatic = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic_4D())), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::Values(false), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)), + ::testing::ValuesIn(fusingParamsSetStaticShape), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn4D_Static, MvnLayerCPUTest, Mvn4DStatic, MvnLayerCPUTest::getTestCaseName); + +const auto Mvn5DStatic = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic_5D())), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::Values(false), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon())), + ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)), + ::testing::ValuesIn(fusingParamsSetStaticShape), + ::testing::ValuesIn(inpPrc), + ::testing::ValuesIn(outPrc)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn5D_Static, MvnLayerCPUTest, Mvn5DStatic, MvnLayerCPUTest::getTestCaseName); + +// no transformed with small spatial dim and i8 data and no fusion to cover model use case +const std::vector inputShapesSmallSpatial = { + { {}, {{4, 1}}}, + { {}, {{2, 2}}}, + { {}, {{1, 2, 1}}}, + { {}, {{3, 1, 1, 1}}}, +}; + +const auto MvnSmallSpatial = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inputShapesSmallSpatial), + ::testing::Values(ElementType::i8), + ::testing::ValuesIn(emptyReductionAxes()), + ::testing::Values(false), + ::testing::Values(false), + ::testing::ValuesIn(epsilon())), + ::testing::Values(emptyCPUSpec), + ::testing::Values(emptyFusingSpec), + ::testing::Values(ElementType::i8), + ::testing::Values(ElementType::f32)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_MvnSmallSpatial, MvnLayerCPUTest, MvnSmallSpatial, MvnLayerCPUTest::getTestCaseName); + +} // namespace +} // namespace MVN +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/reduce_ops.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/reduce.cpp similarity index 56% rename from src/plugins/intel_cpu/tests/functional/single_layer_tests/reduce_ops.cpp rename to src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/reduce.cpp index 93a45ff898e231..3e2384c5bfa420 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/reduce_ops.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/reduce.cpp @@ -1,215 +1,48 @@ -// Copyright (C) 2018-2023 Intel Corporation +// Copyright (C) 2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "shared_test_classes/base/ov_subgraph.hpp" -#include "ngraph_functions/builders.hpp" +#include "single_layer_tests/classes/reduce.hpp" +#include "shared_test_classes/single_layer/reduce_ops.hpp" #include "test_utils/cpu_test_utils.hpp" -#include #include "test_utils/fusing_test_utils.hpp" +using namespace InferenceEngine; using namespace CPUTestUtils; +using namespace ngraph::helpers; using namespace ov::test; + namespace CPULayerTestsDefinitions { +namespace Reduce { +namespace { -typedef std::tuple< - std::vector, // Axis to reduce order - CommonTestUtils::OpType, // Scalar or vector type axis - bool, // Keep dims - ngraph::helpers::ReductionType, // Reduce operation type - ElementType, // Net precision - ElementType, // Input precision - ElementType, // Output precision - std::vector // Input shapes -> basicReduceParams; - -typedef std::tuple< - basicReduceParams, - CPUSpecificParams, - fusingSpecificParams> ReduceLayerCPUTestParamSet; - -class ReduceCPULayerTest : public testing::WithParamInterface, - virtual public SubgraphBaseTest, public CpuTestWithFusing { -public: - static std::string getTestCaseName(testing::TestParamInfo obj) { - basicReduceParams basicParams; - CPUSpecificParams cpuParams; - fusingSpecificParams fusingParams; - std::tie(basicParams, cpuParams, fusingParams) = obj.param; - - std::vector axes; - CommonTestUtils::OpType opType; - bool keepDims; - ngraph::helpers::ReductionType reductionType; - ElementType netPrecision, inPrc, outPrc; - std::vector inputShapes; - - std::tie(axes, opType, keepDims, reductionType, netPrecision, inPrc, outPrc, inputShapes) = basicParams; - - std::ostringstream result; - result << "IS=("; - for (const auto& shape : inputShapes) { - result << CommonTestUtils::partialShape2str({shape.first}) << "_"; - } - result << ")_TS=("; - for (const auto& shape : inputShapes) { - for (const auto& item : shape.second) { - result << CommonTestUtils::vec2str(item) << "_"; - } - } - result << ")_axes=" << CommonTestUtils::vec2str(axes) << "_"; - result << "opType=" << opType << "_"; - result << "type=" << reductionType << "_"; - if (keepDims) - result << "KeepDims=true_"; - else - result << "KeepDims=false_"; - result << "netPRC=" << netPrecision << "_"; - result << "inPRC=" << inPrc << "_"; - result << "outPRC=" << outPrc << "_"; - - result << CPUTestsBase::getTestCaseName(cpuParams); - result << CpuTestWithFusing::getTestCaseName(fusingParams); - - return result.str(); - } -protected: - void SetUp() override { - targetDevice = CommonTestUtils::DEVICE_CPU; - - basicReduceParams basicParams; - CPUSpecificParams cpuParams; - fusingSpecificParams fusingParams; - std::tie(basicParams, cpuParams, fusingParams) = this->GetParam(); - - std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; - std::tie(postOpMgrPtr, fusedOps) = fusingParams; - - std::vector axes; - CommonTestUtils::OpType opType; - bool keepDims; - ElementType inPrc, outPrc; - std::vector inputShapes; - - std::tie(axes, opType, keepDims, reductionType, netPrecision, inPrc, outPrc, inputShapes) = basicParams; - inPrc = outPrc = netPrecision; - - init_input_shapes(inputShapes); - - auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes); - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - - std::vector shapeAxes; - switch (opType) { - case CommonTestUtils::OpType::SCALAR: - if (axes.size() > 1) - FAIL() << "In reduce op if op type is scalar, 'axis' input's must contain 1 element"; - break; - case CommonTestUtils::OpType::VECTOR: - shapeAxes.push_back(axes.size()); - break; - default: - FAIL() << "Reduce op doesn't support operation type: " << opType; - } - auto reductionAxesNode = std::dynamic_pointer_cast( - std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes)); - - const auto reduce = ngraph::builder::makeReduce(paramOuts[0], reductionAxesNode, keepDims, reductionType); - - selectedType = getPrimitiveType() + "_" + - (inPrc == ElementType::boolean ? "I8" : InferenceEngine::details::convertPrecision(inPrc).name()); - - // hybrid layouts - if (inFmts.size() != 0 && outFmts.size() == 0) { - size_t outShapeSize = inputDynamicShapes[0].size() - axes.size(); - switch (outShapeSize) { - case 0: - case 1: - outFmts.push_back(x); - break; - case 2: - outFmts.push_back(nc); - break; - case 3: - outFmts.push_back(tnc); - break; - case 4: - outFmts.push_back(nchw); - break; - default: - FAIL() << "Invaid outShapeSize: " << outShapeSize; - } - } - - function = makeNgraphFunction(netPrecision, params, reduce, "Reduce"); - } - - void generate_inputs(const std::vector& targetInputStaticShapes) override { - inputs.clear(); - const auto& funcInputs = function->inputs(); - for (size_t i = 0; i < funcInputs.size(); ++i) { - const auto& funcInput = funcInputs[i]; - ov::Tensor tensor; - if (reductionType == ngraph::helpers::ReductionType::Prod) { - tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 10, 5); - if (netPrecision == ElementType::f32) { - auto *rawBlobDataPtr = static_cast(tensor.data()); - for (size_t i = 0; i < tensor.get_size(); ++i) { - rawBlobDataPtr[i] /= 10.f; - } - } else if (netPrecision == ElementType::bf16) { - auto *rawBlobDataPtr = static_cast(tensor.data()); - for (size_t i = 0; i < tensor.get_size(); ++i) { - rawBlobDataPtr[i] /= 10.f; - } - } - } else { - tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); - } - - inputs.insert({funcInput.get_node_shared_ptr(), tensor}); - } - } - -private: - ngraph::helpers::ReductionType reductionType; - ElementType netPrecision; +std::vector> inputShapes_dyn = { + {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 9}}}}, }; -TEST_P(ReduceCPULayerTest, CompareWithRefs) { - run(); +std::vector> inputShapes_5D_dyn = { + {{{{1, 5}, 19, {1, 5}, {1, 5}, {1, 5}}, {{2, 19, 2, 2, 2}, {2, 19, 3, 2, 2}}}}, +}; - CheckPluginRelatedResults(compiledModel, "Reduce"); -} -namespace { -const std::vector inpOutPrc = {ElementType::bf16, ElementType::f32}; +std::vector> inputShapes_6D_dyn = { + {{{{1, 5}, 19, {1, 5}, {1, 5}, {1, 5}, {1, 5}}, {{2, 19, 2, 2, 2, 2}, {2, 19, 2, 2, 3, 2}}}}, +}; -const std::vector keepDims = { - true, - false, +std::vector> inputShapes_Int32_dyn = { + {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 3}}}}, }; -const std::vector> axes = { - {0}, - {1}, - {2}, - {3} +std::vector> inputShapes_SmallChannel_dyn = { + {{{{1, 5}, 3, {1, 5}, {1, 10}}, {{2, 3, 2, 2}, {2, 3, 2, 9}}}}, }; -const std::vector> axesND = { - {0, 1}, - {0, 2}, - {0, 3}, - {1, 2}, - {1, 3}, - {2, 3}, - {0, 1, 2}, - {0, 1, 3}, - {0, 2, 3}, - {1, 2, 3}, - {0, 1, 2, 3} +std::vector> inputShapes_SingleBatch_dyn = { + {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{1, 19, 2, 2}, {1, 19, 2, 9}}}}, +}; + +std::vector cpuParams_4D = { + CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}), }; const std::vector> axes5D = { @@ -246,82 +79,9 @@ const std::vector> axesHW = { {2, 3} }; -std::vector opTypes = { - CommonTestUtils::OpType::SCALAR, - CommonTestUtils::OpType::VECTOR, -}; - -const std::vector reductionTypes = { - ngraph::helpers::ReductionType::Mean, - ngraph::helpers::ReductionType::Max, - ngraph::helpers::ReductionType::Sum, - ngraph::helpers::ReductionType::Min, - ngraph::helpers::ReductionType::Prod, - ngraph::helpers::ReductionType::L1, - ngraph::helpers::ReductionType::L2, -}; - -const std::vector reductionTypesInt32 = { - ngraph::helpers::ReductionType::Sum, - ngraph::helpers::ReductionType::Min, - ngraph::helpers::ReductionType::Max, - ngraph::helpers::ReductionType::L1, -}; - -const std::vector reductionTypesFusing = { - ngraph::helpers::ReductionType::Mean, - ngraph::helpers::ReductionType::Max, - ngraph::helpers::ReductionType::L2, -}; - -const std::vector reductionLogicalTypes = { - ngraph::helpers::ReductionType::LogicalOr, - ngraph::helpers::ReductionType::LogicalAnd -}; - -std::vector> inputShapes = { - {{{}, {{2, 19, 2, 9}}}}, - {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 9}}}}, -}; - -std::vector> inputShapes_5D = { - {{{}, {{2, 19, 2, 2, 9}}}}, - {{{{1, 5}, 19, {1, 5}, {1, 5}, {1, 5}}, {{2, 19, 2, 2, 2}, {2, 19, 3, 2, 2}}}}, -}; - -std::vector> inputShapes_6D = { - {{{}, {{2, 19, 2, 2, 2, 2}}}}, - {{{{1, 5}, 19, {1, 5}, {1, 5}, {1, 5}, {1, 5}}, {{2, 19, 2, 2, 2, 2}, {2, 19, 2, 2, 3, 2}}}}, -}; - -std::vector> inputShapes_Int32 = { - {{{}, {{2, 19, 2, 3}}}}, - {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 3}}}}, -}; - -std::vector> inputShapes_SmallChannel = { - {{{}, {{2, 3, 2, 9}}}}, - {{{{1, 5}, 3, {1, 5}, {1, 10}}, {{2, 3, 2, 2}, {2, 3, 2, 9}}}}, -}; - -std::vector> inputShapes_SingleBatch = { - {{{}, {{1, 19, 2, 9}}}}, - {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{1, 19, 2, 2}, {1, 19, 2, 9}}}}, -}; - -std::vector cpuParams_4D = { -#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) - CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}), - CPUSpecificParams({nhwc}, {nhwc}, {}, {}), -#endif - CPUSpecificParams({nchw}, {nchw}, {}, {}), -}; - std::vector cpuParams_5D = { -#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}), CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}), -#endif CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}), }; @@ -339,10 +99,20 @@ std::vector cpuParams_NHWC_4D = { CPUSpecificParams({nhwc}, {nhwc}, {}, {}) }; +const std::vector reductionLogicalTypes = { + ngraph::helpers::ReductionType::LogicalOr, + ngraph::helpers::ReductionType::LogicalAnd +}; + +const std::vector reductionTypesFusing = { + ngraph::helpers::ReductionType::Mean, + ngraph::helpers::ReductionType::Max, + ngraph::helpers::ReductionType::L2, +}; + const std::vector fusingParamsSet { /* activations */ fusingSwish, - /* FQ */ fusingFakeQuantizePerChannelRelu, fusingFakeQuantizePerTensorRelu, @@ -362,57 +132,89 @@ const std::vector fusingParamsSet_KeepNoDims { fusingScaleShift }; -/* ================================ 1.1 No fusion - Arithmetic ================================ */ const auto params_OneAxis = testing::Combine( testing::Combine( - testing::ValuesIn(axes), - testing::ValuesIn(opTypes), - testing::ValuesIn(keepDims), - testing::ValuesIn(reductionTypes), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(axes()), + testing::ValuesIn(opTypes()), + testing::ValuesIn(keepDims()), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes)), + testing::ValuesIn(inputShapes_dyn)), testing::Values(emptyCPUSpec), testing::Values(emptyFusingSpec)); const auto params_MultiAxis_4D = testing::Combine( testing::Combine( - testing::ValuesIn(axesND), + testing::ValuesIn(axesND()), testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(true), - testing::ValuesIn(reductionTypes), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes)), + testing::ValuesIn(inputShapes_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)), testing::Values(emptyFusingSpec)); +const auto params_Int32 = testing::Combine( + testing::Combine( + testing::ValuesIn(axes()), + testing::Values(CommonTestUtils::OpType::VECTOR), + testing::ValuesIn(keepDims()), + testing::ValuesIn(reductionTypesInt32()), + testing::Values(ElementType::i32), + testing::Values(ElementType::undefined), + testing::Values(ElementType::undefined), + testing::ValuesIn(inputShapes_Int32_dyn)), + testing::Values(emptyCPUSpec), + testing::Values(emptyFusingSpec)); + +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_OneAxis_CPU, + ReduceCPULayerTest, + params_OneAxis, + ReduceCPULayerTest::getTestCaseName +); + +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_MultiAxis_4D_CPU, + ReduceCPULayerTest, + params_MultiAxis_4D, + ReduceCPULayerTest::getTestCaseName +); + +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_Int32_CPU, + ReduceCPULayerTest, + params_Int32, + ReduceCPULayerTest::getTestCaseName +); + const auto params_MultiAxis_5D = testing::Combine( testing::Combine( testing::ValuesIn(axes5D), testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(true), - testing::ValuesIn(reductionTypes), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes_5D)), + testing::ValuesIn(inputShapes_5D_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)), testing::Values(emptyFusingSpec)); -#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) const auto params_MultiAxis_4D_Hybrid = testing::Combine( testing::Combine( - testing::ValuesIn(axesND), + testing::ValuesIn(axesND()), testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(false), - testing::ValuesIn(reductionTypes), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes)), + testing::ValuesIn(inputShapes_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)), testing::Values(emptyFusingSpec)); @@ -421,38 +223,24 @@ const auto params_MultiAxis_5D_Hybrid = testing::Combine( testing::ValuesIn(axes5D), testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(false), - testing::ValuesIn(reductionTypes), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes_5D)), + testing::ValuesIn(inputShapes_5D_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)), testing::Values(emptyFusingSpec)); -#endif const auto params_MultiAxis_6D = testing::Combine( testing::Combine( testing::ValuesIn(axes6D), testing::Values(CommonTestUtils::OpType::VECTOR), - testing::ValuesIn(keepDims), - testing::ValuesIn(reductionTypes), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(keepDims()), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes_6D)), - testing::Values(emptyCPUSpec), - testing::Values(emptyFusingSpec)); - -const auto params_Int32 = testing::Combine( - testing::Combine( - testing::ValuesIn(axes), - testing::Values(CommonTestUtils::OpType::VECTOR), - testing::ValuesIn(keepDims), - testing::ValuesIn(reductionTypesInt32), - testing::Values(ElementType::i32), - testing::Values(ElementType::undefined), - testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes_Int32)), + testing::ValuesIn(inputShapes_6D_dyn)), testing::Values(emptyCPUSpec), testing::Values(emptyFusingSpec)); @@ -461,41 +249,27 @@ const auto params_NHWC_SmallChannel = testing::Combine( testing::ValuesIn(axesHW), testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(true), - testing::ValuesIn(reductionTypes), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes_SmallChannel)), + testing::ValuesIn(inputShapes_SmallChannel_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)), testing::Values(emptyFusingSpec)); const auto params_SingleBatch = testing::Combine( testing::Combine( - testing::ValuesIn(axes), + testing::ValuesIn(axes()), testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(true), - testing::ValuesIn(reductionTypes), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes_SingleBatch)), + testing::ValuesIn(inputShapes_SingleBatch_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)), testing::Values(emptyFusingSpec)); -INSTANTIATE_TEST_SUITE_P( - smoke_Reduce_OneAxis_CPU, - ReduceCPULayerTest, - params_OneAxis, - ReduceCPULayerTest::getTestCaseName -); - -INSTANTIATE_TEST_SUITE_P( - smoke_Reduce_MultiAxis_4D_CPU, - ReduceCPULayerTest, - params_MultiAxis_4D, - ReduceCPULayerTest::getTestCaseName -); - INSTANTIATE_TEST_SUITE_P( smoke_Reduce_MultiAxis_5D_CPU, ReduceCPULayerTest, @@ -503,7 +277,6 @@ INSTANTIATE_TEST_SUITE_P( ReduceCPULayerTest::getTestCaseName ); -#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) INSTANTIATE_TEST_SUITE_P( smoke_Reduce_MultiAxis_4D_Hybrid_CPU, ReduceCPULayerTest, @@ -517,7 +290,6 @@ INSTANTIATE_TEST_SUITE_P( params_MultiAxis_5D_Hybrid, ReduceCPULayerTest::getTestCaseName ); -#endif INSTANTIATE_TEST_SUITE_P( smoke_Reduce_MultiAxis_6D_CPU, @@ -526,13 +298,6 @@ INSTANTIATE_TEST_SUITE_P( ReduceCPULayerTest::getTestCaseName ); -INSTANTIATE_TEST_SUITE_P( - smoke_Reduce_Int32_CPU, - ReduceCPULayerTest, - params_Int32, - ReduceCPULayerTest::getTestCaseName -); - INSTANTIATE_TEST_SUITE_P( smoke_Reduce_NHWC_SmallChannel_CPU, ReduceCPULayerTest, @@ -550,27 +315,27 @@ INSTANTIATE_TEST_SUITE_P( /* ================================ 1.2 No fusion - Logical ================================ */ const auto params_OneAxis_Logical = testing::Combine( testing::Combine( - testing::ValuesIn(axes), - testing::ValuesIn(opTypes), - testing::ValuesIn(keepDims), - testing::ValuesIn(reductionLogicalTypes), + testing::ValuesIn(axes()), + testing::ValuesIn(opTypes()), + testing::ValuesIn(keepDims()), + testing::ValuesIn((reductionLogicalTypes)), testing::Values(ElementType::boolean), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes)), + testing::ValuesIn(inputShapes_dyn)), testing::Values(emptyCPUSpec), testing::Values(emptyFusingSpec)); const auto params_MultiAxis_4D_Logical = testing::Combine( testing::Combine( - testing::ValuesIn(axesND), + testing::ValuesIn(axesND()), testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(true), - testing::ValuesIn(reductionLogicalTypes), + testing::ValuesIn((reductionLogicalTypes)), testing::Values(ElementType::boolean), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes)), + testing::ValuesIn(inputShapes_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)), testing::Values(emptyFusingSpec)); @@ -579,25 +344,24 @@ const auto params_MultiAxis_5D_Logical = testing::Combine( testing::ValuesIn(axes5D), testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(true), - testing::ValuesIn(reductionLogicalTypes), + testing::ValuesIn((reductionLogicalTypes)), testing::Values(ElementType::boolean), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes_5D)), + testing::ValuesIn(inputShapes_5D_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)), testing::Values(emptyFusingSpec)); -#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) const auto params_MultiAxis_4D_Hybrid_Logical = testing::Combine( testing::Combine( - testing::ValuesIn(axesND), + testing::ValuesIn(axesND()), testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(false), - testing::ValuesIn(reductionLogicalTypes), + testing::ValuesIn((reductionLogicalTypes)), testing::Values(ElementType::boolean), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes)), + testing::ValuesIn(inputShapes_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)), testing::Values(emptyFusingSpec)); @@ -606,25 +370,24 @@ const auto params_MultiAxis_5D_Hybrid_Logical = testing::Combine( testing::ValuesIn(axes5D), testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(false), - testing::ValuesIn(reductionLogicalTypes), + testing::ValuesIn((reductionLogicalTypes)), testing::Values(ElementType::boolean), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes_5D)), + testing::ValuesIn(inputShapes_5D_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)), testing::Values(emptyFusingSpec)); -#endif const auto params_MultiAxis_6D_Logical = testing::Combine( testing::Combine( testing::ValuesIn(axes6D), testing::Values(CommonTestUtils::OpType::VECTOR), - testing::ValuesIn(keepDims), - testing::ValuesIn(reductionLogicalTypes), + testing::ValuesIn(keepDims()), + testing::ValuesIn((reductionLogicalTypes)), testing::Values(ElementType::boolean), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes_6D)), + testing::ValuesIn(inputShapes_6D_dyn)), testing::Values(emptyCPUSpec), testing::Values(emptyFusingSpec)); @@ -649,7 +412,6 @@ INSTANTIATE_TEST_SUITE_P( ReduceCPULayerTest::getTestCaseName ); -#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) INSTANTIATE_TEST_SUITE_P( smoke_Reduce_MultiAxis_4D_Hybrid_Logical_CPU, ReduceCPULayerTest, @@ -663,7 +425,6 @@ INSTANTIATE_TEST_SUITE_P( params_MultiAxis_5D_Hybrid_Logical, ReduceCPULayerTest::getTestCaseName ); -#endif INSTANTIATE_TEST_SUITE_P( smoke_Reduce_MultiAxis_6D_Logical_CPU, @@ -673,30 +434,29 @@ INSTANTIATE_TEST_SUITE_P( ); /* ================================ 2.1 Fusion - KeepDims ================================ */ -#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) const auto params_OneAxis_fusing = testing::Combine( testing::Combine( - testing::ValuesIn(axes), - testing::ValuesIn(opTypes), + testing::ValuesIn(axes()), + testing::ValuesIn(opTypes()), testing::Values(true), testing::ValuesIn(reductionTypesFusing), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes)), + testing::ValuesIn(inputShapes_dyn)), testing::Values(emptyCPUSpec), testing::ValuesIn(fusingParamsSet)); const auto params_MultiAxis_4D_fusing = testing::Combine( testing::Combine( - testing::ValuesIn(axesND), + testing::ValuesIn(axesND()), testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(true), testing::ValuesIn(reductionTypesFusing), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes)), + testing::ValuesIn(inputShapes_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)), testing::ValuesIn(fusingParamsSet)); @@ -706,10 +466,10 @@ const auto params_MultiAxis_5D_fusing = testing::Combine( testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(true), testing::ValuesIn(reductionTypesFusing), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes_5D)), + testing::ValuesIn(inputShapes_5D_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)), testing::ValuesIn(fusingParamsSet)); @@ -737,14 +497,14 @@ INSTANTIATE_TEST_SUITE_P( /* ================================ 2.2 Fusion - KeepNoDims ================================ */ const auto params_OneAxis_fusing_KeepNoDims = testing::Combine( testing::Combine( - testing::ValuesIn(axes), - testing::ValuesIn(opTypes), + testing::ValuesIn(axes()), + testing::ValuesIn(opTypes()), testing::Values(false), testing::ValuesIn(reductionTypesFusing), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes)), + testing::ValuesIn(inputShapes_dyn)), testing::Values(emptyCPUSpec), testing::ValuesIn(fusingParamsSet_KeepNoDims)); @@ -754,10 +514,10 @@ const auto params_MultiAxis_4D_Hybrid_fusing_KeepNoDims = testing::Combine( testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(false), testing::ValuesIn(reductionTypesFusing), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes)), + testing::ValuesIn(inputShapes_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)), testing::ValuesIn(fusingParamsSet_KeepNoDims)); @@ -767,10 +527,10 @@ const auto params_MultiAxis_5D_Hybrid_fusing_KeepNoDims = testing::Combine( testing::Values(CommonTestUtils::OpType::VECTOR), testing::Values(false), testing::ValuesIn(reductionTypesFusing), - testing::ValuesIn(inpOutPrc), + testing::ValuesIn(inpOutPrc()), testing::Values(ElementType::undefined), testing::Values(ElementType::undefined), - testing::ValuesIn(inputShapes_5D)), + testing::ValuesIn(inputShapes_5D_dyn)), testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)), testing::ValuesIn(fusingParamsSet_KeepNoDims)); @@ -794,8 +554,7 @@ INSTANTIATE_TEST_SUITE_P( params_MultiAxis_5D_Hybrid_fusing_KeepNoDims, ReduceCPULayerTest::getTestCaseName ); -#endif } // namespace -} // namespace CPULayerTestsDefinitions - +} // namespace Reduce +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/mvn.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/mvn.cpp deleted file mode 100644 index f4b405e57a88b7..00000000000000 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/mvn.cpp +++ /dev/null @@ -1,553 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include "ngraph_functions/builders.hpp" -#include "test_utils/cpu_test_utils.hpp" -#include "test_utils/fusing_test_utils.hpp" -#include "shared_test_classes/base/ov_subgraph.hpp" - -using namespace InferenceEngine; -using namespace CPUTestUtils; -using namespace ov::test; - -namespace CPULayerTestsDefinitions { - -using basicCpuMvnParams = std::tuple< - InputShape, // Input shapes - ElementType, // Input precision - ngraph::AxisSet, // Reduction axes - bool, // Across channels - bool, // Normalize variance - double>; // Epsilon - -using MvnLayerCPUTestParamSet = std::tuple< - basicCpuMvnParams, - CPUSpecificParams, - fusingSpecificParams, - ElementType, // CNNNetwork input precision - ElementType>; // CNNNetwork output precision - -class MvnLayerCPUTest : public testing::WithParamInterface, - virtual public SubgraphBaseTest, public CpuTestWithFusing { -public: - static std::string getTestCaseName(testing::TestParamInfo obj) { - basicCpuMvnParams basicParamsSet; - CPUSpecificParams cpuParams; - fusingSpecificParams fusingParams; - ElementType inputPrecision, outputPrecision; - std::tie(basicParamsSet, cpuParams, fusingParams, inputPrecision, outputPrecision) = obj.param; - - InputShape inputShapes; - ElementType netPrecision; - ngraph::AxisSet axes; - bool acrossChanels, normalizeVariance; - double eps; - std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps) = basicParamsSet; - - std::ostringstream result; - result << "IS=" << CommonTestUtils::partialShape2str({inputShapes.first}) << "_"; - result << "TS="; - for (const auto& shape : inputShapes.second) { - result << "(" << CommonTestUtils::vec2str(shape) << ")_"; - } - result << "Precision=" << netPrecision << "_"; - if (!axes.empty()) { - result << "ReductionAxes=" << CommonTestUtils::vec2str(axes.to_vector()) << "_"; - } else { - result << "AcrossChannels=" << (acrossChanels ? "TRUE" : "FALSE") << "_"; - } - result << "NormalizeVariance=" << (normalizeVariance ? "TRUE" : "FALSE") << "_"; - result << "Epsilon=" << eps; - result << "_" << "CNNInpPrc=" << inputPrecision; - result << "_" << "CNNOutPrc=" << outputPrecision; - - result << CPUTestsBase::getTestCaseName(cpuParams); - - result << CpuTestWithFusing::getTestCaseName(fusingParams); - - return result.str(); - } -protected: - void SetUp() override { - targetDevice = CommonTestUtils::DEVICE_CPU; - - basicCpuMvnParams basicParamsSet; - CPUSpecificParams cpuParams; - fusingSpecificParams fusingParams; - ElementType inPrc; - ElementType outPrc; - std::tie(basicParamsSet, cpuParams, fusingParams, inPrc, outPrc) = this->GetParam(); - - std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; - std::tie(postOpMgrPtr, fusedOps) = fusingParams; - - InputShape inputShapes; - ElementType netPrecision; - ngraph::AxisSet axes; - bool acrossChanels, normalizeVariance; - double eps; - std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps) = basicParamsSet; - - init_input_shapes({inputShapes}); - - auto param = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(param)); - auto mvn = ngraph::builder::makeMVN(paramOuts[0], acrossChanels, normalizeVariance, eps); - if (!axes.empty()) { - mvn = ngraph::builder::makeMVN(paramOuts[0], axes, normalizeVariance, eps); - } - - selectedType = getPrimitiveType(); - selectedType = makeSelectedTypeStr(selectedType, netPrecision); - - function = makeNgraphFunction(netPrecision, param, mvn, "mvn"); - } -}; - -TEST_P(MvnLayerCPUTest, CompareWithRefs) { - run(); - CheckPluginRelatedResults(compiledModel, "MVN"); -} - -namespace { - -const std::vector inputShapes_1D = { - { {}, {{5}}}, - { {}, {{16}}}, - { - // dynamic - {-1}, - // target - { - {2}, - {16}, - {1}, - {2} - } - }, - { - // dynamic - {{1, 20}}, - // target - { - {1}, - {16}, - {4}, - {16} - } - } -}; - -const std::vector inputShapes_2D = { - { {}, {{1, 32}}}, - { {}, {{16, 64}}}, - - { - // dynamic - {-1, -1}, - // target - { - {2, 16}, - {4, 16}, - {1, 16}, - {4, 16} - } - }, - { - // dynamic - {{1, 5}, {1, 20}}, - // target - { - {1, 1}, - {2, 16}, - {4, 16}, - {2, 16} - } - } -}; - -const std::vector inputShapes_3D = { - { {}, {{1, 32, 17}}}, - { {}, {{1, 37, 9}}}, - { {}, {{1, 16, 4}}}, - { - // dynamic - {-1, -1, -1}, - // target - { - {2, 16, 6}, - {4, 16, 2}, - {2, 16, 6}, - {4, 16, 2} - } - }, - { - // dynamic - {{1, 5}, {1, 20}, {1, 7}}, - // target - { - {1, 1, 1}, - {2, 16, 6}, - {4, 16, 2}, - {2, 16, 6} - } - } -}; - -const std::vector inputShapes_4D = { - { {}, {{1, 16, 5, 8}}}, - { {}, {{2, 19, 5, 10}}}, - { {}, {{7, 32, 2, 8}}}, - { {}, {{5, 8, 3, 5}}}, - { {}, {{1, 2, 7, 5}}}, - { {}, {{1, 4, 5, 5}}}, - { {}, {{1, 7, 3, 5}}}, - { {}, {{1, 15, 9, 5}}}, - { {}, {{4, 41, 6, 9}}}, - { - // dynamic - {-1, -1, -1, -1}, - // target - { - {2, 16, 10, 6}, - {4, 16, 2, 2}, - {2, 16, 10, 6}, - {4, 16, 2, 2} - } - }, - { - // dynamic - {{1, 5}, {1, 20}, {1, 10}, {1, 7}}, - // target - { - {1, 1, 1, 1}, - {2, 16, 10, 6}, - {4, 16, 2, 2}, - {2, 16, 10, 6} - } - } -}; - -const std::vector inputShapes_5D = { - { {}, {{1, 32, 8, 1, 6}}}, - { {}, {{1, 9, 1, 15, 9}}}, - { {}, {{6, 64, 6, 1, 18}}}, - { {}, {{2, 31, 2, 9, 1}}}, - { {}, {{10, 16, 5, 10, 6}}}, - { - // dynamic - {-1, -1, -1, -1, -1}, - // target - { - {2, 16, 5, 10, 6}, - {4, 16, 7, 2, 2}, - {2, 16, 5, 10, 6}, - {4, 16, 7, 2, 2} - } - }, - { - // dynamic - {{1, 5}, {1, 20}, {1, 7}, {1, 10}, {1, 7}}, - // target - { - {1, 1, 1, 1, 1}, - {2, 16, 5, 10, 6}, - {4, 16, 7, 2, 2}, - {2, 16, 5, 10, 6} - } - } -}; - -const std::vector acrossChannels = { - true, - false -}; - -const std::vector normalizeVariance = { - true, - false -}; - -const std::vector epsilon = { - 0.000000001 -}; - -const std::vector emptyReductionAxes = {{}}; - -std::vector inpPrc = { - ElementType::i8, - ElementType::f32, - #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) - ElementType::bf16 - #endif -}; -std::vector outPrc = { - ElementType::f32, - #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) - ElementType::bf16 - #endif -}; - -std::vector cpuParams_4D = { - CPUSpecificParams({nchw}, {nchw}, {}, {}), - #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) - // TODO: enable nspc test cases for ARM - CPUSpecificParams({nhwc}, {nhwc}, {}, {}), - CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}) - #endif -}; - -std::vector cpuParams_5D = { - CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}), - #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) - // TODO: enable nspc test cases for ARM - CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}), - CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}) - #endif -}; - -std::vector fusingParamsSet { - emptyFusingSpec, - #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) - /* activations */ - fusingRelu, - fusingElu, - fusingTanh, - fusingSwish, - /* FQ */ - fusingFakeQuantizePerTensorRelu, - /* another patterns */ - fusingAddPerTensor - #endif -}; - -std::vector fusingParamsSetStaticShape { - emptyFusingSpec, - #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) - /* FQ */ - fusingFakeQuantizePerChannel, - fusingFakeQuantizePerChannelRelu, - /* another patterns */ - fusingScaleShift, - #endif -}; - -const auto Mvn3D = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(inputShapes_3D), - ::testing::Values(ElementType::f32), - ::testing::ValuesIn(emptyReductionAxes), - ::testing::ValuesIn(acrossChannels), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon)), - ::testing::Values(emptyCPUSpec), - ::testing::ValuesIn(fusingParamsSet), - ::testing::ValuesIn(inpPrc), - ::testing::ValuesIn(outPrc)); - -INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn3D, MvnLayerCPUTest, Mvn3D, MvnLayerCPUTest::getTestCaseName); - -const auto Mvn4D = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(inputShapes_4D), - ::testing::Values(ElementType::f32), - ::testing::ValuesIn(emptyReductionAxes), - ::testing::ValuesIn(acrossChannels), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon)), - ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)), - ::testing::ValuesIn(fusingParamsSet), - ::testing::ValuesIn(inpPrc), - ::testing::ValuesIn(outPrc)); - -INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn4D, MvnLayerCPUTest, Mvn4D, MvnLayerCPUTest::getTestCaseName); - -const auto Mvn5D = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(inputShapes_5D), - ::testing::Values(ElementType::f32), - ::testing::ValuesIn(emptyReductionAxes), - ::testing::ValuesIn(acrossChannels), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon)), - ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)), - ::testing::ValuesIn(fusingParamsSet), - ::testing::ValuesIn(inpPrc), - ::testing::ValuesIn(outPrc)); - -INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn5D, MvnLayerCPUTest, Mvn5D, MvnLayerCPUTest::getTestCaseName); - -// 1D 2D case -std::vector fusingUnaryEltwiseParamsSet { - emptyFusingSpec, - #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) - /* activations */ - fusingRelu, - fusingElu, - fusingTanh, - fusingSwish, - #endif -}; - -const auto Mvn1D = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(inputShapes_1D), - ::testing::Values(ElementType::f32), - ::testing::ValuesIn(emptyReductionAxes), - ::testing::ValuesIn(acrossChannels), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon)), - ::testing::Values(emptyCPUSpec), - ::testing::ValuesIn(fusingUnaryEltwiseParamsSet), - ::testing::ValuesIn(inpPrc), - ::testing::ValuesIn(outPrc)); - -INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn1D, MvnLayerCPUTest, Mvn1D, MvnLayerCPUTest::getTestCaseName); - -// 2D no transformed -const auto Mvn2D = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(inputShapes_2D), - ::testing::Values(ElementType::f32), - ::testing::ValuesIn(emptyReductionAxes), - ::testing::Values(false), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon)), - ::testing::Values(emptyCPUSpec), - ::testing::ValuesIn(fusingParamsSet), - ::testing::ValuesIn(inpPrc), - ::testing::ValuesIn(outPrc)); - -INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn2D, MvnLayerCPUTest, Mvn2D, MvnLayerCPUTest::getTestCaseName); - -// 2d transformed -const auto Mvn2DTrans = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(inputShapes_2D), - ::testing::Values(ElementType::f32), - ::testing::ValuesIn(emptyReductionAxes), - ::testing::Values(true), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon)), - ::testing::Values(emptyCPUSpec), - ::testing::ValuesIn(fusingUnaryEltwiseParamsSet), - ::testing::ValuesIn(inpPrc), - ::testing::ValuesIn(outPrc)); - -INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn2DTrans, MvnLayerCPUTest, Mvn2DTrans, MvnLayerCPUTest::getTestCaseName); - -// no transformed with small spatial dim and i8 data and no fusion to cover model use case -const std::vector inputShapesSmallSpatial = { - { {}, {{4, 1}}}, - { {}, {{2, 2}}}, - { {}, {{1, 2, 1}}}, - { {}, {{3, 1, 1, 1}}}, -}; - -const auto MvnSmallSpatial = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(inputShapesSmallSpatial), - ::testing::Values(ElementType::i8), - ::testing::ValuesIn(emptyReductionAxes), - ::testing::Values(false), - ::testing::Values(false), - ::testing::ValuesIn(epsilon)), - ::testing::Values(emptyCPUSpec), - ::testing::Values(emptyFusingSpec), - ::testing::Values(ElementType::i8), - ::testing::Values(ElementType::f32)); - -INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_MvnSmallSpatial, MvnLayerCPUTest, MvnSmallSpatial, MvnLayerCPUTest::getTestCaseName); - -// Static shape test for some specific fusing parameters in fusingParamsSetStaticShape - -const std::vector inputShapesStatic_2D = { - {1}, - {16}, - {4} -}; - -const std::vector inputShapesStatic_3D = { - {2, 16, 6}, - {4, 16, 2}, - {1, 16, 4} -}; - -const std::vector inputShapesStatic_4D = { - {1, 7, 3, 5}, - {1, 15, 9, 5}, - {4, 41, 6, 9}, - // cover channel case 4*16*2+16+3=147 - {1, 147, 2, 2} -}; - -const std::vector inputShapesStatic_5D = { - {1, 32, 8, 1, 6}, - {1, 9, 1, 15, 9}, - {6, 64, 6, 1, 18}, - // cover channel case 4*16*2+16+9=153 - {6, 153, 2, 2, 2} -}; - -const auto Mvn2DStatic = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(inputShapesStatic_2D), - ::testing::Values(ElementType::f32), - ::testing::ValuesIn(emptyReductionAxes), - ::testing::Values(false), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon)), - ::testing::Values(emptyCPUSpec), - ::testing::ValuesIn(fusingParamsSetStaticShape), - ::testing::ValuesIn(inpPrc), - ::testing::ValuesIn(outPrc)); - -const auto Mvn3DStatic = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic_3D)), - ::testing::Values(ElementType::f32), - ::testing::ValuesIn(emptyReductionAxes), - ::testing::ValuesIn(acrossChannels), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon)), - ::testing::Values(emptyCPUSpec), - ::testing::ValuesIn(fusingParamsSetStaticShape), - ::testing::ValuesIn(inpPrc), - ::testing::ValuesIn(outPrc)); - -INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn3D_Static, MvnLayerCPUTest, Mvn3DStatic, MvnLayerCPUTest::getTestCaseName); - -const auto Mvn4DStatic = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic_4D)), - ::testing::Values(ElementType::f32), - ::testing::ValuesIn(emptyReductionAxes), - ::testing::ValuesIn(acrossChannels), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon)), - ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)), - ::testing::ValuesIn(fusingParamsSetStaticShape), - ::testing::ValuesIn(inpPrc), - ::testing::ValuesIn(outPrc)); - -INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn4D_Static, MvnLayerCPUTest, Mvn4DStatic, MvnLayerCPUTest::getTestCaseName); - -const auto Mvn5DStatic = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic_5D)), - ::testing::Values(ElementType::f32), - ::testing::ValuesIn(emptyReductionAxes), - ::testing::ValuesIn(acrossChannels), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon)), - ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)), - ::testing::ValuesIn(fusingParamsSetStaticShape), - ::testing::ValuesIn(inpPrc), - ::testing::ValuesIn(outPrc)); - -INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Mvn5D_Static, MvnLayerCPUTest, Mvn5DStatic, MvnLayerCPUTest::getTestCaseName); - -} // namespace -} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/unit/streams_info/enable_ht_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/enable_ht_test.cpp new file mode 100644 index 00000000000000..07d7963bdc6cbd --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/streams_info/enable_ht_test.cpp @@ -0,0 +1,289 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include + +#include "cpu_map_scheduling.hpp" +#include "cpu_streams_calculation.hpp" + +using namespace testing; +using namespace InferenceEngine; +using namespace ov; + +namespace { + +struct UseHTTestCase { + bool input_ht_value; + bool input_ht_changed; + std::string input_pm_hint; + std::vector> proc_type_table; + std::vector> result_table; + bool output_ht_value; +}; + +class UseHTTests : public CommonTestUtils::TestsCommon, public testing::WithParamInterface> { +public: + void SetUp() override { + auto test_data = std::get<0>(GetParam()); + + std::vector> test_result_table = + ov::intel_cpu::apply_hyper_threading(test_data.input_ht_value, + test_data.input_ht_changed, + test_data.input_pm_hint, + test_data.proc_type_table); + + ASSERT_EQ(test_data.result_table, test_result_table); + ASSERT_EQ(test_data.input_ht_value, test_data.output_ht_value); + } +}; + +UseHTTestCase _2sockets_false_latency = { + false, + true, + "LATENCY", + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, + false, +}; + +UseHTTestCase _2sockets_false_throughput = { + false, + true, + "THROUGHPUT", + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, + false, +}; + +UseHTTestCase _2sockets_true_latency = { + true, + true, + "LATENCY", + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + true, +}; + +UseHTTestCase _2sockets_true_throughput = { + true, + true, + "THROUGHPUT", + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + true, +}; + +UseHTTestCase _2sockets_default_1_latency = { + false, + false, + "LATENCY", + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, + false, +}; + +UseHTTestCase _2sockets_default_1_throughput = { + false, + false, + "THROUGHPUT", + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, + false, +}; + +UseHTTestCase _2sockets_default_2_latency = { + true, + false, + "LATENCY", + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, + false, +}; + +UseHTTestCase _2sockets_default_2_throughput = { + true, + false, + "THROUGHPUT", + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, + false, +}; + +UseHTTestCase _1sockets_1_false_latency = { + false, + true, + "LATENCY", + {{20, 6, 8, 6}}, + {{14, 6, 8, 0}}, + false, +}; + +UseHTTestCase _1sockets_1_false_throughput = { + false, + true, + "THROUGHPUT", + {{20, 6, 8, 6}}, + {{14, 6, 8, 0}}, + false, +}; + +UseHTTestCase _1sockets_1_true_latency = { + true, + true, + "LATENCY", + {{20, 6, 8, 6}}, + {{20, 6, 8, 6}}, + true, +}; + +UseHTTestCase _1sockets_1_true_throughput = { + true, + true, + "THROUGHPUT", + {{20, 6, 8, 6}}, + {{20, 6, 8, 6}}, + true, +}; + +UseHTTestCase _1sockets_1_default_1_latency = { + false, + false, + "LATENCY", + {{20, 6, 8, 6}}, + {{14, 6, 8, 0}}, + false, +}; + +UseHTTestCase _1sockets_1_default_1_throughput = { + false, + false, + "THROUGHPUT", + {{20, 6, 8, 6}}, + {{20, 6, 8, 6}}, + true, +}; + +UseHTTestCase _1sockets_1_default_2_latency = { + true, + false, + "LATENCY", + {{20, 6, 8, 6}}, + {{14, 6, 8, 0}}, + false, +}; + +UseHTTestCase _1sockets_1_default_2_throughput = { + true, + false, + "THROUGHPUT", + {{20, 6, 8, 6}}, + {{20, 6, 8, 6}}, + true, +}; + +UseHTTestCase _1sockets_2_false_latency = { + false, + true, + "LATENCY", + {{12, 6, 0, 6}}, + {{6, 6, 0, 0}}, + false, +}; + +UseHTTestCase _1sockets_2_false_throughput = { + false, + true, + "THROUGHPUT", + {{12, 6, 0, 6}}, + {{6, 6, 0, 0}}, + false, +}; + +UseHTTestCase _1sockets_2_true_latency = { + true, + true, + "LATENCY", + {{12, 6, 0, 6}}, + {{12, 6, 0, 6}}, + true, +}; + +UseHTTestCase _1sockets_2_true_throughput = { + true, + true, + "THROUGHPUT", + {{12, 6, 0, 6}}, + {{12, 6, 0, 6}}, + true, +}; + +UseHTTestCase _1sockets_2_default_1_latency = { + false, + false, + "LATENCY", + {{12, 6, 0, 6}}, + {{6, 6, 0, 0}}, + false, +}; + +UseHTTestCase _1sockets_2_default_1_throughput = { + false, + false, + "THROUGHPUT", + {{12, 6, 0, 6}}, + {{12, 6, 0, 6}}, + true, +}; + +UseHTTestCase _1sockets_2_default_2_latency = { + true, + false, + "LATENCY", + {{12, 6, 0, 6}}, + {{6, 6, 0, 0}}, + false, +}; + +UseHTTestCase _1sockets_2_default_2_throughput = { + true, + false, + "THROUGHPUT", + {{12, 6, 0, 6}}, + {{12, 6, 0, 6}}, + true, +}; + +TEST_P(UseHTTests, UseHT) {} + +INSTANTIATE_TEST_SUITE_P(UseHTTable, + UseHTTests, + testing::Values(_2sockets_false_latency, + _2sockets_true_latency, + _2sockets_default_1_latency, + _2sockets_default_2_latency, + _1sockets_1_false_latency, + _1sockets_1_true_latency, + _1sockets_1_default_1_latency, + _1sockets_1_default_2_latency, + _1sockets_2_false_latency, + _1sockets_2_true_latency, + _1sockets_2_default_1_latency, + _1sockets_2_default_2_latency, + _2sockets_false_throughput, + _2sockets_true_throughput, + _2sockets_default_1_throughput, + _2sockets_default_2_throughput, + _1sockets_1_false_throughput, + _1sockets_1_true_throughput, + _1sockets_1_default_1_throughput, + _1sockets_1_default_2_throughput, + _1sockets_2_false_throughput, + _1sockets_2_true_throughput, + _1sockets_2_default_1_throughput, + _1sockets_2_default_2_throughput)); + +} // namespace \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/unit/streams_info/scheduling_core_type_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/scheduling_core_type_test.cpp new file mode 100644 index 00000000000000..ac0c3ac1339239 --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/streams_info/scheduling_core_type_test.cpp @@ -0,0 +1,105 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include + +#include "cpu_map_scheduling.hpp" +#include "cpu_streams_calculation.hpp" + +using namespace testing; +using namespace InferenceEngine; +using namespace ov; + +namespace { + +struct SchedulingCoreTypeTestCase { + ov::hint::SchedulingCoreType input_type; + std::vector> proc_type_table; + std::vector> result_table; + ov::hint::SchedulingCoreType output_type; +}; + +class SchedulingCoreTypeTests : public CommonTestUtils::TestsCommon, + public testing::WithParamInterface> { +public: + void SetUp() override { + const auto& test_data = std::get<0>(GetParam()); + auto test_input_type = test_data.input_type; + + std::vector> test_result_table = + ov::intel_cpu::apply_scheduling_core_type(test_input_type, test_data.proc_type_table); + + ASSERT_EQ(test_data.result_table, test_result_table); + ASSERT_EQ(test_data.output_type, test_input_type); + } +}; + +SchedulingCoreTypeTestCase _2sockets_ALL = { + ov::hint::SchedulingCoreType::ANY_CORE, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + ov::hint::SchedulingCoreType::ANY_CORE, +}; + +SchedulingCoreTypeTestCase _2sockets_P_CORE_ONLY = { + ov::hint::SchedulingCoreType::PCORE_ONLY, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, +}; + +SchedulingCoreTypeTestCase _2sockets_E_CORE_ONLY = { + ov::hint::SchedulingCoreType::ECORE_ONLY, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + ov::hint::SchedulingCoreType::ANY_CORE, + // ov::hint::scheduling_core_type returns ANY_CORE because the platform has no Ecores available to satisfy the + // user's request. +}; + +SchedulingCoreTypeTestCase _1sockets_ALL = { + ov::hint::SchedulingCoreType::ANY_CORE, + {{20, 6, 8, 6}}, + {{20, 6, 8, 6}}, + ov::hint::SchedulingCoreType::ANY_CORE, +}; + +SchedulingCoreTypeTestCase _1sockets_P_CORE_ONLY = { + ov::hint::SchedulingCoreType::PCORE_ONLY, + {{20, 6, 8, 6}}, + {{12, 6, 0, 6}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, +}; + +SchedulingCoreTypeTestCase _1sockets_P_CORE_ONLY_1 = { + ov::hint::SchedulingCoreType::PCORE_ONLY, + {{8, 0, 8, 0}}, + {{8, 0, 8, 0}}, + ov::hint::SchedulingCoreType::ANY_CORE, + // ov::hint::scheduling_core_type returns ANY_CORE because the platform has no Pcore available to satisfy the + // user's request. +}; + +SchedulingCoreTypeTestCase _1sockets_E_CORE_ONLY = { + ov::hint::SchedulingCoreType::ECORE_ONLY, + {{20, 6, 8, 6}}, + {{8, 0, 8, 0}}, + ov::hint::SchedulingCoreType::ECORE_ONLY, +}; + +TEST_P(SchedulingCoreTypeTests, SchedulingCoreType) {} + +INSTANTIATE_TEST_SUITE_P(SchedulingCoreTypeTable, + SchedulingCoreTypeTests, + testing::Values(_2sockets_ALL, + _2sockets_P_CORE_ONLY, + _2sockets_E_CORE_ONLY, + _1sockets_ALL, + _1sockets_P_CORE_ONLY, + _1sockets_P_CORE_ONLY_1, + _1sockets_E_CORE_ONLY)); +} // namespace \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/unit/streams_info/streams_e2e_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/streams_e2e_test.cpp new file mode 100644 index 00000000000000..99e02acf9f6d89 --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/streams_info/streams_e2e_test.cpp @@ -0,0 +1,527 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include + +#include "cpu_map_scheduling.hpp" +#include "cpu_streams_calculation.hpp" + +using namespace testing; +using namespace InferenceEngine; +using namespace ov; + +namespace { + +struct StreamGenerateionTestCase { + int input_stream; + bool input_stream_changed; + int input_thread; + int input_request; + int input_model_prefer; + ov::hint::SchedulingCoreType input_type; + bool input_ht_value; + bool input_ht_changed; + bool input_cpu_value; + bool input_cpu_changed; + ov::hint::PerformanceMode input_pm_hint; + ov::intel_cpu::Config::LatencyThreadingMode input_latency_threading_mode; + ov::threading::IStreamsExecutor::ThreadBindingType input_binding_type; + std::vector> input_proc_type_table; + ov::hint::SchedulingCoreType output_type; + bool output_ht_value; + bool output_cpu_value; + ov::hint::PerformanceMode output_pm_hint; + std::vector> output_proc_type_table; + std::vector> output_stream_info_table; +}; + +void make_config(StreamGenerateionTestCase& test_data, ov::intel_cpu::Config& config) { + config.schedulingCoreType = test_data.input_type; + config.enableCpuPinning = test_data.input_cpu_value; + config.changedCpuPinning = test_data.input_cpu_changed; + config.enableHyperThreading = test_data.input_ht_value; + config.changedHyperThreading = test_data.input_ht_changed; + config.perfHintsConfig.ovPerfHint = ov::util::to_string(test_data.input_pm_hint); + config.latencyThreadingMode = test_data.input_latency_threading_mode; + config.perfHintsConfig.ovPerfHintNumRequests = test_data.input_request; + config.streamExecutorConfig._streams = test_data.input_stream; + config.streamExecutorConfig._streams_changed = test_data.input_stream_changed; + config.streamExecutorConfig._threads = test_data.input_thread; + config.streamExecutorConfig._threadBindingType = test_data.input_binding_type; + config.streamExecutorConfig._orig_proc_type_table = test_data.input_proc_type_table; +} + +class StreamGenerationTests : public CommonTestUtils::TestsCommon, + public testing::WithParamInterface> { +public: + void SetUp() override { + auto test_data = std::get<0>(GetParam()); + ov::intel_cpu::Config config; + make_config(test_data, config); + + ov::intel_cpu::generate_stream_info(test_data.input_stream, nullptr, config, test_data.input_model_prefer); + + ASSERT_EQ(test_data.output_stream_info_table, config.streamExecutorConfig._streams_info_table); + ASSERT_EQ(test_data.output_proc_type_table, config.streamExecutorConfig._proc_type_table); + ASSERT_EQ(test_data.output_cpu_value, config.streamExecutorConfig._cpu_pinning); + ASSERT_EQ(test_data.output_ht_value, config.enableHyperThreading); + ASSERT_EQ(test_data.output_type, config.schedulingCoreType); + ASSERT_EQ(test_data.output_pm_hint, + ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode)); + } +}; + +TEST_P(StreamGenerationTests, StreamsGeneration) {} + +StreamGenerateionTestCase generation_latency_1sockets_14cores_1 = { + 1, // param[in]: simulated settting for streams number + false, // param[in]: simulated settting for streams number changed + 0, // param[in]: simulated setting for threads number + 0, // param[in]: simulated setting for inference request number + 0, // param[in]: simulated setting for model prefer threads number + ov::hint::SchedulingCoreType::ANY_CORE, // param[in]: simulated setting for scheduling core type + // (PCORE_ONLY/ECORE_ONLY/ANY_CORE) + true, // param[in]: simulated setting for enableHyperThreading + true, // param[in]: simulated settting for changedHyperThreading + true, // param[in]: simulated setting for enableCpuPinning + true, // param[in]: simulated setting for changedCpuPinning + ov::hint::PerformanceMode::LATENCY, // param[in]: simulated setting for performance mode (throughput/latency) + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, // param[in]: simulated setting for scope of candidate + // processors on latency mode + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, // param[in]: simulated setting for + // threadBindingType + {{20, 6, 8, 6, 0, 0}}, // param[in]: simulated proc_type_table for platform which has one socket, 6 Pcores, 8 + // Ecores and hyper threading enabled + ov::hint::SchedulingCoreType::ANY_CORE, // param[expected out]: scheduling core type needs to be the same as input + true, // param[expected out]: enableHyperThreading needs to be the same as input + true, // param[expected out]: enableCpuPinning needs to be the same as input + ov::hint::PerformanceMode::LATENCY, // param[expected out]: performance mode needs to be the same as input + {{20, 6, 8, 6, 0, 0}}, // param[expected out]: since hyper threading is enabled and all core type is used, + // proc_type_table needs to be the same as input + {{1, ALL_PROC, 20, 0, 0}, + {0, MAIN_CORE_PROC, 6, 0, 0}, + {0, EFFICIENT_CORE_PROC, 8, 0, 0}, + {0, HYPER_THREADING_PROC, 6, 0, 0}}, // param[expected out]: since performance mode is latency and all cores is + // used, the final streams is 1 +}; + +StreamGenerateionTestCase generation_latency_1sockets_14cores_2 = { + 1, + false, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + true, + true, + true, + ov::hint::PerformanceMode::LATENCY, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{14, 6, 8, 0, 0, 0}}, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + true, + ov::hint::PerformanceMode::LATENCY, + {{14, 6, 8, 0, 0, 0}}, + {{1, ALL_PROC, 14, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}}, +}; + +StreamGenerateionTestCase generation_latency_1sockets_14cores_3 = { + 1, + false, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{14, 6, 8, 0, 0, 0}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::LATENCY, + {{6, 6, 0, 0, 0, 0}}, + {{1, MAIN_CORE_PROC, 6, 0, 0}}, +}; + +StreamGenerateionTestCase generation_latency_1sockets_14cores_4 = { + 1, + false, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{20, 6, 8, 6, 0, 0}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + false, + ov::hint::PerformanceMode::LATENCY, + {{12, 6, 0, 6, 0, 0}}, + {{1, MAIN_CORE_PROC, 12, 0, 0}}, +}; + +StreamGenerateionTestCase generation_latency_1sockets_14cores_5 = { + 1, + false, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{20, 6, 8, 6, 0, 0}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::LATENCY, + {{6, 6, 0, 0, 0, 0}}, + {{1, MAIN_CORE_PROC, 6, 0, 0}}, +}; + +StreamGenerateionTestCase generation_latency_2sockets_48cores_6 = { + 1, + false, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::LATENCY, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{1, MAIN_CORE_PROC, 24, 0, 0}, {1, MAIN_CORE_PROC, 24, 1, 1}}, +}; + +StreamGenerateionTestCase generation_latency_2sockets_48cores_7 = { + 1, + false, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::LATENCY, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{1, MAIN_CORE_PROC, 24, 0, 0}, {1, MAIN_CORE_PROC, 24, 1, 1}}, +}; + +StreamGenerateionTestCase generation_latency_2sockets_48cores_8 = { + 1, + true, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::LATENCY, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{1, MAIN_CORE_PROC, 48, -1, -1}}, +}; + +StreamGenerateionTestCase generation_latency_2sockets_48cores_9 = { + 1, + true, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::LATENCY, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{1, MAIN_CORE_PROC, 48, -1, -1}}, +}; + +StreamGenerateionTestCase generation_tput_1sockets_14cores_1 = { + 0, + false, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + true, + true, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{20, 6, 8, 6, 0, 0}}, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + true, + ov::hint::PerformanceMode::THROUGHPUT, + {{20, 6, 8, 6, 0, 0}}, + {{2, MAIN_CORE_PROC, 3, 0, 0}, {2, EFFICIENT_CORE_PROC, 3, 0, 0}, {2, HYPER_THREADING_PROC, 3, 0, 0}}, +}; + +StreamGenerateionTestCase generation_tput_1sockets_14cores_2 = { + 0, + false, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{20, 6, 8, 6, 0, 0}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{6, 6, 0, 0, 0, 0}}, + {{2, MAIN_CORE_PROC, 3, 0, 0}}, +}; + +StreamGenerateionTestCase generation_tput_1sockets_14cores_3 = { + 10, + true, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{20, 6, 8, 6, 0, 0}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{12, 6, 0, 6, 0, 0}}, + {{6, MAIN_CORE_PROC, 1, 0, 0}, {4, HYPER_THREADING_PROC, 1, 0, 0}}, +}; + +StreamGenerateionTestCase generation_tput_1sockets_14cores_4 = { + 0, + false, + 10, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{20, 6, 8, 6, 0, 0}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{12, 6, 0, 6, 0, 0}}, + {{2, MAIN_CORE_PROC, 3, 0, 0}, {1, HYPER_THREADING_PROC, 3, 0, 0}}, +}; + +StreamGenerateionTestCase generation_tput_2sockets_48cores_5 = { + 0, + false, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, + {{6, MAIN_CORE_PROC, 4, 0, 0}, + {6, MAIN_CORE_PROC, 4, 1, 1}, + {6, HYPER_THREADING_PROC, 4, 0, 0}, + {6, HYPER_THREADING_PROC, 4, 1, 1}}, +}; + +StreamGenerateionTestCase generation_tput_2sockets_48cores_6 = { + 0, + false, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{6, MAIN_CORE_PROC, 4, 0, 0}, {6, MAIN_CORE_PROC, 4, 1, 1}}, +}; + +StreamGenerateionTestCase generation_tput_2sockets_48cores_7 = { + 100, + true, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{24, MAIN_CORE_PROC, 1, 0, 0}, {24, MAIN_CORE_PROC, 1, 1, 1}}, +}; + +StreamGenerateionTestCase generation_tput_2sockets_48cores_8 = { + 2, + true, + 20, + 0, + 1, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{2, MAIN_CORE_PROC, 10, 0, 0}}, +}; + +StreamGenerateionTestCase generation_tput_2sockets_48cores_9 = { + 0, + false, + 0, + 0, + 1, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + false, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{24, MAIN_CORE_PROC, 1, 0, 0}, {24, MAIN_CORE_PROC, 1, 1, 1}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_StreamsGeneration, + StreamGenerationTests, + ::testing::Values(generation_latency_1sockets_14cores_1, + generation_latency_1sockets_14cores_2, + generation_latency_1sockets_14cores_3, + generation_latency_1sockets_14cores_4, + generation_latency_1sockets_14cores_5, + generation_latency_2sockets_48cores_6, + generation_latency_2sockets_48cores_7, + generation_latency_2sockets_48cores_8, + generation_latency_2sockets_48cores_9, + generation_tput_1sockets_14cores_1, + generation_tput_1sockets_14cores_2, + generation_tput_1sockets_14cores_3, + generation_tput_1sockets_14cores_4, + generation_tput_2sockets_48cores_5, + generation_tput_2sockets_48cores_6, + generation_tput_2sockets_48cores_7, + generation_tput_2sockets_48cores_8, + generation_tput_2sockets_48cores_9)); + +} // namespace \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp similarity index 53% rename from src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp rename to src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp index 0b5a480f0f5ff0..6f97911fe1da70 100644 --- a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2022 Intel Corporation +// Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -16,363 +16,6 @@ using namespace ov; namespace { -struct SchedulingCoreTypeTestCase { - ov::hint::SchedulingCoreType input_type; - std::vector> proc_type_table; - std::vector> result_table; - ov::hint::SchedulingCoreType output_type; -}; - -class SchedulingCoreTypeTests : public CommonTestUtils::TestsCommon, - public testing::WithParamInterface> { -public: - void SetUp() override { - const auto& test_data = std::get<0>(GetParam()); - auto test_input_type = test_data.input_type; - - std::vector> test_result_table = - ov::intel_cpu::apply_scheduling_core_type(test_input_type, test_data.proc_type_table); - - ASSERT_EQ(test_data.result_table, test_result_table); - ASSERT_EQ(test_data.output_type, test_input_type); - } -}; - -SchedulingCoreTypeTestCase _2sockets_ALL = { - ov::hint::SchedulingCoreType::ANY_CORE, - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - ov::hint::SchedulingCoreType::ANY_CORE, -}; - -SchedulingCoreTypeTestCase _2sockets_P_CORE_ONLY = { - ov::hint::SchedulingCoreType::PCORE_ONLY, - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, -}; - -SchedulingCoreTypeTestCase _2sockets_E_CORE_ONLY = { - ov::hint::SchedulingCoreType::ECORE_ONLY, - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - ov::hint::SchedulingCoreType::ANY_CORE, - // ov::hint::scheduling_core_type returns ANY_CORE because the platform has no Ecores available to satisfy the - // user's request. -}; - -SchedulingCoreTypeTestCase _1sockets_ALL = { - ov::hint::SchedulingCoreType::ANY_CORE, - {{20, 6, 8, 6}}, - {{20, 6, 8, 6}}, - ov::hint::SchedulingCoreType::ANY_CORE, -}; - -SchedulingCoreTypeTestCase _1sockets_P_CORE_ONLY = { - ov::hint::SchedulingCoreType::PCORE_ONLY, - {{20, 6, 8, 6}}, - {{12, 6, 0, 6}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, -}; - -SchedulingCoreTypeTestCase _1sockets_P_CORE_ONLY_1 = { - ov::hint::SchedulingCoreType::PCORE_ONLY, - {{8, 0, 8, 0}}, - {{8, 0, 8, 0}}, - ov::hint::SchedulingCoreType::ANY_CORE, - // ov::hint::scheduling_core_type returns ANY_CORE because the platform has no Pcore available to satisfy the - // user's request. -}; - -SchedulingCoreTypeTestCase _1sockets_E_CORE_ONLY = { - ov::hint::SchedulingCoreType::ECORE_ONLY, - {{20, 6, 8, 6}}, - {{8, 0, 8, 0}}, - ov::hint::SchedulingCoreType::ECORE_ONLY, -}; - -TEST_P(SchedulingCoreTypeTests, SchedulingCoreType) {} - -INSTANTIATE_TEST_SUITE_P(SchedulingCoreTypeTable, - SchedulingCoreTypeTests, - testing::Values(_2sockets_ALL, - _2sockets_P_CORE_ONLY, - _2sockets_E_CORE_ONLY, - _1sockets_ALL, - _1sockets_P_CORE_ONLY, - _1sockets_P_CORE_ONLY_1, - _1sockets_E_CORE_ONLY)); - -struct UseHTTestCase { - bool input_ht_value; - bool input_ht_changed; - std::string input_pm_hint; - std::vector> proc_type_table; - std::vector> result_table; - bool output_ht_value; -}; - -class UseHTTests : public CommonTestUtils::TestsCommon, public testing::WithParamInterface> { -public: - void SetUp() override { - auto test_data = std::get<0>(GetParam()); - - std::vector> test_result_table = - ov::intel_cpu::apply_hyper_threading(test_data.input_ht_value, - test_data.input_ht_changed, - test_data.input_pm_hint, - test_data.proc_type_table); - - ASSERT_EQ(test_data.result_table, test_result_table); - ASSERT_EQ(test_data.input_ht_value, test_data.output_ht_value); - } -}; - -UseHTTestCase _2sockets_false_latency = { - false, - true, - "LATENCY", - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, - false, -}; - -UseHTTestCase _2sockets_false_throughput = { - false, - true, - "THROUGHPUT", - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, - false, -}; - -UseHTTestCase _2sockets_true_latency = { - true, - true, - "LATENCY", - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - true, -}; - -UseHTTestCase _2sockets_true_throughput = { - true, - true, - "THROUGHPUT", - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - true, -}; - -UseHTTestCase _2sockets_default_1_latency = { - false, - false, - "LATENCY", - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, - false, -}; - -UseHTTestCase _2sockets_default_1_throughput = { - false, - false, - "THROUGHPUT", - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, - false, -}; - -UseHTTestCase _2sockets_default_2_latency = { - true, - false, - "LATENCY", - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, - false, -}; - -UseHTTestCase _2sockets_default_2_throughput = { - true, - false, - "THROUGHPUT", - {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, - {{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}}, - false, -}; - -UseHTTestCase _1sockets_1_false_latency = { - false, - true, - "LATENCY", - {{20, 6, 8, 6}}, - {{14, 6, 8, 0}}, - false, -}; - -UseHTTestCase _1sockets_1_false_throughput = { - false, - true, - "THROUGHPUT", - {{20, 6, 8, 6}}, - {{14, 6, 8, 0}}, - false, -}; - -UseHTTestCase _1sockets_1_true_latency = { - true, - true, - "LATENCY", - {{20, 6, 8, 6}}, - {{20, 6, 8, 6}}, - true, -}; - -UseHTTestCase _1sockets_1_true_throughput = { - true, - true, - "THROUGHPUT", - {{20, 6, 8, 6}}, - {{20, 6, 8, 6}}, - true, -}; - -UseHTTestCase _1sockets_1_default_1_latency = { - false, - false, - "LATENCY", - {{20, 6, 8, 6}}, - {{14, 6, 8, 0}}, - false, -}; - -UseHTTestCase _1sockets_1_default_1_throughput = { - false, - false, - "THROUGHPUT", - {{20, 6, 8, 6}}, - {{20, 6, 8, 6}}, - true, -}; - -UseHTTestCase _1sockets_1_default_2_latency = { - true, - false, - "LATENCY", - {{20, 6, 8, 6}}, - {{14, 6, 8, 0}}, - false, -}; - -UseHTTestCase _1sockets_1_default_2_throughput = { - true, - false, - "THROUGHPUT", - {{20, 6, 8, 6}}, - {{20, 6, 8, 6}}, - true, -}; - -UseHTTestCase _1sockets_2_false_latency = { - false, - true, - "LATENCY", - {{12, 6, 0, 6}}, - {{6, 6, 0, 0}}, - false, -}; - -UseHTTestCase _1sockets_2_false_throughput = { - false, - true, - "THROUGHPUT", - {{12, 6, 0, 6}}, - {{6, 6, 0, 0}}, - false, -}; - -UseHTTestCase _1sockets_2_true_latency = { - true, - true, - "LATENCY", - {{12, 6, 0, 6}}, - {{12, 6, 0, 6}}, - true, -}; - -UseHTTestCase _1sockets_2_true_throughput = { - true, - true, - "THROUGHPUT", - {{12, 6, 0, 6}}, - {{12, 6, 0, 6}}, - true, -}; - -UseHTTestCase _1sockets_2_default_1_latency = { - false, - false, - "LATENCY", - {{12, 6, 0, 6}}, - {{6, 6, 0, 0}}, - false, -}; - -UseHTTestCase _1sockets_2_default_1_throughput = { - false, - false, - "THROUGHPUT", - {{12, 6, 0, 6}}, - {{12, 6, 0, 6}}, - true, -}; - -UseHTTestCase _1sockets_2_default_2_latency = { - true, - false, - "LATENCY", - {{12, 6, 0, 6}}, - {{6, 6, 0, 0}}, - false, -}; - -UseHTTestCase _1sockets_2_default_2_throughput = { - true, - false, - "THROUGHPUT", - {{12, 6, 0, 6}}, - {{12, 6, 0, 6}}, - true, -}; - -TEST_P(UseHTTests, UseHT) {} - -INSTANTIATE_TEST_SUITE_P(UseHTTable, - UseHTTests, - testing::Values(_2sockets_false_latency, - _2sockets_true_latency, - _2sockets_default_1_latency, - _2sockets_default_2_latency, - _1sockets_1_false_latency, - _1sockets_1_true_latency, - _1sockets_1_default_1_latency, - _1sockets_1_default_2_latency, - _1sockets_2_false_latency, - _1sockets_2_true_latency, - _1sockets_2_default_1_latency, - _1sockets_2_default_2_latency, - _2sockets_false_throughput, - _2sockets_true_throughput, - _2sockets_default_1_throughput, - _2sockets_default_2_throughput, - _1sockets_1_false_throughput, - _1sockets_1_true_throughput, - _1sockets_1_default_1_throughput, - _1sockets_1_default_2_throughput, - _1sockets_2_false_throughput, - _1sockets_2_true_throughput, - _1sockets_2_default_1_throughput, - _1sockets_2_default_2_throughput)); - struct StreamsCalculationTestCase { int input_streams; bool input_streams_chaged; @@ -380,7 +23,7 @@ struct StreamsCalculationTestCase { int input_infer_requests; int model_prefer_threads; std::string input_perf_hint; - ov::intel_cpu::Config::LatencyThreadingMode scopeOflatencyCandidate; + ov::intel_cpu::Config::LatencyThreadingMode latencyThreadingMode; std::vector> proc_type_table; std::vector> stream_info_table; }; @@ -398,7 +41,7 @@ class StreamsCalculationTests : public CommonTestUtils::TestsCommon, test_data.input_infer_requests, test_data.model_prefer_threads, test_data.input_perf_hint, - test_data.scopeOflatencyCandidate, + test_data.latencyThreadingMode, test_data.proc_type_table); ASSERT_EQ(test_data.stream_info_table, test_stream_info_table); @@ -418,8 +61,9 @@ StreamsCalculationTestCase _2sockets_104cores_latency_platform_1 = { // simulation {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, - {104, 52, 0, 52, 1, 1}}, // param[in]: the proc_type_table in this simulation - {{1, MAIN_CORE_PROC, 208}}, // param[expected out]: the expected result of streams_info_table in this simulation + {104, 52, 0, 52, 1, 1}}, // param[in]: the proc_type_table in this simulation + {{1, MAIN_CORE_PROC, 208, -1, -1}}, // param[expected out]: the expected result of streams_info_table in this + // simulation }; StreamsCalculationTestCase _2sockets_104cores_latency_platform_2 = { @@ -431,9 +75,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_platform_2 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{104, 104, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 1}}, - {{1, MAIN_CORE_PROC, 104}}, + {{1, MAIN_CORE_PROC, 104, -1, -1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_platform_3 = { 1, false, @@ -447,9 +90,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_platform_3 = { {52, 26, 0, 26, 1, 0}, {52, 26, 0, 26, 2, 1}, {52, 26, 0, 26, 3, 1}}, - {{1, MAIN_CORE_PROC, 208}}, + {{1, MAIN_CORE_PROC, 208, -1, -1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_platform_4 = { 1, false, @@ -459,9 +101,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_platform_4 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{104, 104, 0, 0, -1, -1}, {26, 13, 0, 0, 0, 0}, {26, 13, 0, 0, 1, 0}, {26, 13, 0, 0, 2, 1}, {26, 13, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 104}}, + {{1, MAIN_CORE_PROC, 104, -1, -1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_socket_1 = { 1, false, @@ -471,9 +112,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_1 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{2, MAIN_CORE_PROC, 104}}, + {{1, MAIN_CORE_PROC, 104, 0, 0}, {1, MAIN_CORE_PROC, 104, 1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_socket_2 = { 1, false, @@ -483,9 +123,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_2 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, {{104, 104, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 1}}, - {{2, MAIN_CORE_PROC, 52}}, + {{1, MAIN_CORE_PROC, 52, 0, 0}, {1, MAIN_CORE_PROC, 52, 1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_socket_3 = { 1, false, @@ -499,9 +138,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_3 = { {52, 26, 0, 26, 1, 0}, {52, 26, 0, 26, 2, 1}, {52, 26, 0, 26, 3, 1}}, - {{2, MAIN_CORE_PROC, 104}}, + {{1, MAIN_CORE_PROC, 104, -1, 0}, {1, MAIN_CORE_PROC, 104, -1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_socket_4 = { 1, false, @@ -511,9 +149,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_4 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, {{104, 104, 0, 0, -1, -1}, {26, 13, 0, 0, 0, 0}, {26, 13, 0, 0, 1, 0}, {26, 13, 0, 0, 2, 1}, {26, 13, 0, 0, 3, 1}}, - {{2, MAIN_CORE_PROC, 52}}, + {{1, MAIN_CORE_PROC, 52, -1, 0}, {1, MAIN_CORE_PROC, 52, -1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_socket_5 = { 1, false, @@ -523,9 +160,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_5 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, {{60, 60, 0, 0, -1, -1}, {10, 10, 0, 0, 0, 0}, {10, 10, 0, 0, 1, 0}, {20, 20, 0, 0, 2, 1}, {20, 20, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 40}}, + {{1, MAIN_CORE_PROC, 40, -1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_socket_6 = { 1, false, @@ -535,9 +171,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_6 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, {{60, 60, 0, 0, -1, -1}, {10, 10, 0, 0, 0, 0}, {20, 20, 0, 0, 1, 1}, {10, 10, 0, 0, 2, 0}, {20, 20, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 40}}, + {{1, MAIN_CORE_PROC, 40, -1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_socket_7 = { 1, true, @@ -547,9 +182,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_7 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, {{104, 104, 0, 0, -1, -1}, {26, 13, 0, 0, 0, 0}, {26, 13, 0, 0, 1, 0}, {26, 13, 0, 0, 2, 1}, {26, 13, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 104}}, + {{1, MAIN_CORE_PROC, 104, -1, -1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_node_1 = { 1, false, @@ -559,9 +193,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_node_1 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_NUMA_NODE, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{2, MAIN_CORE_PROC, 104}}, + {{1, MAIN_CORE_PROC, 104, 0, 0}, {1, MAIN_CORE_PROC, 104, 1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_node_2 = { 1, false, @@ -571,9 +204,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_node_2 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_NUMA_NODE, {{104, 104, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 1}}, - {{2, MAIN_CORE_PROC, 52}}, + {{1, MAIN_CORE_PROC, 52, 0, 0}, {1, MAIN_CORE_PROC, 52, 1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_node_3 = { 1, false, @@ -587,9 +219,11 @@ StreamsCalculationTestCase _2sockets_104cores_latency_node_3 = { {52, 26, 0, 26, 1, 0}, {52, 26, 0, 26, 2, 1}, {52, 26, 0, 26, 3, 1}}, - {{4, MAIN_CORE_PROC, 52}}, + {{1, MAIN_CORE_PROC, 52, 0, 0}, + {1, MAIN_CORE_PROC, 52, 1, 0}, + {1, MAIN_CORE_PROC, 52, 2, 1}, + {1, MAIN_CORE_PROC, 52, 3, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_node_4 = { 1, false, @@ -599,9 +233,11 @@ StreamsCalculationTestCase _2sockets_104cores_latency_node_4 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_NUMA_NODE, {{104, 104, 0, 0, -1, -1}, {26, 13, 0, 0, 0, 0}, {26, 13, 0, 0, 1, 0}, {26, 13, 0, 0, 2, 1}, {26, 13, 0, 0, 3, 1}}, - {{4, MAIN_CORE_PROC, 26}}, + {{1, MAIN_CORE_PROC, 26, 0, 0}, + {1, MAIN_CORE_PROC, 26, 1, 0}, + {1, MAIN_CORE_PROC, 26, 2, 1}, + {1, MAIN_CORE_PROC, 26, 3, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_node_5 = { 1, true, @@ -611,9 +247,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_node_5 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_NUMA_NODE, {{104, 104, 0, 0, -1, -1}, {26, 13, 0, 0, 0, 0}, {26, 13, 0, 0, 1, 0}, {26, 13, 0, 0, 2, 1}, {26, 13, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 104}}, + {{1, MAIN_CORE_PROC, 104, -1, -1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_1 = { 1, false, @@ -623,9 +258,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_1 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{1, MAIN_CORE_PROC, 20}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_2 = { 1, false, @@ -635,9 +269,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_2 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{1, MAIN_CORE_PROC, 20}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_3 = { 1, false, @@ -647,9 +280,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_3 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{1, MAIN_CORE_PROC, 208}}, + {{1, MAIN_CORE_PROC, 208, -1, -1}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_4 = { 1, true, @@ -659,9 +291,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_4 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{1, MAIN_CORE_PROC, 20}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_5 = { 1, true, @@ -671,9 +302,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_5 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{1, MAIN_CORE_PROC, 20}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, }; - StreamsCalculationTestCase _2sockets_104cores_latency_6 = { 1, true, @@ -683,9 +313,8 @@ StreamsCalculationTestCase _2sockets_104cores_latency_6 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{1, MAIN_CORE_PROC, 208}}, + {{1, MAIN_CORE_PROC, 208, -1, -1}}, }; - StreamsCalculationTestCase _2sockets_104cores_tput_1 = { 1, false, @@ -695,9 +324,11 @@ StreamsCalculationTestCase _2sockets_104cores_tput_1 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{52, MAIN_CORE_PROC, 4}}, + {{13, MAIN_CORE_PROC, 4, 0, 0}, + {13, MAIN_CORE_PROC, 4, 1, 1}, + {13, HYPER_THREADING_PROC, 4, 0, 0}, + {13, HYPER_THREADING_PROC, 4, 1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_tput_2 = { 2, true, @@ -707,9 +338,8 @@ StreamsCalculationTestCase _2sockets_104cores_tput_2 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{2, MAIN_CORE_PROC, 104}}, + {{1, MAIN_CORE_PROC, 104, 0, 0}, {1, MAIN_CORE_PROC, 104, 1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_tput_3 = { 1, false, @@ -719,9 +349,8 @@ StreamsCalculationTestCase _2sockets_104cores_tput_3 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{5, MAIN_CORE_PROC, 4}}, + {{5, MAIN_CORE_PROC, 4, 0, 0}}, }; - StreamsCalculationTestCase _2sockets_104cores_tput_4 = { 2, true, @@ -731,9 +360,8 @@ StreamsCalculationTestCase _2sockets_104cores_tput_4 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{2, MAIN_CORE_PROC, 10}}, + {{2, MAIN_CORE_PROC, 10, 0, 0}}, }; - StreamsCalculationTestCase _2sockets_104cores_tput_5 = { 1, false, @@ -743,9 +371,11 @@ StreamsCalculationTestCase _2sockets_104cores_tput_5 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{208, MAIN_CORE_PROC, 1}}, + {{52, MAIN_CORE_PROC, 1, 0, 0}, + {52, MAIN_CORE_PROC, 1, 1, 1}, + {52, HYPER_THREADING_PROC, 1, 0, 0}, + {52, HYPER_THREADING_PROC, 1, 1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_tput_6 = { 1, false, @@ -755,9 +385,11 @@ StreamsCalculationTestCase _2sockets_104cores_tput_6 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{104, MAIN_CORE_PROC, 2}}, + {{26, MAIN_CORE_PROC, 2, 0, 0}, + {26, MAIN_CORE_PROC, 2, 1, 1}, + {26, HYPER_THREADING_PROC, 2, 0, 0}, + {26, HYPER_THREADING_PROC, 2, 1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_tput_7 = { 1, false, @@ -767,9 +399,55 @@ StreamsCalculationTestCase _2sockets_104cores_tput_7 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{26, MAIN_CORE_PROC, 8}}, + {{6, MAIN_CORE_PROC, 8, 0, 0}, + {6, MAIN_CORE_PROC, 8, 1, 1}, + {6, HYPER_THREADING_PROC, 8, 0, 0}, + {6, HYPER_THREADING_PROC, 8, 1, 1}, + {1, MAIN_CORE_PROC, 8, -1, -1}, + {1, HYPER_THREADING_PROC, 8, -1, -1}}, +}; +StreamsCalculationTestCase _2sockets_104cores_tput_7_1 = { + 26, + true, + 0, + 0, + 8, + "THROUGHPUT", + ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, + {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, + {{6, MAIN_CORE_PROC, 8, 0, 0}, + {6, MAIN_CORE_PROC, 8, 1, 1}, + {6, HYPER_THREADING_PROC, 8, 0, 0}, + {6, HYPER_THREADING_PROC, 8, 1, 1}, + {1, MAIN_CORE_PROC, 8, -1, -1}, + {1, HYPER_THREADING_PROC, 8, -1, -1}}, +}; +StreamsCalculationTestCase _2sockets_104cores_tput_7_2 = { + 1, + false, + 0, + 0, + 4, + "THROUGHPUT", + ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, + {{208, 104, 0, 104, -1, -1}, + {52, 26, 0, 26, 0, 0}, + {52, 26, 0, 26, 1, 0}, + {52, 26, 0, 26, 2, 1}, + {52, 26, 0, 26, 3, 1}}, + {{6, MAIN_CORE_PROC, 4, 0, 0}, + {6, MAIN_CORE_PROC, 4, 1, 0}, + {6, MAIN_CORE_PROC, 4, 2, 1}, + {6, MAIN_CORE_PROC, 4, 3, 1}, + {6, HYPER_THREADING_PROC, 4, 0, 0}, + {6, HYPER_THREADING_PROC, 4, 1, 0}, + {6, HYPER_THREADING_PROC, 4, 2, 1}, + {6, HYPER_THREADING_PROC, 4, 3, 1}, + {1, MAIN_CORE_PROC, 4, -1, 0}, + {1, MAIN_CORE_PROC, 4, -1, 1}, + {1, HYPER_THREADING_PROC, 4, -1, 0}, + {1, HYPER_THREADING_PROC, 4, -1, 1}}, }; - StreamsCalculationTestCase _2sockets_104cores_tput_8 = { 1, false, @@ -779,7 +457,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_8 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{5, MAIN_CORE_PROC, 8}}, + {{5, MAIN_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_9 = { @@ -791,7 +469,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_9 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{2, MAIN_CORE_PROC, 10}}, + {{2, MAIN_CORE_PROC, 10, 0, 0}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_10 = { @@ -803,7 +481,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_10 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{2, MAIN_CORE_PROC, 104}}, + {{1, MAIN_CORE_PROC, 104, 0, 0}, {1, MAIN_CORE_PROC, 104, 1, 1}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_11 = { @@ -815,7 +493,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_11 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{2, MAIN_CORE_PROC, 104}}, + {{1, MAIN_CORE_PROC, 104, 0, 0}, {1, MAIN_CORE_PROC, 104, 1, 1}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_12 = { @@ -827,7 +505,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_12 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, - {{2, MAIN_CORE_PROC, 104}}, + {{1, MAIN_CORE_PROC, 104, 0, 0}, {1, MAIN_CORE_PROC, 104, 1, 1}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_13 = { @@ -839,7 +517,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_13 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{104, 104, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 1}}, - {{26, MAIN_CORE_PROC, 4}}, + {{13, MAIN_CORE_PROC, 4, 0, 0}, {13, MAIN_CORE_PROC, 4, 1, 1}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_14 = { @@ -851,7 +529,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_14 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{104, 104, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 1}}, - {{2, MAIN_CORE_PROC, 52}}, + {{1, MAIN_CORE_PROC, 52, 0, 0}, {1, MAIN_CORE_PROC, 52, 1, 1}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_15 = { @@ -863,7 +541,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_15 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{104, 104, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 1}}, - {{104, MAIN_CORE_PROC, 1}}, + {{52, MAIN_CORE_PROC, 1, 0, 0}, {52, MAIN_CORE_PROC, 1, 1, 1}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_16 = { @@ -875,7 +553,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_16 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{104, 104, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 1}}, - {{52, MAIN_CORE_PROC, 2}}, + {{26, MAIN_CORE_PROC, 2, 0, 0}, {26, MAIN_CORE_PROC, 2, 1, 1}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_17 = { @@ -887,7 +565,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_17 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{104, 104, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 1}}, - {{13, MAIN_CORE_PROC, 8}}, + {{6, MAIN_CORE_PROC, 8, 0, 0}, {6, MAIN_CORE_PROC, 8, 1, 1}, {1, MAIN_CORE_PROC, 8, -1, -1}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_18 = { @@ -899,7 +577,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_18 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{104, 104, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 1}}, - {{2, MAIN_CORE_PROC, 52}}, + {{1, MAIN_CORE_PROC, 52, 0, 0}, {1, MAIN_CORE_PROC, 52, 1, 1}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_19 = { @@ -911,7 +589,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_19 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{104, 104, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 1}}, - {{2, MAIN_CORE_PROC, 52}}, + {{1, MAIN_CORE_PROC, 52, 0, 0}, {1, MAIN_CORE_PROC, 52, 1, 1}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_20 = { @@ -923,7 +601,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_20 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{104, 104, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 1}}, - {{2, MAIN_CORE_PROC, 52}}, + {{1, MAIN_CORE_PROC, 52, 0, 0}, {1, MAIN_CORE_PROC, 52, 1, 1}}, }; StreamsCalculationTestCase _2sockets_48cores_latency_1 = { @@ -935,7 +613,7 @@ StreamsCalculationTestCase _2sockets_48cores_latency_1 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{1, MAIN_CORE_PROC, 48}}, + {{1, MAIN_CORE_PROC, 48, -1, -1}}, }; StreamsCalculationTestCase _2sockets_48cores_tput_1 = { @@ -947,7 +625,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_1 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{12, MAIN_CORE_PROC, 4}}, + {{6, MAIN_CORE_PROC, 4, 0, 0}, {6, MAIN_CORE_PROC, 4, 1, 1}}, }; StreamsCalculationTestCase _2sockets_48cores_tput_2 = { @@ -959,7 +637,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_2 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{48, MAIN_CORE_PROC, 1}}, + {{24, MAIN_CORE_PROC, 1, 0, 0}, {24, MAIN_CORE_PROC, 1, 1, 1}}, }; StreamsCalculationTestCase _2sockets_48cores_tput_3 = { @@ -971,7 +649,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_3 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{12, MAIN_CORE_PROC, 4}}, + {{6, MAIN_CORE_PROC, 4, 0, 0}, {6, MAIN_CORE_PROC, 4, 1, 1}}, }; StreamsCalculationTestCase _2sockets_48cores_tput_4 = { @@ -983,7 +661,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_4 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{2, MAIN_CORE_PROC, 10}}, + {{2, MAIN_CORE_PROC, 10, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_1 = { @@ -995,7 +673,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_1 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{14, 6, 8, 0, 0, 0}}, - {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, + {{1, ALL_PROC, 14, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_2 = { @@ -1007,7 +685,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_2 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 4}}, + {{1, ALL_PROC, 10, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_3 = { @@ -1019,7 +697,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_3 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, MAIN_CORE_PROC, 12}}, + {{1, MAIN_CORE_PROC, 12, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_4 = { @@ -1031,7 +709,10 @@ StreamsCalculationTestCase _1sockets_14cores_latency_4 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, ALL_PROC, 20}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 6}}, + {{1, ALL_PROC, 20, 0, 0}, + {0, MAIN_CORE_PROC, 6, 0, 0}, + {0, EFFICIENT_CORE_PROC, 8, 0, 0}, + {0, HYPER_THREADING_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_5 = { @@ -1043,7 +724,10 @@ StreamsCalculationTestCase _1sockets_14cores_latency_5 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, ALL_PROC, 20}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 6}}, + {{1, ALL_PROC, 20, 0, 0}, + {0, MAIN_CORE_PROC, 6, 0, 0}, + {0, EFFICIENT_CORE_PROC, 8, 0, 0}, + {0, HYPER_THREADING_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_6 = { @@ -1055,7 +739,10 @@ StreamsCalculationTestCase _1sockets_14cores_latency_6 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, ALL_PROC, 20}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 6}}, + {{1, ALL_PROC, 20, 0, 0}, + {0, MAIN_CORE_PROC, 6, 0, 0}, + {0, EFFICIENT_CORE_PROC, 8, 0, 0}, + {0, HYPER_THREADING_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_7 = { @@ -1067,7 +754,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_7 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{14, 6, 8, 0, 0, 0}}, - {{1, MAIN_CORE_PROC, 6}}, + {{1, MAIN_CORE_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_8 = { @@ -1079,7 +766,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_8 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{14, 6, 8, 0, 0, 0}}, - {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, + {{1, ALL_PROC, 14, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_9 = { @@ -1091,7 +778,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_9 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{14, 6, 8, 0, 0, 0}}, - {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, + {{1, ALL_PROC, 14, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_10 = { @@ -1103,7 +790,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_10 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{14, 6, 8, 0, 0, 0}}, - {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, + {{1, ALL_PROC, 14, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_11 = { @@ -1115,7 +802,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_11 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 4}}, + {{1, ALL_PROC, 10, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_12 = { @@ -1127,7 +814,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_12 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, MAIN_CORE_PROC, 12}}, + {{1, MAIN_CORE_PROC, 12, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_13 = { @@ -1139,7 +826,10 @@ StreamsCalculationTestCase _1sockets_14cores_latency_13 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, ALL_PROC, 20}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 6}}, + {{1, ALL_PROC, 20, 0, 0}, + {0, MAIN_CORE_PROC, 6, 0, 0}, + {0, EFFICIENT_CORE_PROC, 8, 0, 0}, + {0, HYPER_THREADING_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_14 = { @@ -1151,7 +841,10 @@ StreamsCalculationTestCase _1sockets_14cores_latency_14 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, ALL_PROC, 20}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 6}}, + {{1, ALL_PROC, 20, 0, 0}, + {0, MAIN_CORE_PROC, 6, 0, 0}, + {0, EFFICIENT_CORE_PROC, 8, 0, 0}, + {0, HYPER_THREADING_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_15 = { @@ -1163,7 +856,10 @@ StreamsCalculationTestCase _1sockets_14cores_latency_15 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, ALL_PROC, 20}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 6}}, + {{1, ALL_PROC, 20, 0, 0}, + {0, MAIN_CORE_PROC, 6, 0, 0}, + {0, EFFICIENT_CORE_PROC, 8, 0, 0}, + {0, HYPER_THREADING_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_16 = { @@ -1175,7 +871,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_16 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{14, 6, 8, 0, 0, 0}}, - {{1, MAIN_CORE_PROC, 6}}, + {{1, MAIN_CORE_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_17 = { @@ -1187,7 +883,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_17 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{14, 6, 8, 0, 0, 0}}, - {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, + {{1, ALL_PROC, 14, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_latency_18 = { @@ -1199,7 +895,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_18 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{14, 6, 8, 0, 0, 0}}, - {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, + {{1, ALL_PROC, 14, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_1 = { @@ -1211,7 +907,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_1 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}}, + {{2, MAIN_CORE_PROC, 3, 0, 0}, {2, EFFICIENT_CORE_PROC, 3, 0, 0}, {2, HYPER_THREADING_PROC, 3, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_2 = { @@ -1223,7 +919,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_2 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}}, + {{1, MAIN_CORE_PROC, 6, 0, 0}, {1, EFFICIENT_CORE_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_3 = { @@ -1235,7 +931,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_3 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}}, + {{2, MAIN_CORE_PROC, 3, 0, 0}, {2, EFFICIENT_CORE_PROC, 3, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_4 = { @@ -1247,7 +943,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_4 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}}, + {{2, MAIN_CORE_PROC, 3, 0, 0}, {2, EFFICIENT_CORE_PROC, 3, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_5 = { @@ -1259,7 +955,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_5 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{6, MAIN_CORE_PROC, 1}, {4, EFFICIENT_CORE_PROC, 2}, {6, HYPER_THREADING_PROC, 1}}, + {{6, MAIN_CORE_PROC, 1, 0, 0}, {4, EFFICIENT_CORE_PROC, 2, 0, 0}, {6, HYPER_THREADING_PROC, 1, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_6 = { @@ -1271,7 +967,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_6 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{3, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {3, HYPER_THREADING_PROC, 2}}, + {{3, MAIN_CORE_PROC, 2, 0, 0}, {4, EFFICIENT_CORE_PROC, 2, 0, 0}, {3, HYPER_THREADING_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_7 = { @@ -1283,7 +979,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_7 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{6, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}}, + {{6, MAIN_CORE_PROC, 1, 0, 0}, {8, EFFICIENT_CORE_PROC, 1, 0, 0}, {6, HYPER_THREADING_PROC, 1, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_8 = { @@ -1295,7 +991,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_8 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}}, + {{2, MAIN_CORE_PROC, 3, 0, 0}, {2, EFFICIENT_CORE_PROC, 3, 0, 0}, {2, HYPER_THREADING_PROC, 3, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_9 = { @@ -1307,7 +1003,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_9 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}}, + {{2, MAIN_CORE_PROC, 3, 0, 0}, {2, EFFICIENT_CORE_PROC, 3, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_10 = { @@ -1319,7 +1015,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_10 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}}, + {{2, MAIN_CORE_PROC, 3, 0, 0}, {2, EFFICIENT_CORE_PROC, 3, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_11 = { @@ -1331,7 +1027,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_11 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}}, + {{1, MAIN_CORE_PROC, 6, 0, 0}, {1, EFFICIENT_CORE_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_12 = { @@ -1343,7 +1039,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_12 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}}, + {{1, MAIN_CORE_PROC, 6, 0, 0}, {1, EFFICIENT_CORE_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_13 = { @@ -1355,7 +1051,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_13 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{1, MAIN_CORE_PROC, 1}}, + {{1, MAIN_CORE_PROC, 1, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_14 = { @@ -1367,7 +1063,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_14 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{6, MAIN_CORE_PROC, 1}, {1, EFFICIENT_CORE_PROC, 2}}, + {{6, MAIN_CORE_PROC, 1, 0, 0}, {1, EFFICIENT_CORE_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_15 = { @@ -1379,7 +1075,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_15 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{6, MAIN_CORE_PROC, 1}, {3, EFFICIENT_CORE_PROC, 2}}, + {{6, MAIN_CORE_PROC, 1, 0, 0}, {3, EFFICIENT_CORE_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_16 = { @@ -1391,7 +1087,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_16 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 8, 6, 0, 0}}, - {{6, MAIN_CORE_PROC, 1}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 1}}, + {{6, MAIN_CORE_PROC, 1, 0, 0}, {4, EFFICIENT_CORE_PROC, 2, 0, 0}, {1, HYPER_THREADING_PROC, 1, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_latency_1 = { @@ -1403,7 +1099,10 @@ StreamsCalculationTestCase _1sockets_10cores_latency_1 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 2, 8, 2, 0, 0}}, - {{1, ALL_PROC, 12}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 2}}, + {{1, ALL_PROC, 12, 0, 0}, + {0, MAIN_CORE_PROC, 2, 0, 0}, + {0, EFFICIENT_CORE_PROC, 8, 0, 0}, + {0, HYPER_THREADING_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_latency_2 = { @@ -1415,7 +1114,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_2 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 2, 8, 2, 0, 0}}, - {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 6}}, + {{1, ALL_PROC, 8, 0, 0}, {0, MAIN_CORE_PROC, 2, 0, 0}, {0, EFFICIENT_CORE_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_latency_3 = { @@ -1427,7 +1126,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_3 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 2, 8, 2, 0, 0}}, - {{1, MAIN_CORE_PROC, 4}}, + {{1, MAIN_CORE_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_latency_4 = { @@ -1439,7 +1138,10 @@ StreamsCalculationTestCase _1sockets_10cores_latency_4 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 2, 8, 2, 0, 0}}, - {{1, ALL_PROC, 12}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 2}}, + {{1, ALL_PROC, 12, 0, 0}, + {0, MAIN_CORE_PROC, 2, 0, 0}, + {0, EFFICIENT_CORE_PROC, 8, 0, 0}, + {0, HYPER_THREADING_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_latency_5 = { @@ -1451,7 +1153,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_5 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{10, 2, 8, 0, 0, 0}}, - {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}}, + {{1, ALL_PROC, 10, 0, 0}, {0, MAIN_CORE_PROC, 2, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_latency_6 = { @@ -1463,7 +1165,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_6 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{10, 2, 8, 0, 0, 0}}, - {{1, MAIN_CORE_PROC, 2}}, + {{1, MAIN_CORE_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_latency_7 = { @@ -1475,7 +1177,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_7 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{10, 2, 8, 0, 0, 0}}, - {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}}, + {{1, ALL_PROC, 10, 0, 0}, {0, MAIN_CORE_PROC, 2, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_tput_1 = { @@ -1487,7 +1189,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_1 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 2, 8, 2, 0, 0}}, - {{1, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}}, + {{1, MAIN_CORE_PROC, 2, 0, 0}, {4, EFFICIENT_CORE_PROC, 2, 0, 0}, {1, HYPER_THREADING_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_tput_2 = { @@ -1499,7 +1201,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_2 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 2, 8, 2, 0, 0}}, - {{1, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}}, + {{1, MAIN_CORE_PROC, 2, 0, 0}, {1, EFFICIENT_CORE_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_tput_3 = { @@ -1511,7 +1213,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_3 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 2, 8, 2, 0, 0}}, - {{1, MAIN_CORE_PROC, 2}, {3, EFFICIENT_CORE_PROC, 2}}, + {{1, MAIN_CORE_PROC, 2, 0, 0}, {3, EFFICIENT_CORE_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_tput_4 = { @@ -1523,7 +1225,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_4 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 2, 8, 2, 0, 0}}, - {{1, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}}, + {{1, MAIN_CORE_PROC, 2, 0, 0}, {2, EFFICIENT_CORE_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_tput_5 = { @@ -1535,7 +1237,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_5 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 2, 8, 2, 0, 0}}, - {{2, MAIN_CORE_PROC, 1}, {4, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 1}}, + {{2, MAIN_CORE_PROC, 1, 0, 0}, {4, EFFICIENT_CORE_PROC, 2, 0, 0}, {2, HYPER_THREADING_PROC, 1, 0, 0}}, }; StreamsCalculationTestCase _1sockets_10cores_tput_6 = { @@ -1547,7 +1249,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_6 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 2, 8, 2, 0, 0}}, - {{1, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}}, + {{1, MAIN_CORE_PROC, 2, 0, 0}, {4, EFFICIENT_CORE_PROC, 2, 0, 0}, {1, HYPER_THREADING_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_latency_1 = { @@ -1559,7 +1261,10 @@ StreamsCalculationTestCase _1sockets_8cores_latency_1 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 4, 4, 4, 0, 0}}, - {{1, ALL_PROC, 12}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}, {0, HYPER_THREADING_PROC, 4}}, + {{1, ALL_PROC, 12, 0, 0}, + {0, MAIN_CORE_PROC, 4, 0, 0}, + {0, EFFICIENT_CORE_PROC, 4, 0, 0}, + {0, HYPER_THREADING_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_latency_2 = { @@ -1571,7 +1276,10 @@ StreamsCalculationTestCase _1sockets_8cores_latency_2 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 4, 4, 4, 0, 0}}, - {{1, ALL_PROC, 12}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}, {0, HYPER_THREADING_PROC, 4}}, + {{1, ALL_PROC, 12, 0, 0}, + {0, MAIN_CORE_PROC, 4, 0, 0}, + {0, EFFICIENT_CORE_PROC, 4, 0, 0}, + {0, HYPER_THREADING_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_latency_3 = { @@ -1583,7 +1291,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_3 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 4, 4, 4, 0, 0}}, - {{1, MAIN_CORE_PROC, 8}}, + {{1, MAIN_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_latency_4 = { @@ -1595,7 +1303,10 @@ StreamsCalculationTestCase _1sockets_8cores_latency_4 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 4, 4, 4, 0, 0}}, - {{1, ALL_PROC, 12}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}, {0, HYPER_THREADING_PROC, 4}}, + {{1, ALL_PROC, 12, 0, 0}, + {0, MAIN_CORE_PROC, 4, 0, 0}, + {0, EFFICIENT_CORE_PROC, 4, 0, 0}, + {0, HYPER_THREADING_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_latency_5 = { @@ -1607,7 +1318,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_5 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{8, 4, 4, 0, 0, 0}}, - {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}}, + {{1, ALL_PROC, 8, 0, 0}, {0, MAIN_CORE_PROC, 4, 0, 0}, {0, EFFICIENT_CORE_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_latency_6 = { @@ -1619,7 +1330,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_6 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{8, 4, 4, 0, 0, 0}}, - {{1, MAIN_CORE_PROC, 4}}, + {{1, MAIN_CORE_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_latency_7 = { @@ -1631,7 +1342,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_7 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{8, 4, 4, 0, 0, 0}}, - {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}}, + {{1, ALL_PROC, 8, 0, 0}, {0, MAIN_CORE_PROC, 4, 0, 0}, {0, EFFICIENT_CORE_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_tput_1 = { @@ -1643,7 +1354,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_1 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 4, 4, 4, 0, 0}}, - {{1, MAIN_CORE_PROC, 4}, {1, EFFICIENT_CORE_PROC, 4}, {1, HYPER_THREADING_PROC, 4}}, + {{1, MAIN_CORE_PROC, 4, 0, 0}, {1, EFFICIENT_CORE_PROC, 4, 0, 0}, {1, HYPER_THREADING_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_tput_2 = { @@ -1655,7 +1366,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_2 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 4, 4, 4, 0, 0}}, - {{1, MAIN_CORE_PROC, 4}, {1, EFFICIENT_CORE_PROC, 4}}, + {{1, MAIN_CORE_PROC, 4, 0, 0}, {1, EFFICIENT_CORE_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_tput_3 = { @@ -1667,7 +1378,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_3 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 4, 4, 4, 0, 0}}, - {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}}, + {{2, MAIN_CORE_PROC, 2, 0, 0}, {2, EFFICIENT_CORE_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_tput_4 = { @@ -1679,7 +1390,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_4 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 4, 4, 4, 0, 0}}, - {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}}, + {{2, MAIN_CORE_PROC, 2, 0, 0}, {2, EFFICIENT_CORE_PROC, 2, 0, 0}, {2, HYPER_THREADING_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_tput_5 = { @@ -1691,7 +1402,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_5 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 4, 4, 4, 0, 0}}, - {{2, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}}, + {{2, MAIN_CORE_PROC, 2, 0, 0}, {1, EFFICIENT_CORE_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_tput_6 = { @@ -1703,7 +1414,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_6 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 4, 4, 4, 0, 0}}, - {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}}, + {{2, MAIN_CORE_PROC, 2, 0, 0}, {2, EFFICIENT_CORE_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_tput_7 = { @@ -1715,7 +1426,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_7 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 4, 4, 4, 0, 0}}, - {{4, MAIN_CORE_PROC, 1}, {2, EFFICIENT_CORE_PROC, 2}, {4, HYPER_THREADING_PROC, 1}}, + {{4, MAIN_CORE_PROC, 1, 0, 0}, {2, EFFICIENT_CORE_PROC, 2, 0, 0}, {4, HYPER_THREADING_PROC, 1, 0, 0}}, }; StreamsCalculationTestCase _1sockets_8cores_tput_8 = { @@ -1727,7 +1438,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_8 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{8, 4, 4, 0, 0, 0}}, - {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}}, + {{2, MAIN_CORE_PROC, 2, 0, 0}, {2, EFFICIENT_CORE_PROC, 2, 0, 0}}, }; StreamsCalculationTestCase _1sockets_6cores_latency_1 = { @@ -1739,7 +1450,7 @@ StreamsCalculationTestCase _1sockets_6cores_latency_1 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 6, 0, 6, 0, 0}}, - {{1, MAIN_CORE_PROC, 12}}, + {{1, MAIN_CORE_PROC, 12, 0, 0}}, }; StreamsCalculationTestCase _1sockets_6cores_latency_2 = { @@ -1751,7 +1462,7 @@ StreamsCalculationTestCase _1sockets_6cores_latency_2 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 6, 0, 6, 0, 0}}, - {{1, MAIN_CORE_PROC, 12}}, + {{1, MAIN_CORE_PROC, 12, 0, 0}}, }; StreamsCalculationTestCase _1sockets_6cores_latency_3 = { @@ -1763,7 +1474,7 @@ StreamsCalculationTestCase _1sockets_6cores_latency_3 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{6, 6, 0, 0, 0, 0}}, - {{1, MAIN_CORE_PROC, 6}}, + {{1, MAIN_CORE_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_6cores_tput_1 = { @@ -1775,7 +1486,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_1 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 6, 0, 6, 0, 0}}, - {{2, MAIN_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}}, + {{2, MAIN_CORE_PROC, 3, 0, 0}, {2, HYPER_THREADING_PROC, 3, 0, 0}}, }; StreamsCalculationTestCase _1sockets_6cores_tput_2 = { @@ -1787,7 +1498,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_2 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 6, 0, 6, 0, 0}}, - {{1, MAIN_CORE_PROC, 6}, {1, HYPER_THREADING_PROC, 6}}, + {{1, MAIN_CORE_PROC, 6, 0, 0}, {1, HYPER_THREADING_PROC, 6, 0, 0}}, }; StreamsCalculationTestCase _1sockets_6cores_tput_3 = { @@ -1799,7 +1510,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_3 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 6, 0, 6, 0, 0}}, - {{2, MAIN_CORE_PROC, 3}}, + {{2, MAIN_CORE_PROC, 3, 0, 0}}, }; StreamsCalculationTestCase _1sockets_6cores_tput_4 = { @@ -1811,7 +1522,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_4 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{12, 6, 0, 6, 0, 0}}, - {{6, MAIN_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}}, + {{6, MAIN_CORE_PROC, 1, 0, 0}, {6, HYPER_THREADING_PROC, 1, 0, 0}}, }; StreamsCalculationTestCase _1sockets_ecores_latency_1 = { @@ -1823,7 +1534,7 @@ StreamsCalculationTestCase _1sockets_ecores_latency_1 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{16, 0, 16, 0, 0, 0}}, - {{1, EFFICIENT_CORE_PROC, 16}}, + {{1, EFFICIENT_CORE_PROC, 16, 0, 0}}, }; StreamsCalculationTestCase _1sockets_ecores_latency_2 = { @@ -1835,7 +1546,7 @@ StreamsCalculationTestCase _1sockets_ecores_latency_2 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{16, 0, 16, 0, 0, 0}}, - {{1, EFFICIENT_CORE_PROC, 4}}, + {{1, EFFICIENT_CORE_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_ecores_latency_3 = { @@ -1847,7 +1558,7 @@ StreamsCalculationTestCase _1sockets_ecores_latency_3 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{16, 0, 16, 0, 0, 0}}, - {{1, EFFICIENT_CORE_PROC, 16}}, + {{1, EFFICIENT_CORE_PROC, 16, 0, 0}}, }; StreamsCalculationTestCase _1sockets_ecores_latency_4 = { @@ -1859,7 +1570,7 @@ StreamsCalculationTestCase _1sockets_ecores_latency_4 = { "LATENCY", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{16, 0, 16, 0, 0, 0}}, - {{1, EFFICIENT_CORE_PROC, 4}}, + {{1, EFFICIENT_CORE_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_ecores_tput_1 = { @@ -1871,7 +1582,7 @@ StreamsCalculationTestCase _1sockets_ecores_tput_1 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{16, 0, 16, 0, 0, 0}}, - {{16, EFFICIENT_CORE_PROC, 1}}, + {{16, EFFICIENT_CORE_PROC, 1, 0, 0}}, }; StreamsCalculationTestCase _1sockets_ecores_tput_2 = { @@ -1883,7 +1594,7 @@ StreamsCalculationTestCase _1sockets_ecores_tput_2 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{16, 0, 16, 0, 0, 0}}, - {{4, EFFICIENT_CORE_PROC, 4}}, + {{4, EFFICIENT_CORE_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_ecores_tput_3 = { @@ -1895,7 +1606,7 @@ StreamsCalculationTestCase _1sockets_ecores_tput_3 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{16, 0, 16, 0, 0, 0}}, - {{2, EFFICIENT_CORE_PROC, 8}}, + {{2, EFFICIENT_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _1sockets_ecores_tput_4 = { @@ -1907,7 +1618,7 @@ StreamsCalculationTestCase _1sockets_ecores_tput_4 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{16, 0, 16, 0, 0, 0}}, - {{4, EFFICIENT_CORE_PROC, 4}}, + {{4, EFFICIENT_CORE_PROC, 4, 0, 0}}, }; StreamsCalculationTestCase _1sockets_ecores_tput_5 = { @@ -1919,7 +1630,7 @@ StreamsCalculationTestCase _1sockets_ecores_tput_5 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{16, 0, 16, 0, 0, 0}}, - {{2, EFFICIENT_CORE_PROC, 8}}, + {{2, EFFICIENT_CORE_PROC, 8, 0, 0}}, }; StreamsCalculationTestCase _1sockets_mock_tput_1 = { @@ -1931,7 +1642,7 @@ StreamsCalculationTestCase _1sockets_mock_tput_1 = { "THROUGHPUT", ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, {{20, 6, 7, 6, 0, 0}}, - {{6, MAIN_CORE_PROC, 1}, {3, EFFICIENT_CORE_PROC, 2}, {3, HYPER_THREADING_PROC, 1}}, + {{6, MAIN_CORE_PROC, 1, 0, 0}, {3, EFFICIENT_CORE_PROC, 2, 0, 0}, {3, HYPER_THREADING_PROC, 1, 0, 0}}, }; TEST_P(StreamsCalculationTests, StreamsCalculation) {} @@ -1967,6 +1678,8 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable, _2sockets_104cores_tput_5, _2sockets_104cores_tput_6, _2sockets_104cores_tput_7, + _2sockets_104cores_tput_7_1, + _2sockets_104cores_tput_7_2, _2sockets_104cores_tput_8, _2sockets_104cores_tput_9, _2sockets_104cores_tput_10, @@ -2065,510 +1778,4 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable, _1sockets_ecores_tput_5, _1sockets_mock_tput_1)); -struct StreamGenerateionTestCase { - int input_stream; - bool input_stream_changed; - int input_thread; - int input_request; - int input_model_prefer; - ov::hint::SchedulingCoreType input_type; - bool input_ht_value; - bool input_ht_changed; - bool input_cpu_value; - bool input_cpu_changed; - ov::hint::PerformanceMode input_pm_hint; - ov::intel_cpu::Config::LatencyThreadingMode input_latency_scope; - ov::threading::IStreamsExecutor::ThreadBindingType input_binding_type; - std::vector> input_proc_type_table; - ov::hint::SchedulingCoreType output_type; - bool output_ht_value; - bool output_cpu_value; - ov::hint::PerformanceMode output_pm_hint; - std::vector> output_proc_type_table; - std::vector> output_stream_info_table; -}; - -void make_config(StreamGenerateionTestCase& test_data, ov::intel_cpu::Config& config) { - config.schedulingCoreType = test_data.input_type; - config.enableCpuPinning = test_data.input_cpu_value; - config.changedCpuPinning = test_data.input_cpu_changed; - config.enableHyperThreading = test_data.input_ht_value; - config.changedHyperThreading = test_data.input_ht_changed; - config.perfHintsConfig.ovPerfHint = ov::util::to_string(test_data.input_pm_hint); - config.scopeOflatencyCandidate = test_data.input_latency_scope; - config.perfHintsConfig.ovPerfHintNumRequests = test_data.input_request; - config.streamExecutorConfig._streams = test_data.input_stream; - config.streamExecutorConfig._streams_changed = test_data.input_stream_changed; - config.streamExecutorConfig._threads = test_data.input_thread; - config.streamExecutorConfig._threadBindingType = test_data.input_binding_type; - config.streamExecutorConfig._orig_proc_type_table = test_data.input_proc_type_table; -} - -class StreamGenerationTests : public CommonTestUtils::TestsCommon, - public testing::WithParamInterface> { -public: - void SetUp() override { - auto test_data = std::get<0>(GetParam()); - ov::intel_cpu::Config config; - make_config(test_data, config); - - ov::intel_cpu::generate_stream_info(test_data.input_stream, nullptr, config, test_data.input_model_prefer); - - ASSERT_EQ(test_data.output_stream_info_table, config.streamExecutorConfig._streams_info_table); - ASSERT_EQ(test_data.output_proc_type_table, config.streamExecutorConfig._proc_type_table); - ASSERT_EQ(test_data.output_cpu_value, config.streamExecutorConfig._cpu_pinning); - ASSERT_EQ(test_data.output_ht_value, config.enableHyperThreading); - ASSERT_EQ(test_data.output_type, config.schedulingCoreType); - ASSERT_EQ(test_data.output_pm_hint, - ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode)); - } -}; - -TEST_P(StreamGenerationTests, StreamsGeneration) {} - -StreamGenerateionTestCase generation_latency_1sockets_14cores_1 = { - 1, // param[in]: simulated settting for streams number - false, // param[in]: simulated settting for streams number changed - 0, // param[in]: simulated setting for threads number - 0, // param[in]: simulated setting for inference request number - 0, // param[in]: simulated setting for model prefer threads number - ov::hint::SchedulingCoreType::ANY_CORE, // param[in]: simulated setting for scheduling core type - // (PCORE_ONLY/ECORE_ONLY/ANY_CORE) - true, // param[in]: simulated setting for enableHyperThreading - true, // param[in]: simulated settting for changedHyperThreading - true, // param[in]: simulated setting for enableCpuPinning - true, // param[in]: simulated setting for changedCpuPinning - ov::hint::PerformanceMode::LATENCY, // param[in]: simulated setting for performance mode (throughput/latency) - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, // param[in]: simulated setting for scope of candidate processors - // on latency mode - ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, // param[in]: simulated setting for - // threadBindingType - {{20, 6, 8, 6, 0, 0}}, // param[in]: simulated proc_type_table for platform which has one socket, 6 Pcores, 8 - // Ecores and hyper threading enabled - ov::hint::SchedulingCoreType::ANY_CORE, // param[expected out]: scheduling core type needs to be the same as input - true, // param[expected out]: enableHyperThreading needs to be the same as input - true, // param[expected out]: enableCpuPinning needs to be the same as input - ov::hint::PerformanceMode::LATENCY, // param[expected out]: performance mode needs to be the same as input - {{20, 6, 8, 6, 0, 0}}, // param[expected out]: since hyper threading is enabled and all core type is used, - // proc_type_table needs to be the same as input - {{1, ALL_PROC, 20}, - {0, MAIN_CORE_PROC, 6}, - {0, EFFICIENT_CORE_PROC, 8}, - {0, - HYPER_THREADING_PROC, - 6}}, // param[expected out]: since performance mode is latency and all cores is used, the final streams is 1 -}; - -StreamGenerateionTestCase generation_latency_1sockets_14cores_2 = { - 1, - false, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::ANY_CORE, - true, - true, - true, - true, - ov::hint::PerformanceMode::LATENCY, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, - {{14, 6, 8, 0, 0, 0}}, - ov::hint::SchedulingCoreType::ANY_CORE, - false, - true, - ov::hint::PerformanceMode::LATENCY, - {{14, 6, 8, 0, 0, 0}}, - {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, -}; - -StreamGenerateionTestCase generation_latency_1sockets_14cores_3 = { - 1, - false, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::PCORE_ONLY, - true, - true, - false, - true, - ov::hint::PerformanceMode::LATENCY, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, - {{14, 6, 8, 0, 0, 0}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, - false, - false, - ov::hint::PerformanceMode::LATENCY, - {{6, 6, 0, 0, 0, 0}}, - {{1, MAIN_CORE_PROC, 6}}, -}; - -StreamGenerateionTestCase generation_latency_1sockets_14cores_4 = { - 1, - false, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::PCORE_ONLY, - true, - true, - false, - true, - ov::hint::PerformanceMode::LATENCY, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, - {{20, 6, 8, 6, 0, 0}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, - true, - false, - ov::hint::PerformanceMode::LATENCY, - {{12, 6, 0, 6, 0, 0}}, - {{1, MAIN_CORE_PROC, 12}}, -}; - -StreamGenerateionTestCase generation_latency_1sockets_14cores_5 = { - 1, - false, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::PCORE_ONLY, - false, - true, - false, - true, - ov::hint::PerformanceMode::LATENCY, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, - {{20, 6, 8, 6, 0, 0}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, - false, - false, - ov::hint::PerformanceMode::LATENCY, - {{6, 6, 0, 0, 0, 0}}, - {{1, MAIN_CORE_PROC, 6}}, -}; - -StreamGenerateionTestCase generation_latency_2sockets_48cores_6 = { - 1, - false, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::PCORE_ONLY, - false, - true, - false, - true, - ov::hint::PerformanceMode::LATENCY, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, - {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, - false, - false, - ov::hint::PerformanceMode::LATENCY, - {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{2, MAIN_CORE_PROC, 24}}, -}; - -StreamGenerateionTestCase generation_latency_2sockets_48cores_7 = { - 1, - false, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::PCORE_ONLY, - true, - true, - false, - true, - ov::hint::PerformanceMode::LATENCY, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, - {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, - false, - false, - ov::hint::PerformanceMode::LATENCY, - {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{2, MAIN_CORE_PROC, 24}}, -}; - -StreamGenerateionTestCase generation_latency_2sockets_48cores_8 = { - 1, - true, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::PCORE_ONLY, - false, - true, - false, - true, - ov::hint::PerformanceMode::LATENCY, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, - {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, - false, - false, - ov::hint::PerformanceMode::LATENCY, - {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{1, MAIN_CORE_PROC, 48}}, -}; - -StreamGenerateionTestCase generation_latency_2sockets_48cores_9 = { - 1, - true, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::PCORE_ONLY, - true, - true, - false, - true, - ov::hint::PerformanceMode::LATENCY, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, - {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, - false, - false, - ov::hint::PerformanceMode::LATENCY, - {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{1, MAIN_CORE_PROC, 48}}, -}; - -StreamGenerateionTestCase generation_tput_1sockets_14cores_1 = { - 0, - false, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::ANY_CORE, - true, - true, - true, - true, - ov::hint::PerformanceMode::THROUGHPUT, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, - {{20, 6, 8, 6, 0, 0}}, - ov::hint::SchedulingCoreType::ANY_CORE, - true, - true, - ov::hint::PerformanceMode::THROUGHPUT, - {{20, 6, 8, 6, 0, 0}}, - {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}}, -}; - -StreamGenerateionTestCase generation_tput_1sockets_14cores_2 = { - 0, - false, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::PCORE_ONLY, - false, - true, - false, - true, - ov::hint::PerformanceMode::THROUGHPUT, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::CORES, - {{20, 6, 8, 6, 0, 0}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, - false, - false, - ov::hint::PerformanceMode::THROUGHPUT, - {{6, 6, 0, 0, 0, 0}}, - {{2, MAIN_CORE_PROC, 3}}, -}; - -StreamGenerateionTestCase generation_tput_1sockets_14cores_3 = { - 10, - true, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::PCORE_ONLY, - true, - true, - false, - true, - ov::hint::PerformanceMode::THROUGHPUT, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::CORES, - {{20, 6, 8, 6, 0, 0}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, - true, - false, - ov::hint::PerformanceMode::THROUGHPUT, - {{12, 6, 0, 6, 0, 0}}, - {{6, MAIN_CORE_PROC, 1}, {4, HYPER_THREADING_PROC, 1}}, -}; - -StreamGenerateionTestCase generation_tput_1sockets_14cores_4 = { - 0, - false, - 10, - 0, - 0, - ov::hint::SchedulingCoreType::PCORE_ONLY, - true, - true, - false, - true, - ov::hint::PerformanceMode::THROUGHPUT, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::CORES, - {{20, 6, 8, 6, 0, 0}}, - ov::hint::SchedulingCoreType::PCORE_ONLY, - true, - false, - ov::hint::PerformanceMode::THROUGHPUT, - {{12, 6, 0, 6, 0, 0}}, - {{2, MAIN_CORE_PROC, 3}, {1, HYPER_THREADING_PROC, 3}}, -}; - -StreamGenerateionTestCase generation_tput_2sockets_48cores_5 = { - 0, - false, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::ANY_CORE, - true, - true, - false, - true, - ov::hint::PerformanceMode::THROUGHPUT, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::CORES, - {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, - ov::hint::SchedulingCoreType::ANY_CORE, - true, - false, - ov::hint::PerformanceMode::THROUGHPUT, - {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, - {{24, MAIN_CORE_PROC, 4}}, -}; - -StreamGenerateionTestCase generation_tput_2sockets_48cores_6 = { - 0, - false, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::ANY_CORE, - false, - true, - false, - true, - ov::hint::PerformanceMode::THROUGHPUT, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::CORES, - {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, - ov::hint::SchedulingCoreType::ANY_CORE, - false, - false, - ov::hint::PerformanceMode::THROUGHPUT, - {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{12, MAIN_CORE_PROC, 4}}, -}; - -StreamGenerateionTestCase generation_tput_2sockets_48cores_7 = { - 100, - true, - 0, - 0, - 0, - ov::hint::SchedulingCoreType::ANY_CORE, - false, - true, - false, - true, - ov::hint::PerformanceMode::THROUGHPUT, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::CORES, - {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, - ov::hint::SchedulingCoreType::ANY_CORE, - false, - false, - ov::hint::PerformanceMode::THROUGHPUT, - {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{48, MAIN_CORE_PROC, 1}}, -}; - -StreamGenerateionTestCase generation_tput_2sockets_48cores_8 = { - 2, - true, - 20, - 0, - 1, - ov::hint::SchedulingCoreType::ANY_CORE, - false, - true, - false, - true, - ov::hint::PerformanceMode::THROUGHPUT, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::CORES, - {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, - ov::hint::SchedulingCoreType::ANY_CORE, - false, - false, - ov::hint::PerformanceMode::THROUGHPUT, - {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{2, MAIN_CORE_PROC, 10}}, -}; - -StreamGenerateionTestCase generation_tput_2sockets_48cores_9 = { - 0, - false, - 0, - 0, - 1, - ov::hint::SchedulingCoreType::ANY_CORE, - true, - false, - false, - true, - ov::hint::PerformanceMode::THROUGHPUT, - ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, - ov::threading::IStreamsExecutor::ThreadBindingType::CORES, - {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, - ov::hint::SchedulingCoreType::ANY_CORE, - false, - false, - ov::hint::PerformanceMode::THROUGHPUT, - {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, - {{48, MAIN_CORE_PROC, 1}}, -}; - -INSTANTIATE_TEST_SUITE_P(smoke_StreamsGeneration, - StreamGenerationTests, - ::testing::Values(generation_latency_1sockets_14cores_1, - generation_latency_1sockets_14cores_2, - generation_latency_1sockets_14cores_3, - generation_latency_1sockets_14cores_4, - generation_latency_1sockets_14cores_5, - generation_latency_2sockets_48cores_6, - generation_latency_2sockets_48cores_7, - generation_latency_2sockets_48cores_8, - generation_latency_2sockets_48cores_9, - generation_tput_1sockets_14cores_1, - generation_tput_1sockets_14cores_2, - generation_tput_1sockets_14cores_3, - generation_tput_1sockets_14cores_4, - generation_tput_2sockets_48cores_5, - generation_tput_2sockets_48cores_6, - generation_tput_2sockets_48cores_7, - generation_tput_2sockets_48cores_8, - generation_tput_2sockets_48cores_9)); - } // namespace \ No newline at end of file diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index 80477e9c972f8f..f8e81ab0799929 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -92,8 +92,8 @@ struct network { network(program::ptr program, stream::ptr stream, uint16_t stream_id); - network(cldnn::BinaryInputBuffer& ifs, stream::ptr stream, engine& engine, bool is_primary_stream = true); - network(cldnn::BinaryInputBuffer& ifs, const ExecutionConfig& config, stream::ptr stream, engine& engine, bool is_primary_stream = true); + network(cldnn::BinaryInputBuffer& ifs, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id); + network(cldnn::BinaryInputBuffer& ifs, const ExecutionConfig& config, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id); ~network(); @@ -215,6 +215,7 @@ struct network { void configure_primitives_second_output(); void build_insts_deps(); uint32_t get_id() const { return net_id; } + uint32_t get_local_id() const { return _local_net_id; } stream& get_stream() const { return *_stream; } stream::ptr get_stream_ptr() const { return _stream; } bool is_internal() const { return _internal; } @@ -254,6 +255,8 @@ struct network { bool _is_dynamic = false; bool _enable_profiling = false; bool _reset_arguments; + uint32_t _local_net_id = 0; // This is for thread-safe deserialization. 'net_id' is globally unique, + // but '_local_net_id' is unique only in each intel_gpu::Graph. std::unordered_map> _primitives; std::vector _in_out_shared_mem_types; diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp index dccbea3eaef456..9386e5240083c3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp @@ -37,7 +37,7 @@ class BinaryOutputBuffer : public OutputBuffer { class BinaryInputBuffer : public InputBuffer { public: BinaryInputBuffer(std::istream& stream, engine& engine) - : InputBuffer(this, engine), _stream(stream), _impl_params(nullptr), _num_networks(0), _stream_id(0) {} + : InputBuffer(this, engine), _stream(stream), _impl_params(nullptr) {} void read(void* const data, std::streamsize size) { auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast(data), size); @@ -62,18 +62,10 @@ class BinaryInputBuffer : public InputBuffer { std::streampos tellg() { return _stream.tellg(); } void seekg(std::streampos pos) { _stream.seekg(pos); } - void new_network_added() { _num_networks += 1; } - int get_num_networks() const { return _num_networks; } - - void set_stream_id(uint16_t stream_id) { _stream_id = stream_id; } - uint16_t get_stream_id() const { return _stream_id; } - private: std::istream& _stream; void* _impl_params; std::vector>> _const_data_map; - int _num_networks; - uint16_t _stream_id; }; template diff --git a/src/plugins/intel_gpu/src/graph/data.cpp b/src/plugins/intel_gpu/src/graph/data.cpp index 907c6a79434b10..00cd00bf90a3f8 100644 --- a/src/plugins/intel_gpu/src/graph/data.cpp +++ b/src/plugins/intel_gpu/src/graph/data.cpp @@ -87,7 +87,7 @@ void data_inst::load(BinaryInputBuffer& ib) { ib >> make_data(&data_size, sizeof(size_t)); if (!get_network().is_primary_stream()) { - _outputs[0] = ib.getConstData(get_network_id() - (ib.get_num_networks() * ib.get_stream_id()), id()); + _outputs[0] = ib.getConstData(_network.get_local_id(), id()); auto pos = ib.tellg(); pos += data_size; ib.seekg(pos); @@ -103,7 +103,7 @@ void data_inst::load(BinaryInputBuffer& ib) { _outputs[0]->copy_from(get_network().get_stream(), _buf.data()); } - ib.addConstData(get_network_id(), id(), _outputs[0]); + ib.addConstData(_network.get_local_id(), id(), _outputs[0]); } } diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index 281fa1b4d8674e..9755296009a308 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -158,9 +158,9 @@ std::vector fully_connected_inst::calc_output_layouts(fully_connected_no ov::op::v0::shape_infer(&op, input_shapes, output_shapes); bool is_static = input_layout.is_static() && weights_layout.is_static(); - - format::type output_format = is_static ? get_preferred_format(node, impl_param) : - input_layout.format.value; + bool allow_new_shape_infer = impl_param.get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + format::type output_format = is_static && !allow_new_shape_infer ? get_preferred_format(node, impl_param) : + input_layout.format.value; return { layout{output_shapes[0], output_type, output_format} }; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp index 2ac6b4b39b145c..480fc286b62aed 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp @@ -58,6 +58,11 @@ void compile_graph::run(program& p) { can_select_impl = false; } + // TODO: Remove this WA once we have shape agnostic conv kernl with specified auto_pad attributes + if (node->is_type() && node->is_dynamic() && !node->as().use_explicit_padding()) { + can_select_impl = false; + } + // TODO: need to come up with better handling of unsupported shape agnostic cases // e.g. process exceptions from choose_impl() and ignore those for dynamic parameters if (node->is_type() && node->is_dynamic() && node->get_output_pshape().size() > 3) diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/shape_of.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/shape_of.cpp index 72aadd3e65067f..4a16219c9dbd0a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/shape_of.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/shape_of.cpp @@ -58,13 +58,15 @@ struct shape_of_impl : public typed_primitive_impl { OPENVINO_THROW("[GPU] Couldn't execute shape_of operation: unsupported output data type (", output_dt , ")"); } - if (events.size() > 1) { - return stream.group_events(events); - } else if (events.size() == 1) { - return events[0]; - } else { - return stream.create_user_event(true); + if (stream.get_queue_type() == QueueTypes::out_of_order) { + if (events.size() > 1) { + return stream.group_events(events); + } else if (events.size() == 1) { + return events[0]; + } } + + return stream.create_user_event(true); } void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index a3b923f124f0f0..16072a5e53ee2c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -7,6 +7,7 @@ #include "convolution_inst.h" #include "convolution/convolution_kernel_selector.h" #include "convolution/convolution_params.h" +#include "ngraph/validation_util.hpp" namespace cldnn { namespace ocl { @@ -42,7 +43,6 @@ struct convolution_impl : typed_primitive_impl_ocl { auto stride = primitive->stride; const auto& dilation = primitive->dilation; - const auto& pad = primitive->padding_begin; const auto& groups = primitive->groups; const auto& deformable_groups = primitive->deformable_groups; const auto transposed = primitive->transposed; @@ -73,15 +73,46 @@ struct convolution_impl : typed_primitive_impl_ocl { deform_conv_dep_offset++; const auto& weights_layout = impl_param.input_layouts[1 + 0 + deform_conv_dep_offset] - .convert_to_weights_layout(primitive->grouped_weights_shape); + .convert_to_weights_layout(primitive->grouped_weights_shape); + + const auto& input_layout = impl_param.get_input_layout(); + auto spatial_rank = input_layout.get_spatial_rank(); + std::vector dims; + for (size_t i = 0; i < spatial_rank; i++) { + dims.push_back(static_cast(weights_layout.spatial(i))); + } + ov::Shape kernel(dims.begin(), dims.end()); + ov::CoordinateDiff pads_begin(primitive->padding_begin.begin(), primitive->padding_begin.end()); + ov::CoordinateDiff pads_end(primitive->padding_end.begin(), primitive->padding_end.end()); + const auto auto_pad = primitive->auto_pad; + if (auto_pad == ov::op::PadType::SAME_UPPER || auto_pad == ov::op::PadType::SAME_LOWER) { + pads_begin.clear(); + pads_end.clear(); + OPENVINO_SUPPRESS_DEPRECATED_START + ngraph::try_apply_auto_padding(input_layout.get_partial_shape(), + kernel, + stride, + dilation, + auto_pad, + pads_end, + pads_begin); + OPENVINO_SUPPRESS_DEPRECATED_END + } + if (auto_pad == ov::op::PadType::VALID) { + pads_begin = ov::CoordinateDiff(pads_begin.size(), 0); + pads_end = ov::CoordinateDiff(pads_end.size(), 0); + } + pads_begin.resize(std::max(2, pads_begin.size()), 0); + pads_end.resize(std::max(2, pads_end.size()), 0); + uint32_t kx = weights_layout.spatial(0); uint32_t ky = weights_layout.spatial(1); uint32_t kz = weights_layout.spatial(2); conv_params.filterSize = { kx, ky, kz }; - uint32_t pad_z = std::max(pad.size() >= 3 ? pad[pad.size() - 3] : 0, 0); - uint32_t pad_y = std::max(pad.size() >= 2 ? pad[pad.size() - 2] : 0, 0); - uint32_t pad_x = std::max(pad.size() >= 1 ? pad[pad.size() - 1] : 0, 0); + uint32_t pad_z = std::max(pads_begin.size() >= 3 ? pads_begin[pads_begin.size() - 3] : 0, 0); + uint32_t pad_y = std::max(pads_begin.size() >= 2 ? pads_begin[pads_begin.size() - 2] : 0, 0); + uint32_t pad_x = std::max(pads_begin.size() >= 1 ? pads_begin[pads_begin.size() - 1] : 0, 0); conv_params.padding = {pad_x, pad_y, pad_z}; uint32_t stride_z = stride.size() >= 3 ? static_cast(stride[stride.size() - 3]) : 1; @@ -121,7 +152,7 @@ struct convolution_impl : typed_primitive_impl_ocl { auto can_swap = [](const kernel_selector::Tensor::DataTensor& dt) -> bool { auto x_channel_idx = kernel_selector::Tensor::DataTensor::Channelndex(dt.GetLayout(), kernel_selector::Tensor::DataChannelName::X); - auto x_axis_dim = dt.GetDims()[x_channel_idx]; + auto x_axis_dim = dt.GetDims()[static_cast(x_channel_idx)]; return (x_axis_dim.pad.Total() == 0 && x_axis_dim.v == 1); }; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index a1e9081e0e64fc..68f56f2379e8b9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -37,7 +37,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl { static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); - auto get_fc_input_layouts = [primitive](const std::vector& input_layouts) { + auto get_fc_input_layouts = [primitive](const std::vector& input_layouts, bool allow_new_shape_infer) { auto reshape_to_2d = [](const ov::PartialShape& shape, const ov::Dimension& feature, size_t rank) { if (shape.is_static()) { auto static_shape = shape.to_shape(); @@ -56,15 +56,21 @@ struct fully_connected_impl : typed_primitive_impl_ocl { auto input1_pshape = input1_layout.get_partial_shape(); ov::Dimension feature = input0_pshape[std::min(primitive->input_size, static_cast(4)) - 1ul]; + if (allow_new_shape_infer) { + feature = input0_pshape[primitive->input_size - 1ul]; + } if (primitive->input_size > 3) { input0_layout.set_partial_shape(reshape_to_2d(input0_pshape, feature, primitive->input_size)); + input0_layout.format = format::bfyx; } if (input1_pshape.size() != 2) { input1_layout.set_partial_shape(reshape_to_2d(input1_pshape, feature, primitive->weights_rank)); + input1_layout.format = format::bfyx; } std::vector layouts{input0_layout, input1_layout}; + return layouts; }; @@ -83,9 +89,10 @@ struct fully_connected_impl : typed_primitive_impl_ocl { return updated_out_layout; }; + bool allow_new_shape_infer = impl_param.get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer); auto updated_impl_param = impl_param; - const auto input_layouts = get_fc_input_layouts(impl_param.input_layouts); + const auto input_layouts = get_fc_input_layouts(impl_param.input_layouts, allow_new_shape_infer); updated_impl_param.input_layouts[0] = input_layouts[0]; updated_impl_param.input_layouts[1] = input_layouts[1]; updated_impl_param.weights_layout = input_layouts[1]; @@ -137,6 +144,10 @@ attach_fully_connected_impl::attach_fully_connected_impl() { std::make_tuple(data_types::f16, format::yxfb), std::make_tuple(data_types::f32, format::bfyx), std::make_tuple(data_types::f16, format::bfyx), + std::make_tuple(data_types::f32, format::bfzyx), + std::make_tuple(data_types::f16, format::bfzyx), + std::make_tuple(data_types::f32, format::bfwzyx), + std::make_tuple(data_types::f16, format::bfwzyx), std::make_tuple(data_types::f32, format::byxf), std::make_tuple(data_types::f16, format::byxf), std::make_tuple(data_types::i8, format::bfyx), diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp index c892906e3112b1..8291b94db4ceba 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp @@ -115,7 +115,7 @@ struct concatenation_onednn : typed_primitive_onednn_impl create(const concatenation_node& arg, const kernel_impl_params& impl_params) { auto& engine = impl_params.prog->get_engine(); auto& config = impl_params.prog->get_config(); - if (arg.can_be_optimized()) + if (impl_params.can_be_optimized()) return make_unique(engine, config); auto prim = impl_params.typed_desc(); auto attr = arg.get_onednn_primitive_attributes(); diff --git a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h index 12ca93abc930dd..238e791e86eb89 100644 --- a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h @@ -89,6 +89,7 @@ struct typed_program_node : public typed_program_node_baseweights_zero_points.size() > 0; } bool compensation_term() const { return get_primitive()->compensation.size() > 0; } bool activations_zero_points_term() const { return get_primitive()->activations_zero_points.size() > 0; } + bool use_explicit_padding() const { return get_primitive()->auto_pad == ov::op::PadType::EXPLICIT; } // Currently convolution with constant weight is only supported for dynamic shape std::vector get_shape_infer_dependencies() const override { return {}; } diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 3b43b021739536..4cb42270a54fab 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1727,6 +1727,15 @@ format layout_optimizer::get_preferred_format(program_node& node) { if (use_onednn_impls) { expected = node.get_preferred_output_fmt(); } + + if (!allow_new_shape_infer && node.is_type()) { + auto& fc_node = node.as(); + auto input_layout = fc_node.input().get_output_layout(); + if (input_layout.format.dimension() > 4) { + expected = format::bfyx; + node.set_preferred_input_fmt(0, format::bfyx); + } + } } if (allow_new_shape_infer && node.get_preferred_input_fmt() != format::any) { diff --git a/src/plugins/intel_gpu/src/graph/loop.cpp b/src/plugins/intel_gpu/src/graph/loop.cpp index 90a6595e822ffa..9de5b8b2d09ea1 100644 --- a/src/plugins/intel_gpu/src/graph/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/loop.cpp @@ -529,7 +529,7 @@ void loop_inst::load(BinaryInputBuffer& ib) { ib >> _condition_id; ib >> _num_iteration_id; ib >> _max_iteration; - body_network = std::make_shared(ib, get_network().get_stream_ptr(), get_network().get_engine(), get_network().is_primary_stream()); + body_network = std::make_shared(ib, get_network().get_stream_ptr(), get_network().get_engine(), get_network().is_primary_stream(), 0); } } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 6b2cea271dca6d..013214ede8d447 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -359,10 +359,10 @@ network::network(program::ptr program, uint16_t stream_id) network::network(program::ptr program, stream::ptr stream, uint16_t stream_id) : network(program, program->get_config(), stream, false, stream_id == 0) {} -network::network(cldnn::BinaryInputBuffer& ib, stream::ptr stream, engine& engine, bool is_primary_stream) - : network(ib, ExecutionConfig{}, stream, engine, is_primary_stream) {} +network::network(cldnn::BinaryInputBuffer& ib, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id) + : network(ib, ExecutionConfig{}, stream, engine, is_primary_stream, local_net_id) {} -network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, stream::ptr stream, engine& engine, bool is_primary_stream) +network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id) : _program(nullptr) , _config(config) , _engine(engine) @@ -370,10 +370,9 @@ network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, st , _memory_pool(new memory_pool(engine)) , _internal(false) , _is_primary_stream(is_primary_stream) - , _reset_arguments(true) { + , _reset_arguments(true) + , _local_net_id(local_net_id) { net_id = get_unique_net_id(); - if (is_primary_stream) - ib.new_network_added(); kernels_cache kernels_cache(get_engine(), config, 0, nullptr, {""}); ib >> kernels_cache; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 986793447b473d..6d5dea9790bfb2 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -800,7 +800,8 @@ primitive_inst::primitive_inst(network& network) , _outputs({memory::ptr()}) , _reordered_weights_cache(network.get_weights_cache_capacity()) , _output_changed(false) - , _mem_allocated(false) {} + , _mem_allocated(false) + , _type(nullptr) {} primitive_inst::primitive_inst(network& network, program_node const& node, bool allocate_memory) : _network(network) @@ -973,7 +974,7 @@ event::ptr primitive_inst::update_weights() { _impl_params->weights_layout = optional_layout(original_layout); } else { auto expected_layout = reorder_kernel_params->get_output_layout(); - // Set original patrial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion + // Set original partial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion expected_layout.set_partial_shape(original_layout.get_partial_shape()); _impl_params->weights_layout = optional_layout(expected_layout); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp index 102884aa8ad125..caff184325b79c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp @@ -372,8 +372,6 @@ ParamsKey Convolution_kernel_b_fs_zyx_fsv16_imad::GetSupportedKey() const { k.EnableGroupedConvolution(); k.EnableQuantization(QuantizationType::SYMMETRIC); k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA); - k.EnableQuantization(QuantizationType::ASYMMETRIC_WEIGHTS); - k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS); k.EnableDilation(); return k; } diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index d1e0892da7a002..056753ebcf2d24 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -106,9 +106,8 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const size_t num_networks; ib >> num_networks; - for (size_t i = 0; i < num_networks; ++i) { - ib.set_stream_id(m_stream_id); - m_networks.emplace_back(std::make_shared(ib, get_engine().create_stream(config), get_engine(), m_stream_id == 0)); + for (uint32_t i = 0; i < num_networks; ++i) { + m_networks.emplace_back(std::make_shared(ib, get_engine().create_stream(config), get_engine(), m_stream_id == 0, i)); } } diff --git a/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp b/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp index e3c8ce52d80f9f..0962c8ae008181 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp @@ -74,6 +74,7 @@ static void CreateConvolutionOp(Program& p, const std::shared_ptrget_dilations(); auto pads_begin = op->get_pads_begin(); auto pads_end = op->get_pads_end(); + auto auto_pad = op->get_auto_pad(); if (!op->is_dynamic()) { // Extend 1d vectors to 2d as 1d can't be handled properly by the graph optimizer for now @@ -91,7 +92,8 @@ static void CreateConvolutionOp(Program& p, const std::shared_ptr(CommonTestUtils::DEVICE_GPU)), ConvolutionLayerGPUTestDynamic::getTestCaseName); +// ==== Symmetric auto pad +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic2DSymAutoPad, ConvolutionLayerGPUTestDynamic, + ::testing::Combine( + ::testing::Combine( + ::testing::Values(SizeVector{3, 3}), + ::testing::Values(SizeVector{1, 1}), + ::testing::Values(std::vector{0, 0}), + ::testing::Values(std::vector{0, 0}), + ::testing::Values(SizeVector{1, 1}), + ::testing::Values(10), + ::testing::ValuesIn({ngraph::op::PadType::SAME_LOWER, ngraph::op::PadType::SAME_UPPER})), + ::testing::Values(ElementType::f16), + ::testing::Values(ElementType::f16), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(dynInputShapes2D), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ConvolutionLayerGPUTestDynamic::getTestCaseName); + // ==== Asymmetric pad INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic2D_AsymPad, ConvolutionLayerGPUTestDynamic, ::testing::Combine( @@ -199,6 +217,24 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic3DSymPad, Convolut ::testing::Values(CommonTestUtils::DEVICE_GPU)), ConvolutionLayerGPUTestDynamic::getTestCaseName); +// ==== Symmetric auto pad +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic3DSymAutoPad, ConvolutionLayerGPUTestDynamic, + ::testing::Combine( + ::testing::Combine( + ::testing::Values(SizeVector{3, 3, 3}), + ::testing::Values(SizeVector{1, 1, 1}), + ::testing::Values(std::vector{0, 0, 0}), + ::testing::Values(std::vector{0, 0, 0}), + ::testing::Values(SizeVector{1, 1, 1}), + ::testing::Values(3), + ::testing::ValuesIn({ngraph::op::PadType::SAME_LOWER, ngraph::op::PadType::SAME_UPPER})), + ::testing::Values(ElementType::f16), + ::testing::Values(ElementType::f16), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(dynInputShapes3D), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ConvolutionLayerGPUTestDynamic::getTestCaseName); + // ==== Asymmetric pad INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic3DAsymPad, ConvolutionLayerGPUTestDynamic, ::testing::Combine( diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index b7ed06ce9a23a7..2511d3178537f0 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1892,6 +1892,111 @@ TEST(fully_connected_gpu, dynamic) { ASSERT_EQ(3.0f, output_ptr[3]); } +TEST(fully_connected_gpu, dynamic_6d_input) { + auto& engine = get_test_engine(); + + const int32_t input_b = 1, input_f = 3, input_w = 2, input_z = 1, input_y = 1, input_x = 4; + const int32_t weight_b = 2; + + auto input_dyn_layout = layout{ov::PartialShape{ov::Dimension(-1), input_f, input_w, input_z, input_y, input_x}, data_types::f32, format::bfwzyx}; + auto input_data = engine.allocate_memory(layout{ov::PartialShape{input_b, input_f, input_w, input_z, input_y, input_x}, data_types::f32, format::bfwzyx}); + auto weights_data = engine.allocate_memory({ov::PartialShape{weight_b, input_x}, data_types::f32, format::bfyx }); + + set_values(input_data, {-0.5f, 2.0f, 0.5f, 1.f, -1.5f, 2.0f, 0.5f, 1.f, + -0.5f, 2.5f, 0.5f, 1.f, -0.5f, 3.0f, 0.5f, 1.f, + -0.5f, 2.0f, 0.5f, 1.f, -0.5f, 2.0f, 2.5f, 1.f}); + set_values(weights_data, {1.5f, 1.0f, -1.0f, 0.0f, + 0.5f, -0.5f, -0.5f, 1.0f, }); + + cldnn::topology topology{ + input_layout("input", input_dyn_layout), + data("weights", weights_data), + fully_connected("fc", input_info("input"), "weights", "", cldnn::padding(), input_dyn_layout.get_rank()) + }; + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); + network.set_input_data("input", input_data); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "fc"); + + auto output_prim_mem = outputs.begin()->second.get_memory(); + + auto out_l = network.get_output_layout(outputs.begin()->first); + ASSERT_EQ(output_prim_mem->get_layout().batch(), 1); + ASSERT_EQ(out_l.batch(), 1); + ASSERT_EQ(out_l.feature(), 3); + ASSERT_EQ(out_l.spatial(0), 2); + ASSERT_EQ(out_l.spatial(1), 1); + ASSERT_EQ(out_l.spatial(2), 1); + ASSERT_EQ(out_l.spatial(3), 2); + + std::vector expected_output = { + 0.75, -0.5, -0.75, -1, 1.25, -0.75, 1.75, -1, 0.75, -0.5, -1.25, -1.5 + }; + + cldnn::mem_lock output_ptr(output_prim_mem, get_test_stream()); + + for (size_t i = 0 ; i < out_l.get_linear_size(); i++) { + ASSERT_EQ(expected_output[i], output_ptr[i]); + } +} + +TEST(fully_connected_gpu, static_6d_input) { + auto& engine = get_test_engine(); + + const int32_t input_b = 1, input_f = 3, input_w = 2, input_z = 1, input_y = 1, input_x = 4; + const int32_t weight_b = 2; + + auto input_dyn_layout = layout{ov::PartialShape{input_b, input_f, input_w, input_z, input_y, input_x}, data_types::f32, format::bfwzyx}; + auto input_data = engine.allocate_memory(input_dyn_layout); + auto weights_data = engine.allocate_memory({ov::PartialShape{weight_b, input_x}, data_types::f32, format::bfyx }); + + set_values(input_data, {-0.5f, 2.0f, 0.5f, 1.f, -1.5f, 2.0f, 0.5f, 1.f, + -0.5f, 2.5f, 0.5f, 1.f, -0.5f, 3.0f, 0.5f, 1.f, + -0.5f, 2.0f, 0.5f, 1.f, -0.5f, 2.0f, 2.5f, 1.f}); + set_values(weights_data, {1.5f, 1.0f, -1.0f, 0.0f, + 0.5f, -0.5f, -0.5f, 1.0f, }); + + cldnn::topology topology{ + input_layout("input", input_dyn_layout), + data("weights", weights_data), + fully_connected("fc", input_info("input"), "weights", "", cldnn::padding(), input_dyn_layout.get_rank()), + }; + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); + network.set_input_data("input", input_data); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "fc"); + + auto output_prim_mem = outputs.begin()->second.get_memory(); + + auto out_l = network.get_output_layout(outputs.begin()->first); + ASSERT_EQ(output_prim_mem->get_layout().batch(), 6); + ASSERT_EQ(out_l.batch(), 6); + ASSERT_EQ(out_l.feature(), 2); + ASSERT_EQ(out_l.spatial(0), 1); + ASSERT_EQ(out_l.spatial(1), 1); + + std::vector expected_output = { + 0.75, -0.5, -0.75, -1, 1.25, -0.75, 1.75, -1, 0.75, -0.5, -1.25, -1.5 + }; + + cldnn::mem_lock output_ptr(output_prim_mem, get_test_stream()); + + for (size_t i = 0 ; i < out_l.get_linear_size(); i++) { + ASSERT_EQ(expected_output[i], output_ptr[i]); + } +} + TEST(fully_connected_gpu, dynamic_multi_inference_same_shape) { auto& engine = get_test_engine(); const int32_t input_f = 3, input_b = 1, weight_b = 4; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index 62dfeadb5029f4..5e09105488903f 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -138,7 +138,7 @@ class GemmGPUTest : public ::testing::TestWithParam { { std::istream in_mem(&mem_buf); BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine); - network = std::make_shared(ib, get_test_stream_ptr(), engine); + network = std::make_shared(ib, get_test_stream_ptr(), engine, true, 0); } } else { network = std::make_shared(engine, tp, get_test_default_config(engine)); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/proposal_cpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/proposal_cpu_test.cpp index 5ead5708e8d205..3dddfcf6c6009f 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/proposal_cpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/proposal_cpu_test.cpp @@ -101,7 +101,7 @@ TestRunnerProposal::TestRunnerProposal(cldnn::tensor image_in { std::istream in_mem(&mem_buf); BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine()); - _network.reset(new network(ib, get_test_stream_ptr(), get_test_engine())); + _network.reset(new network(ib, get_test_stream_ptr(), get_test_engine(), true, 0)); } } else { _network.reset(new network(get_test_engine(), _topology)); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/streams_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/streams_test.cpp index bc9349807bbea8..3bc4e705f81b40 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/streams_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/streams_test.cpp @@ -93,10 +93,9 @@ class gpu_streams: public ::testing::Test { std::istream in_mem(&mem_buf); BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine); auto pos = ib.tellg(); - network0 = std::make_shared(ib, get_test_stream_ptr(), engine, true); + network0 = std::make_shared(ib, get_test_stream_ptr(), engine, true, 0); ib.seekg(pos); - ib.set_stream_id(1); - network1 = std::make_shared(ib, get_test_stream_ptr(), engine, false); + network1 = std::make_shared(ib, get_test_stream_ptr(), engine, false, 0); } } } else { @@ -193,13 +192,12 @@ class gpu_streams: public ::testing::Test { { std::istream in_mem0(&mem_buf0); BinaryInputBuffer ib0 = BinaryInputBuffer(in_mem0, engine); - network0 = std::make_shared(ib0, get_test_stream_ptr(), engine, false); + network0 = std::make_shared(ib0, get_test_stream_ptr(), engine, false, 0); } { std::istream in_mem1(&mem_buf1); BinaryInputBuffer ib1 = BinaryInputBuffer(in_mem1, engine); - ib1.set_stream_id(1); - network1 = std::make_shared(ib1, get_test_stream_ptr(), engine, true); + network1 = std::make_shared(ib1, get_test_stream_ptr(), engine, true, 0); } } } else { diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h index 1c266850bc2cbf..45421420a0e5fa 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h +++ b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h @@ -771,7 +771,7 @@ inline cldnn::network::ptr get_network(cldnn::engine& engine, { std::istream in_mem(&mem_buf); cldnn::BinaryInputBuffer ib = cldnn::BinaryInputBuffer(in_mem, engine); - network = std::make_shared(ib, config, stream, engine); + network = std::make_shared(ib, config, stream, engine, true, 0); } } else { network = std::make_shared(engine, topology, config); diff --git a/src/plugins/intel_gpu/thirdparty/onednn_gpu b/src/plugins/intel_gpu/thirdparty/onednn_gpu index f27dedbfc093f5..ec0b2ee85fc2a2 160000 --- a/src/plugins/intel_gpu/thirdparty/onednn_gpu +++ b/src/plugins/intel_gpu/thirdparty/onednn_gpu @@ -1 +1 @@ -Subproject commit f27dedbfc093f51032a4580198bb80579440dc15 +Subproject commit ec0b2ee85fc2a2dbdeec10035c5ef5813d8fb5ea diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py index ae30a7e3ed8366..d69a5c33dbadaa 100644 --- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py +++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py @@ -50,8 +50,15 @@ def _test(self, model, ref_net, kind, ie_device, precision, ir_version, infer_ti else: inputs = self._prepare_input() - torch_inputs = [torch.from_numpy(inp) if isinstance( - inp, np.ndarray) else inp for inp in inputs] + def numpy_to_torch_recursively(x): + if isinstance(x, tuple): + return tuple(numpy_to_torch_recursively(y) for y in x) + elif isinstance(x, np.ndarray): + return torch.from_numpy(x) + else: + return x + + torch_inputs = [numpy_to_torch_recursively(inp) for inp in inputs] if 'custom_eps' in kwargs and kwargs['custom_eps'] is not None: custom_eps = kwargs['custom_eps'] @@ -61,6 +68,8 @@ def _test(self, model, ref_net, kind, ie_device, precision, ir_version, infer_ti def use_ts_backend(): return(os.environ.get('USE_TS_BACKEND', False)) + ov_inputs = flattenize_inputs(inputs) + if use_ts_backend(): self.ts_backend_test(model, torch_inputs, custom_eps) else: @@ -68,7 +77,7 @@ def use_ts_backend(): model.eval() trace_model = kwargs.get('trace_model', False) freeze_model = kwargs.get('freeze_model', True) - model, converted_model = self.convert_directly_via_frontend(model, torch_inputs, trace_model, dynamic_shapes, inputs, freeze_model) + model, converted_model = self.convert_directly_via_frontend(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model) graph = model.inlined_graph if kind is not None and not isinstance(kind, (tuple, list)): @@ -80,7 +89,7 @@ def use_ts_backend(): # OV infer: core = Core() compiled = core.compile_model(converted_model, ie_device) - infer_res = compiled(deepcopy(inputs)) + infer_res = compiled(deepcopy(ov_inputs)) if hasattr(self, 'skip_framework') and self.skip_framework: warnings.warn('Framework is skipped') @@ -266,25 +275,33 @@ def get_params(ie_device=None, precision=None): return test_args -def flattenize_dict_outputs(res): +def flattenize_dict_outputs(res, types): if isinstance(res, dict): - return flattenize_outputs(res.values()) + return flattenize(res.values(), types) -def flattenize_outputs(res): +def flattenize(res, types: list): results = [] for res_item in res: # if None is at output we skip it if res_item is None: continue # If input is list or tuple flattenize it - if isinstance(res_item, (list, tuple)): - decomposed_res = flattenize_outputs(res_item) + if isinstance(res_item, (list, tuple)) and type(res_item) in types: + decomposed_res = flattenize(res_item, types) results.extend(decomposed_res) continue - if isinstance(res_item, dict): - decomposed_res = flattenize_dict_outputs(res_item) + if isinstance(res_item, dict) and type(res_item) in types: + decomposed_res = flattenize_dict_outputs(res_item, types) results.extend(decomposed_res) continue results.append(res_item) return results + + +def flattenize_outputs(res): + return flattenize(res, [list, tuple, dict]) + + +def flattenize_inputs(res): + return flattenize(res, [tuple]) diff --git a/tests/layer_tests/pytorch_tests/test_tuple_construct.py b/tests/layer_tests/pytorch_tests/test_tuple_construct.py index 9e782079965535..a8bd03731c644c 100644 --- a/tests/layer_tests/pytorch_tests/test_tuple_construct.py +++ b/tests/layer_tests/pytorch_tests/test_tuple_construct.py @@ -33,6 +33,11 @@ class prim_tuple_construct_with_list(torch.nn.Module): def forward(self, x): return (x, [None, x + x], None) + class prim_tuple_construct_with_tensor_tail(torch.nn.Module): + + def forward(self, x): + return ((x, x + x), x + x + x) + class prim_tuple_construct_with_list_and_tuple(torch.nn.Module): def forward(self, x): @@ -43,6 +48,7 @@ def forward(self, x): "multiple": prim_tuple_construct, "none": prim_tuple_construct_with_none, "list": prim_tuple_construct_with_list, + "tensor_tail": prim_tuple_construct_with_tensor_tail, "list_and_tuple": prim_tuple_construct_with_list_and_tuple } @@ -51,11 +57,11 @@ def forward(self, x): return model(), ref_net, "prim::TupleConstruct" - @pytest.mark.parametrize("case", ["single", "multiple", "none", "list", "list_and_tuple"]) + @pytest.mark.parametrize("case", ["single", "multiple", "none", "list", "tensor_tail", "list_and_tuple"]) @pytest.mark.nightly def test_tuple_construct(self, case, ie_device, precision, ir_version): self._test(*self.create_model(case), ie_device, precision, ir_version) - + class TestTupleConstructTupleUnpack(PytorchLayerTest): def _prepare_input(self): @@ -69,7 +75,7 @@ class prim_tuple_construct_tuple_unpack(torch.nn.Module): def forward(self, x): x1, x2, x3, x4, x5 = self.prepare_input(x) return x1, x2, x3, x4, x5 - + def prepare_input(self, x): return x, x + 2, None, x.reshape(-1), (x * 10).to(torch.int32) @@ -80,4 +86,106 @@ def prepare_input(self, x): @pytest.mark.nightly def test_tuple_construct_unpack(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version, freeze_model=False) \ No newline at end of file + self._test(*self.create_model(), ie_device, precision, ir_version, freeze_model=False) + + +class TestTupleUnpackParameterSingle(PytorchLayerTest): + def _prepare_input(self): + def tensor_gen(): + return np.random.uniform(0, 50, (1, 2, 10)).astype(np.float32) + return ( (tensor_gen(), tensor_gen()), ) + + def create_model(self): + import torch + from typing import Tuple + + class model(torch.nn.Module): + + def forward(self, x: Tuple[torch.Tensor, torch.Tensor]): + x1, x2 = x + return x1, x2 + + + return model(), None, ["prim::TupleUnpack"] + + @pytest.mark.nightly + def test(self, ie_device, precision, ir_version): + self._test(*self.create_model(), ie_device, precision, ir_version) + + +class TestTupleUnpackParameterSingleMixed(PytorchLayerTest): + def _prepare_input(self): + def tensor_gen(): + return np.random.uniform(0, 50, (1, 2, 10)).astype(np.float32) + # generate tensor with a different shape for easier mismatch detection in case of mixed input order + def tensor_gen_2(): + return np.random.uniform(0, 50, (2, 3)).astype(np.float32) + return (tensor_gen_2(), (tensor_gen(), tensor_gen()), tensor_gen_2()) + + def create_model(self): + import torch + from typing import Tuple + + class model(torch.nn.Module): + + def forward(self, y1, x: Tuple[torch.Tensor, torch.Tensor], y2): + x1, x2 = x + return x1, x2, y1, y2 + + + return model(), None, ["prim::TupleUnpack"] + + @pytest.mark.nightly + def test(self, ie_device, precision, ir_version): + self._test(*self.create_model(), ie_device, precision, ir_version) + + +class TestTupleUnpackParameterNested(PytorchLayerTest): + def _prepare_input(self): + def tensor_gen(): + return np.random.uniform(0, 50, (1, 2, 10)).astype(np.float32) + return ( ((tensor_gen(), tensor_gen()), (tensor_gen(), tensor_gen())), ) + + def create_model(self): + import torch + from typing import Tuple + + class model(torch.nn.Module): + + def forward(self, x: Tuple[Tuple[torch.Tensor, torch.Tensor], Tuple[torch.Tensor, torch.Tensor]]): + x1, x2 = x + y1, y2 = x1 + y3, y4 = x2 + return y1, y2, y3, y4 + + + return model(), None, ["prim::TupleUnpack"] + + @pytest.mark.nightly + def test(self, ie_device, precision, ir_version): + self._test(*self.create_model(), ie_device, precision, ir_version) + + +class TestTupleUnpackParameterMultiple(PytorchLayerTest): + def _prepare_input(self): + def tensor_gen(): + return np.random.uniform(0, 50, (1, 2, 10)).astype(np.float32) + return ( (tensor_gen(), tensor_gen()), (tensor_gen(), tensor_gen()) ) + + def create_model(self): + import torch + from typing import Tuple + + class model(torch.nn.Module): + + def forward(self, x: Tuple[torch.Tensor, torch.Tensor], y: Tuple[torch.Tensor, torch.Tensor]): + z1, z2 = x + z3, z4 = y + return z1, z2, z3, z4 + + + return model(), None, ["prim::TupleUnpack"] + + @pytest.mark.nightly + def test(self, ie_device, precision, ir_version): + self._test(*self.create_model(), ie_device, precision, ir_version) diff --git a/thirdparty/dependencies.cmake b/thirdparty/dependencies.cmake index c439df4c569d13..3095e1c6416fe5 100644 --- a/thirdparty/dependencies.cmake +++ b/thirdparty/dependencies.cmake @@ -2,6 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 # +if(CMAKE_TOOLCHAIN_FILE MATCHES "vcpkg" OR DEFINED VCPKG_VERBOSE) + set(OV_VCPKG_BUILD ON) +elseif(CMAKE_TOOLCHAIN_FILE MATCHES "conan_toolchain" OR DEFINED CONAN_EXPORTED) + set(OV_CONAN_BUILD) +endif() + set(_old_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) set(_old_CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ${CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE}) @@ -363,8 +369,7 @@ endif() # if(ENABLE_SAMPLES OR ENABLE_TESTS) - if(CMAKE_TOOLCHAIN_FILE MATCHES "vcpkg " OR DEFINED VCPKG_VERBOSE OR - CMAKE_TOOLCHAIN_FILE MATCHES "conan_toolchain" OR DEFINED CONAN_EXPORTED) + if(OV_VCPKG_BUILD OR OV_CONAN_BUILD) # vcpkg contains only libs compiled with threads # conan case find_package(gflags QUIET) @@ -446,15 +451,16 @@ if(ENABLE_OV_PADDLE_FRONTEND OR ENABLE_OV_ONNX_FRONTEND OR ENABLE_OV_TF_FRONTEND if(CMAKE_VERBOSE_MAKEFILE) set(Protobuf_DEBUG ON) endif() - # Note: we also specify 'protobuf' in NAMES because of vcpkg - set(find_package_protobuf_names Protobuf protobuf) + if(OV_VCPKG_BUILD) + set(protobuf_config CONFIG) + endif() # try to find newer version first (major is changed) # see https://protobuf.dev/support/version-support/ and # https://github.com/protocolbuffers/protobuf/commit/d61f75ff6db36b4f9c0765f131f8edc2f86310fa - find_package(Protobuf 4.22.0 QUIET NAMES ${find_package_protobuf_names}) + find_package(Protobuf 4.22.0 QUIET ${protobuf_config}) if(NOT Protobuf_FOUND) # otherwise, fallback to existing default - find_package(Protobuf 3.20.3 REQUIRED NAMES ${find_package_protobuf_names}) + find_package(Protobuf 3.20.3 REQUIRED ${protobuf_config}) endif() set(PROTOC_EXECUTABLE protobuf::protoc) else() diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo index c7a13a842d4133..744f799a2485af 160000 --- a/thirdparty/open_model_zoo +++ b/thirdparty/open_model_zoo @@ -1 +1 @@ -Subproject commit c7a13a842d41333397ea6c3f9bc5a7053da00eec +Subproject commit 744f799a2485af208e33c2b16db76e5eacf58219 diff --git a/tools/mo/requirements_mxnet.txt b/tools/mo/requirements_mxnet.txt index 339a09689c7537..8b6561f761b00d 100644 --- a/tools/mo/requirements_mxnet.txt +++ b/tools/mo/requirements_mxnet.txt @@ -1,5 +1,5 @@ -c ../constraints.txt -numpy>=1.16.6,<1.26 +numpy>=1.16.6,<1.24 mxnet networkx defusedxml diff --git a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py index ae9973ebab84b0..fd7c3ee7384aac 100644 --- a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +++ b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py @@ -43,7 +43,7 @@ def update_list_or_dict(container, name, idx, value): container[idx] = value return - + def get_value_from_list_or_dict(container, name, idx): if isinstance(container, dict): if name is None: @@ -87,8 +87,8 @@ def extract_input_info_from_example(args, inputs): example_dtype = pt_to_ov_type_map.get(str(dtype)) user_dtype = get_value_from_list_or_dict(data_types, input_name, input_id) if user_dtype is not None and example_dtype.to_dtype() != user_dtype: - raise Error(f"Defined input type {user_dtype} is not equal to provided example_input type {example_dtype.to_dtype()}") - + raise Error(f"Defined input type {user_dtype} is not equal to provided example_input type {example_dtype.to_dtype()}") + data_rank = getattr(example_input, "ndim", 0) user_input_shape = get_value_from_list_or_dict(input_shapes, input_name, input_id) if user_input_shape.rank.get_length() != data_rank: @@ -108,7 +108,7 @@ def extract_input_info_from_example(args, inputs): input_name = input_names[input_id] if input_names else None update_list_or_dict(input_shapes, input_name, input_id, input_shape) update_list_or_dict(data_types, input_name, input_id, ov_dtype.to_dtype()) - + args.placeholder_data_types = data_types args.placeholder_shapes = input_shapes if not args.input and input_names: @@ -126,6 +126,9 @@ def to_torch_tensor(tensor): return torch.tensor(tensor.data) if isinstance(tensor, (float, int, bool)): return tensor + if isinstance(tensor, tuple): + # TODO: Function to_torch_tensor should be renamed as it handles not only a tensor + return tuple(to_torch_tensor(x) for x in tensor) else: raise Error("Unexpected type of example_input. Supported types torch.Tensor, np.array or ov.Tensor. " "Got {}".format(type(tensor)))