From 3ec7f427e467d149a5f79500aaecd94b07185e85 Mon Sep 17 00:00:00 2001 From: Christophe Favergeon Date: Mon, 26 Feb 2024 13:50:57 +0100 Subject: [PATCH 1/5] First release of the CMSIS-DSP++ C++ extension --- Documentation/Doxygen/dsp.dxy.in | 19 +- Documentation/Doxygen/src/building.md | 29 + Documentation/Doxygen/src/code_size.md | 14 + Documentation/Doxygen/src/dsppp_main.md | 18 + Documentation/Doxygen/src/fusion.md | 39 + Documentation/Doxygen/src/guidelines.md | 1 + Documentation/Doxygen/src/introduction.md | 64 + Documentation/Doxygen/src/mainpage.md | 17 +- Documentation/Doxygen/src/matrix.md | 168 ++ Documentation/Doxygen/src/memory_allocator.md | 87 + .../Doxygen/src/memory_static_dynamic.md | 35 + Documentation/Doxygen/src/template.md | 60 + Documentation/Doxygen/src/vector.md | 112 + Documentation/Doxygen/src/vectorop.md | 100 + dsppp/.gitignore | 13 + dsppp/Examples/dot_product.cpp | 52 + dsppp/Examples/matrix_op.cpp | 109 + dsppp/Examples/vector_op.cpp | 83 + dsppp/Include/dsppp/DSP/basic.hpp | 256 +++ dsppp/Include/dsppp/DSP/matrix_multiply.hpp | 367 ++++ dsppp/Include/dsppp/DSP/memory.hpp | 98 + dsppp/Include/dsppp/DSP/num_features.hpp | 14 + dsppp/Include/dsppp/DSP/q15.hpp | 238 +++ dsppp/Include/dsppp/DSP/q7.hpp | 264 +++ dsppp/Include/dsppp/Helium/basic.hpp | 223 ++ dsppp/Include/dsppp/Helium/float.hpp | 426 ++++ dsppp/Include/dsppp/Helium/half.hpp | 520 +++++ .../Include/dsppp/Helium/matrix_multiply.hpp | 335 +++ .../dsppp/Helium/matrix_multiply_f16.hpp | 404 ++++ .../dsppp/Helium/matrix_multiply_f32.hpp | 270 +++ .../dsppp/Helium/matrix_multiply_fixed.hpp | 613 ++++++ dsppp/Include/dsppp/Helium/num_features.hpp | 17 + dsppp/Include/dsppp/Helium/q15.hpp | 461 ++++ dsppp/Include/dsppp/Helium/q31.hpp | 345 +++ dsppp/Include/dsppp/Helium/q7.hpp | 463 ++++ dsppp/Include/dsppp/Neon/basic.hpp | 133 ++ dsppp/Include/dsppp/Neon/float.hpp | 105 + dsppp/Include/dsppp/Neon/num_features.hpp | 5 + dsppp/Include/dsppp/Scalar/basic.hpp | 189 ++ .../Include/dsppp/Scalar/matrix_multiply.hpp | 134 ++ .../dsppp/Scalar/matrix_multiply_fixed.hpp | 124 ++ .../dsppp/Scalar/matrix_multiply_float.hpp | 119 ++ dsppp/Include/dsppp/algorithms.hpp | 269 +++ dsppp/Include/dsppp/arch.hpp | 64 + dsppp/Include/dsppp/arch_detection.hpp | 281 +++ dsppp/Include/dsppp/common.hpp | 73 + dsppp/Include/dsppp/fixed_point.hpp | 1036 +++++++++ dsppp/Include/dsppp/forward.hpp | 149 ++ dsppp/Include/dsppp/fusion.hpp | 760 +++++++ dsppp/Include/dsppp/fusion_ops.hpp | 358 ++++ dsppp/Include/dsppp/matrix.hpp | 647 ++++++ dsppp/Include/dsppp/matrix_impl.hpp | 612 ++++++ dsppp/Include/dsppp/matrix_view.hpp | 751 +++++++ dsppp/Include/dsppp/memory_pool.hpp | 259 +++ dsppp/Include/dsppp/num_features/double.hpp | 63 + dsppp/Include/dsppp/num_features/float.hpp | 77 + dsppp/Include/dsppp/num_features/group.hpp | 171 ++ dsppp/Include/dsppp/num_features/half.hpp | 76 + dsppp/Include/dsppp/num_features/q15.hpp | 66 + dsppp/Include/dsppp/num_features/q31.hpp | 65 + dsppp/Include/dsppp/num_features/q7.hpp | 57 + dsppp/Include/dsppp/number.hpp | 190 ++ dsppp/Include/dsppp/unroll.hpp | 247 +++ dsppp/Include/dsppp/vec.hpp | 442 ++++ dsppp/Include/dsppp/vector_impl.hpp | 576 +++++ dsppp/Include/dsppp/vector_view.hpp | 449 ++++ dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct | 80 + .../ARMCM0P/ARMCM0plus_ac6.sct.base@1.0.0 | 80 + dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld | 263 +++ .../ARMCM0P/ARMCM0plus_gcc.ld.base@2.2.0 | 263 +++ .../RTE/Device/ARMCM0P/ac6_linker_script.sct | 111 + .../RTE/Device/ARMCM0P/clang_linker_script.ld | 353 ++++ dsppp/RTE/Device/ARMCM0P/gcc_linker_script.ld | 294 +++ dsppp/RTE/Device/ARMCM0P/regions_ARMCM0P.h | 60 + dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c | 146 ++ .../ARMCM0P/startup_ARMCM0plus.c.base@3.0.0 | 146 ++ dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c | 69 + .../ARMCM0P/system_ARMCM0plus.c.base@2.0.0 | 69 + dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct | 80 + .../Device/ARMCM4/ARMCM4_ac6.sct.base@1.0.0 | 80 + dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld | 263 +++ .../Device/ARMCM4/ARMCM4_gcc.ld.base@2.2.0 | 263 +++ .../RTE/Device/ARMCM4/clang_linker_script.ld | 353 ++++ dsppp/RTE/Device/ARMCM4/regions_ARMCM4.h | 60 + dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c | 150 ++ .../Device/ARMCM4/startup_ARMCM4.c.base@3.0.0 | 150 ++ dsppp/RTE/Device/ARMCM4/system_ARMCM4.c | 79 + .../Device/ARMCM4/system_ARMCM4.c.base@2.0.0 | 79 + dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h | 84 + .../SSE-300-MPS3/RTE_Device.h.base@1.1.0 | 84 + .../Device/SSE-300-MPS3/cmsis_driver_config.h | 25 + .../cmsis_driver_config.h.base@1.1.1 | 25 + dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h | 149 ++ .../SSE-300-MPS3/device_cfg.h.base@1.1.3 | 149 ++ .../SSE-300-MPS3/linker_SSE300MPS3_secure.ld | 242 +++ .../linker_SSE300MPS3_secure.ld.base@1.0.0 | 242 +++ .../SSE-300-MPS3/linker_SSE300MPS3_secure.sct | 62 + .../linker_SSE300MPS3_secure.sct.base@1.1.0 | 62 + dsppp/RTE/Device/SSE-300-MPS3/region_defs.h | 44 + .../SSE-300-MPS3/region_defs.h.base@1.0.0 | 44 + dsppp/RTE/Device/SSE-300-MPS3/region_limits.h | 45 + .../SSE-300-MPS3/region_limits.h.base@1.0.0 | 45 + .../Device/SSE-300-MPS3/startup_SSE300MPS3.c | 375 ++++ .../startup_SSE300MPS3.c.base@1.1.1 | 375 ++++ .../Device/SSE-300-MPS3/system_SSE300MPS3.c | 93 + .../system_SSE300MPS3.c.base@1.1.1 | 93 + .../Device/SSE_300_MPS3/ac6_linker_script.sct | 111 + .../SSE_300_MPS3/clang_linker_script.ld | 353 ++++ .../Device/SSE_300_MPS3/gcc_linker_script.ld | 294 +++ .../regions_V2M_MPS3_SSE_300_FVP.h | 400 ++++ dsppp/RTE/_Release_IPSS_M0P/RTE_Components.h | 20 + dsppp/RTE/_Release_IPSS_M4/RTE_Components.h | 20 + .../RTE_Components.h | 23 + .../RTE_Components.h | 25 + .../RTE_Components.h | 20 + dsppp/RTE/_Release_VHT-M0P/RTE_Components.h | 20 + dsppp/RTE/_Release_VHT-M4/RTE_Components.h | 20 + dsppp/RTE/_Release_VHT_M0P/RTE_Components.h | 20 + dsppp/RTE/_Release_VHT_M4/RTE_Components.h | 20 + dsppp/allocator.cpp | 98 + dsppp/allocator.h | 124 ++ dsppp/cdefault.yml | 142 ++ dsppp/clang_sse300.c | 65 + dsppp/example.cproject.yml | 120 ++ dsppp/fvp_configs/VHT-Corstone-300.txt | 9 + dsppp/fvp_configs/VHT-M0P.txt | 3 + dsppp/fvp_configs/VHT-M4.txt | 3 + dsppp/getserial.py | 28 + dsppp/linker_scripts/ARMCM0P/region_defs.h | 60 + dsppp/linker_scripts/ARMCM4/region_defs.h | 60 + .../linker_scripts/SSE-300-MPS3/region_defs.h | 44 + .../SSE-300-MPS3/region_limits.h | 45 + dsppp/linker_scripts/ac6_m0p_mps3_s.sct | 111 + dsppp/linker_scripts/ac6_m4_mps3_s.sct | 111 + dsppp/linker_scripts/ac6_sse300_mps3_s.sct | 79 + dsppp/linker_scripts/ac6_sse310_mps3_s.sct | 60 + dsppp/linker_scripts/clang_m0p_mps3.ld | 353 ++++ dsppp/linker_scripts/clang_m4_mps3.ld | 353 ++++ dsppp/linker_scripts/clang_sse300_mps3.sct | 364 ++++ dsppp/linker_scripts/clang_sse310_mps3.sct | 363 ++++ dsppp/linker_scripts/gcc_m0p_mps3.ld | 294 +++ dsppp/linker_scripts/gcc_m4_mps3.ld | 294 +++ dsppp/linker_scripts/gcc_sse300_mps3.ld | 295 +++ dsppp/linker_scripts/gcc_sse310_mps3_s.ld | 295 +++ dsppp/main.c | 93 + dsppp/mps3run.py | 78 + dsppp/process.py | 137 ++ dsppp/run_all.py | 390 ++++ dsppp/test.cbuild-pack.yml | 17 + dsppp/test.cproject.yml | 146 ++ dsppp/test.csolution.yml | 108 + dsppp/test_config.h | 13 + dsppp/tests/bench.c | 3 + dsppp/tests/bench.h | 60 + dsppp/tests/cmsis_tests.h | 699 +++++++ dsppp/tests/cmsisdsp.cpp | 1146 ++++++++++ dsppp/tests/col_test.cpp | 112 + dsppp/tests/common_tests.cpp | 48 + dsppp/tests/common_tests.h | 282 +++ dsppp/tests/debug_mat.h | 738 +++++++ dsppp/tests/debug_test.cpp | 45 + dsppp/tests/debug_test_external.cpp | 56 + dsppp/tests/dot_test.cpp | 213 ++ dsppp/tests/filter_test.cpp | 657 ++++++ dsppp/tests/fusion_test.cpp | 247 +++ dsppp/tests/matrix_test.cpp | 1863 +++++++++++++++++ dsppp/tests/matrix_utils.h | 640 ++++++ dsppp/tests/row_test.cpp | 204 ++ dsppp/tests/test.h | 19 + dsppp/tests/vector_test.cpp | 219 ++ 170 files changed, 33958 insertions(+), 4 deletions(-) create mode 100644 Documentation/Doxygen/src/building.md create mode 100644 Documentation/Doxygen/src/code_size.md create mode 100644 Documentation/Doxygen/src/dsppp_main.md create mode 100644 Documentation/Doxygen/src/fusion.md create mode 100644 Documentation/Doxygen/src/guidelines.md create mode 100644 Documentation/Doxygen/src/introduction.md create mode 100644 Documentation/Doxygen/src/matrix.md create mode 100644 Documentation/Doxygen/src/memory_allocator.md create mode 100644 Documentation/Doxygen/src/memory_static_dynamic.md create mode 100644 Documentation/Doxygen/src/template.md create mode 100644 Documentation/Doxygen/src/vector.md create mode 100644 Documentation/Doxygen/src/vectorop.md create mode 100644 dsppp/.gitignore create mode 100644 dsppp/Examples/dot_product.cpp create mode 100644 dsppp/Examples/matrix_op.cpp create mode 100644 dsppp/Examples/vector_op.cpp create mode 100644 dsppp/Include/dsppp/DSP/basic.hpp create mode 100644 dsppp/Include/dsppp/DSP/matrix_multiply.hpp create mode 100644 dsppp/Include/dsppp/DSP/memory.hpp create mode 100644 dsppp/Include/dsppp/DSP/num_features.hpp create mode 100644 dsppp/Include/dsppp/DSP/q15.hpp create mode 100644 dsppp/Include/dsppp/DSP/q7.hpp create mode 100644 dsppp/Include/dsppp/Helium/basic.hpp create mode 100644 dsppp/Include/dsppp/Helium/float.hpp create mode 100644 dsppp/Include/dsppp/Helium/half.hpp create mode 100644 dsppp/Include/dsppp/Helium/matrix_multiply.hpp create mode 100644 dsppp/Include/dsppp/Helium/matrix_multiply_f16.hpp create mode 100644 dsppp/Include/dsppp/Helium/matrix_multiply_f32.hpp create mode 100644 dsppp/Include/dsppp/Helium/matrix_multiply_fixed.hpp create mode 100644 dsppp/Include/dsppp/Helium/num_features.hpp create mode 100644 dsppp/Include/dsppp/Helium/q15.hpp create mode 100644 dsppp/Include/dsppp/Helium/q31.hpp create mode 100644 dsppp/Include/dsppp/Helium/q7.hpp create mode 100644 dsppp/Include/dsppp/Neon/basic.hpp create mode 100644 dsppp/Include/dsppp/Neon/float.hpp create mode 100644 dsppp/Include/dsppp/Neon/num_features.hpp create mode 100644 dsppp/Include/dsppp/Scalar/basic.hpp create mode 100644 dsppp/Include/dsppp/Scalar/matrix_multiply.hpp create mode 100644 dsppp/Include/dsppp/Scalar/matrix_multiply_fixed.hpp create mode 100644 dsppp/Include/dsppp/Scalar/matrix_multiply_float.hpp create mode 100644 dsppp/Include/dsppp/algorithms.hpp create mode 100644 dsppp/Include/dsppp/arch.hpp create mode 100644 dsppp/Include/dsppp/arch_detection.hpp create mode 100644 dsppp/Include/dsppp/common.hpp create mode 100644 dsppp/Include/dsppp/fixed_point.hpp create mode 100644 dsppp/Include/dsppp/forward.hpp create mode 100644 dsppp/Include/dsppp/fusion.hpp create mode 100644 dsppp/Include/dsppp/fusion_ops.hpp create mode 100644 dsppp/Include/dsppp/matrix.hpp create mode 100644 dsppp/Include/dsppp/matrix_impl.hpp create mode 100644 dsppp/Include/dsppp/matrix_view.hpp create mode 100644 dsppp/Include/dsppp/memory_pool.hpp create mode 100644 dsppp/Include/dsppp/num_features/double.hpp create mode 100644 dsppp/Include/dsppp/num_features/float.hpp create mode 100644 dsppp/Include/dsppp/num_features/group.hpp create mode 100644 dsppp/Include/dsppp/num_features/half.hpp create mode 100644 dsppp/Include/dsppp/num_features/q15.hpp create mode 100644 dsppp/Include/dsppp/num_features/q31.hpp create mode 100644 dsppp/Include/dsppp/num_features/q7.hpp create mode 100644 dsppp/Include/dsppp/number.hpp create mode 100644 dsppp/Include/dsppp/unroll.hpp create mode 100644 dsppp/Include/dsppp/vec.hpp create mode 100644 dsppp/Include/dsppp/vector_impl.hpp create mode 100644 dsppp/Include/dsppp/vector_view.hpp create mode 100644 dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct create mode 100644 dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct.base@1.0.0 create mode 100644 dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld create mode 100644 dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld.base@2.2.0 create mode 100644 dsppp/RTE/Device/ARMCM0P/ac6_linker_script.sct create mode 100644 dsppp/RTE/Device/ARMCM0P/clang_linker_script.ld create mode 100644 dsppp/RTE/Device/ARMCM0P/gcc_linker_script.ld create mode 100644 dsppp/RTE/Device/ARMCM0P/regions_ARMCM0P.h create mode 100644 dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c create mode 100644 dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c.base@3.0.0 create mode 100644 dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c create mode 100644 dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c.base@2.0.0 create mode 100644 dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct create mode 100644 dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct.base@1.0.0 create mode 100644 dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld create mode 100644 dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld.base@2.2.0 create mode 100644 dsppp/RTE/Device/ARMCM4/clang_linker_script.ld create mode 100644 dsppp/RTE/Device/ARMCM4/regions_ARMCM4.h create mode 100644 dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c create mode 100644 dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c.base@3.0.0 create mode 100644 dsppp/RTE/Device/ARMCM4/system_ARMCM4.c create mode 100644 dsppp/RTE/Device/ARMCM4/system_ARMCM4.c.base@2.0.0 create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h.base@1.1.0 create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h.base@1.1.1 create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h.base@1.1.3 create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld.base@1.0.0 create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct.base@1.1.0 create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/region_defs.h create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/region_defs.h.base@1.0.0 create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/region_limits.h create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/region_limits.h.base@1.0.0 create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c.base@1.1.1 create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c create mode 100644 dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c.base@1.1.1 create mode 100644 dsppp/RTE/Device/SSE_300_MPS3/ac6_linker_script.sct create mode 100644 dsppp/RTE/Device/SSE_300_MPS3/clang_linker_script.ld create mode 100644 dsppp/RTE/Device/SSE_300_MPS3/gcc_linker_script.ld create mode 100644 dsppp/RTE/Device/SSE_300_MPS3/regions_V2M_MPS3_SSE_300_FVP.h create mode 100644 dsppp/RTE/_Release_IPSS_M0P/RTE_Components.h create mode 100644 dsppp/RTE/_Release_IPSS_M4/RTE_Components.h create mode 100644 dsppp/RTE/_Release_LLVM-Corstone-300/RTE_Components.h create mode 100644 dsppp/RTE/_Release_MPS3-Corstone-300/RTE_Components.h create mode 100644 dsppp/RTE/_Release_VHT-Corstone-300/RTE_Components.h create mode 100644 dsppp/RTE/_Release_VHT-M0P/RTE_Components.h create mode 100644 dsppp/RTE/_Release_VHT-M4/RTE_Components.h create mode 100644 dsppp/RTE/_Release_VHT_M0P/RTE_Components.h create mode 100644 dsppp/RTE/_Release_VHT_M4/RTE_Components.h create mode 100644 dsppp/allocator.cpp create mode 100644 dsppp/allocator.h create mode 100644 dsppp/cdefault.yml create mode 100644 dsppp/clang_sse300.c create mode 100644 dsppp/example.cproject.yml create mode 100644 dsppp/fvp_configs/VHT-Corstone-300.txt create mode 100644 dsppp/fvp_configs/VHT-M0P.txt create mode 100644 dsppp/fvp_configs/VHT-M4.txt create mode 100644 dsppp/getserial.py create mode 100644 dsppp/linker_scripts/ARMCM0P/region_defs.h create mode 100644 dsppp/linker_scripts/ARMCM4/region_defs.h create mode 100644 dsppp/linker_scripts/SSE-300-MPS3/region_defs.h create mode 100644 dsppp/linker_scripts/SSE-300-MPS3/region_limits.h create mode 100644 dsppp/linker_scripts/ac6_m0p_mps3_s.sct create mode 100644 dsppp/linker_scripts/ac6_m4_mps3_s.sct create mode 100644 dsppp/linker_scripts/ac6_sse300_mps3_s.sct create mode 100644 dsppp/linker_scripts/ac6_sse310_mps3_s.sct create mode 100644 dsppp/linker_scripts/clang_m0p_mps3.ld create mode 100644 dsppp/linker_scripts/clang_m4_mps3.ld create mode 100644 dsppp/linker_scripts/clang_sse300_mps3.sct create mode 100644 dsppp/linker_scripts/clang_sse310_mps3.sct create mode 100644 dsppp/linker_scripts/gcc_m0p_mps3.ld create mode 100644 dsppp/linker_scripts/gcc_m4_mps3.ld create mode 100644 dsppp/linker_scripts/gcc_sse300_mps3.ld create mode 100644 dsppp/linker_scripts/gcc_sse310_mps3_s.ld create mode 100644 dsppp/main.c create mode 100644 dsppp/mps3run.py create mode 100644 dsppp/process.py create mode 100644 dsppp/run_all.py create mode 100644 dsppp/test.cbuild-pack.yml create mode 100644 dsppp/test.cproject.yml create mode 100644 dsppp/test.csolution.yml create mode 100644 dsppp/test_config.h create mode 100644 dsppp/tests/bench.c create mode 100644 dsppp/tests/bench.h create mode 100644 dsppp/tests/cmsis_tests.h create mode 100644 dsppp/tests/cmsisdsp.cpp create mode 100644 dsppp/tests/col_test.cpp create mode 100644 dsppp/tests/common_tests.cpp create mode 100644 dsppp/tests/common_tests.h create mode 100644 dsppp/tests/debug_mat.h create mode 100644 dsppp/tests/debug_test.cpp create mode 100644 dsppp/tests/debug_test_external.cpp create mode 100644 dsppp/tests/dot_test.cpp create mode 100644 dsppp/tests/filter_test.cpp create mode 100644 dsppp/tests/fusion_test.cpp create mode 100644 dsppp/tests/matrix_test.cpp create mode 100644 dsppp/tests/matrix_utils.h create mode 100644 dsppp/tests/row_test.cpp create mode 100644 dsppp/tests/test.h create mode 100644 dsppp/tests/vector_test.cpp diff --git a/Documentation/Doxygen/dsp.dxy.in b/Documentation/Doxygen/dsp.dxy.in index 73c175731..f514addf5 100644 --- a/Documentation/Doxygen/dsp.dxy.in +++ b/Documentation/Doxygen/dsp.dxy.in @@ -573,14 +573,14 @@ HIDE_UNDOC_MEMBERS = YES # if EXTRACT_ALL is enabled. # The default value is: NO. -HIDE_UNDOC_CLASSES = NO +HIDE_UNDOC_CLASSES = YES # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # declarations. If set to NO, these declarations will be included in the # documentation. # The default value is: NO. -HIDE_FRIEND_COMPOUNDS = NO +HIDE_FRIEND_COMPOUNDS = YES # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these @@ -919,11 +919,24 @@ WARN_LOGFILE = # Note: If this tag is empty the current directory is searched. INPUT = ./src/mainpage.md \ + ./src/dsppp_main.md \ + ./src/introduction.md \ + ./src/template.md \ + ./src/guidelines.md \ + ./src/vectorop.md \ + ./src/memory_allocator.md \ + ./src/memory_static_dynamic.md \ + ./src/code_size.md \ + ./src/fusion.md \ + ./src/vector.md \ + ./src/matrix.md \ + ./src/building.md \ ./src/history.md \ ./src/history.txt \ ../../Examples/ARM \ ../../Include/ \ - ../../Source/ \ + ../../Source/ \ + ../../dsppp/Include # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/Documentation/Doxygen/src/building.md b/Documentation/Doxygen/src/building.md new file mode 100644 index 000000000..0cdf6b41b --- /dev/null +++ b/Documentation/Doxygen/src/building.md @@ -0,0 +1,29 @@ +# Building and running examples {#dsppp_building} + +## To build + +First time: + +```shell +cbuild -O cprj test.csolution.yml --toolchain AC6 -c example.Release+VHT-Corstone-300 -p -r --update-rte + +``` + +Other times: + +```shell +cbuild -O cprj test.csolution.yml --toolchain AC6 -c example.Release+VHT-Corstone-300 +``` + +If you want to select another test, edit the file `example.cproject.yml` and uncomment the test. + +## To run + +If the tools have been installed with `vcpkg`: + +``` +FVP_Corstone_SSE-300_Ethos-U55.exe -f fvp_configs/VHT-Corstone-300.txt -a cpu0=cprj\out\example\VHT-Corstone-300\Release\example.axf +``` + +Otherwise, you'll need to use the path to your FVP installation. + diff --git a/Documentation/Doxygen/src/code_size.md b/Documentation/Doxygen/src/code_size.md new file mode 100644 index 000000000..f06cf696b --- /dev/null +++ b/Documentation/Doxygen/src/code_size.md @@ -0,0 +1,14 @@ +# Code size {#dsppp_code_size} + +It was explained in previous sections that types `Vector` and `Vector` are considered as different types if `NB1` and `NB2` are differents. + +A template algorithm is like a code generator that will generate different code for different values of the template arguments : the types. + +If you use a template algorithm with different vector datatypes, it will generate different code for those two datatypes. The generated code will be specialized for the specific datatypes used and thus is likely to be more efficient. + +But then it means you get different implementations so more code size. + +If you have a lot of different sizes in your system, then you're likely to get too much code size and in that case it may be better to use dynamic objects instead of static ones. + +dynamic objects are less efficient so it is a trade-off between code size / speed. + diff --git a/Documentation/Doxygen/src/dsppp_main.md b/Documentation/Doxygen/src/dsppp_main.md new file mode 100644 index 000000000..df5325dbc --- /dev/null +++ b/Documentation/Doxygen/src/dsppp_main.md @@ -0,0 +1,18 @@ +# DSP++ extension {#dsppp_main} + +C++ extensions to CMSIS-DSP using C++ template meta-programming (headers only). + +The headers are not yet part of the CMSIS-DSP pack since they are experimental. You can get them from the [CMSIS-DSP github](https://github.com/ARM-software/CMSIS-DSP/dsppp/Include). There is nothing to build. Just include the headers when you want to use this framework. + +* @subpage dsppp_intro "Introduction" +* @subpage dsppp_template "C++ template for C programmer" +* @subpage dsppp_vector_example "Vector operation example" +* @subpage dsppp_memory_allocator "Memory allocation" +* @subpage dsppp_memory_static_dynamic "Static / Dynamic objects" +* @subpage dsppp_code_size "Code size" +* @subpage dsppp_fusion "Fusion mechanism" +* @subpage dsppp_vector "Vector operators" +* @subpage dsppp_matrix "Matrix operators" +* @subpage dsppp_building "Building and running examples" +* @subpage dsppp_guidelines "Usage guidelines" + diff --git a/Documentation/Doxygen/src/fusion.md b/Documentation/Doxygen/src/fusion.md new file mode 100644 index 000000000..cec2f4f89 --- /dev/null +++ b/Documentation/Doxygen/src/fusion.md @@ -0,0 +1,39 @@ +# Fusion {#dsppp_fusion} + +```cpp +Vector d = a + b * c; +``` + +With this line of code, there is loop fusion : instead of having one loop per operator there is one loop for the whole computation. + +It is important to have some ideas of how it works to avoid some mistake in the use of the library. + +In above code, `a + b * c` is not computing anything ! +`a + b * c` is creating a representation of the expression : an abstract syntax tree (AST) at build time. + +When this AST is assigned to the variable `d` it is evaluated. +The evaluation forces the inlining of the expression operators in one loop. The code generated thus contains only one loop with a fusion of all the operators : `+` and `*`. + +The library is supporting virtual vectors. They are a view on an existing part of a vector. You can use a virtual vector for instance to read some samples with a stride. Or write some samples with a stride. A virtual vector does not own its memory. + +If you write: +```cpp +d = a; +``` + +and `d` and `a` are virtual vectors then nothing will be written to `d` ! + +`d` will becomes `a` and `a` will no more be valid. + +If you want to copy a virtual vector you need to make an expression and write: + +```cpp +d = copy(a); +``` + +Note that this problem occurs only for virtual vectors who do not own their memory. + +For real vectors, a copy would occur. But since there is no overhead in adding `copy` it is better to do it to avoid problems. + + + diff --git a/Documentation/Doxygen/src/guidelines.md b/Documentation/Doxygen/src/guidelines.md new file mode 100644 index 000000000..bd8163784 --- /dev/null +++ b/Documentation/Doxygen/src/guidelines.md @@ -0,0 +1 @@ +# Guidelines {#dsppp_guidelines} diff --git a/Documentation/Doxygen/src/introduction.md b/Documentation/Doxygen/src/introduction.md new file mode 100644 index 000000000..4814dab6b --- /dev/null +++ b/Documentation/Doxygen/src/introduction.md @@ -0,0 +1,64 @@ +## Introduction {#dsppp_intro} + +### Dot product example + +If you want to compute the dot product: + +\f[ + + + +\f] + +with CMSIS-DSP, you would write: + +```c +arm_add_f32(a,b,tmp1,NB); +arm_scale_f32(tmp1,scale,tmp2,NB); +arm_mult_f32(c,d,tmp3,NB); +arm_dot_prod_f32(tmp2,tmp3,NB,&r); +``` + +There are several limitations with this way of writing the code: + +1. The code needs to be rewritten and the `_f32` suffix changed if the developer wants to use another datatype + +2. Temporary buffers need to be allocated and managed (`tmp1`,`tmp2`,`tmp3`,`tmp4`) + +3. The four function calls are four different loops. It is not good for data locality and caches. The computation is not done in one pass + +4. Each loop contains a small number of instructions. For instance, for the `arm_add_f32`, two loads, an add instruction and a store. It is not enough to enable the compiler to reorder the instructions to improve the performance + +With this new C++ template library, you can write: + + +```cpp +r = dot(scale*(a+b),c*d); +``` + +The code generated by this line computes the dot product in one pass with all the operators (`+`, `*`) included in the loop. +There is no more any temporary buffers. + +### Vector operations + +Let's look at another example: + +\f[ + +\overrightarrow{d} = \overrightarrow{a} + \overrightarrow{b} * \overrightarrow{c} + +\f] + +With the C++ library, it can be written as: + + +```cpp +Vector d = a + b * c; +``` + +Here again : all the vector operations (`+`,`*`) are done in one pass with one loop. There is no more any temporary buffer. + +If you're coming from C and does not know anything about C++ templates, we have a very quick introduction : @ref dsppp_template "The minimum you need to know about C++ template to use this library". + +You can also jump directly to an @ref dsppp_vector_example "example with vector operations". + diff --git a/Documentation/Doxygen/src/mainpage.md b/Documentation/Doxygen/src/mainpage.md index f33a65b9e..d081f0a3b 100644 --- a/Documentation/Doxygen/src/mainpage.md +++ b/Documentation/Doxygen/src/mainpage.md @@ -1,5 +1,7 @@ # Overview {#mainpage} +## Introduction + This user manual describes the CMSIS DSP software library, a suite of common compute processing functions for use on Cortex-M and Cortex-A processor based devices. The library is divided into a number of functions each covering a specific category: @@ -26,9 +28,21 @@ The library is providing vectorized versions of most algorithms for Helium and o When using a vectorized version, provide a little bit of padding after the end of a buffer (3 words) because the vectorized code may read a little bit after the end of a buffer. You don't have to modify your buffers but just ensure that the end of buffer + padding is not outside of a memory region. +## Related projects + +### Python wrapper + A Python wrapper is also available with a Python API as close as possible to the C one. It can be used to start developing and testing an algorithm with NumPy and SciPy before writing the C version. Is is available on [PyPI.org](https://pypi.org/project/cmsisdsp/). It can be installed with: `pip install cmsisdsp`. -## Using the Library {#using} +### Experimental C++ template extension + +This extension is a set of C++ headers. They just need to included to start using the features. + +Those headers are not yet part of the pack and you need to get them from the [github repository](https://github.com/ARM-software/CMSIS-DSP/tree/main/Include) + +More documentation about the @ref dsppp_main "DSP++" extension. + +## Using the CMSIS-DSP Library {#using} The library is released in source form. It is strongly advised to compile the library using `-Ofast` optimization to have the best performances. @@ -56,6 +70,7 @@ The table below explains the content of **ARM::CMSIS-DSP** pack. 📂 Include | Include files for using and building the lib 📂 PrivateInclude | Private include files for building the lib 📂 Source | Source files + 📂 dsppp | Experimental C++ teamplate extension 📄 ARM.CMSIS-DSP.pdsc | CMSIS-Pack description file 📄 LICENSE | License Agreement (Apache 2.0) diff --git a/Documentation/Doxygen/src/matrix.md b/Documentation/Doxygen/src/matrix.md new file mode 100644 index 000000000..c3c983b0d --- /dev/null +++ b/Documentation/Doxygen/src/matrix.md @@ -0,0 +1,168 @@ +# Matrix {#dsppp_matrix} + +Matrixes can be used similarly to vectors: + +```cpp +Matrix a; +Matrix b; +``` + +If the dimensions of the matrixes are not known at build time, you would instead write: + +``` +Matrix a(rows,cols); +Matrix b(rows,cols); +``` + +Once you have matrixes, you need to initialize them. A matrix is also a vector, so you can initialize it by indexing into the vector: + +```cpp +for(std::size_t i=0;i result = a * a + b; +``` + +The operators `+` and `*` are merged into the loop. `*` is the element-wise multiply. For the vector / matrix products you should use the operator `dot`. + +Note that fusion of operators will not work with `dot(Matrix, Matrix`). It is only supported with vectors : `dot(Vector,Vector)` or `dot(Matrix,Vector)`. + +## VectorView + +We can create virtual vectors which are view of some slices of the matrix. + +### Row vector + +To set the second row to `0.0f`, you can do: + +``` +result.row(1) = 0.0f; +``` + +To set the odd elements of the 3rd row to `0.0f` we can do: + +``` +result.row<2>(2,1) = 0.0f; +``` + +The first argument `2` is the row number (starting from `0`). + +The second argument `1` is where is the row we start the view : element `1`. + +`<2>` is the stride known at built time. + +The `row` API is: + +```cpp +template +VectorView row(const index_t i,const index_t start=0,const index_t stop=C) + +``` + +`stop` is the index of the first element **after** the end of the view. + +`i` is the row index + +### Column vector + +There is a similar API for columns. + +Let's set the odd elements of columns 3 to `5.0f`: + +``` +result.col<2>(2,1) = 5.0f; +``` + +## MatrixView + +It is also possible to create a virtual matrix : a view onto a subset of the matrix. + +Let's add the bottom right corner of the matrix to itself: + +```cpp +result.sub(4,8,4,8) = result.sub(4,8,4,8) + result.sub(4,8,4,8) +``` + +The API is: + +```cpp +MatrixView sub(const index_t rs, + const index_t re, + const index_t cs, + const index_t ce) +``` + +You specify the row start and row end, then column start and column end. + +Note that the end is the first index **after** the end of your rows or columns. + +No stride is supported for matrix view in this version of the library. + +## Matrix operations + +In addition to the vector operations `+`,`-` and `*`, matrixes are supporting more operations: + +* `dot` for vector / matrix products +* `diagonal` to create a diagonal matrix from a vector. +* `identity` to create an identity matrix +* `tranpose` to create the transposed matrix +* `outer` for the outer product of two vectors + +### dot + +```cpp +result = dot(a,b); +``` + +The compiler may use the move semantic to copy the temporary result of the `dot` function to `result`. + +In this case, no copy would occur and `result` after the assignment would be a vector allocated by `dot` so using the `TMP_ALLOC` . + +### diagonal + +```cpp +result = Matrix::diagonal(c); +``` + +### identity + +```cpp +result = Matrix::identity(); +``` + +### transpose + +```cpp +result = a.transpose(); +``` + +or + +```cpp +transposeTo(result,a); +``` + +### outer product + +```cpp +result = outer(c,c); +``` + diff --git a/Documentation/Doxygen/src/memory_allocator.md b/Documentation/Doxygen/src/memory_allocator.md new file mode 100644 index 000000000..a539a3109 --- /dev/null +++ b/Documentation/Doxygen/src/memory_allocator.md @@ -0,0 +1,87 @@ +# Memory allocation {#dsppp_memory_allocator} + +By default, `malloc` is used. + +```cpp +Vector +``` + +is allocating a vector of dimension `NB` (known at build time) and datatype `float32_t`. + +The definition of the `Vector` template is: + +```cpp +template typename Allocator = TMP_ALLOC> +struct Vector:Vector_Base

+``` + +It means that by default the memory allocator is `TMP_ALLOC`. + +This `TMP_ALLOC` `#define` can be changed if you define it before including any header from the library. + +An allocator should implement a template like: + +```cpp +template +struct malloc_allocator { + /* Dynamic dimension allocations (L<0) */ + static char* allocate ( vector_length_t sz) noexcept; + + /* Dimension L know at build time (L > 0) */ + static char* allocate ( ) noexcept; + + static void destroy ( char* ptr ) noexcept; + +}; +``` + +It has no state because in practice we observed that compilers were generating more efficient code without state in the memory allocator template. + +If you don't want to use a `malloc` based memory allocator, you can replace it with your own memory allocator and implement an API like the one just shown in `malloc_allocator`. + +For instance, often in DSP pipelines, the dimensions of the vectors and matrixes are fixed and known at build time. +In that case, you could replace the memory allocator by one using memory pools. + +With memory pools, allocation is nearly cost free and there is no fragmentation. + +The test framework of the library is providing an example in `allocator.h` and `allocator.cpp`. + +There are two memory allocators: + +1. `stat_allocator` is a `malloc` based allocator that is making statistics on the memory allocations and how many buffers of each dimension is required + +2. `pool_allocator` can use the data generated by `stat_allocator`to pre-allocate memory pools that will be then used for the memory allocations. The memory pools are also creating aligned buffers. + +It is no more difficult (and less difficult) than allocating temporary buffers in CMSIS-DSP. + +You could define the `TMP_ALLOC` with: + +```cpp +#if defined(POOL_ALLOCATOR) +#define TMP_ALLOC pool_allocator +#else +#define TMP_ALLOC stat_allocator +#endif +``` + +You use `stat_allocator` by default. When your code is working, you switch to `pool_allocator` to have better performance and determinism. + +Another possibility is to use different vector types: + +```cpp +template +using PVector = Vector; +``` + +Note that you cannot avoid using `TMP_ALLOC` because some functions in the library are creating temporary objects. For instance, if you want to make an identity matrix, you can use ` mk_identity` that will make a memory allocation using `TMP_ALLOC` + +Also note that if you create a vector with: + +```cpp +Vector v(NB); +``` + +then the dimension `NB` is a runtime parameter. The memory pool allocator given as example in this library is only working with dimensions known at build time. For runtime dimensions, it is still using a `malloc`. + diff --git a/Documentation/Doxygen/src/memory_static_dynamic.md b/Documentation/Doxygen/src/memory_static_dynamic.md new file mode 100644 index 000000000..a1d911814 --- /dev/null +++ b/Documentation/Doxygen/src/memory_static_dynamic.md @@ -0,0 +1,35 @@ +# Static / dynamic {#dsppp_memory_static_dynamic} + +As we have seen in the previous sections, there are two kind of vectors: + +* `Vector` with a dimension know at runtime +* `Vector` with a dimension known at build time + +The former vectors are called "dynamic" ins this library. The later are called "static". + +This naming "static" / "dynamic" is referring to the dimension. With "dynamic" vectors the same code can, at runtime, create vectors of different length based on a runtime length. + +With "static" vectors : the length is fixed at build time and will never change at runtime. + +Note that the library also have "static" / "dynamic" matrixes. So, we are going to use "objects" to cover both cases + +# Static objects + +The advantage of static objects is that the dimension is known at build time. The compiler can thus generate an algorithm that is specialized for those dimensions and thus more efficient. + +With static objects it is also possible to use different memory allocator with better performances and determinism. + +But, with static objects, objects of different dimension are considered as different types. The compiler will generate different implementation so it will have an impact on the code dimension. + +If you need lots of objects of different dimensions, or if the dimensions are nort known at build time, then you need to use dynamic object + +# Dynamic objects + +With dynamic objects, the dimension is know at runtime. So object of different dimensions have the same datatype and the compiler is generating only one implementation for all those objects. It cannot generate specialized implementations based on the dimension. It is better for code size, but the implementations will be less efficient. + +Also when dimension is not know at build time, some instruction selection made by the C++ library at build time is no more possible. It has an effect on performance since at runtime one must decides what's possible or not. It is mostly impacting matrixes where stride information is needed. + +With vector instructions one can use scatter / gather instructions and they require a stride. But there are constraints depending on the datatype and when the stride is too big for those instructions, they cannot be used. This check has to be done at runtime for dynamic object. + +Finally, with dynamic object, memory allocation can be an issue. You can mitigate the problem by reusing temporaries in your algorithms instead of re-allocating them. But it makes the implementation more difficult. See section about @ref dsppp_guidelines. + diff --git a/Documentation/Doxygen/src/template.md b/Documentation/Doxygen/src/template.md new file mode 100644 index 000000000..16b4994ac --- /dev/null +++ b/Documentation/Doxygen/src/template.md @@ -0,0 +1,60 @@ +# What you need to know about C++ templates {#dsppp_template} + +## What is a template useful for ? + +In CMSIS-DSP, you have functions like: + +* `arm_add_f32` +* `arm_add_f64` + +Without unrolling, the scalar implementation is the same but is duplicated (two different source files to maintain although they are nearly the same). + +One could try to reuse the same source for both functions using C preprocessor. But we would still have two different functions with different names at the end (both generated from the same C + C preprocessor macros) + +With C++ templates, we can achieve the same result in a better way since the C++ compiler will check the templates and typecheck them. In addition to that, both functions can share the same name. + +With C++ template, we could have a *generic* function `arm_add` taking as argument a pointer `T *pSrc` where `T` is a type variable ! + +When the function is used with a `float32_t *`, the compiler would generate code for a function using `float32_t`. + +And if the function is used with a `float64_t *`, the compiler would generate code for a function using `float64_t`. + +The generic `arm_add` source code is a template used to generate different implementations. It is like a code generator. + +And if the compiler is unable to generate an implementation because the type variable `T` is replaced by a type with no addition operator, then it would be detected by the compiler. + +## Templates for datatypes + +C++ templates also apply to structs and classes. + +For instance, we could have a template `Vector` and thus different types `Vector`, `Vector` ... + +There is another aspect of C++ templates that may be surprising : the types can contain numbers. + +For instance, one could have a type +`Vector` for a vector of `float` and of length `10`. The length being known at build time. + +The types `Vector` and `Vector` should be considered as different types because they have different lengths. The length is part of the type. + +What we said above for code generation applies. For a template algorithm using any kind of vector, the compiler would generate different code for different vector types. The code for a template algorithm using `Vector` would be different from the code for `Vector` because those two types are different. + + +## Implicit parameters + +A template can also have implicit parameters. + +For instance one could use `Vector` or `Vector`. + +In the first case, the length is an implicit parameter with a default value and it is equivalent to writing `Vector` where `DYNAMIC` could be a special value (negative for instance) used to tell the compiler that the length of the vector is not known at build time but only at runtime. + +Both variants may use totally different implementations. The `DYNAMIC` variant may contain a `length` field in the `struct` definition whereas other variants do not need this field since the length is known at build time. + +## How to use templates ? + +A template is just a C++ header. You only need to include this header to start using the template. There is nothing to build. + +## Example + +Now you can look at an @ref dsppp_vector_example "example with vector operations" showing how to use the library + + diff --git a/Documentation/Doxygen/src/vector.md b/Documentation/Doxygen/src/vector.md new file mode 100644 index 000000000..546338fee --- /dev/null +++ b/Documentation/Doxygen/src/vector.md @@ -0,0 +1,112 @@ +# Vector {#dsppp_vector} + +The use of vectors has been explained in @ref dsppp_vector_example "example with vector operations" and focusing on `float32_t`. + +The vector template is defined as: + +```cpp +template typename Allocator = TMP_ALLOC> +struct Vector:Vector_Base

+``` + +* `P` is the datatype of vector elements +* `L` is the static length of the vector (length known at build time). `L<0` when the length is dynamic and not known at build time. It is the default value. +* `Allocator` is the memory allocator. By default it is `TMP_ALLOC` that you can redefine since it is a macro +* `Vector_Base

` is providing the storage. A vector owns its storage buffer. + +## Q15 example + +Example with `Q15` is very similar: + +The vectors are defined: + +```cpp +Vector aQ15; +Vector bQ15; +Vector cQ15; +``` + +They are initialized: + +```cpp +for(int i = 0;i< NB;i++) +{ + aQ15[i] = bQ15[i] = cQ15[i] = Q15(i); +} +``` + +Here, the `Q15` value is initialized from the int value `i` and thus represents \f$ i/2^{15} \f$ + +Some computation is done + +```cpp +Vector dQ15 = aQ15 + bQ15 * cQ15; +``` + +The result is displayed: + +```cpp +std::cout << "Result = " << dQ15 ; +``` + +## VectorView + +A vector view is a virtual vector : a view of a vector. + +One can define a `VectorView` with: + +```cpp +auto subD = d.sub(2); +``` + +This is creating a virtual vector starting at index `2` (3rd element) of vector `d`. + +You can then operate with this virtual vector: + +```cpp +subD = subD + 2.0f; +``` + +If you display the vector `d`, you'll see that `2.0f` has been added to all elements starting from the 2rd one. + +`VectorView` do not own their memory. It is owned by the original vector. + +If you write: + +```cpp +x = y +``` + +and `x` and `y` are `VectorView`, no copy will occur. `x` will just reference the same data as `y`. If you want to copy you have to be explicit and write: + +```cpp +x = copy(y) +``` + +It is advised to always use the `copy` operator (even with normal vectors). + +Virtual vectors can have a stride: + +```cpp +d.sub<2>(1) = 0.0f; +``` + +This line sets the odd elements of the vector to `0.0f`. It is creating a vvirtual vector with stride `2` and starting at index `1` of first vector. + +Then, all elements of this virtual vector are set to `0.0f`. + +The `sub` API is: + +```cpp +template +VectorView sub(const index_t start=0,const index_t stop=L) +``` + +You can define: + +* The stride `S` : statically known and by default `1`. +* The start of the view (`0` by default) +* The end of the view (`L` by default : the length known at build time). Note that it is the first index **after** the end of the vector. + diff --git a/Documentation/Doxygen/src/vectorop.md b/Documentation/Doxygen/src/vectorop.md new file mode 100644 index 000000000..aed42944a --- /dev/null +++ b/Documentation/Doxygen/src/vectorop.md @@ -0,0 +1,100 @@ +# Vector operation example {#dsppp_vector_example} + +To compute: + +\f[ + +\overrightarrow{d} = \overrightarrow{a} + \overrightarrow{b} * \overrightarrow{c} + +\f] + +we need to: +1. Include the right header files +2. allocate the vectors +3. initialize the vectors +4. make the computation. + +# Include the headers + +The headers are not yet part of the CMSIS-DSP packs since they are experimental. You can get them from the [CMSIS-DSP github](https://github.com/ARM-software/CMSIS-DSP/CPP) + +```cpp +#include +#include + +using namespace arm_cmsis_dsp; +``` + +If fixed point datatypes are required, `#include ` should be used before `` + +Fixed point requires the use of CMSIS-DSP. + +# Creation of the vectors + +To create a vector `a` you would write: + +```cpp +constexpr int NB = 32; + +Vector a; +Vector b; +Vector c; +``` + +`Vector` is creating a vector of dimension `NB` (known at build time) and datatype `float32_t`. This creation is requiring some memory allocation and by default it is done with a `malloc`. + +It is possible to change the memory allocator for the vectors (and it is advised) to avoid using `malloc` and instead have deterministic allocation without fragmentation. + +See section @ref dsppp_memory_allocator "Memory allocation". + +Vectors of different dimensions are considered as being different types. + +If you don't know the dimension at build time, you can use a different type of vector with: + +```cpp +Vector a(NB); +``` + +For the trade-off between vector with build time dimension or runtime dimension please see the section @ref dsppp_memory_static_dynamic . + +# Initialization of the vectors + +You can index the vectors as normal C arrays. + +```cpp +for(int i = 0;i< NB;i++) +{ + a[i] = b[i] = c[i] = i; +} +``` + +# Computation + +The computation can be written normally as : + +```cpp +Vector d = a + b * c; +``` + +Note that the computation can be parametrized with template arguments so the same computation could be used with any datatype or length. In that case you would have to define a template (and not just a normal function) and inside you would use something like: + +```cpp +Vector d = a + b * c; +``` + +where `T` is a type variable coming from the template. + +The operators `+`, `*` are computed in one pass with one loop : we have loop fusion and instead of having a loop per operator we have a loop for the whole computation. + +To understand fusion and how to extend it with new operators, see section @ref dsppp_fusion . + +For an overview of vector operators, see section @ref dsppp_vector . +For an overview of matrix operators, see section @ref dsppp_matrix . + +# Displaying the result + +The vectors can be displayed on `stdout` for debug purpose. + +```cpp +std::cout << "Result = " << d ; +``` diff --git a/dsppp/.gitignore b/dsppp/.gitignore new file mode 100644 index 000000000..0cd7f9a89 --- /dev/null +++ b/dsppp/.gitignore @@ -0,0 +1,13 @@ +build_* +allocation/* +out/ +tmp/ +__pycache__/ +**.DS_Store +*.cprj +cprj/*.cbuild*.yml +dump_* +run_*.bat +ac6_results/ +gcc_results/ +clang_results/ diff --git a/dsppp/Examples/dot_product.cpp b/dsppp/Examples/dot_product.cpp new file mode 100644 index 000000000..c1ee8146f --- /dev/null +++ b/dsppp/Examples/dot_product.cpp @@ -0,0 +1,52 @@ + +#include "RTE_Components.h" +#include CMSIS_device_header + +#if defined(MPS3) +#include "cmsis_driver_config.h" +#include "stdout_USART.h" +#endif + +#include + +#include +#include + +using namespace arm_cmsis_dsp; + + +int main(void) +{ +#if defined(MPS3) + stdout_init(); +#endif + + std::cout << "Dot product example\r\n"; + + constexpr int NB = 32; + + Vector a; + Vector b; + Vector c; + Vector d; + + float32_t scale = 0.5; + + for(int i = 0;i< NB;i++) + { + a[i] = b[i] = c[i] = d[i] = i; + } + + float32_t r; + + r = dot(scale*(a+b),c*d); + + std::cout << "Result = " << r << "\r\n"; + + +#if defined(MPS3) + while(1); +#endif +} + + diff --git a/dsppp/Examples/matrix_op.cpp b/dsppp/Examples/matrix_op.cpp new file mode 100644 index 000000000..f9fa12318 --- /dev/null +++ b/dsppp/Examples/matrix_op.cpp @@ -0,0 +1,109 @@ + +#include "RTE_Components.h" +#include CMSIS_device_header + +#if defined(MPS3) +#include "cmsis_driver_config.h" +#include "stdout_USART.h" +#endif + +#include + +#include +#include +#include + +using namespace arm_cmsis_dsp; + + +int main(void) +{ +#if defined(MPS3) + stdout_init(); +#endif + + std::cout << "Matrix operation examples\r\n"; + + constexpr int ROWS = 8; + constexpr int COLS = 8; + + Matrix a; + Matrix b; + + for(std::size_t i=0;i result = a * a + b; + + std::cout << "Result = " << std::endl << result ; + + // Vector views + + // Rows + result.row(1) = 0.0f; + std::cout << "Result = " << std::endl << result ; + + // Row with stride + // setting odd elements of 3rd row to 0 + result.row<2>(2,1) = 0.0f; + std::cout << "Result = " << std::endl << result ; + + // Column with stride + result.col<2>(2,1) = 5.0f; + std::cout << "Result = " << std::endl << result ; + + // Matrix view + result.sub(4,8,4,8) = result.sub(4,8,4,8) + result.sub(4,8,4,8); + std::cout << "Result = " << std::endl << result ; + + // operators + // dot + result = dot(a,b); + std::cout << "Result = " << std::endl << result ; + + // diagonal + Vector c; + + for(int i = 0;i< ROWS;i++) + { + c[i] = i; + } + result = Matrix::diagonal(c); + + std::cout << "Result = " << std::endl << result ; + + // identity matrix + result = Matrix::identity(); + + std::cout << "Result = " << std::endl << result ; + + // transpose matrix + result = a.transpose(); + + std::cout << "Result = " << std::endl << result ; + + transposeTo(result,a); + + std::cout << "Result = " << std::endl << result ; + + // outer product + result = outer(c,c); + std::cout << "Result = " << std::endl << result ; + + +#if defined(MPS3) + while(1); +#endif +} + + diff --git a/dsppp/Examples/vector_op.cpp b/dsppp/Examples/vector_op.cpp new file mode 100644 index 000000000..6964fc3b0 --- /dev/null +++ b/dsppp/Examples/vector_op.cpp @@ -0,0 +1,83 @@ + +#include "RTE_Components.h" +#include CMSIS_device_header + +#if defined(MPS3) +#include "cmsis_driver_config.h" +#include "stdout_USART.h" +#endif + +#include + +#include +#include +#include + +using namespace arm_cmsis_dsp; + + +int main(void) +{ +#if defined(MPS3) + stdout_init(); +#endif + + std::cout << "Vector operation examples\r\n"; + + constexpr int NB = 32; + + // float32 example + + Vector a; + Vector b; + Vector c; + + for(int i = 0;i< NB;i++) + { + a[i] = b[i] = c[i] = i; + } + + + Vector d = a + b * c; + + + std::cout << "Result = " << d ; + + // Vector view example 1 + auto subD = d.sub(2); + subD = subD + 2.0f; + + // d vector has been modified starting from the 3rd element + // (index 2) + std::cout << "Result = " << d ; + + // Now we set all odd elements to 0. + d.sub<2>(1) = 0.0f; + std::cout << "Result = " << d ; + + + // Q15 example + Vector aQ15; + Vector bQ15; + Vector cQ15; + + for(int i = 0;i< NB;i++) + { + aQ15[i] = bQ15[i] = cQ15[i] = Q15(i); + } + + + Vector dQ15 = aQ15 + bQ15 * cQ15; + + + std::cout << "Result = " << dQ15 ; + + + + +#if defined(MPS3) + while(1); +#endif +} + + diff --git a/dsppp/Include/dsppp/DSP/basic.hpp b/dsppp/Include/dsppp/DSP/basic.hpp new file mode 100644 index 000000000..9032412e4 --- /dev/null +++ b/dsppp/Include/dsppp/DSP/basic.hpp @@ -0,0 +1,256 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_DSP +#undef ARM_MATH_MVEI +#undef ARM_MATH_MVEF +#undef ARM_MATH_NEON +#endif + +/** \addtogroup ARCHALG + * \addtogroup DSPALG DSP Extension specific algorithm + * \ingroup ARCHALG + * @{ + */ + +#if defined(ARM_MATH_DSP) +#if !defined(ARM_MATH_MVEI) && !defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON) + +#define DSP_UNROLL 1 + +template() && + IsVector::value && + SameElementType::value,bool>::type = true> +inline void _Fill(DST &v, + const T val, + vector_length_t l, + const DSP* = nullptr) +{ + constexpr int nb_lanes = vector_traits::nb_lanes; + index_t i; + + for(i=0 ; i <= l-(nb_lanes<() && + must_use_matrix_idx() && + SameElementType::value,bool>::type = true> +inline void _Fill2D(DST &v, + const T val, + const vector_length_t rows, + const vector_length_t cols, + const DSP* = nullptr) +{ + constexpr int nb_lanes = vector_traits::nb_lanes; + index_t row=0; + + for(; row <= rows-(1<() && + vector_idx_pair(),bool>::type = true> +inline void eval(DA &v, + const DB& other, + const vector_length_t l, + const DSP* = nullptr) +{ + using T = typename traits::Scalar; + constexpr int nb_lanes = vector_traits::nb_lanes; + constexpr unsigned int U = DSP_UNROLL; + index_t i; + + for(i=0 ; i <= l-(nb_lanes<() && + must_use_matrix_idx_pair(),bool>::type = true> +inline void eval2D(DA &v, + const DB& other, + const vector_length_t rows, + const vector_length_t cols, + const DSP* = nullptr) +{ + using T = typename traits::Scalar; + constexpr int nb_lanes = vector_traits::nb_lanes; + index_t row=0; + + for(; row <= rows-(1<() && + vector_idx_pair(),bool>::type = true> +inline DotResult _dot(const DA& a, + const DB& b, + const vector_length_t l, + const DSP* = nullptr) +{ + using Acc = DotResult; + using T = typename traits::Scalar; + using Temp = typename vector_traits::temp_accumulator; + constexpr int nb_lanes = vector_traits::nb_lanes; + constexpr unsigned int U = DSP_UNROLL; + index_t i; + + Acc acc = Acc{}; + Temp vacc = vector_traits::temp_acc_zero(); + + for(i=0 ; i <= l-(nb_lanes<() && + vector_idx_pair(),bool>::type = true> +inline void _swap(DA&& a, + DB&& b, + const vector_length_t l, + const DSP* = nullptr) +{ + using Scalar = typename ElementType::type; + using Vector = typename vector_traits::vector; + + constexpr int nb_lanes = vector_traits::type>::nb_lanes; + index_t i=0; + Vector tmpa,tmpb; + + for(i=0 ; i <= l-nb_lanes; i += nb_lanes) + { + tmpa = a.vector_op(i); + tmpb = b.vector_op(i); + b.vector_store(i,tmpa); + a.vector_store(i,tmpb); + } + + for(;i::Scalar,Q15>::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +__STATIC_INLINE void _arm_mat_trans( + const MA &src, + MB &dst, + const DSP* = nullptr) +{ + using T = typename traits::Scalar; + using VEC = typename vector_traits::vector; + constexpr int nb_lanes = vector_traits::nb_lanes; + + T *pIn = src.ptr(); /* input data matrix pointer */ + T *pOut = dst.ptr(); /* output data matrix pointer */ + uint16_t nRows = src.rows(); /* number of rows */ + uint16_t nCols = src.columns(); /* number of columns */ + uint32_t col, row = nRows, i = 0U; /* Loop counters */ + + VEC in; /* variable to hold temporary output */ + + /* Matrix transpose by exchanging the rows with columns */ + /* row loop */ + do + { + /* Pointer pOut is set to starting address of column being processed */ + pOut = dst.ptr() + i; + + + /* Loop unrolling: Compute 4 outputs at a time */ + col = nCols / (2*nb_lanes); + + while (col > 0U) /* column loop */ + { + /* Read two elements from row */ + in = inner::vload1<1>(pIn); + pIn += nb_lanes; + + /* Unpack and store one element in destination */ + *pOut = Q15(in.v); + /* Update pointer pOut to point to next row of transposed matrix */ + pOut += dst.stride(); + + /* Unpack and store second element in destination */ + *pOut = Q15((in.v & (q31_t) 0xffff0000) >> 16); + /* Update pointer pOut to point to next row of transposed matrix */ + pOut += dst.stride(); + + /* Read two elements from row */ + in = inner::vload1<1>(pIn); + pIn += nb_lanes; + + /* Unpack and store one element in destination */ + *pOut = Q15(in.v); + /* Update pointer pOut to point to next row of transposed matrix */ + pOut += dst.stride(); + + /* Unpack and store second element in destination */ + *pOut = Q15((in & (q31_t) 0xffff0000) >> 16); + /* Update pointer pOut to point to next row of transposed matrix */ + pOut += dst.stride(); + + /* Decrement column loop counter */ + col--; + } + + /* Loop unrolling: Compute remaining outputs */ + col = nCols & (2*nb_lanes-1); + while (col > 0U) + { + /* Read and store input element in destination */ + *pOut = *pIn++; + + /* Update pointer pOut to point to next row of transposed matrix */ + pOut += dst.stride(); + + /* Decrement column loop counter */ + col--; + } + + pIn += (src.stride()-nCols); + + i ++; + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); /* row loop end */ + + + +} + + +template::Scalar,Q31>::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline void _dot_m_v(RES &res, + const M&m,const V&v, + const DSP* = nullptr) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + constexpr int nb_lanes = vector_traits::nb_lanes; + + uint32_t numRows = m.rows(); + uint32_t numCols = m.columns(); + const T *pSrcA = m.ptr(); + const T *pInA1; /* input data matrix pointer A of Q15 type */ + const T *pInA2; /* input data matrix pointer A of Q15 type */ + const T *pInA3; /* input data matrix pointer A of Q15 type */ + const T *pInA4; /* input data matrix pointer A of Q15 type */ + T *px; /* Temporary output data matrix pointer */ + uint16_t i, row; /* loop counters */ + int16_t colCnt; + VEC matData, matData2, vecData, vecData2; + T tmpData; + + + /* Process 4 rows at a time */ + row = numRows >> 2; + i = 0u; + px = res.ptr(); + + /* The following loop performs the dot-product of each row in pSrcA with the vector */ + /* row loop */ + while (row > 0) { + /* Initialize accumulators */ + ACC sum1 = ACC{}; + ACC sum2 = ACC{}; + ACC sum3 = ACC{}; + ACC sum4 = ACC{}; + + /* For every row wise process, the pInVec pointer is set + ** to the starting address of the vector */ + + /* Loop unrolling: process 2 columns per iteration */ + + /* Initialize pointers to the starting address of the column being processed */ + pInA1 = pSrcA + i; + pInA2 = pInA1 + m.stride(); + pInA3 = pInA2 + m.stride(); + pInA4 = pInA3 + m.stride(); + + // Main loop: matrix-vector multiplication + for(colCnt = 0 ; colCnt <= (int16_t)numCols - nb_lanes; colCnt += nb_lanes) + { + // Read 2 values from vector + vecData = v.vector_op(colCnt); + + // Read 8 values from the matrix - 2 values from each of 4 rows, and do multiply accumulate + matData = inner::vload1<1> (pInA1); + pInA1 += nb_lanes; + sum1 = inner::vmacc(sum1, matData, vecData); + + matData = inner::vload1<1> (pInA2); + pInA2 += nb_lanes; + sum2 = inner::vmacc(sum2, matData, vecData); + + matData = inner::vload1<1> (pInA3); + pInA3 += nb_lanes; + sum3 = inner::vmacc(sum3, matData, vecData); + + matData = inner::vload1<1> (pInA4); + pInA4 += nb_lanes; + sum4 = inner::vmacc(sum4, matData, vecData); + + // Decrement the loop counter + } + + /* process any remaining columns */ + + for(; colCnt < (int16_t)numCols; colCnt ++) + { + tmpData = v[colCnt]; + sum1 = inner::mac(sum1,*pInA1++ , tmpData); + sum2 = inner::mac(sum2,*pInA2++ , tmpData); + sum3 = inner::mac(sum3,*pInA3++ , tmpData); + sum4 = inner::mac(sum4,*pInA4++ , tmpData); + } + + /* Saturate and store the result in the destination buffer */ + *px++ = inner::from_accumulator(sum1); + *px++ = inner::from_accumulator(sum2); + *px++ = inner::from_accumulator(sum3); + *px++ = inner::from_accumulator(sum4); + + i = i + m.stride() * 4; + + /* Decrement the row loop counter */ + row--; + } + + /* process any remaining rows */ + row = numRows & 3u; + while (row > 0) { + + ACC sum = ACC{}; + pInA1 = pSrcA + i; + + // loop unrolling - process 4 elements at a time + + for(colCnt = 0 ; colCnt <= (int16_t)numCols - 2*nb_lanes; colCnt += 2*nb_lanes) + { + vecData = v.vector_op(colCnt); + vecData2 = v.vector_op(colCnt+nb_lanes); + + matData = inner::vload1<1>(pInA1); + pInA1 += nb_lanes; + matData2 = inner::vload1<1>(pInA1); + pInA1 += nb_lanes; + sum = inner::vmacc(sum, matData, vecData); + sum = inner::vmacc(sum, matData2, vecData2); + } + + // process remainder of row + for(; colCnt < (int16_t)numCols; colCnt ++) + { + + sum = inner::mac(sum, *pInA1++ , v[colCnt]); + } + *px++ = inner::from_accumulator(sum); + i = i + m.stride(); + row--; + } +} + +template::Scalar,Q31>::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +__STATIC_INLINE void _dot_m_m(const MA&pSrcA,const MB&pSrcB, + RES &&pDst, + const TMP &BT, + const DSP* = nullptr) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + constexpr int nb_lanes = vector_traits::nb_lanes; + + ACC sum; /* Accumulator */ + + + T *pSrcBT = BT.ptr(); /* Input data matrix pointer for transpose */ + T *pInA = pSrcA.ptr(); /* Input data matrix pointer A of Q15 type */ + T *pInB = pSrcB.ptr(); /* Input data matrix pointer B of Q15 type */ + T *px; /* Temporary output data matrix pointer */ + uint16_t numRowsA = pSrcA.rows(); /* Number of rows of input matrix A */ + uint16_t numColsB = pSrcB.columns(); /* Number of columns of input matrix B */ + uint16_t numColsA = pSrcA.columns(); /* Number of columns of input matrix A */ + uint16_t numRowsB = pSrcB.rows(); /* Number of rows of input matrix B */ + uint32_t col, i = 0U, row = numRowsB, colCnt; /* Loop counters */ + + VEC inA1, inB1, inA2, inB2; + + + /* Reset variables for usage in following multiplication process */ + row = numRowsA; + i = 0U; + px = pDst.ptr(); + + /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ + /* row loop */ + do + { + /* For every row wise process, column loop counter is to be initiated */ + col = numColsB; + + /* For every row wise process, pIn2 pointer is set to starting address of transposed pSrcB data */ + pInB = pSrcBT; + + /* column loop */ + do + { + /* Set variable sum, that acts as accumulator, to zero */ + sum = ACC{}; + + /* Initiate pointer pInA to point to starting address of column being processed */ + pInA = pSrcA.ptr() + i; + + /* Apply loop unrolling and compute 2 MACs simultaneously. */ + colCnt = numColsA / (2*nb_lanes); + + /* matrix multiplication */ + while (colCnt > 0U) + { + /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */ + + /* read real and imag values from pSrcA and pSrcB buffer */ + inA1 = inner::vload1<1> (pInA); + pInA += nb_lanes; + inB1 = inner::vload1<1> (pInB); + pInB += nb_lanes; + + inA2 = inner::vload1<1> (pInA); + pInA += nb_lanes; + inB2 = inner::vload1<1> (pInB); + pInB += nb_lanes; + + /* Multiply and Accumulates */ + sum = inner::vmacc(sum, inA1, inB1); + sum = inner::vmacc(sum, inA2, inB2); + + /* Decrement loop counter */ + colCnt--; + } + + /* process remaining column samples */ + colCnt = numColsA & (2*nb_lanes-1); + + while (colCnt > 0U) + { + /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */ + sum = inner::mac(sum ,*pInA++ , *pInB++); + + /* Decrement loop counter */ + colCnt--; + } + + /* Saturate and store result in destination buffer */ + *px = inner::from_accumulator(sum); + px++; + + /* Decrement column loop counter */ + col--; + + } while (col > 0U); + + i = i + pSrcA.stride(); + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); + +} +#endif +#endif + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/DSP/memory.hpp b/dsppp/Include/dsppp/DSP/memory.hpp new file mode 100644 index 000000000..6aa190579 --- /dev/null +++ b/dsppp/Include/dsppp/DSP/memory.hpp @@ -0,0 +1,98 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_DSP +#undef ARM_MATH_MVEI +#undef ARM_MATH_MVEF +#undef ARM_MATH_NEON +#endif + + +namespace arm_cmsis_dsp { + + +/** \addtogroup DSPALG + * @{ + */ + +#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \ + (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \ + (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \ + (((int32_t)(v3) << 24) & (int32_t)0xFF000000) ) + + +__STATIC_FORCEINLINE int32_t read_q15x2 ( + Q15 const * pQ15) +{ + int32_t val; + const int16_t *p=reinterpret_cast(pQ15); + +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (&val, p, 4); +#else + val = (p[1] << 16) | (p[0] & 0x0FFFF) ; +#endif + + return (val); +}; + + + +__STATIC_FORCEINLINE void write_q15x2 ( + Q15 * pQ15, + int32_t value) +{ + int32_t val = value; + int16_t *p=reinterpret_cast(pQ15); + +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (p, &val, 4); +#else + p[0] = (int16_t)(val & 0x0FFFF); + p[1] = (int16_t)(val >> 16); +#endif +}; + + +__STATIC_FORCEINLINE int32_t read_q7x4 ( + Q7 const * pQ7) +{ + int32_t val; + const int8_t *p=reinterpret_cast(pQ7); + +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (&val, p, 4); +#else + val =((p[3] & 0x0FF) << 24) | ((p[2] & 0x0FF) << 16) | ((p[1] & 0x0FF) << 8) | (p[0] & 0x0FF); +#endif + return (val); +}; + + + + + + +__STATIC_FORCEINLINE void write_q7x4 ( + Q7 *& pQ7, + int32_t value) +{ + int8_t *p=reinterpret_cast(pQ7); + int32_t val = value; +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (p, &val, 4); +#else + p[0] = (q7_t)(val & 0x0FF); + p[1] = (q7_t)((val >> 8) & 0x0FF); + p[2] = (q7_t)((val >> 16) & 0x0FF); + p[3] = (q7_t)((val >> 24) & 0x0FF); + +#endif +}; + +/*! @} */ + +} + diff --git a/dsppp/Include/dsppp/DSP/num_features.hpp b/dsppp/Include/dsppp/DSP/num_features.hpp new file mode 100644 index 000000000..e13f1a922 --- /dev/null +++ b/dsppp/Include/dsppp/DSP/num_features.hpp @@ -0,0 +1,14 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/* + +vreduce is going from vector accumulator to scalar accumulator +from_accumulator is going from scalar accumulator to scalar datatype + + +*/ + +#include "q7.hpp" +#include "q15.hpp" diff --git a/dsppp/Include/dsppp/DSP/q15.hpp b/dsppp/Include/dsppp/DSP/q15.hpp new file mode 100644 index 000000000..f10d2c140 --- /dev/null +++ b/dsppp/Include/dsppp/DSP/q15.hpp @@ -0,0 +1,238 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_DSP +#undef ARM_MATH_MVEI +#undef ARM_MATH_MVEF +#undef ARM_MATH_NEON +#endif + +/** \addtogroup DSPNumber DSP extension specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup DSPQ15Number Q15 + * \ingroup DSPNumber + * @{ + */ + +#if defined(ARM_MATH_DSP) +#if !defined(ARM_MATH_MVEI) && !defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON) + + +struct Q15DSPVector { + Q15DSPVector():v(0){}; + explicit Q15DSPVector(int32_t val):v(val){}; + operator int32_t(){return v;}; + +int32_t v; +}; + +template<> +struct vector_traits::type> +{ + typedef Q15 type; + typedef type::value_type storage_type; + typedef Q15DSPVector vector; + typedef Q<33,30> temp_accumulator; + + /* + + The evaluators are not using any predication and instead + use additional code after the loop to manage the tail. + + So, no inner function with predicate_t is required. + + Fusion operators still have call to inner operator with + predicate but they are not called in this context. + + */ + typedef uint32_t predicate_t; + + + static constexpr bool has_vector = true; + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + static constexpr bool has_predicate = false; + + static constexpr int nb_lanes = 2; + + static Q<33,30> temp_acc_zero() + { + return(Q<33,30>()); + } + + static constexpr int16_t zero_lane() {return 0;}; + + static constexpr int16_t lane_value(const Q15 x) {return x.v;}; + + +}; + + + +namespace inner { + + /* Needed to build but not used */ + template<> + struct vctpq{ + static uint32_t mk(uint32_t v) + { + return(v); + }; + }; + + __STATIC_FORCEINLINE Q15DSPVector vconst(Q15 val) + { + return(Q15DSPVector(__PKHBT(val.v, val.v, 16))); + } + + + __STATIC_FORCEINLINE Q15DSPVector vneg(const Q15DSPVector a) + { + return(Q15DSPVector(__QSUB16(0, a.v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vadd(const Q15DSPVector a, + const Q15DSPVector b) + { + return(Q15DSPVector(__QADD16(a.v,b.v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vadd(const Q15DSPVector a, + const Q15 b) + { + return(Q15DSPVector(__QADD16(a.v,vconst(b).v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vadd(const Q15 a, + const Q15DSPVector b) + { + return(Q15DSPVector(__QADD16(vconst(a).v,b.v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vsub(const Q15DSPVector a, + const Q15DSPVector b) + { + return(Q15DSPVector(__QSUB16(a.v,b.v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vsub(const Q15DSPVector a, + const Q15 b) + { + return(Q15DSPVector(__QSUB16(a.v,vconst(b).v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vsub(const Q15 a, + const Q15DSPVector b) + { + return(Q15DSPVector(__QSUB16(vconst(a).v,b.v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vmul(const Q15DSPVector a, + const Q15DSPVector b) + { + q31_t mul1,mul2; + q15_t out1,out2; + + mul1 = (q31_t) ((q15_t) (a.v ) * (q15_t) (b.v )); + mul2 = (q31_t) ((q15_t) (a.v >> 16) * (q15_t) (b.v >> 16)); + + out1 = (q15_t) __SSAT(mul1 >> 15, 16); + out2 = (q15_t) __SSAT(mul2 >> 15, 16); + return(Q15DSPVector(__PKHBT(out1, out2, 16))); + }; + + + __STATIC_FORCEINLINE Q15DSPVector vmul(const Q15DSPVector a, + const Q15 b) + { + return(vmul(a,vconst(b))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vmul(const Q15 a, + const Q15DSPVector b) + { + return(vmul(vconst(a),b)); + }; + + + template::type = true> + inline Q15DSPVector vload1(const Q15 *p) + { + return(Q15DSPVector(read_q15x2(p))); + }; + + + template1),bool>::type = true> + inline Q15DSPVector vload1(const Q15 *p) + { + Q15 a = p[0]; + Q15 b = p[S]; + + return(Q15DSPVector(__PKHBT(a.v, b.v, 16))); + }; + + + // Dynamic stride + inline Q15DSPVector vload1(const Q15 *p,index_t stride) + { + Q15 a = p[0]; + Q15 b = *(p+stride); + + return(Q15DSPVector(__PKHBT(a.v, b.v, 16))); + } + + template::type = true> + inline void vstore1(Q15 *p,const Q15DSPVector val) + { + write_q15x2 (p, val.v); + }; + + template1),bool>::type = true> + inline void vstore1(Q15 *p,const Q15DSPVector val) + { + p[0] = Q15(val.v & 0x0FFFF); + p[S] = Q15(val.v >> 16); + }; + + // dynamic stride + inline void vstore1(Q15 *p,const index_t stride, + const Q15DSPVector val) + { + p[0] = Q15(val.v & 0x0FFFF); + *(p+stride) = Q15(val.v >> 16); + } + + __STATIC_FORCEINLINE Q<33,30> vmacc(const Q<33,30> sum, + const Q15DSPVector vala, + const Q15DSPVector valb) + { + return(Q<33,30>(__SMLALD(vala.v,valb.v,sum.v))); + }; + + __STATIC_FORCEINLINE Q<33,30> vmacc(const Q15DSPVector vala, + const Q15DSPVector valb) + { + return(Q<33,30>(__SMLALD(vala.v,valb.v,0))); + }; + + __STATIC_FORCEINLINE Q<33,30> vreduce(const Q<33,30> sum) + { + return(sum); + }; + + +}; + + +#endif +#endif + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/DSP/q7.hpp b/dsppp/Include/dsppp/DSP/q7.hpp new file mode 100644 index 000000000..7c218294e --- /dev/null +++ b/dsppp/Include/dsppp/DSP/q7.hpp @@ -0,0 +1,264 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_DSP +#undef ARM_MATH_MVEI +#undef ARM_MATH_MVEF +#undef ARM_MATH_NEON +#endif + +/** \addtogroup DSPNumber DSP extension specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup DSPQ7Number Q7 + * \ingroup DSPNumber + * @{ + */ + +#if defined(ARM_MATH_DSP) +#if !defined(ARM_MATH_MVEI) && !defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON) + + +struct Q7DSPVector { + Q7DSPVector():v(0){}; + explicit Q7DSPVector(int32_t val):v(val){}; + operator int32_t(){return v;}; +int32_t v; +}; + +template<> +struct vector_traits::type> +{ + typedef Q7 type; + typedef type::value_type storage_type; + typedef Q7DSPVector vector; + typedef Q<17,14> temp_accumulator; + + /* + + The evaluators are not using any predication and instead + use additional code after the loop to manage the tail. + + So, no inner function with predicate_t is required. + + Fusion operators still have call to inner operator with + predicate but they are not called in this context. + + */ + typedef uint32_t predicate_t; + + + static constexpr bool has_vector = true; + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + static constexpr bool has_predicate = false; + + static constexpr int nb_lanes = 4; + + static Q<17,14> temp_acc_zero() + { + return(Q<17,14>()); + } + + static constexpr int8_t zero_lane() {return 0;}; + + static constexpr int8_t lane_value(const Q7 x) {return x.v;}; + + +}; + + + +namespace inner { + + /* Needed to build but not used */ + template<> + struct vctpq{ + static uint32_t mk(uint32_t v) + { + return(v); + }; + }; + + __STATIC_FORCEINLINE Q7DSPVector vconst(Q7 val) + { + return(Q7DSPVector(__PACKq7(val.v, val.v, val.v, val.v))); + } + + + __STATIC_FORCEINLINE Q7DSPVector vneg(const Q7DSPVector a) + { + return(Q7DSPVector(__QSUB8(0, a.v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vadd(const Q7DSPVector a, + const Q7DSPVector b) + { + return(Q7DSPVector(__QADD8(a.v,b.v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vadd(const Q7DSPVector a, + const Q7 b) + { + return(Q7DSPVector(__QADD8(a.v,vconst(b).v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vadd(const Q7 a, + const Q7DSPVector b) + { + return(Q7DSPVector(__QADD8(vconst(a).v,b.v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vsub(const Q7DSPVector a, + const Q7DSPVector b) + { + return(Q7DSPVector(__QSUB8(a.v,b.v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vsub(const Q7DSPVector a, + const Q7 b) + { + return(Q7DSPVector(__QSUB8(a.v,vconst(b).v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vsub(const Q7 a, + const Q7DSPVector b) + { + return(Q7DSPVector(__QSUB8(vconst(a).v,b.v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vmul(const Q7DSPVector a, + const Q7DSPVector b) + { + q7_t out1, out2, out3, out4; + q15_t mul1,mul2,mul3,mul4; + + mul1 = (q15_t) ((q7_t) (a.v ) * (q7_t) (b.v )); + mul2 = (q15_t) ((q7_t) (a.v >> 8) * (q7_t) (b.v >> 8)); + mul3 = (q15_t) ((q7_t) (a.v >> 16) * (q7_t) (b.v >> 16)); + mul4 = (q15_t) ((q7_t) (a.v >> 24) * (q7_t) (b.v >> 24)); + + out1 = (q7_t) __SSAT(mul1 >> 7, 8); + out2 = (q7_t) __SSAT(mul2 >> 7, 8); + out3 = (q7_t) __SSAT(mul3 >> 7, 8); + out4 = (q7_t) __SSAT(mul4 >> 7, 8); + return(Q7DSPVector(__PACKq7(out1,out2,out3,out4))); + }; + + + __STATIC_FORCEINLINE Q7DSPVector vmul(const Q7DSPVector a, + const Q7 b) + { + return(vmul(a,vconst(b))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vmul(const Q7 a, + const Q7DSPVector b) + { + return(vmul(vconst(a),b)); + }; + + + template::type = true> + inline Q7DSPVector vload1(const Q7 *p) + { + return(Q7DSPVector(read_q7x4(p))); + }; + + + template1),bool>::type = true> + inline Q7DSPVector vload1(const Q7 *p) + { + Q7 a = p[0]; + Q7 b = p[S]; + Q7 c = p[2*S]; + Q7 d = p[3*S]; + + return(Q7DSPVector(__PACKq7(a.v, b.v, c.v,d.v))); + }; + + + // Dynamic stride + inline Q7DSPVector vload1(const Q7 *p,index_t stride) + { + Q7 a = p[0]; + Q7 b = *(p+stride); + Q7 c = *(p+2*stride); + Q7 d = *(p+3*stride); + + return(Q7DSPVector(__PACKq7(a.v, b.v, c.v,d.v))); + } + + template::type = true> + inline void vstore1(Q7 *p,const Q7DSPVector val) + { + write_q7x4 (p, val.v); + }; + + template1),bool>::type = true> + inline void vstore1(Q7 *p,const Q7DSPVector val) + { + p[0] = Q7(val.v & 0x0FF); + p[S] = Q7(val.v >> 8); + p[2*S] = Q7(val.v >> 16); + p[3*S] = Q7(val.v >> 24); + }; + + // dynamic stride + inline void vstore1(Q7 *p,const index_t stride, + const Q7DSPVector val) + { + p[0] = Q7(val.v & 0x0FF); + *(p+stride) = Q7(val.v >> 8); + *(p+2*stride) = Q7(val.v >> 16); + *(p+3*stride) = Q7(val.v >> 24); + } + + __STATIC_FORCEINLINE Q<17,14> vmacc(const Q<17,14> sum, + const Q7DSPVector vala, + const Q7DSPVector valb) + { + q31_t inA1, inA2, inB1, inB2; + q31_t s; + inA1 = __SXTB16(__ROR(vala.v, 8)); + /* extract reminaing two samples */ + inA2 = __SXTB16(vala.v); + /* extract two q7_t samples to q15_t samples */ + inB1 = __SXTB16(__ROR(valb.v, 8)); + /* extract reminaing two samples */ + inB2 = __SXTB16(valb.v); + + /* multiply and accumulate two samples at a time */ + s = __SMLAD(inA1, inB1, sum.v); + s = __SMLAD(inA2, inB2, s); + + return(Q<17,14>(s)); + }; + + __STATIC_FORCEINLINE Q<17,14> vmacc(const Q7DSPVector vala, + const Q7DSPVector valb) + { + return(vmacc(Q<17,14>(0),vala,valb)); + }; + + __STATIC_FORCEINLINE Q<17,14> vreduce(const Q<17,14> sum) + { + return(sum); + }; + + +}; + + +#endif +#endif + + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/basic.hpp b/dsppp/Include/dsppp/Helium/basic.hpp new file mode 100644 index 000000000..ac0529fce --- /dev/null +++ b/dsppp/Include/dsppp/Helium/basic.hpp @@ -0,0 +1,223 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include +#include + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#endif + +/** \addtogroup ARCHALG + * \addtogroup HELIUMALG Helium specific algorithm + * \ingroup ARCHALG + * @{ + */ + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) +template() && + IsVector::value && + SameElementType::value,bool>::type = true> +inline void _Fill(DST &v, + const T val, + const vector_length_t l, + const Helium* = nullptr) +{ + constexpr int nb_lanes = vector_traits::nb_lanes; + index_t i=0; + UNROLL_LOOP + for(i=0;i < l; i += nb_lanes) + { + v.vector_store_tail(i,l-i,inner::vconst_tail(val,inner::vctpq::mk(l-i))); + } +} + +template() && + must_use_matrix_idx() && + SameElementType::value,bool>::type = true> +inline void _Fill2D(DST &v, + const T val, + const vector_length_t rows, + const vector_length_t cols, + const Helium* = nullptr) +{ + constexpr int nb_lanes = vector_traits::nb_lanes; + + // Outer unroll factor in case inner loop does not have + // enough arithmetic instructions. + // In future version this may be estimated from the + // complexity of the AST to evaluate + constexpr int U = 1; + index_t row=0; + + UNROLL_LOOP + for(; row <= rows-U;row += U) + { + + UNROLL_LOOP + for(index_t col=0; col < cols;col += nb_lanes) + { + for(int k=0;k::mk(cols-col))); + } + } + } + + for(; row < rows;row ++) + { + + UNROLL_LOOP + for(index_t col=0; col < cols;col += nb_lanes) + { + v.matrix_store_tail(row,col,cols-col,inner::vconst_tail(val,inner::vctpq::mk(cols-col))); + } + } +} + +template() && + vector_idx_pair(),bool>::type = true> +inline void eval(DA &v, + const DB& other, + const vector_length_t l, + const Helium* = nullptr) +{ + using T = typename traits::Scalar; + constexpr int nb_lanes = vector_traits::nb_lanes; + + index_t i=0; + + UNROLL_LOOP + for(i=0;i < l; i += nb_lanes) + { + v.vector_store_tail(i,l-i,other.vector_op_tail(i,l-i)); + } +} + + +template() && + must_use_matrix_idx_pair(),bool>::type = true> +inline void eval2D(DA &v, + const DB& other, + const vector_length_t rows, + const vector_length_t cols, + const Helium* = nullptr) +{ + using T = typename traits::Scalar; + constexpr int nb_lanes = vector_traits::nb_lanes; + // Attempt at computing the unrolling factor + // depending on the complexity of the AST + // (will have to rework this estimation) + constexpr int RU = 5 - Complexity::value; + constexpr int U = (RU <= 0) || (RU>=5) ? 1 : RU; + index_t row=0; + + UNROLL_LOOP + for(; row <= rows-U;row += U) + { + + UNROLL_LOOP + for(index_t col=0; col < cols;col += nb_lanes) + { + for(int k=0;k +void printt(const TupType& _tup, std::index_sequence) +{ + std::cout << "("; + (..., (std::cout << (I == 0? "" : ", ") << std::get(_tup))); + std::cout << ")\n"; +} + +template +void printt (const std::tuple& _tup) +{ + printt(_tup, std::make_index_sequence()); +} + +template() && + vector_idx_pair(),bool>::type = true> +inline DotResult _dot(const DA& a, + const DB& b, + const vector_length_t l, + const Helium* = nullptr) +{ + //using Res = DotResult; + // Vector scalar datatype + + using T = typename traits::Scalar; + using Temp = typename vector_traits::temp_accumulator; + + constexpr int nb_lanes = vector_traits::nb_lanes; + + Temp acc = vector_traits::temp_acc_zero(); + + UNROLL_LOOP + for(index_t i=0; i::mk(l-i)); + } + + return(inner::vreduce(acc)); +} + +template() && + vector_idx_pair(),bool>::type = true> +inline void _swap(DA&& a, + DB&& b, + const vector_length_t l, + const Helium* = nullptr) +{ + using Scalar = typename ElementType::type; + using Vector = typename vector_traits::vector; + + constexpr int nb_lanes = vector_traits::type>::nb_lanes; + index_t i=0; + Vector tmpa,tmpb; + + UNROLL_LOOP + for(i=0;i < l; i += nb_lanes) + { + tmpa = a.vector_op_tail(i,l-i); + tmpb = b.vector_op_tail(i,l-i); + b.vector_store_tail(i,l-i,tmpa); + a.vector_store_tail(i,l-i,tmpb); + } +} +#endif + +/*! @} */ + diff --git a/dsppp/Include/dsppp/Helium/float.hpp b/dsppp/Include/dsppp/Helium/float.hpp new file mode 100644 index 000000000..6f7861508 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/float.hpp @@ -0,0 +1,426 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#endif + +/** \addtogroup HeliumNumber Helium specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup HeliumFloatNumber Float + * \ingroup HeliumNumber + * @{ + */ + +/****************** + * + * Helium + * + */ + +#if defined(ARM_MATH_MVEF) + +/* + + +Arch is deriving from Helium + +*/ +template +struct vector_traits::value>::type > +{ + typedef float type; + typedef float storage_type; + typedef float32x4_t vector; + typedef float32x4_t temp_accumulator; + typedef mve_pred16_t predicate_t; + static constexpr bool has_vector = true; + static constexpr bool is_float = true; + static constexpr bool is_fixed = false; + static constexpr bool has_predicate = true; + + static constexpr int nb_lanes = 4; + + static float32x4_t temp_acc_zero() + { + return(vdupq_n_f32(0.0f)); + } + + static constexpr float zero_lane() {return 0.0f;}; + // Useful in fixed point since lane value is an int and not a Q something + static constexpr float lane_value(const float x) {return x;}; + +}; + + + +namespace inner { + + template<> + struct vctpq { + static mve_pred16_t mk(uint32_t v) + { + return(vctp32q(v)); + }; + }; + + __STATIC_FORCEINLINE float32x4_t vconst(const float v) + { + return(vdupq_n_f32(v)); + } + + __STATIC_FORCEINLINE float32x4_t vconst_tail(const float v, + const mve_pred16_t p0) + { + return(vdupq_x_n_f32(v,p0)); + } + + __STATIC_FORCEINLINE float32x4_t vneg(const float32x4_t a) + { + return(vnegq(a)); + }; + + __STATIC_FORCEINLINE float32x4_t vneg(const float32x4_t a, + const mve_pred16_t p0) + { + return(vnegq_x(a,p0)); + }; + + __STATIC_FORCEINLINE float32x4_t vadd(const float32x4_t a,const float32x4_t b) + { + return(vaddq(a,b)); + }; + + + __STATIC_FORCEINLINE float32x4_t vadd(const float32x4_t a,const float b) + { + return(vaddq_n_f32(a,b)); + }; + + __STATIC_FORCEINLINE float32x4_t vadd(const float a,const float32x4_t b) + { + return(vaddq_n_f32(b,a)); + }; + + __STATIC_FORCEINLINE float32x4_t vadd(const float32x4_t a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vaddq_x(a,b,p0)); + }; + + + __STATIC_FORCEINLINE float32x4_t vadd(const float32x4_t a,const float b, + const mve_pred16_t p0) + { + return(vaddq_x_n_f32(a,b,p0)); + }; + + __STATIC_FORCEINLINE float32x4_t vadd(const float a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vaddq_x_n_f32(b,a,p0)); + }; + + __STATIC_FORCEINLINE float32x4_t vsub(const float32x4_t a,const float32x4_t b) + { + return(vsubq(a,b)); + }; + + __STATIC_FORCEINLINE float32x4_t vsub(const float32x4_t a,const float b) + { + return(vsubq_n_f32(a,b)); + }; + + __STATIC_FORCEINLINE float32x4_t vsub(const float a,const float32x4_t b) + { + return(vsubq_n_f32(b,a)); + }; + + __STATIC_FORCEINLINE float32x4_t vsub(const float32x4_t a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vsubq_x(a,b,p0)); + }; + + __STATIC_FORCEINLINE float32x4_t vsub(const float32x4_t a,const float b, + const mve_pred16_t p0) + { + return(vsubq_x_n_f32(a,b,p0)); + }; + + __STATIC_FORCEINLINE float32x4_t vsub(const float a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vsubq_x_n_f32(b,a,p0)); + }; + + __STATIC_FORCEINLINE float32x4_t vmul(const float32x4_t a,const float32x4_t b) + { + return(vmulq(a,b)); + }; + + __STATIC_FORCEINLINE float32x4_t vmul(const float32x4_t a,const float b) + { + return(vmulq_n_f32(a,b)); + }; + + __STATIC_FORCEINLINE float32x4_t vmul(const float a,const float32x4_t b) + { + return(vmulq_n_f32(b,a)); + }; + + __STATIC_FORCEINLINE float32x4_t vmul(const float32x4_t a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vmulq_x(a,b,p0)); + }; + + __STATIC_FORCEINLINE float32x4_t vmul(const float32x4_t a,const float b, + const mve_pred16_t p0) + { + return(vmulq_x_n_f32(a,b,p0)); + }; + + __STATIC_FORCEINLINE float32x4_t vmul(const float a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vmulq_x_n_f32(b,a,p0)); + }; + + __STATIC_FORCEINLINE float32x4_t vmacc(const float32x4_t acc,const float32x4_t a,const float32x4_t b) + { + return(vfmaq(acc,a,b)); + }; + + __STATIC_FORCEINLINE float32x4_t vmacc(const float32x4_t acc,const float32x4_t a,const float_t b) + { + return(vfmaq(acc,a,b)); + }; + + __STATIC_FORCEINLINE float32x4_t vmacc(const float32x4_t acc,const float32x4_t a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vfmaq_m(acc,a,b,p0)); + }; + + + + __STATIC_FORCEINLINE float vreduce(const float32x4_t in) + { + float acc = vgetq_lane(in, 0) + vgetq_lane(in, 1) + + vgetq_lane(in, 2) + vgetq_lane(in, 3); + return(acc); + }; + + + + + template::type = true> + inline float32x4_t vload1(const float32_t *p) + { + return(vld1q(p)); + }; + + template1),bool>::type = true> + inline float32x4_t vload1(const float32_t *p) + { + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + return(vldrwq_gather_shifted_offset_f32(p,offset)); + }; + + + // With dynamic stride + inline float32x4_t vload1(const float32_t *p,const index_t stride) + { + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + return(vldrwq_gather_shifted_offset_f32(p,offset)); + }; + + + + template::type = true> + inline float32x4_t vload1_z(const float32_t *p,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(p,p0)); + }; + + template1),bool>::type = true> + inline float32x4_t vload1_z(const float32_t *p,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + //uint32x4_t offset={0,1,2,3}; + //uint32x4_t offset = vidupq_u32((uint32_t)0,1); + //offset = vmulq_n_u32(offset,S); + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + return(vldrwq_gather_shifted_offset_z_f32(p,offset,p0)); + }; + + // With dynamic stride + inline float32x4_t vload1_z(const float32_t *p,const index_t stride,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + //uint32x4_t offset={0,1,2,3}; + //uint32x4_t offset = vidupq_u32((uint32_t)0,1); + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + return(vldrwq_gather_shifted_offset_z_f32(p,offset,p0)); + }; + + /* Generalized stride */ + template + struct vload1_gen_stride + { + static float32x4_t run(const float32_t *p) + { + constexpr uint32x4_t offset={S...}; + return(vldrwq_gather_shifted_offset_f32(p,offset)); + }; + }; + + template<> + struct vload1_gen_stride<0,1,2,3> + { + inline float32x4_t run(const float32_t *p) + { + return(vld1q(p)); + }; + }; + + /* Generalized stride */ + template + struct vload1_gen_stride_z + { + inline float32x4_t run(const float32_t *p,const std::size_t nb,const mve_pred16_t p0) + { + constexpr uint32x4_t offset={S...}; + (void)nb; + return(vldrwq_gather_shifted_offset_z_f32(p,offset,p0)); + }; + }; + + template<> + struct vload1_gen_stride_z<0,1,2,3> + { + inline float32x4_t run(const float32_t *p,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(p,p0)); + }; + }; + + template::type = true> + inline void vstore1(float32_t *p,const float32x4_t val) + { + vst1q(p,val); + }; + + template1),bool>::type = true> + inline void vstore1(float32_t *p,const float32x4_t val) + { + //uint32x4_t offset={0,1,2,3}; + //uint32x4_t offset = vidupq_u32((uint32_t)0,1); + //offset = vmulq_n_u32(offset,S); + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + vstrwq_scatter_shifted_offset_f32(p,offset,val); + }; + + // with dynamic stride + inline void vstore1(float32_t *p,const index_t stride,const float32x4_t val) + { + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + vstrwq_scatter_shifted_offset_f32(p,offset,val); + }; + + + template::type = true> + inline void vstore1_z(float32_t *p,const float32x4_t val,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + vstrwq_p(p,val,p0); + }; + + template1),bool>::type = true> + inline void vstore1_z(float32_t *p,const float32x4_t val,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + //uint32x4_t offset={0,1,2,3}; + //uint32x4_t offset = vidupq_u32((uint32_t)0,1); + //offset = vmulq_n_u32(offset,S); + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + vstrwq_scatter_shifted_offset_p_f32(p,offset,val,p0); + }; + + // with dynamic stride + inline void vstore1_z(float32_t *p,const index_t stride,const float32x4_t val,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + vstrwq_scatter_shifted_offset_p_f32(p,offset,val,p0); + }; + + // Generalized stride + template + struct vstore1_gen_stride + { + static void run(float32_t *p,const float32x4_t val) + { + constexpr uint32x4_t offset={S...}; + vstrwq_scatter_shifted_offset_f32(p,offset,val); + }; + }; + + template<> + struct vstore1_gen_stride<0,1,2,3> + { + static void run(float32_t *p,const float32x4_t val) + { + vst1q(p,val); + }; + }; + + template + struct vstore1_gen_stride_z + { + static void vstore1_z(float32_t *p,const float32x4_t val,const std::size_t nb,const mve_pred16_t p0) + { + constexpr uint32x4_t offset={S...}; + (void)nb; + vstrwq_scatter_shifted_offset_p_f32(p,offset,val,p0); + } + }; + + template<> + struct vstore1_gen_stride_z<0,1,2,3> + { + static void vstore1_z(float32_t *p,const float32x4_t val,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + vstrwq_p(p,val,p0); + } + + }; + + + +}; + +#endif + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/half.hpp b/dsppp/Include/dsppp/Helium/half.hpp new file mode 100644 index 000000000..0df7f2418 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/half.hpp @@ -0,0 +1,520 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HeliumNumber Helium specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup HeliumHalfNumber Half + * \ingroup HeliumNumber + * @{ + */ + +#if defined(ARM_MATH_MVE_FLOAT16) +template +struct vector_traits::value>::type> +{ + typedef float16_t type; + typedef float16_t storage_type; + typedef float16x8_t vector; + typedef float16x8_t temp_accumulator; + typedef mve_pred16_t predicate_t; + + static constexpr bool has_vector = true; + static constexpr bool is_float = true; + static constexpr bool is_fixed = false; + static constexpr bool has_predicate = true; + + static constexpr int nb_lanes = 8; + + static float16x8_t temp_acc_zero() + { + return(vdupq_n_f16(0.0f)); + } + + static constexpr float16_t zero_lane() {return 0.0f;}; + // Useful in fixed point since lane value is an int and not a Q something + static constexpr float16_t lane_value(const float16_t x) {return x;}; + +}; + + +namespace inner { + + + template<> + struct vctpq{ + static mve_pred16_t mk(uint32_t v) + + { + return(vctp16q(v)); + }; + }; + + __STATIC_FORCEINLINE float16x8_t vconst(float16_t v) + { + return(vdupq_n_f16(v)); + } + + __STATIC_FORCEINLINE float16x8_t vconst_tail(const float16_t v, + const mve_pred16_t p0) + { + return(vdupq_x_n_f16(v,p0)); + } + + __STATIC_FORCEINLINE float16x8_t vneg(const float16x8_t a) + { + return(vnegq(a)); + }; + + __STATIC_FORCEINLINE float16x8_t vneg(const float16x8_t a, + const mve_pred16_t p0) + { + return(vnegq_x(a,p0)); + }; + + /* + + ADD + + */ + + __STATIC_FORCEINLINE float16x8_t vadd(const float16x8_t a, + const float16x8_t b) + { + return(vaddq(a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vadd(const float16x8_t a, + const float16_t b) + { + return(vaddq_n_f16(a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vadd(const float16_t a, + const float16x8_t b) + { + return(vaddq_n_f16(b,a)); + }; + + __STATIC_FORCEINLINE float16x8_t vadd(const float16x8_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vaddq_x(a,b,p0)); + }; + + __STATIC_FORCEINLINE float16x8_t vadd(const float16x8_t a, + const float16_t b, + const mve_pred16_t p0) + { + return(vaddq_x_n_f16(a,b,p0)); + }; + + __STATIC_FORCEINLINE float16x8_t vadd(const float16_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vaddq_x_n_f16(b,a,p0)); + }; + + /* + + SUB + + */ + + __STATIC_FORCEINLINE float16x8_t vsub(const float16x8_t a, + const float16x8_t b) + { + return(vsubq(a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vsub(const float16x8_t a, + const float16_t b) + { + return(vsubq_n_f16(a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vsub(const float16_t a, + const float16x8_t b) + { + return(vsubq_n_f16(b,a)); + }; + + __STATIC_FORCEINLINE float16x8_t vsub(const float16x8_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vsubq_x(a,b,p0)); + }; + + __STATIC_FORCEINLINE float16x8_t vsub(const float16x8_t a, + const float16_t b, + const mve_pred16_t p0) + { + return(vsubq_x_n_f16(a,b,p0)); + }; + + __STATIC_FORCEINLINE float16x8_t vsub(const float16_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vsubq_x_n_f16(b,a,p0)); + }; + + /* + + MUL + + */ + + __STATIC_FORCEINLINE float16x8_t vmul(const float16x8_t a, + const float16x8_t b) + { + return(vmulq(a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vmul(const float16x8_t a, + const float16_t b) + { + return(vmulq_n_f16(a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vmul(const float16_t a, + const float16x8_t b) + { + return(vmulq_n_f16(b,a)); + }; + + __STATIC_FORCEINLINE float16x8_t vmul(const float16x8_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vmulq_x(a,b,p0)); + }; + + __STATIC_FORCEINLINE float16x8_t vmul(const float16x8_t a, + const float16_t b, + const mve_pred16_t p0) + { + return(vmulq_x_n_f16(a,b,p0)); + }; + + __STATIC_FORCEINLINE float16x8_t vmul(const float16_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vmulq_x_n_f16(b,a,p0)); + }; + + /* + + vmacc + + */ + + __STATIC_FORCEINLINE float16x8_t vmacc(const float16x8_t acc, + const float16x8_t a, + const float16x8_t b) + { + return(vfmaq(acc,a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vmacc(const float16x8_t acc, + const float16x8_t a, + const float16_t b) + { + return(vfmaq(acc,a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vmacc(const float16x8_t acc, + const float16x8_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vfmaq_m(acc,a,b,p0)); + }; + + + + __STATIC_FORCEINLINE float16_t vreduce(float16x8_t in) + { + float16x8_t tmpVec; + _Float16 acc; + + tmpVec = (float16x8_t) vrev32q_s16((int16x8_t) in); + in = vaddq_f16(tmpVec, in); + tmpVec = (float16x8_t) vrev64q_s32((int32x4_t) in); + in = vaddq_f16(tmpVec, in); + acc = (_Float16)vgetq_lane_f16(in, 0) + (_Float16)vgetq_lane_f16(in, 4); + + return acc; + }; + + /* + + Load + + */ + + template::type = true> + inline float16x8_t vload1(const float16_t *p) + { + return(vld1q(p)); + }; + + template1) && (S<=65535),bool>::type = true> + inline float16x8_t vload1(const float16_t *p) + { + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + return(vldrhq_gather_shifted_offset_f16(p,offset)); + }; + + template65535),bool>::type = true> + inline float16x8_t vload1(const float16_t *p) + { + float16x8_t res; + for(std::size_t i=0;i<8;i++) + { + res[i] = *p; + p += S; + } + + return(res); + }; + + // With dynamic stride + inline float16x8_t vload1(const float16_t *p,const index_t stride) + { + if (stride <= 65535) + { + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,stride); + return(vldrhq_gather_shifted_offset_f16(p,offset)); + } + else + { + float16x8_t res; + for(std::size_t i=0;i<8;i++) + { + res[i] = *p; + p += stride; + } + return(res); + } + }; + + + + template::type = true> + inline float16x8_t vload1_z(const float16_t *p, + const std::size_t nb, + const mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(p,p0)); + }; + + template1)&& (S<=65535),bool>::type = true> + inline float16x8_t vload1_z(const float16_t *p, + const std::size_t nb, + const mve_pred16_t p0) + { + (void)nb; + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + return(vldrhq_gather_shifted_offset_z_f16(p,offset,p0)); + }; + + template65535),bool>::type = true> + inline float16x8_t vload1_z(const float16_t *p,std::size_t nb,mve_pred16_t p0) + { + (void)p0; + float16x8_t res; + std::size_t i=0; + for(;i::type = true> + inline void vstore1(float16_t *p,const float16x8_t val) + { + vst1q(p,val); + }; + + template1) && (S<=65535),bool>::type = true> + inline void vstore1(float16_t *p,const float16x8_t val) + { + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + vstrhq_scatter_shifted_offset_f16(p,offset,val); + }; + + template65535),bool>::type = true> + inline void vstore1(float16_t *p,const float16x8_t val) + { + for(std::size_t i=0;i<8;i++) + { + *p = val[i]; + p += S; + } + + }; + + // dynamic stride + inline void vstore1(float16_t *p, + const index_t stride, + const float16x8_t val) + { + if (stride <=65535) + { + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,stride); + vstrhq_scatter_shifted_offset_f16(p,offset,val); + } + else + { + for(std::size_t i=0;i<8;i++) + { + *p = val[i]; + p += stride; + } + } + } + + template::type = true> + inline void vstore1_z(float16_t *p, + const float16x8_t val, + std::size_t nb, + mve_pred16_t p0) + { + (void)nb; + vstrhq_p(p,val,p0); + }; + + template1) && (S<=65535),bool>::type = true> + inline void vstore1_z(float16_t *p, + const float16x8_t val, + std::size_t nb, + mve_pred16_t p0) + { + (void)nb; + + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + vstrhq_scatter_shifted_offset_p_f16(p,offset,val,p0); + }; + + + template65535),bool>::type = true> + inline void vstore1_z(float16_t *p, + const float16x8_t val, + std::size_t nb, + mve_pred16_t p0) + { + (void)p0; + for(std::size_t i=0;i +inline void _dot_m_v(RES &res, + const M&m,const V&v, + const Helium* = nullptr) +{ + + const vector_length_t nb_rows=m.rows(); + constexpr int U = 4; + + index_t row=0; + + DISABLE_LOOP_UNROLL + for(; row<=nb_rows-U; row += U) + { + results([&res,&row](index_t k){return &res[row+k];}) = + inner::from_accumulator(dot(unroll( + [&row,&m](index_t k){return m.row(row+k);}), + replicate(v) + )); + } + + switch (nb_rows-row) + { + case 3: + results<3>([&res,row](index_t k){return &res[row+k];}) = + inner::from_accumulator(dot(unroll<3>( + [row,&m](index_t k){return m.row(row+k);}), + replicate<3>(v) + )); + break; + case 2: + results<2>([&res,row](index_t k){return &res[row+k];}) = + inner::from_accumulator(dot(unroll<2>( + [row,&m](index_t k){return m.row(row+k);}), + replicate<2>(v) + )); + break; + case 1: + res[row] = inner::from_accumulator(dot(m.row(row),v)); + break; + } + +} + +#define MATRIX_DIM2 2 +#define MATRIX_DIM3 3 +#define MATRIX_DIM4 4 + +#if defined(ARM_MATH_MVEI) + +/* Fixed point specific cases*/ +#include "matrix_multiply_fixed.hpp" + +#endif + +#if defined(ARM_MATH_MVEF) + +/* Datatype specific cases*/ +#include "matrix_multiply_f16.hpp" +#include "matrix_multiply_f32.hpp" + +/* Generic float */ +template() && + number_traits::Scalar>::is_float,bool>::type = true> +__STATIC_INLINE void _dot_m_m(const MA&pSrcA,const MB&pSrcB, + RES &&pDst, + const Helium* = nullptr) + { + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + constexpr int nb_lanes = vector_traits::nb_lanes; + + T *pInB = pSrcB.ptr(); /* input data matrix pointer B */ + T *pInA = pSrcA.ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + int numRowsA = pSrcA.rows(); /* number of rows of input matrix A */ + int numColsB = pSrcB.columns(); /* number of columns of input matrix B */ + int numColsA = pSrcA.columns(); /* number of columns of input matrix A */ + uint32_t blkCnt; /* loop counters */ + uint32_t i; + + { + /* small squared matrix specialized routines */ + if(numRowsA == numColsB && numColsB == numColsA) { + if (numRowsA == 1) + { + pDst(0,0)= pSrcA(0,0) * pSrcB(0,0); + return; + } + else if(numRowsA == 2) + return _arm_mat_mult_2x2_mve(pSrcA, pSrcB, std::forward(pDst)); + else if(numRowsA == 3) + return _arm_mat_mult_3x3_mve(pSrcA, pSrcB, std::forward(pDst)); + else if(numRowsA == 4) + return _arm_mat_mult_4x4_mve(pSrcA, pSrcB, std::forward(pDst)); + } + + /* main loop process 4 rows */ + i = numRowsA >> 2; + while (i > 0U) + { + T *pInA0, *pInA1, *pInA2, *pInA3; + T *pInB0; + T *pOut0, *pOut1, *pOut2, *pOut3; + ACC vecMac0, vecMac1, vecMac2, vecMac3; + VEC vecInB; + + /* pointers to 4 consecutive output rows */ + pOut0 = pOut; + pOut1 = pOut0 + pDst.stride(); + pOut2 = pOut1 + pDst.stride(); + pOut3 = pOut2 + pDst.stride(); + pInB0 = pInB; + + uint32_t k = numColsB / nb_lanes; + while (k > 0U) + { + /* pointers to 4 consecutive Matrix A rows */ + pInA0 = pInA; + pInA1 = pInA0 + pSrcA.stride(); + pInA2 = pInA1 + pSrcA.stride(); + pInA3 = pInA2 + pSrcA.stride(); + + vecMac0 = vector_traits::temp_acc_zero(); + vecMac1 = vector_traits::temp_acc_zero(); + vecMac2 = vector_traits::temp_acc_zero(); + vecMac3 = vector_traits::temp_acc_zero(); + + blkCnt = numColsA; + + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = inner::vload1<1>(pInB0); /* vldrwq_f32(pInB0, 0); */ + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + vecMac3 = inner::vmacc(vecMac3, vecInB, *pInA3++); + + pInB0 = pInB0 + pSrcB.stride(); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + /* Store the results (4 x 4 block) in the destination buffer */ + inner::vstore1<1>(pOut0, vecMac0); + pOut0 += nb_lanes; + inner::vstore1<1>(pOut1, vecMac1); + pOut1 += nb_lanes; + inner::vstore1<1>(pOut2, vecMac2); + pOut2 += nb_lanes; + inner::vstore1<1>(pOut3, vecMac3); + pOut3 += nb_lanes; + + /* + * rewind + */ + pInB0 -= (pSrcB.stride() * numColsA) - nb_lanes; + k--; + } + + int colBLeft = numColsB & (nb_lanes - 1); + if (colBLeft) + { + pInA0 = pInA; + pInA1 = pInA0 + pSrcA.stride(); + pInA2 = pInA1 + pSrcA.stride(); + pInA3 = pInA2 + pSrcA.stride(); + + mve_pred16_t p0 = inner::vctpq::mk(colBLeft); + + vecMac0 = vector_traits::temp_acc_zero(); + vecMac1 = vector_traits::temp_acc_zero(); + vecMac2 = vector_traits::temp_acc_zero(); + vecMac3 = vector_traits::temp_acc_zero(); + + blkCnt = numColsA; + + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = inner::vload1_z<1>(pInB0, colBLeft,p0); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + vecMac3 = inner::vmacc(vecMac3, vecInB, *pInA3++); + + pInB0 = pInB0 + pSrcB.stride(); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + /* Store the results (4 x colBLeft block) in the destination buffer */ + inner::vstore1_z<1>(pOut0, vecMac0, colBLeft,p0); + inner::vstore1_z<1>(pOut1, vecMac1, colBLeft,p0); + inner::vstore1_z<1>(pOut2, vecMac2, colBLeft,p0); + inner::vstore1_z<1>(pOut3, vecMac3, colBLeft,p0); + } + + /* move to next rows */ + pInA += 4 * pSrcA.stride(); + pOut += 4 * pDst.stride(); + i--; + } + + /* + * non multiple of 4 rows for Matrix A + * process single row + */ + if (numRowsA & 3) + { + i = numRowsA & 3; + while (i > 0U) + { + T *pInA0; + T *pInB0; + T *pOut0; + VEC vecInB; + ACC vecMac0; + + pOut0 = pOut; + pInB0 = pInB; + + uint32_t k = numColsB / nb_lanes; + while (k > 0U) + { + pInA0 = pInA; + + vecMac0 = vector_traits::temp_acc_zero(); + blkCnt = numColsA; + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = inner::vload1<1>(pInB0); /* vldrwq_f32(pInB0, 0); */ + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + + pInB0 = pInB0 + pSrcB.stride(); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + /* Store the results (1 x 4 block) in the destination buffer */ + inner::vstore1<1>(pOut0, vecMac0); + pOut0 += nb_lanes; + + /* + * rewind + */ + pInB0 -= (pSrcB.stride() * numColsA) - nb_lanes; + k--; + } + + int colBLeft = numColsB & (nb_lanes-1); + if (colBLeft) + { + pInA0 = pInA; + mve_pred16_t p0 = inner::vctpq::mk(colBLeft); + + vecMac0 = vector_traits::temp_acc_zero(); + blkCnt = numColsA; + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = inner::vload1_z<1>(pInB0, colBLeft,p0); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + + pInB0 = pInB0 + pSrcB.stride(); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* Store the results (1 x colBLeft block) in the destination buffer */ + inner::vstore1_z<1>(pOut0, vecMac0, colBLeft,p0); + } + + /* move to next row */ + pInA += 1 * pSrcA.stride(); + pOut += 1 * pDst.stride(); + i--; + } + + } + +} + +} + + +#undef MATRIX_DIM2 +#undef MATRIX_DIM3 +#undef MATRIX_DIM4 + +#endif + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/matrix_multiply_f16.hpp b/dsppp/Include/dsppp/Helium/matrix_multiply_f16.hpp new file mode 100644 index 000000000..3671160f9 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/matrix_multiply_f16.hpp @@ -0,0 +1,404 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HELIUMALG + * @{ + */ + +#if defined(ARM_MATH_MVE_FLOAT16) + +/* + +This can't be used with stride bigger than 21845 +which for embedded is acceptable. + +No check is done at runtime or build time that the stride is not +too big. + +*/ + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_2x2_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + using T = typename traits::Scalar; + //using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + + const uint16_t offsetA[8] = { 0, 0, (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride(), + 0, 0, (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride() }; + /* offsetB allows to read and duplicate 1 row of B */ + const uint16_t offsetB[8] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + + /* {d00, d01, d10, d11} */ + const uint16_t offsetD[8] = { 0, 1, (uint16_t)pDst.stride(), (uint16_t)(pDst.stride()+1), + 0,0,0,0 }; + + uint16x8_t vecOffsA, vecOffsB,vecOffsD; + VEC vecInA, vecInB, vecDst; + T *pOut = pDst.ptr(); /* output data matrix pointer */ + + /* + * load initial offsets + */ + vecOffsA = vldrhq_u16((uint16_t const *) offsetA); + vecOffsB = vldrhq_u16((uint16_t const *) offsetB); + /* + * load {a00 a00 a10 a10 x x x x } + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * load {b00 b01 b00 b01 x x x x } + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 a00 b01 + * a10 b00 a10 b01 + * x x + * x x } + */ + vecDst = vmulq(vecInA, vecInB); + /* + * move to 2nd column of matrix A + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 1); + /* + * load {a01 a01 a11 a11 x x x x} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b10, b11, b10, b11, x x x x } + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 + a01 b10 a00 b01 + a01 b11 + * a10 b00 + a11 b10 a10 b01 + a11 b11 + * x x + * x x } + */ + vecDst = vfmaq(vecDst, vecInA, vecInB); + + mve_pred16_t p0 = vctp16q(2*2); + /* + * Store the result in the destination buffer + * (lower half of the vector) + */ + + vecOffsD = vldrhq_u16((uint16_t const *) offsetD); + + vstrhq_scatter_shifted_offset_p(pOut,vecOffsD,vecDst,p0); + +} + + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_3x3_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + const uint16_t offsetA[8] = { 0, 0, 0, + (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride(), + (uint16_t)(2U*pSrcA.stride()), (uint16_t)(2U*pSrcA.stride()) }; + /* offsetB allows to read and duplicate 1 row of B */ + const uint16_t offsetB[8] = { 0, 1, 2, 0, 1, 2, 0, 1 }; + const uint16_t offsetD[8] = { 0, 1, 2, + (uint16_t)(0+pDst.stride()), (uint16_t)(1+pDst.stride()), + (uint16_t)(2+pDst.stride()), + (uint16_t)(0+2*pDst.stride()), + (uint16_t)(1+2*pDst.stride()) }; + + uint16x8_t vecOffsA, vecOffsB,vecOffsD; + float16x8_t vecInA, vecInB, vecDst; + float16_t *pOut = pDst.ptr(); /* output data matrix pointer */ + + /* + * load initial offsets + */ + vecOffsA = vldrhq_u16((uint16_t const *) offsetA); + vecOffsB = vldrhq_u16((uint16_t const *) offsetB); + + /* + * load {a00 a00 a00 a10 a10 a10 a20 a20} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * load {b00 b01 b02 b00 b01 b02 b00 b01} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 a00 b01 a00 b02 + * a10 b00 a10 b01 a10 b02 + * a20 b00 a20 b01} + */ + vecDst = vmulq(vecInA, vecInB); + + /* + * move to 2nd column of matrix A + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 1); + /* + * load {a01 a01 a01 a11 a11 a11 a21 a21} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b10, b11, b12, b10, b11, b12, b10, b11} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 + a01 b10 a00 b01 + a01 b11 a00 b02 + a01 b12 + * a10 b00 + a11 b10 a10 b01 + a11 b11 a10 b02 + a11 b12 + * a20 b00 + a21 b10 a20 b01 + a21 b11 } + */ + vecDst = vfmaq(vecDst, vecInA, vecInB); + /* + * move to 3rd column of matrix A + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 1); + /* + * load {a02 a02 a02 a12 a12 a12 a22 a22} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b20, b21, b22, b20, b21, b22, b20, b21} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * {a00 b00 + a01 b10 + a02 b20 a00 b01 + a01 b11 + a02 b21 a00 b02 + a01 b12 + a02 b22}, + * a10 b00 + a11 b10 + a12 b20 a10 b01 + a11 b11 + a12 b21 a10 b02 + a11 b12 + a12 b22}, + * a20 b00 + a21 b10 + a22 b20 a20 b01 + a21 b11 + a22 b21 } + */ + vecDst = vfmaq(vecDst, vecInA, vecInB); + + /* + * Store the result in the destination buffer + */ + vecOffsD = vldrhq_u16((uint16_t const *) offsetD); + + vstrhq_scatter_shifted_offset(pOut,vecOffsD,vecDst); + + pOut += 2*pDst.stride()+2; + + /* last element computed in scalar mode + * a20 b02 + a21 b12 + a22 b22 + */ + + const _Float16 * pA = (const _Float16 *)pSrcA.const_ptr(); + const _Float16 * pB = (const _Float16 *)pSrcB.const_ptr(); + const index_t sa =pSrcA.stride(); + const index_t sb =pSrcB.stride(); + *pOut = pA[2*sa] * pB[2] + pA[1+2*sa] * pB[2+sb] + pA[2+2*sa] * pB[2+2*sb]; + +} + + + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_4x4_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + /* offsetA allows to read and duplicate 2 successive column elements of A */ + const uint16_t offsetA[8] = { 0, 0, 0, 0, + (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride() }; + /* offsetB allows to read and duplicate 1 row of B */ + const uint16_t offsetB[8] = { 0, 1, 2, 3, 0, 1, 2, 3 }; + + const uint16_t offsetD[8] = { 0, 1, 2, 3, + (uint16_t)(0+pDst.stride()), (uint16_t)(1+pDst.stride()), + (uint16_t)(2+pDst.stride()), (uint16_t)(3+pDst.stride()) }; + + uint16x8_t vecOffsA, vecOffsB,vecOffsD; + float16x8_t vecInA, vecInB, vecDst0, vecDst1; + float16_t *pOut = pDst.ptr(); /* output data matrix pointer */ + + /* + * load initial offsets + */ + vecOffsA = vldrhq_u16((uint16_t const *) offsetA); + vecOffsB = vldrhq_u16((uint16_t const *) offsetB); + + /* + * load {a00 a00 a00 a00 a10 a10 a10 a10} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * load {b00 b01 b02 b03 b00 b01 b02 b03} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + + /* + * { a00 b00 a00 b01 a00 b02 a00 b03 + * a10 b00 a10 b01 a10 b02 a10 b03 } + */ + vecDst0 = vmulq(vecInA, vecInB); + /* + * jump 2 x A rows (2nd half of matrix) + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) pSrcA.stride()*2); + /* + * load {a20 a20 a20 a20 a30 a30 a30 a30} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * { a20 b00 a20 b01 a20 b02 a20 b03 + * a30 b00 a30 b01 a30 b02 + a31 b12 } + */ + vecDst1 = vmulq(vecInA, vecInB); + /* + * rewind back to top half of the A matrix (2nd column) + */ + vecOffsA = vsubq(vecOffsA, (uint16_t) (2*pSrcA.stride()-1)); + /* + * load {a01 a01 a01 a01 a11 a11 a11 a11} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b10, b11, b12, b13, b10, b11, b12, b13} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 + a01 b10 a00 b01 + a01 b11 a00 b02 + a01 b12 a00 b03 + a01 b13 + * a10 b00 + a11 b10 a10 b01 + a11 b11 a10 b02 + a11 b12 a10 b03 + a11 b13 } + */ + vecDst0 = vfmaq(vecDst0, vecInA, vecInB); + /* + * jump 2 x A rows (2nd half of matrix) + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) pSrcA.stride()*2); + /* + * load {a21 a21 a21 a21 a31 a31 a31 a31} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * {a20 b00 + a21 b10 a20 b01 + a21 b11 a20 b02 + a21 b12 a20 b03 + a21 b13 + * a30 b00 + a31 b10 a30 b01 + a31 b11 a30 b02 + a31 b12 a30 b03 + a31 b13 } + */ + vecDst1 = vfmaq(vecDst1, vecInA, vecInB); + + /* + * rewind back to top half of the A matrix (3rd column) + */ + vecOffsA = vsubq(vecOffsA, (uint16_t) (2*pSrcA.stride()-1)); + /* + * load {a02 a02 a02 a02 a12 a12 a12 a12} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b20, b21, b22, b23, b20, b21, b22, b23} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 + a01 b10 + a02 b20 a00 b01 + a01 b11 + a02 b21 a00 b02 + a01 b12 + a02 b22 a00 b03 + a01 b13 + a02 b23 + * a10 b00 + a11 b10 + a12 b20 a10 b01 + a11 b11 + a12 b21 a10 b02 + a11 b12 + a12 b22 a10 b03 + a11 b13 + a12 b23 } + */ + vecDst0 = vfmaq(vecDst0, vecInA, vecInB); + /* + * jump 2 x A rows + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 2*pSrcA.stride()); + + /* + * load {a22 a22 a22 a22 a32 a32 a32 a32} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * {a20 b00 + a21 b10 + a22 b20 a20 b01 + a21 b11 + a22 b21 a20 b02 + a21 b12 + a22 b22 a20 b03 + a21 b13 + a22 b23 + * a30 b00 + a31 b10 + a32 b20 a30 b01 + a31 b11 + a32 b21 a30 b02 + a31 b12 + a32 b22 a30 b03 + a31 b13 + a32 b23 } + */ + vecDst1 = vfmaq(vecDst1, vecInA, vecInB); + + /* + * rewind back to top half of the A matrix (4th column) + */ + vecOffsA = vsubq(vecOffsA, (uint16_t) (2*pSrcA.stride()-1)); + /* + * load {a03 a03 a03 a03 a13 a13 a13 a13} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b30, b31, b32, b33, b30, b31, b32, b33} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 +...+ a03 b30, a00 b01 +...+ a03 b31, a00 b02 +...+ a03 b32, a00 b03 +...+ a03 b33 + * a10 b00 +...+ a13 b30, a10 b01 +...+ a13 b31, a10 b02 +...+ a13 b32, a10 b03 +...+ a13 b33 } + */ + vecDst0 = vfmaq(vecDst0, vecInA, vecInB); + /* + * jump 2 x A rows + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) pSrcA.stride()*2); + /* + * load {a23 a23 a23 a23 a33 a33 a33 a33} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * {a20 b00 +...+ a23 b30, a20 b01 +...+ a23 b31, a20 b02 +...+ a23 b32, a20 b03 +...+ a23 b33 + * a30 b00 +...+ a33 b30, a30 b01 +...+ a33 b31, a30 b02 +...+ a33 b32, a30 b03 +...+ a33 b33 } + */ + vecDst1 = vfmaq(vecDst1, vecInA, vecInB); + + /* + * Store the result in the destination buffer + */ + vecOffsD = vldrhq_u16((uint16_t const *) offsetD); + vstrhq_scatter_shifted_offset(pOut,vecOffsD,vecDst0); + pOut += 2*pDst.stride(); + vstrhq_scatter_shifted_offset(pOut,vecOffsD,vecDst1); + +} + +#endif + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/matrix_multiply_f32.hpp b/dsppp/Include/dsppp/Helium/matrix_multiply_f32.hpp new file mode 100644 index 000000000..ecdfbc6c2 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/matrix_multiply_f32.hpp @@ -0,0 +1,270 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HELIUMALG + * @{ + */ + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_2x2_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + using T = typename traits::Scalar; + //using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + + /* {a00, a00, a10, a10} */ + const uint32_t offsetA0[4] = { 0, 0, pSrcA.stride(), pSrcA.stride() }; + /* {b00, b01, b00, b01} */ + const uint32_t offsetB0[4] = { 0, 1, 0, 1 }; + /* {a01, a01, a11, a11} */ + const uint32_t offsetA1[4] = { 1, 1, pSrcA.stride() + 1, pSrcA.stride() + 1 }; + /* {b10, b11, b10, b11} */ + const uint32_t offsetB1[4] = { pSrcB.stride(), pSrcB.stride()+1, pSrcB.stride(), pSrcB.stride()+1 }; + + /* {d00, d01, d10, d11} */ + const uint32_t offsetD[4] = { 0, 1, pDst.stride(), pDst.stride()+1 }; + + uint32x4_t vecOffsA, vecOffsB,vecOffsC; + VEC vecInA, vecInB, vecDst; + + if constexpr (!HasStaticStride::value) + { + vecOffsA = vldrwq_u32((uint32_t const *) offsetA0); + } + vecOffsB = vldrwq_u32((uint32_t const *) offsetB0); + + if constexpr (!HasStaticStride::value) + { + vecInA = vldrwq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + } + else + { + constexpr int s = StaticStride::value; + vecInA = inner::vload1_gen_stride<0, 0, s, s>::run(pSrcA.const_ptr()); + } + + if constexpr (!HasStaticStride::value) + { + vecInB = vldrwq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + } + else + { + vecInB = inner::vload1_gen_stride<0, 1, 0, 1>::run(pSrcB.const_ptr()); + } + vecDst = inner::vmul(vecInA, vecInB); + + if constexpr (!HasStaticStride::value) + { + vecOffsA = vldrwq_u32((uint32_t const *) offsetA1); + } + + if constexpr (!HasStaticStride::value) + { + vecOffsB = vldrwq_u32((uint32_t const *) offsetB1); + } + + if constexpr (!HasStaticStride::value) + { + vecInA = vldrwq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + } + else + { + constexpr int s = StaticStride::value; + vecInA = inner::vload1_gen_stride<1, 1, s+1, s+1>::run(pSrcA.const_ptr()); + + } + + if constexpr (!HasStaticStride::value) + { + vecInB = vldrwq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + } + else + { + constexpr int s = StaticStride::value; + vecInB = inner::vload1_gen_stride::run(pSrcB.const_ptr()); + } + + if constexpr (!HasStaticStride::value) + { + vecOffsC = vldrwq_u32((uint32_t const *) offsetD); + } + + vecDst = inner::vmacc(vecDst, vecInA, vecInB); + + //inner::vstore1<1>(pDst.ptr(), vecDst); + if constexpr (!HasStaticStride::value) + { + vstrwq_scatter_shifted_offset(pDst.ptr(),vecOffsC,vecDst); + } + else + { + constexpr int s = StaticStride::value; + inner::vstore1_gen_stride<0, 1, s, s+1>::run(pDst.ptr(),vecDst); + } + +} + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_3x3_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + T *pInB = pSrcB.ptr(); /* input data matrix pointer B */ + T *pInA = pSrcA.ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + T *pInA0, *pInA1, *pInA2; + ACC vecMac0, vecMac1, vecMac2; + VEC vecInB; + T const *pSrBVec; + + pSrBVec = (float32_t const *) pInB; + + pInA0 = pInA; + pInA1 = pInA0 + pSrcA.stride(); + pInA2 = pInA1 + pSrcA.stride(); + /* enable predication to disable last (4th) vector element */ + mve_pred16_t p0 = inner::vctpq::mk(MATRIX_DIM3); + + /* + * load {b0,0, b0,1, b0,2, 0} + */ + vecInB = inner::vload1_z<1>(pSrBVec, MATRIX_DIM3,p0); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmul(vecInB, *pInA0++); + vecMac1 = inner::vmul(vecInB, *pInA1++); + vecMac2 = inner::vmul(vecInB, *pInA2++); + /* + * load {b1,0, b1,1, b1,2, 0} + */ + vecInB = inner::vload1_z<1>(pSrBVec, MATRIX_DIM3,p0); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + /* + * load {b2,0, b2,1 , b2,2, 0} + */ + vecInB = inner::vload1_z<1>(pSrBVec, MATRIX_DIM3,p0); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + + /* partial vector stores */ + inner::vstore1_z<1>(pOut, vecMac0, MATRIX_DIM3,p0); + pOut += pDst.stride(); + inner::vstore1_z<1>(pOut, vecMac1, MATRIX_DIM3,p0); + pOut += pDst.stride(); + inner::vstore1_z<1>(pOut, vecMac2, MATRIX_DIM3,p0); + /* + * Return to application + */ +} + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_4x4_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + T const *pSrBVec; + T *pInB = pSrcB.ptr(); /* input data matrix pointer B */ + T *pInA = pSrcA.ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + T *pInA0, *pInA1, *pInA2, *pInA3; + ACC vecMac0, vecMac1, vecMac2, vecMac3; + VEC vecInB; + + pSrBVec = (float32_t const *) pInB; + + pInA0 = pInA; + pInA1 = pInA0 + pSrcA.stride(); + pInA2 = pInA1 + pSrcA.stride(); + pInA3 = pInA2 + pSrcA.stride(); + /* + * load {b0,0, b0,1, b0,2, b0,3} + */ + vecInB = inner::vload1<1>(pSrBVec); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmul(vecInB, *pInA0++); + vecMac1 = inner::vmul(vecInB, *pInA1++); + vecMac2 = inner::vmul(vecInB, *pInA2++); + vecMac3 = inner::vmul(vecInB, *pInA3++); + /* + * load {b1,0, b1,1, b1,2, b1,3} + */ + vecInB = inner::vload1<1>(pSrBVec); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + vecMac3 = inner::vmacc(vecMac3, vecInB, *pInA3++); + /* + * load {b2,0, b2,1, b2,2, b2,3} + */ + vecInB = inner::vload1<1>(pSrBVec); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + vecMac3 = inner::vmacc(vecMac3, vecInB, *pInA3++); + /* + * load {b3,0, b3,1, b3,2, b3,3} + */ + vecInB = inner::vload1<1>(pSrBVec); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + vecMac3 = inner::vmacc(vecMac3, vecInB, *pInA3++); + + inner::vstore1<1>(pOut, vecMac0); + pOut += pDst.stride(); + inner::vstore1<1>(pOut, vecMac1); + pOut += pDst.stride(); + inner::vstore1<1>(pOut, vecMac2); + pOut += pDst.stride(); + inner::vstore1<1>(pOut, vecMac3); + +} + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/matrix_multiply_fixed.hpp b/dsppp/Include/dsppp/Helium/matrix_multiply_fixed.hpp new file mode 100644 index 000000000..8169fc0f9 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/matrix_multiply_fixed.hpp @@ -0,0 +1,613 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HELIUMALG + * @{ + */ + +#if defined(ARM_MATH_MVEI) + + + +#define MVE_ASRL_SAT16(acc, shift) ((sqrshrl_sat48(acc, -(32-shift)) >> 32) & 0xffffffff) + + + +template() && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline void arm_mat_mult_2x2_mve( + const MA & pSrcA, + const MB & pSrcB, + RES && pDst) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + + const T *pInB = pSrcB.const_ptr(); /* input data matrix pointer B */ + const T *pInA = pSrcA.const_ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + const T *pInA0 = pInA; + const T *pInA1 = pInA0 + pSrcA.stride(); + ACC acc0, acc1; + VEC vecB, vecA0, vecA1; + mve_pred16_t p0 = inner::vctpq::mk(MATRIX_DIM2); + + + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM2,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM2,p0); + } + + + vecA0 = inner::vload1_z<1>(pInA0,MATRIX_DIM2,p0); + vecA1 = inner::vload1_z<1>(pInA1,MATRIX_DIM2,p0); + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + + pOut[0] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM2,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM2,p0); + } + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + + pOut[0] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + +} + + +template() && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline void arm_mat_mult_3x3_mve( + const MA & pSrcA, + const MB & pSrcB, + RES && pDst) +{ + + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + + const T *pInB = pSrcB.const_ptr(); /* input data matrix pointer B */ + const T *pInA = pSrcA.const_ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + const T *pInA0 = pInA; + const T *pInA1 = pInA0 + pSrcA.stride(); + const T *pInA2 = pInA1 + pSrcA.stride(); + ACC acc0, acc1, acc2; + VEC vecB, vecA0, vecA1, vecA2; + mve_pred16_t p0 = inner::vctpq::mk(MATRIX_DIM3); + + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM3,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM3,p0); + } + + vecA0 = inner::vload1_z<1>(pInA0,MATRIX_DIM3,p0); + vecA1 = inner::vload1_z<1>(pInA1,MATRIX_DIM3,p0); + vecA2 = inner::vload1_z<1>(pInA2,MATRIX_DIM3,p0); + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM3,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM3,p0); + } + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM3,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM3,p0); + } + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + +} + + +template() && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline void arm_mat_mult_4x4_mve( + const MA & pSrcA, + const MB & pSrcB, + RES && pDst) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + + const T *pInB = pSrcB.const_ptr(); /* input data matrix pointer B */ + const T *pInA = pSrcA.const_ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + const T *pInA0 = pInA; + const T *pInA1 = pInA0 + pSrcA.stride(); + const T *pInA2 = pInA1 + pSrcA.stride(); + const T *pInA3 = pInA2 + pSrcA.stride(); + ACC acc0, acc1, acc2, acc3; + VEC vecB, vecA0, vecA1, vecA2, vecA3; + mve_pred16_t p0 = inner::vctpq::mk(MATRIX_DIM4); + + + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM4,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM4,p0); + } + + vecA0 = inner::vload1_z<1>(pInA0,MATRIX_DIM4,p0); + vecA1 = inner::vload1_z<1>(pInA1,MATRIX_DIM4,p0); + vecA2 = inner::vload1_z<1>(pInA2,MATRIX_DIM4,p0); + vecA3 = inner::vload1_z<1>(pInA3,MATRIX_DIM4,p0); + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + acc3 = inner::vmacc(vecA3, vecB,p0); + + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut[3 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc3)); + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM4,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM4,p0); + } + + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + acc3 = inner::vmacc(vecA3, vecB,p0); + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut[3 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc3)); + + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM4,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM4,p0); + } + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + acc3 = inner::vmacc(vecA3, vecB,p0); + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut[3 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc3)); + + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM4,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM4,p0); + } + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + acc3 = inner::vmacc(vecA3, vecB,p0); + + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut[3 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc3)); + +} + + + +template() && + number_traits::Scalar>::is_fixed,bool>::type = true> + __STATIC_INLINE void _dot_m_m(const MA&pSrcA,const MB&pSrcB, + RES &&pDst, + const TMP &BT, + const Helium* = nullptr) + { + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + constexpr int nb_lanes = vector_traits::nb_lanes; + + const T *pInA = pSrcA.const_ptr(); /* input data matrix pointer A */ + const T *pInB = pSrcB.const_ptr(); + T *pOut = pDst.ptr(); /* input data matrix pointer B */ + T *px; /* Temporary output data matrix pointer */ + T *px2; /* Temporary output data matrix pointer */ + uint32_t numRowsA = pSrcA.rows(); /* number of rows of input matrix A */ + + uint32_t numColsB = pSrcB.columns(); /* number of columns of input matrix B */ + uint32_t numColsA = pSrcA.columns(); /* number of columns of input matrix A */ + + uint32_t strideA = pSrcA.stride(); /* number of columns of input matrix A */ + + uint32_t numRowsB = pSrcB.rows(); /* number of rows of input matrix A */ + uint32_t col, i = 0u, j, row = numRowsB; /* loop counters */ + + const T *pInA2; + const T *pInB2; + uint32_t blkCnt; /* loop counters */ + + + + { + /* small squared matrix specialized routines */ + if (numRowsA == numColsB && numColsB == numColsA) { + + if (numRowsA == 1) { + pDst(0,0) = pSrcA(0,0) * pSrcB(0,0); + return; + } else if (numRowsA == 2) + return arm_mat_mult_2x2_mve(pSrcA, pSrcB, std::forward(pDst)); + else if (numRowsA == 3) + return arm_mat_mult_3x3_mve(pSrcA, pSrcB, std::forward(pDst)); + else if (numRowsA == 4) + return arm_mat_mult_4x4_mve(pSrcA, pSrcB, std::forward(pDst)); + } + + /* + * Matrix transpose + */ + + const T *pSrcBT = BT.const_ptr(); + + + /* + * Reset the variables for the usage in the following multiplication process + */ + i = 0; + row = numRowsA >> 1; + px = pOut; + px2 = px + pDst.stride(); + + /* + * The following loop performs the dot-product of each row in pSrcA with each column in pSrcB + */ + + /* + * row loop + */ + while (row > 0u) { + /* + * For every row wise process, the column loop counter is to be initiated + */ + col = numColsB >> 1; + /* + * For every row wise process, the pIn2 pointer is set + * to the starting address of the transposed pSrcB data + */ + pInB = pSrcBT; + pInB2 = pInB + numRowsB; + j = 0; + + /* + * column loop + */ + while (col > 0u) { + T const *pSrcAVec, *pSrcBVec, *pSrcA2Vec, *pSrcB2Vec; + VEC vecA, vecA2, vecB, vecB2; + ACC acc0, acc1, acc2, acc3; + + /* + * Initiate the pointer pIn1 to point to the starting address of the column being processed + */ + pInA = pSrcA.const_ptr() + i; + pInA2 = pInA + strideA; + pInB = pSrcBT + j; + pInB2 = pInB + numRowsB; + + + pSrcAVec = (T const *) pInA; + pSrcA2Vec = (T const *) pInA2; + pSrcBVec = (T const *) pInB; + pSrcB2Vec = (T const *) pInB2; + + acc0 = vector_traits::temp_acc_zero(); + acc1 = vector_traits::temp_acc_zero(); + acc2 = vector_traits::temp_acc_zero(); + acc3 = vector_traits::temp_acc_zero(); + + vecA = inner::vload1<1>(pSrcAVec); + pSrcAVec += nb_lanes; + + blkCnt = numColsA / nb_lanes; + while (blkCnt > 0U) { + vecB = inner::vload1<1>(pSrcBVec); + pSrcBVec += nb_lanes; + acc0 = inner::vmacc(acc0, vecA, vecB); + vecA2 = inner::vload1<1>(pSrcA2Vec); + pSrcA2Vec += nb_lanes; + acc1 = inner::vmacc(acc1, vecA2, vecB); + vecB2 = inner::vload1<1>(pSrcB2Vec); + pSrcB2Vec += nb_lanes; + acc2 = inner::vmacc(acc2, vecA, vecB2); + vecA = inner::vload1<1>(pSrcAVec); + pSrcAVec += nb_lanes; + acc3 = inner::vmacc(acc3, vecA2, vecB2); + + blkCnt--; + } + /* + * tail + */ + blkCnt = numColsA & (nb_lanes-1); + if (blkCnt > 0U) { + mve_pred16_t p0 = inner::vctpq::mk(blkCnt); + vecB = inner::vload1<1>(pSrcBVec); + acc0 = inner::vmacc(acc0, vecA, vecB, p0); + vecA2 = inner::vload1<1>(pSrcA2Vec); + acc1 = inner::vmacc(acc1, vecA2, vecB, p0); + vecB2 = inner::vload1<1>(pSrcB2Vec); + acc2 = inner::vmacc(acc2, vecA, vecB2, p0); + vecA = inner::vload1<1>(pSrcAVec); + acc3 = inner::vmacc(acc3, vecA2, vecB2, p0); + } + + *px++ = inner::from_accumulator(inner::vreduce(acc0)); + *px++ = inner::from_accumulator(inner::vreduce(acc2)); + *px2++ = inner::from_accumulator(inner::vreduce(acc1)); + *px2++ = inner::from_accumulator(inner::vreduce(acc3)); + + j += numRowsB * 2; + /* + * Decrement the column loop counter + */ + col--; + + } + + i = i + strideA * 2; + px = px2 + (numColsB & 1u); + px2 = px + pDst.stride(); + /* + * Decrement the row loop counter + */ + row--; + } + + /* + * Compute remaining row and/or column below + */ + + if (numColsB & 1u) { + row = numRowsA & (~0x1); //avoid redundant computation + px = pOut + + pDst.stride() - 1; + i = 0; + + /* + * row loop + */ + while (row > 0) { + + + T const *pSrcAVec, *pSrcBVec; + VEC vecA, vecB; + ACC acc0; + + /* + * point to last column in matrix B + */ + pInB = pSrcBT + numRowsB * (numColsB - 1); + pInA = pSrcA.const_ptr() + i; + + pSrcAVec = (T const *) pInA; + pSrcBVec = (T const *) pInB; + + acc0 = vector_traits::temp_acc_zero(); + blkCnt = (numColsA) / nb_lanes; + while (blkCnt > 0U) { + vecA = inner::vload1<1>(pSrcAVec); + pSrcAVec += nb_lanes; + vecB = inner::vload1<1>(pSrcBVec); + pSrcBVec += nb_lanes; + acc0 = inner::vmacc(acc0, vecA, vecB); + + blkCnt--; + } + /* + * tail + */ + blkCnt = (numColsA & (nb_lanes-1)); + if (blkCnt > 0U) { + mve_pred16_t p0 = inner::vctpq::mk(blkCnt); + vecA = inner::vload1<1>(pSrcAVec); + vecB = inner::vload1<1>(pSrcBVec); + acc0 = inner::vmacc(acc0, vecA, vecB, p0); + } + + *px = inner::from_accumulator(inner::vreduce(acc0)); + + px += pDst.stride(); + + i += strideA; + /* + * Decrement the row loop counter + */ + row--; + } + } + + if (numRowsA & 1u) { + col = numColsB; + i = 0u; + /* + * point to last row in output matrix + */ + px = pOut + pDst.stride() * (numRowsA - 1); + /* + * col loop + */ + while (col > 0) { + + T const *pSrcAVec, *pSrcBVec; + VEC vecA, vecB; + ACC acc0; + + /* + * point to last row in matrix A + */ + pInA = pSrcA.const_ptr() + (numRowsA - 1) * strideA; + pInB = pSrcBT + i; + + /* + * Set the variable sum, that acts as accumulator, to zero + */ + pSrcAVec = (T const *) pInA; + pSrcBVec = (T const *) pInB; + acc0 = vector_traits::temp_acc_zero(); + + blkCnt = ((numColsA) / nb_lanes); + while (blkCnt > 0U) { + vecA = inner::vload1<1>(pSrcAVec); + pSrcAVec += nb_lanes; + vecB = inner::vload1<1>(pSrcBVec); + pSrcBVec += nb_lanes; + acc0 = inner::vmacc(acc0, vecA, vecB); + + blkCnt--; + } + /* + * tail + */ + blkCnt = (numColsA & 7); + if (blkCnt > 0U) { + mve_pred16_t p0 = inner::vctpq::mk(blkCnt); + vecA = inner::vload1<1>(pSrcAVec); + vecB = inner::vload1<1>(pSrcBVec); + acc0 = inner::vmacc(acc0, vecA, vecB, p0); + } + + *px++ = inner::from_accumulator(inner::vreduce(acc0)); + + i += numColsA; + + /* + * Decrement the col loop counter + */ + col--; + } + } + + } + +} + +#endif + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/num_features.hpp b/dsppp/Include/dsppp/Helium/num_features.hpp new file mode 100644 index 000000000..1f3b34d55 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/num_features.hpp @@ -0,0 +1,17 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/* + +vreduce is going from vector accumulator to scalar accumulator +from_accumulator is going from scalar accumulator to scalar datatype + + +*/ + +#include "float.hpp" +#include "half.hpp" +#include "q31.hpp" +#include "q15.hpp" +#include "q7.hpp" diff --git a/dsppp/Include/dsppp/Helium/q15.hpp b/dsppp/Include/dsppp/Helium/q15.hpp new file mode 100644 index 000000000..2da379ac2 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/q15.hpp @@ -0,0 +1,461 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HeliumNumber Helium specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup HeliumQ15Number Q15 + * \ingroup HeliumNumber + * @{ + */ + + +/****************** + * + * Helium + * + */ +#if defined(ARM_MATH_MVEI) + + +template +struct vector_traits::value>::type > +{ + typedef Q15 type; + typedef type::value_type storage_type; + typedef int16x8_t vector; + typedef Q<33,30> temp_accumulator; + typedef mve_pred16_t predicate_t; + + static constexpr bool has_vector = true; + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + static constexpr bool has_predicate = true; + + static constexpr int nb_lanes = 8; + + + static Q<33,30> temp_acc_zero() + { + return(Q<33,30>()); + } + + static constexpr int16_t zero_lane() {return 0;}; + + static constexpr int16_t lane_value(const Q15 x) {return x.v;}; + +}; + +namespace inner { + + + template<> + struct vctpq{ + static mve_pred16_t mk(uint32_t v) + { + return(vctp16q(v)); + }; + }; + + __STATIC_FORCEINLINE int16x8_t vconst(Q15 val) + { + return(vdupq_n_s16(val.v)); + } + + __STATIC_FORCEINLINE int16x8_t vconst_tail(Q15 val, + const mve_pred16_t p0) + { + return(vdupq_x_n_s16(val.v,p0)); + } + + + __STATIC_FORCEINLINE int16x8_t vneg(const int16x8_t a) + { + return(vqnegq(a)); + }; + + __STATIC_FORCEINLINE int16x8_t vneg(const int16x8_t a, + const mve_pred16_t p0) + { + return(vqnegq_m(vuninitializedq_s16(),a,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vadd(const int16x8_t a,const int16x8_t b) + { + return(vqaddq(a,b)); + }; + + __STATIC_FORCEINLINE int16x8_t vadd(const int16x8_t a,const Q15 b) + { + return(vqaddq_n_s16(a,b.v)); + }; + + __STATIC_FORCEINLINE int16x8_t vadd(const Q15 a,const int16x8_t b) + { + return(vqaddq_n_s16(b,a.v)); + }; + + + __STATIC_FORCEINLINE int16x8_t vadd(const int16x8_t a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqaddq_m(vuninitializedq_s16(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vadd(const int16x8_t a,const Q15 b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s16(vuninitializedq_s16(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vadd(const Q15 a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s16(vuninitializedq_s16(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const int16x8_t a,const int16x8_t b) + { + return(vqsubq(a,b)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const int16x8_t a,const Q15 b) + { + return(vqsubq_n_s16(a,b.v)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const Q15 a,const int16x8_t b) + { + return(vqsubq_n_s16(b,a.v)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const int16x8_t a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqsubq_m(vuninitializedq_s16(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const int16x8_t a,const Q15 b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s16(vuninitializedq_s16(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const Q15 a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s16(vuninitializedq_s16(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vmul(const int16x8_t a,const int16x8_t b) + { + return(vqdmulhq(a,b)); + }; + + + __STATIC_FORCEINLINE int16x8_t vmul(const int16x8_t a,const Q15 b) + { + return(vqdmulhq_n_s16(a,b.v)); + }; + + __STATIC_FORCEINLINE int16x8_t vmul(const Q15 a,const int16x8_t b) + { + return(vqdmulhq_n_s16(b,a.v)); + }; + + __STATIC_FORCEINLINE int16x8_t vmul(const int16x8_t a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m(vuninitializedq_s16(),a,b,p0)); + }; + + + __STATIC_FORCEINLINE int16x8_t vmul(const int16x8_t a,const Q15 b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s16(vuninitializedq_s16(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vmul(const Q15 a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s16(vuninitializedq_s16(),b,a.v,p0)); + }; + + template::type = true> + inline int16x8_t vload1(const Q15 *p) + { + return(vld1q(reinterpret_cast(p))); + }; + + /* + + 7*S must be <= 65535 so + S <= 9362 + + */ + template1) && (S<=9362),bool>::type = true> + inline int16x8_t vload1(const Q15 *p) + { + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + //uint16x8_t offset = vidupq_u16((uint16_t)0,1); + //offset = vmulq_n_u16(offset,S); + return(vldrhq_gather_shifted_offset_s16(reinterpret_cast(p),offset)); + }; + + template9362),bool>::type = true> + inline int16x8_t vload1(const Q15 *p) + { + int16x8_t res; + for(std::size_t i=0;i<8;i++) + { + res[i] = p->v; + p += S; + } + + return(res); + }; + + // Dynamic stride + inline int16x8_t vload1(const Q15 *p,index_t stride) + { + if (stride <= 9362) + { + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,stride); + return(vldrhq_gather_shifted_offset_s16(reinterpret_cast(p),offset)); + } + else + { + int16x8_t res; + for(std::size_t i=0;i<8;i++) + { + res[i] = p->v; + p += stride; + } + return(res); + } + } + + template::type = true> + inline int16x8_t vload1_z(const Q15 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(reinterpret_cast(p),p0)); + + }; + + template1) && (S<=9362),bool>::type = true> + inline int16x8_t vload1_z(const Q15 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,S); + return(vldrhq_gather_shifted_offset_z_s16(reinterpret_cast(p),offset,p0)); + }; + + template9362),bool>::type = true> + inline int16x8_t vload1_z(const Q15 *p,std::size_t nb,mve_pred16_t p0) + { + (void)p0; + int16x8_t res; + std::size_t i=0; + for(;iv; + p += S; + } + + for(;i<8;i++) + { + res[i] = 0; + p += S; + } + + return(res); + + }; + + // Dynamic stride + inline int16x8_t vload1_z(const Q15 *p,index_t stride,std::size_t nb,mve_pred16_t p0) + { + + if (stride <= 9362) + { + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,stride); + return(vldrhq_gather_shifted_offset_z_s16(reinterpret_cast(p),offset,p0)); + } + else + { + int16x8_t res; + std::size_t i=0; + for(;iv; + p += stride; + } + + for(;i<8;i++) + { + res[i] = 0; + p += stride; + } + return(res); + } + }; + + + template::type = true> + inline void vstore1(Q15 *p,const int16x8_t val) + { + vst1q(reinterpret_cast(p),val); + }; + + template1) && (S<=9362),bool>::type = true> + inline void vstore1(Q15 *p,const int16x8_t val) + { + //uint16x8_t offset={0,1,2,3,4,5,6,7}; + //uint16x8_t offset = vidupq_u16((uint16_t)0,1); + //offset = vmulq_n_u16(offset,S); + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + return(vstrhq_scatter_shifted_offset_s16(reinterpret_cast(p),offset,val)); + }; + + template9362),bool>::type = true> + inline void vstore1(Q15 *p,const int16x8_t val) + { + for(std::size_t i=0;i<8;i++) + { + *p = Q15(val[i]); + p += S; + } + + }; + + // dynamic stride + inline void vstore1(Q15 *p,const index_t stride,const int16x8_t val) + { + if (stride <=9362) + { + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,stride); + return(vstrhq_scatter_shifted_offset_s16(reinterpret_cast(p),offset,val)); + } + else + { + for(std::size_t i=0;i<8;i++) + { + *p = Q15(val[i]); + p += stride; + } + } + } + + template::type = true> + inline void vstore1_z(Q15 *p,const int16x8_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vstrhq_p(reinterpret_cast(p),val,p0)); + }; + + template1) && (S<=9362),bool>::type = true> + inline void vstore1_z(Q15 *p,const int16x8_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + //uint16x8_t offset={0,1,2,3,4,5,6,7}; + //uint16x8_t offset = vidupq_u16((uint16_t)0,1); + //offset = vmulq_n_u16(offset,S); + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + return(vstrhq_scatter_shifted_offset_p_s16(reinterpret_cast(p),offset,val,p0)); + }; + + + template9362),bool>::type = true> + inline void vstore1_z(Q15 *p,const int16x8_t val,std::size_t nb,mve_pred16_t p0) + { + (void)p0; + for(std::size_t i=0;i(p),offset,val,p0)); + } + else + { + for(std::size_t i=0;i vmacc(const Q<33,30> sum, + const int16x8_t vala, + const int16x8_t valb) + { + return(Q<33,30>(vmlaldavaq(sum.v,vala,valb))); + }; + + __STATIC_FORCEINLINE Q<33,30> vmacc(const Q<33,30> sum, + const int16x8_t vala, + const int16x8_t valb, + const mve_pred16_t p0) + { + return(Q<33,30>(vmlaldavaq_p(sum.v,vala,valb,p0))); + }; + + __STATIC_FORCEINLINE Q<33,30> vmacc(const int16x8_t vala, + const int16x8_t valb) + { + return(Q<33,30>(vmlaldavq(vala,valb))); + }; + + __STATIC_FORCEINLINE Q<33,30> vmacc(const int16x8_t vala, + const int16x8_t valb, + const mve_pred16_t p0) + { + return(Q<33,30>(vmlaldavq_p(vala,valb,p0))); + }; + + __STATIC_FORCEINLINE Q<33,30> vreduce(const Q<33,30> sum) + { + return(sum); + }; + +}; +#endif + +/*! @} */ +/*! @} */ diff --git a/dsppp/Include/dsppp/Helium/q31.hpp b/dsppp/Include/dsppp/Helium/q31.hpp new file mode 100644 index 000000000..6f8eaae6e --- /dev/null +++ b/dsppp/Include/dsppp/Helium/q31.hpp @@ -0,0 +1,345 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HeliumNumber Helium specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup HeliumQ31Number Q31 + * \ingroup HeliumNumber + * @{ + */ + + +/****************** + * + * Helium + * + */ +#if defined(ARM_MATH_MVEI) + + + +template +struct vector_traits::value>::type > +{ + typedef Q31 type; + typedef type::value_type storage_type; + typedef int32x4_t vector; + typedef Q<9,54> temp_accumulator; + typedef mve_pred16_t predicate_t; + + static constexpr bool has_vector = true; + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + static constexpr bool has_predicate = true; + + static constexpr int nb_lanes = 4; + + + static Q<9,54> temp_acc_zero() + { + return(Q<9,54>()); + } + + static constexpr int16_t zero_lane() {return 0;}; + + static constexpr int16_t lane_value(const Q31 x) {return x.v;}; + +}; + +namespace inner { + + template<> + struct vctpq{ + static mve_pred16_t mk(uint32_t v) + { + return(vctp32q(v)); + }; + }; + + __STATIC_FORCEINLINE int32x4_t vconst(Q31 val) + { + return(vdupq_n_s32(val.v)); + } + + __STATIC_FORCEINLINE int32x4_t vconst_tail(Q31 val, + const mve_pred16_t p0) + { + return(vdupq_x_n_s32(val.v,p0)); + } + + + __STATIC_FORCEINLINE int32x4_t vneg(const int32x4_t a) + { + return(vqnegq(a)); + }; + + __STATIC_FORCEINLINE int32x4_t vneg(const int32x4_t a, + const mve_pred16_t p0) + { + return(vqnegq_m(vuninitializedq_s32(),a,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vadd(const int32x4_t a,const int32x4_t b) + { + return(vqaddq(a,b)); + }; + + __STATIC_FORCEINLINE int32x4_t vadd(const int32x4_t a,const Q31 b) + { + return(vqaddq_n_s32(a,b.v)); + }; + + __STATIC_FORCEINLINE int32x4_t vadd(const Q31 a,const int32x4_t b) + { + return(vqaddq_n_s32(b,a.v)); + }; + + + __STATIC_FORCEINLINE int32x4_t vadd(const int32x4_t a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqaddq_m(vuninitializedq_s32(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vadd(const int32x4_t a,const Q31 b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s32(vuninitializedq_s32(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vadd(const Q31 a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s32(vuninitializedq_s32(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const int32x4_t a,const int32x4_t b) + { + return(vqsubq(a,b)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const int32x4_t a,const Q31 b) + { + return(vqsubq_n_s32(a,b.v)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const Q31 a,const int32x4_t b) + { + return(vqsubq_n_s32(b,a.v)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const int32x4_t a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqsubq_m(vuninitializedq_s32(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const int32x4_t a,const Q31 b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s32(vuninitializedq_s32(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const Q31 a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s32(vuninitializedq_s32(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vmul(const int32x4_t a,const int32x4_t b) + { + return(vqdmulhq(a,b)); + }; + + + __STATIC_FORCEINLINE int32x4_t vmul(const int32x4_t a,const Q31 b) + { + return(vqdmulhq_n_s32(a,b.v)); + }; + + __STATIC_FORCEINLINE int32x4_t vmul(const Q31 a,const int32x4_t b) + { + return(vqdmulhq_n_s32(b,a.v)); + }; + + __STATIC_FORCEINLINE int32x4_t vmul(const int32x4_t a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m(vuninitializedq_s32(),a,b,p0)); + }; + + + __STATIC_FORCEINLINE int32x4_t vmul(const int32x4_t a,const Q31 b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s32(vuninitializedq_s32(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vmul(const Q31 a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s32(vuninitializedq_s32(),b,a.v,p0)); + }; + + template::type = true> + inline int32x4_t vload1(const Q31 *p) + { + return(vld1q(reinterpret_cast(p))); + }; + + template1),bool>::type = true> + inline int32x4_t vload1(const Q31 *p) + { + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + return(vldrwq_gather_shifted_offset_s32(reinterpret_cast(p),offset)); + }; + + + // Dynamic stride + inline int32x4_t vload1(const Q31 *p,index_t stride) + { + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + return(vldrwq_gather_shifted_offset_s32(reinterpret_cast(p),offset)); + + } + + template::type = true> + inline int32x4_t vload1_z(const Q31 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(reinterpret_cast(p),p0)); + + }; + + template1),bool>::type = true> + inline int32x4_t vload1_z(const Q31 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,S); + return(vldrwq_gather_shifted_offset_z_s32(reinterpret_cast(p),offset,p0)); + }; + + + + // Dynamic stride + inline int32x4_t vload1_z(const Q31 *p,index_t stride,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + return(vldrwq_gather_shifted_offset_z_s32(reinterpret_cast(p),offset,p0)); + + }; + + + template::type = true> + inline void vstore1(Q31 *p,const int32x4_t val) + { + vst1q(reinterpret_cast(p),val); + }; + + template1) ,bool>::type = true> + inline void vstore1(Q31 *p,const int32x4_t val) + { + + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + return(vstrwq_scatter_shifted_offset_s32(reinterpret_cast(p),offset,val)); + }; + + + + // dynamic stride + inline void vstore1(Q31 *p,const index_t stride,const int32x4_t val) + { + + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + return(vstrwq_scatter_shifted_offset_s32(reinterpret_cast(p),offset,val)); + } + + template::type = true> + inline void vstore1_z(Q31 *p,const int32x4_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vstrwq_p(reinterpret_cast(p),val,p0)); + }; + + template1),bool>::type = true> + inline void vstore1_z(Q31 *p,const int32x4_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + vstrwq_scatter_shifted_offset_p_s32(reinterpret_cast(p),offset,val,p0); + }; + + + + // dynamic stride + inline void vstore1_z(Q31 *p,const index_t stride,const int32x4_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + vstrwq_scatter_shifted_offset_p_s32(reinterpret_cast(p),offset,val,p0); + + }; + + __STATIC_FORCEINLINE Q<9,54> vmacc(const Q<9,54> sum, + const int32x4_t vala, + const int32x4_t valb) + { + return(Q<9,54>(vrmlaldavhaq(sum.v,vala,valb))); + }; + + __STATIC_FORCEINLINE Q<9,54> vmacc(const Q<9,54> sum, + const int32x4_t vala, + const int32x4_t valb, + const mve_pred16_t p0) + { + return(Q<9,54>(vrmlaldavhaq_p(sum.v,vala,valb,p0))); + }; + + __STATIC_FORCEINLINE Q<9,54> vmacc(const int32x4_t vala, + const int32x4_t valb) + { + return(Q<9,54>(vrmlaldavhq(vala,valb))); + }; + + __STATIC_FORCEINLINE Q<9,54> vmacc(const int32x4_t vala, + const int32x4_t valb, + const mve_pred16_t p0) + { + return(Q<9,54>(vrmlaldavhq_p(vala,valb,p0))); + }; + + __STATIC_FORCEINLINE Q<15,48> vreduce(const Q<9,54> sum) + { + return(Q<15,48>(asrl(sum.v, 6))); + }; + +}; + +#endif + +/*! @} */ +/*! @} */ diff --git a/dsppp/Include/dsppp/Helium/q7.hpp b/dsppp/Include/dsppp/Helium/q7.hpp new file mode 100644 index 000000000..5d9f4cc25 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/q7.hpp @@ -0,0 +1,463 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HeliumNumber Helium specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup HeliumQ7Number Q7 + * \ingroup HeliumNumber + * @{ + */ + + +/****************** + * + * Helium + * + */ +#if defined(ARM_MATH_MVEI) + + +template +struct vector_traits::value>::type > +{ + typedef Q7 type; + typedef type::value_type storage_type; + typedef int8x16_t vector; + typedef Q<17,14> temp_accumulator; + typedef mve_pred16_t predicate_t; + + static constexpr bool has_vector = true; + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + static constexpr bool has_predicate = true; + + static constexpr int nb_lanes = 16; + + + static Q<17,14> temp_acc_zero() + { + return(Q<17,14>()); + } + + static constexpr int8_t zero_lane() {return 0;}; + + static constexpr int8_t lane_value(const Q7 x) {return x.v;}; + +}; + +namespace inner { + + + template<> + struct vctpq{ + static mve_pred16_t mk(uint32_t v) + { + return(vctp8q(v)); + }; + }; + + __STATIC_FORCEINLINE int8x16_t vconst(Q7 val) + { + return(vdupq_n_s8(val.v)); + } + + __STATIC_FORCEINLINE int8x16_t vconst_tail(Q7 val, + const mve_pred16_t p0) + { + return(vdupq_x_n_s8(val.v,p0)); + } + + + __STATIC_FORCEINLINE int8x16_t vneg(const int8x16_t a) + { + return(vqnegq(a)); + }; + + __STATIC_FORCEINLINE int8x16_t vneg(const int8x16_t a, + const mve_pred16_t p0) + { + return(vqnegq_m(vuninitializedq_s8(),a,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vadd(const int8x16_t a,const int8x16_t b) + { + return(vqaddq(a,b)); + }; + + __STATIC_FORCEINLINE int8x16_t vadd(const int8x16_t a,const Q7 b) + { + return(vqaddq_n_s8(a,b.v)); + }; + + __STATIC_FORCEINLINE int8x16_t vadd(const Q7 a,const int8x16_t b) + { + return(vqaddq_n_s8(b,a.v)); + }; + + + __STATIC_FORCEINLINE int8x16_t vadd(const int8x16_t a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqaddq_m(vuninitializedq_s8(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vadd(const int8x16_t a,const Q7 b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s8(vuninitializedq_s8(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vadd(const Q7 a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s8(vuninitializedq_s8(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const int8x16_t a,const int8x16_t b) + { + return(vqsubq(a,b)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const int8x16_t a,const Q7 b) + { + return(vqsubq_n_s8(a,b.v)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const Q7 a,const int8x16_t b) + { + return(vqsubq_n_s8(b,a.v)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const int8x16_t a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqsubq_m(vuninitializedq_s8(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const int8x16_t a,const Q7 b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s8(vuninitializedq_s8(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const Q7 a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s8(vuninitializedq_s8(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vmul(const int8x16_t a,const int8x16_t b) + { + return(vqdmulhq(a,b)); + }; + + + __STATIC_FORCEINLINE int8x16_t vmul(const int8x16_t a,const Q7 b) + { + return(vqdmulhq_n_s8(a,b.v)); + }; + + __STATIC_FORCEINLINE int8x16_t vmul(const Q7 a,const int8x16_t b) + { + return(vqdmulhq_n_s8(b,a.v)); + }; + + __STATIC_FORCEINLINE int8x16_t vmul(const int8x16_t a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m(vuninitializedq_s8(),a,b,p0)); + }; + + + __STATIC_FORCEINLINE int8x16_t vmul(const int8x16_t a,const Q7 b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s8(vuninitializedq_s8(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vmul(const Q7 a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s8(vuninitializedq_s8(),b,a.v,p0)); + }; + + template::type = true> + inline int8x16_t vload1(const Q7 *p) + { + return(vld1q(reinterpret_cast(p))); + }; + + /* + + 15*S <= 255 => S <= 17 + + */ + template1) && (S<=17),bool>::type = true> + inline int8x16_t vload1(const Q7 *p) + { + constexpr uint8x16_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S, + 8*S,9*S,10*S,11*S,12*S,13*S,14*S,15*S}; + //uint8x16_t offset = vidupq_u8((uint16_t)0,1); + //offset = vmulq_n_u8(offset,S); + return(vldrbq_gather_offset_s8(reinterpret_cast(p),offset)); + }; + + template17),bool>::type = true> + inline int8x16_t vload1(const Q7 *p) + { + int8x16_t res; + for(std::size_t i=0;i<16;i++) + { + res[i] = p->v; + p += S; + } + + return(res); + }; + + // Dynamic stride + inline int8x16_t vload1(const Q7 *p,index_t stride) + { + if (stride <= 17) + { + uint8x16_t offset = vidupq_u8((uint32_t)0,1); + offset = vmulq_n_u8(offset,stride); + return(vldrbq_gather_offset_s8(reinterpret_cast(p),offset)); + } + else + { + int8x16_t res; + for(std::size_t i=0;i<16;i++) + { + res[i] = p->v; + p += stride; + } + return(res); + } + } + + template::type = true> + inline int8x16_t vload1_z(const Q7 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(reinterpret_cast(p),p0)); + + }; + + template1) && (S<=17),bool>::type = true> + inline int8x16_t vload1_z(const Q7 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + uint8x16_t offset = vidupq_u8((uint32_t)0,1); + offset = vmulq_n_u8(offset,S); + return(vldrbq_gather_offset_z_s8(reinterpret_cast(p),offset,p0)); + }; + + template17),bool>::type = true> + inline int8x16_t vload1_z(const Q7 *p,std::size_t nb,mve_pred16_t p0) + { + (void)p0; + int8x16_t res; + std::size_t i=0; + for(;iv; + p += S; + } + + for(;i<16;i++) + { + res[i] = 0; + p += S; + } + + return(res); + + }; + + // Dynamic stride + inline int8x16_t vload1_z(const Q7 *p,index_t stride,std::size_t nb,mve_pred16_t p0) + { + + if (stride <= 17) + { + uint8x16_t offset = vidupq_u8((uint32_t)0,1); + offset = vmulq_n_u8(offset,stride); + return(vldrbq_gather_offset_z_s8(reinterpret_cast(p),offset,p0)); + } + else + { + int8x16_t res; + std::size_t i=0; + for(;iv; + p += stride; + } + + for(;i<16;i++) + { + res[i] = 0; + p += stride; + } + return(res); + } + }; + + + template::type = true> + inline void vstore1(Q7 *p,const int8x16_t val) + { + vst1q(reinterpret_cast(p),val); + }; + + template1) && (S<=17),bool>::type = true> + inline void vstore1(Q7 *p,const int8x16_t val) + { + //uint8x16_t offset={0,1,2,3,4,5,6,7}; + //uint8x16_t offset = vidupq_u8((uint16_t)0,1); + //offset = vmulq_n_u8(offset,S); + constexpr uint8x16_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S, + 8*S,9*S,10*S,11*S,12*S,13*S,14*S,15*S}; + return(vstrbq_scatter_offset_s8(reinterpret_cast(p),offset,val)); + }; + + template17),bool>::type = true> + inline void vstore1(Q7 *p,const int8x16_t val) + { + for(std::size_t i=0;i<16;i++) + { + *p = Q7(val[i]); + p += S; + } + + }; + + // dynamic stride + inline void vstore1(Q7 *p,const index_t stride,const int8x16_t val) + { + if (stride <=17) + { + uint8x16_t offset = vidupq_u8((uint32_t)0,1); + offset = vmulq_n_u8(offset,stride); + return(vstrbq_scatter_offset_s8(reinterpret_cast(p),offset,val)); + } + else + { + for(std::size_t i=0;i<16;i++) + { + *p = Q7(val[i]); + p += stride; + } + } + } + + template::type = true> + inline void vstore1_z(Q7 *p,const int8x16_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vstrbq_p(reinterpret_cast(p),val,p0)); + }; + + template1) && (S<=17),bool>::type = true> + inline void vstore1_z(Q7 *p,const int8x16_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + //uint8x16_t offset={0,1,2,3,4,5,6,7}; + //uint8x16_t offset = vidupq_u8((uint16_t)0,1); + //offset = vmulq_n_u8(offset,S); + constexpr uint8x16_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S, + 8*S,9*S,10*S,11*S,12*S,13*S,14*S,15*S}; + return(vstrbq_scatter_offset_p_s8(reinterpret_cast(p),offset,val,p0)); + }; + + + template17),bool>::type = true> + inline void vstore1_z(Q7 *p,const int8x16_t val,std::size_t nb,mve_pred16_t p0) + { + (void)p0; + for(std::size_t i=0;i(p),offset,val,p0)); + } + else + { + for(std::size_t i=0;i vmacc(const Q<17,14> sum, + const int8x16_t vala, + const int8x16_t valb) + { + return(Q<17,14>(vmladavaq(sum.v,vala,valb))); + }; + + __STATIC_FORCEINLINE Q<17,14> vmacc(const Q<17,14> sum, + const int8x16_t vala, + const int8x16_t valb, + const mve_pred16_t p0) + { + return(Q<17,14>(vmladavaq_p(sum.v,vala,valb,p0))); + }; + + __STATIC_FORCEINLINE Q<17,14> vmacc(const int8x16_t vala, + const int8x16_t valb) + { + return(Q<17,14>(vmladavq(vala,valb))); + }; + + __STATIC_FORCEINLINE Q<17,14> vmacc(const int8x16_t vala, + const int8x16_t valb, + const mve_pred16_t p0) + { + return(Q<17,14>(vmladavq_p(vala,valb,p0))); + }; + + __STATIC_FORCEINLINE Q<17,14> vreduce(const Q<17,14> sum) + { + return(sum); + }; + +}; +#endif + +/*! @} */ +/*! @} */ diff --git a/dsppp/Include/dsppp/Neon/basic.hpp b/dsppp/Include/dsppp/Neon/basic.hpp new file mode 100644 index 000000000..828c96489 --- /dev/null +++ b/dsppp/Include/dsppp/Neon/basic.hpp @@ -0,0 +1,133 @@ +// -*- C++ -*- + +#pragma once + +#include +#include +#include +#if 0 + +template +void _Add(const T* pSrcA, + const T* pSrcB, + T* pDst, + const std::size_t l, + const Neon* = nullptr, + typename std::enable_if::is_float && + vector_traits::has_vector,T>::type* = nullptr) +{ + using num = vector_traits; + using VecType = typename num::vector; + constexpr int nb_lanes = num::nb_lanes; + constexpr int lanes_shift = shiftFromValue(nb_lanes); + constexpr int lanes_mask = maskFromShift(lanes_shift); + + //std::cout << "Neon float\r\n" ; + + uint32_t blkCnt; /* Loop counter */ + + VecType vec1; + VecType vec2; + VecType res; + + /* Compute several lanes at a time */ + blkCnt = l >> lanes_shift; + + while (blkCnt > 0U) + { + /* C = A + B */ + + /* Add and then store the results in the destination buffer. */ + vec1 = vld1q(pSrcA); + vec2 = vld1q(pSrcB); + res = vaddq(vec1, vec2); + vst1q(pDst, res); + + /* Increment pointers */ + pSrcA += nb_lanes; + pSrcB += nb_lanes; + pDst += nb_lanes; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Tail */ + blkCnt = l & lanes_mask; + + while (blkCnt > 0U) + { + /* C = A + B */ + + /* Add and store result in destination buffer. */ + *pDst++ = (*pSrcA++) + (*pSrcB++); + + /* Decrement loop counter */ + blkCnt--; + } + + +}; + + + + +template +void _Add(const T* pSrcA_Q, + const T* pSrcB_Q, + T* pDst_Q, + const std::size_t l, + const Neon* = nullptr, + typename std::enable_if::is_fixed && + vector_traits::has_vector,T>::type* = nullptr) +{ + using num = vector_traits; + using VecType = typename num::vector; + using value_type = typename T::value_type; + constexpr int nb_lanes = num::nb_lanes; + constexpr int lanes_shift = shiftFromValue(nb_lanes); + constexpr int lanes_mask = maskFromShift(lanes_shift); + const value_type *pSrcA = reinterpret_cast(pSrcA_Q); + const value_type *pSrcB = reinterpret_cast(pSrcB_Q); + value_type *pDst = reinterpret_cast(pDst_Q); + + uint32_t blkCnt; /* loop counters */ + VecType vecA; + VecType vecB; + + /* Compute 8 outputs at a time */ + blkCnt = l >> lanes_shift; + while (blkCnt > 0U) + { + /* + * C = A + B + * Add and then store the results in the destination buffer. + */ + vecA = vld1q(pSrcA); + vecB = vld1q(pSrcB); + vst1q(pDst, vqaddq(vecA, vecB)); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + /* + * advance vector source and destination pointers + */ + pSrcA += nb_lanes; + pSrcB += nb_lanes; + pDst += nb_lanes; + } + /* + * tail + */ + blkCnt = l & lanes_mask; + if (blkCnt > 0U) + { + mve_pred16_t p0 = num::vctpq(blkCnt); + vecA = vld1q(pSrcA); + vecB = vld1q(pSrcB); + vstrq_p(pDst, vqaddq(vecA, vecB), p0); + } +} + +#endif \ No newline at end of file diff --git a/dsppp/Include/dsppp/Neon/float.hpp b/dsppp/Include/dsppp/Neon/float.hpp new file mode 100644 index 000000000..0dc95759b --- /dev/null +++ b/dsppp/Include/dsppp/Neon/float.hpp @@ -0,0 +1,105 @@ +// -*- C++ -*- + +#pragma once + +/****************** + * + * Neon + * + */ +#if defined(ARM_MATH_NEON) + +template +struct vector_traits::value>::type> +{ + typedef float type; + typedef float storage_type; + typedef float32x4_t vector; + static constexpr bool has_vector = true; + static constexpr bool is_float = true; + static constexpr bool is_fixed = false; + + static constexpr int nb_lanes = 4; + + static constexpr float zero_lane() {return 0.0f;}; + + +}; + +namespace inner { + + + + __STATIC_FORCEINLINE float32x4_t vadd(const float32x4_t a,const float32x4_t b) + { + return(vaddq_f32(a,b)); + }; + + __STATIC_FORCEINLINE float32x4_t vmul(const float32x4_t a,const float32x4_t b) + { + return(vmulqq_f32(a,b)); + }; + + __STATIC_FORCEINLINE float32x4_t vconst(const float v) + { + const float32x4_t t = vdupq_n_f32(v) + return(t); + } + + template::type = true> + inline float32x4_t vload1(const float32_t *p) + { + return(vld1q(p)); + }; + + template1),bool>::type = true> + inline float32x4_t vload1(const float32_t *p) + { + float32x4_t res; + res[0] = *p; + p += S; + + res[1] = *p; + p += S; + + res[2] = *p; + p += S; + + res[3] = *p; + p += S; + + return(res); + }; + + template::type = true> + inline void vstore1(float32_t *p,const float32x4_t val) + { + return(vst1q(p,val)); + }; + + template1),bool>::type = true> + inline void vstore1(float32_t *p,const float32x4_t val) + { + *p = val[0]; + p += S; + + *p = val[1]; + p += S; + + *p = val[2]; + p += S; + + *p = val[3]; + p += S; + }; + + + +}; + +#endif diff --git a/dsppp/Include/dsppp/Neon/num_features.hpp b/dsppp/Include/dsppp/Neon/num_features.hpp new file mode 100644 index 000000000..142d4607f --- /dev/null +++ b/dsppp/Include/dsppp/Neon/num_features.hpp @@ -0,0 +1,5 @@ +// -*- C++ -*- + +#pragma once + +#include "float.hpp" diff --git a/dsppp/Include/dsppp/Scalar/basic.hpp b/dsppp/Include/dsppp/Scalar/basic.hpp new file mode 100644 index 000000000..cace44b47 --- /dev/null +++ b/dsppp/Include/dsppp/Scalar/basic.hpp @@ -0,0 +1,189 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup ARCHALG Architecture specific algorithm + * \ingroup DSPPP + * \addtogroup SCALARALG Scalar algorithm + * \ingroup ARCHALG + * @{ + */ + + +#define SCALAR_UNROLL 2 + +template::value && + SameElementType::value,bool>::type = true> +inline void _Fill(DST &v, + const T val, + vector_length_t l, + const Scalar* = nullptr) +{ + constexpr unsigned int U = SCALAR_UNROLL; + index_t i; + + UNROLL_LOOP + for(i=0 ; i <= l-(1<() && + SameElementType::value,bool>::type = true> +inline void _Fill2D(DST &v, + const T val, + const vector_length_t rows, + const vector_length_t cols, + const Scalar* = nullptr) +{ + constexpr unsigned int U = SCALAR_UNROLL; + index_t row=0; + + + for(; row <= rows-(1<(),bool>::type = true> +inline void eval(DA &v, + const DB& other, + const vector_length_t l, + const Scalar* = nullptr) +{ + constexpr unsigned int U = SCALAR_UNROLL; + index_t i=0; + + for(i=0 ; i <= l-(1<(),bool>::type = true> +inline void eval2D(DA &v, + const DB& other, + const vector_length_t rows, + const vector_length_t cols, + const Scalar* = nullptr) +{ + constexpr unsigned int U = SCALAR_UNROLL; + index_t row=0; + + + for(; row <= rows-(1<(),bool>::type = true> +inline DotResult _dot(const DA& a, + const DB& b, + const vector_length_t l, + const Scalar* = nullptr) +{ + using Acc = DotResult; + constexpr unsigned int U = SCALAR_UNROLL; + index_t i; + + Acc acc = Acc{}; + + for(i=0 ; i <= l-(1<(),bool>::type = true> +inline void _swap(DA&& a, + DB&& b, + const vector_length_t l, + const Scalar* = nullptr) +{ + for(index_t i=0;i +__STATIC_INLINE void _arm_mat_trans( + const MA &src, + MB &dst, + const Scalar* = nullptr) +{ + DISABLE_LOOP_UNROLL + for(index_t r=0;r < dst.rows() ; r++) + { + dst.row(r) = copy(src.col(r)); + } +} + +template +inline void _dot_m_v(RES &res, + const M&m,const V&v, + const Scalar* = nullptr) +{ + using T = typename traits::Scalar; + using Acc = typename number_traits::accumulator; + uint32_t numRows = m.rows(); + uint32_t numCols = m.columns(); + const T *pSrcA = m.ptr(); + const T *pInA1; /* input data matrix pointer A of Q31 type */ + const T *pInA2; /* input data matrix pointer A of Q31 type */ + const T *pInA3; /* input data matrix pointer A of Q31 type */ + const T *pInA4; /* input data matrix pointer A of Q31 type */ + T *px; /* Temporary output data matrix pointer */ + uint32_t i; + uint16_t row, colCnt; /* loop counters */ + T matData, matData2, vecData, vecData2; + + + /* Process 4 rows at a time */ + row = numRows >> 2; + i = 0u; + px = res.ptr(); + + /* The following loop performs the dot-product of each row in pSrcA with the vector */ + /* row loop */ + while (row > 0) { + /* Initialize accumulators */ + Acc sum1 = Acc{}; + Acc sum2 = Acc{}; + Acc sum3 = Acc{}; + Acc sum4 = Acc{}; + + + /* Loop unrolling: process 2 columns per iteration */ + //colCnt = numCols; + + /* Initialize pointers to the starting address of the column being processed */ + pInA1 = pSrcA + i; + pInA2 = pInA1 + m.stride(); + pInA3 = pInA2 + m.stride(); + pInA4 = pInA3 + m.stride(); + + + // Main loop: matrix-vector multiplication + for(colCnt = 0 ; colCnt < numCols; colCnt ++) + { + // Read 2 values from vector + vecData = v[colCnt]; + // Read 8 values from the matrix - 2 values from each of 4 rows, and do multiply accumulate + matData = *(pInA1)++; + sum1 = inner::mac(sum1, matData, vecData); + matData = *(pInA2)++; + sum2 = inner::mac(sum2, matData, vecData); + matData = *(pInA3)++; + sum3 = inner::mac(sum3, matData, vecData); + matData = *(pInA4)++; + sum4 = inner::mac(sum4, matData, vecData); + } + + /* Saturate and store the result in the destination buffer */ + *px++ = inner::from_accumulator(sum1); + *px++ = inner::from_accumulator(sum2); + *px++ = inner::from_accumulator(sum3); + *px++ = inner::from_accumulator(sum4); + + i = i + m.stride() * 4; + + /* Decrement the row loop counter */ + row--; + } + + /* process any remaining rows */ + row = numRows & 3u; + while (row > 0) { + + Acc sum = Acc{}; + pInA1 = pSrcA + i; + + int32_t k=0; + for(k=0; k <= (int)numCols-2; k += 2) + { + vecData = v[k]; + vecData2 = v[k+1]; + matData = *(pInA1)++; + matData2 = *(pInA1)++; + sum = inner::mac(sum, matData, vecData); + sum = inner::mac(sum, matData2, vecData2); + } + // process remainder of row + + + for(; k < (int)numCols; k ++) + { + sum = inner::mac(sum ,*pInA1++, v[k]); + } + + *px++ = inner::from_accumulator(sum); + i = i + m.stride(); + row--; + } +} + +#include "matrix_multiply_fixed.hpp" +#include "matrix_multiply_float.hpp" + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Scalar/matrix_multiply_fixed.hpp b/dsppp/Include/dsppp/Scalar/matrix_multiply_fixed.hpp new file mode 100644 index 000000000..461e63a76 --- /dev/null +++ b/dsppp/Include/dsppp/Scalar/matrix_multiply_fixed.hpp @@ -0,0 +1,124 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup SCALARALG + * @{ + */ + +template::Scalar>::is_fixed,bool>::type = true> +__STATIC_INLINE void _dot_m_m(const MA&pSrcA,const MB&pSrcB, + RES &&pDst, + const TMP &BT, + const Scalar* = nullptr) +{ + using T = typename traits::Scalar; + using Acc = typename number_traits::accumulator; + + T *pIn1 = pSrcA.ptr(); /* Input data matrix pointer A */ + T *pIn2 = pSrcB.ptr(); /* Input data matrix pointer B */ + T *pInA = pSrcA.ptr(); /* Input data matrix pointer A */ + T *pInB = pSrcB.ptr(); /* Input data matrix pointer B */ + T *pOut = pDst.ptr(); /* Output data matrix pointer */ + T *px; /* Temporary output data matrix pointer */ + Acc sum; /* Accumulator */ + uint16_t numRowsA = pSrcA.rows(); /* Number of rows of input matrix A */ + uint16_t numColsB = pSrcB.columns(); /* Number of columns of input matrix B */ + uint16_t numColsA = pSrcA.columns(); /* Number of columns of input matrix A */ + uint32_t col, i = 0U, row = numRowsA, colCnt; /* Loop counters */ + + (void)BT; + /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ + /* row loop */ + do + { + /* Output pointer is set to starting address of row being processed */ + px = pOut + i; + + /* For every row wise process, column loop counter is to be initiated */ + col = numColsB; + + /* For every row wise process, pIn2 pointer is set to starting address of pSrcB data */ + pIn2 = pSrcB.ptr(); + + /* column loop */ + do + { + /* Set the variable sum, that acts as accumulator, to zero */ + sum = Acc{}; + + /* Initialize pointer pIn1 to point to starting address of column being processed */ + pIn1 = pInA; + + + /* Loop unrolling: Compute 4 MACs at a time. */ + colCnt = numColsA >> 2U; + + /* matrix multiplication */ + while (colCnt > 0U) + { + /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */ + + /* Perform the multiply-accumulates */ + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum,*pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + /* Decrement loop counter */ + colCnt--; + } + + /* Loop unrolling: Compute remaining MACs */ + colCnt = numColsA % 0x4U; + + + while (colCnt > 0U) + { + /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */ + + /* Perform the multiply-accumulates */ + sum = inner::mac(sum ,*pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + /* Decrement loop counter */ + colCnt--; + } + + /* Convert result from 2.62 to 1.31 format and store in destination buffer */ + *px++ = inner::from_accumulator(sum); + + /* Decrement column loop counter */ + col--; + + /* Update pointer pIn2 to point to starting address of next column */ + pIn2 = pInB + (numColsB - col) ; + + } while (col > 0U); + + /* Update pointer pInA to point to starting address of next row */ + i = i + pDst.stride(); + pInA = pInA + pSrcA.stride(); + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); + + +} + + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Scalar/matrix_multiply_float.hpp b/dsppp/Include/dsppp/Scalar/matrix_multiply_float.hpp new file mode 100644 index 000000000..96cf08cf3 --- /dev/null +++ b/dsppp/Include/dsppp/Scalar/matrix_multiply_float.hpp @@ -0,0 +1,119 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup SCALARALG + * @{ + */ + +template::Scalar>::is_float,bool>::type = true> +__STATIC_INLINE void _dot_m_m(const MA&pSrcA,const MB&pSrcB, + RES &&pDst, + const Scalar* = nullptr) +{ + using T = typename traits::Scalar; + using Acc = typename number_traits::accumulator; + //using Comp = typename number_traits::compute_type; + T *pIn1 = pSrcA.ptr(); /* Input data matrix pointer A */ + T *pIn2 = pSrcB.ptr(); /* Input data matrix pointer B */ + T *pInA = pSrcA.ptr(); /* Input data matrix pointer A */ + T *pInB = pSrcB.ptr(); /* Input data matrix pointer B */ + T *pOut = pDst.ptr(); /* Output data matrix pointer */ + T *px; /* Temporary output data matrix pointer */ + Acc sum; /* Accumulator */ + uint16_t numRowsA = pSrcA.rows(); /* Number of rows of input matrix A */ + uint16_t numColsB = pSrcB.columns(); /* Number of columns of input matrix B */ + uint16_t numColsA = pSrcA.columns(); /* Number of columns of input matrix A */ + uint32_t col, i = 0U, row = numRowsA, colCnt; /* Loop counters */ + + + /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ + /* row loop */ + do + { + /* Output pointer is set to starting address of row being processed */ + px = pOut + i; + + /* For every row wise process, column loop counter is to be initiated */ + col = numColsB; + + /* For every row wise process, pIn2 pointer is set to starting address of pSrcB data */ + pIn2 = pSrcB.ptr(); + + /* column loop */ + do + { + /* Set the variable sum, that acts as accumulator, to zero */ + sum = Acc{}; + + /* Initialize pointer pIn1 to point to starting address of column being processed */ + pIn1 = pInA; + + + /* Loop unrolling: Compute 4 MACs at a time. */ + colCnt = numColsA >> 2U; + + /* matrix multiplication */ + while (colCnt > 0U) + { + /* c(m,p) = a(m,1) * b(1,p) + a(m,2) * b(2,p) + .... + a(m,n) * b(n,p) */ + + /* Perform the multiply-accumulates */ + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + /* Decrement loop counter */ + colCnt--; + } + + /* Loop unrolling: Compute remaining MACs */ + colCnt = numColsA % 0x4U; + + while (colCnt > 0U) + { + /* c(m,p) = a(m,1) * b(1,p) + a(m,2) * b(2,p) + .... + a(m,n) * b(n,p) */ + + /* Perform the multiply-accumulates */ + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + /* Decrement loop counter */ + colCnt--; + } + + /* Store result in destination buffer */ + *px++ = inner::from_accumulator(sum); + + /* Decrement column loop counter */ + col--; + + /* Update pointer pIn2 to point to starting address of next column */ + pIn2 = pInB + (numColsB - col); + + } while (col > 0U); + + /* Update pointer pInA to point to starting address of next row */ + i = i + pDst.stride(); + pInA = pInA + pSrcA.stride(); + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); + + +} + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/algorithms.hpp b/dsppp/Include/dsppp/algorithms.hpp new file mode 100644 index 000000000..6f2f205ab --- /dev/null +++ b/dsppp/Include/dsppp/algorithms.hpp @@ -0,0 +1,269 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup DSPPP C++ extension + * C++ template extension to CMSIS-DSP. It is not yet part of + * the pack but the headers can be found on the + * [CMSIS-DSP github](https://github.com/ARM-software/CMSIS-DSP/dsppp/Include) + * The principles are described in this @ref dsppp_main "page" + * @{ + * @} + */ + + +/** +In this file we have kernels that are written in an +architecture independant way (using operators of the library) + +*/ + +namespace arm_cmsis_dsp { + +/** \addtogroup ALGO Architecture independent algorithms + * \ingroup DSPPP + * @{ + */ + +/* + +Matrix transpose + +*/ + + +/** + * Transpose a matrix. + * + * @param dst Destination matrix. + * @param src Source matrix. + * + */ +template::value && + HasMatrixIndexing::value /*&& + SameElementType::value*/,bool>::type = true> +inline void transposeTo(MA &dst, + const MB& src) +{ + _arm_mat_trans(src,dst,CURRENT_ARCH); +} + + +/* + +Init a diagonal matrix (0 outside of diagonal) + +*/ +template typename A, + typename VB, +typename std::enable_if::value && + SameElementType::value,bool>::type = true> +inline void _diagonal(Matrix &v, + const VB& other, + const vector_length_t rows) +{ + UNROLL_LOOP + for(index_t r=0;r < rows ; r++) + { + v.row(r) = P{}; + v(r,r) = other[r]; + } +} + + +/* + + +Fill diagonal of an existing matrix + +*/ +template typename A, + typename VB, +typename std::enable_if::value && + SameElementType::value,bool>::type = true> +inline void _fill_diagonal(Matrix &v, + const VB& other, + const vector_length_t rows) +{ + for(index_t r=0;r < rows ; r++) + { + v(r,r) = other[r]; + } +} + +template typename A> +inline void _identity(Matrix &v, + const vector_length_t rows) +{ + UNROLL_LOOP + for(index_t r=0;r < rows ; r++) + { + v.row(r) = P{}; + v(r,r) = number_traits

::one(); + } +} + + + +template::value,bool>::type = true> +inline typename OutputVector::type dot(const M&m,const V&v) +{ + typename OutputVector::type res; + _dot_m_v(res,m,v,CURRENT_ARCH); + return(res); +} + +template::value,bool>::type = true> +inline typename OutputVector::type dot(const M&m,const V&v) +{ + typename OutputVector::type res(m.rows()); + _dot_m_v(res,m,v,CURRENT_ARCH); + return(res); +} + +template::value,bool>::type = true> +inline void dot(RES && res,const M&m,const V&v) +{ + //typename OutputVector::type res(m.rows()); + _dot_m_v(res,m,v,CURRENT_ARCH); +} + +template::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb) +{ + + typename OutputMatrix::type res; + auto BT = mb.transpose(); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(BT)>; + _dot_m_m(ma,mb,res,BT,CURRENT_ARCH); + return(res); +} + +template::value && + number_traits::Scalar>::is_float,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb) +{ + + typename OutputMatrix::type res; + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(BT)>; + _dot_m_m(ma,mb,res,CURRENT_ARCH); + return(res); +} + +template::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb) +{ + typename OutputMatrix::type res(ma.rows(),mb.columns()); + auto BT = mb.transpose(); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(BT)>; + _dot_m_m(ma,mb,res,BT,CURRENT_ARCH); + return(res); +} + +template::value && + number_traits::Scalar>::is_float,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb) +{ + typename OutputMatrix::type res(ma.rows(),mb.columns()); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(BT)>; + _dot_m_m(ma,mb,res,CURRENT_ARCH); + return(res); +} + +/* + + +Get res matrix as argument to avoid memory allocation when +assigning the result to a different type of Matrix (like a Matrix view). + +*/ +template::value && + number_traits::Scalar>::is_float,bool>::type = true> +inline void dot(RES &&res,const MA&ma,const MB&mb) +{ + //typename OutputMatrix::type res(ma.rows(),mb.columns()); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(BT)>; + _dot_m_m(ma,mb,std::forward(res),CURRENT_ARCH); +} + +template::value && + number_traits::Scalar>::is_float,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb) +{ + typename OutputMatrix::type res(ma.rows(),mb.columns()); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(mbt)>; + _dot_m_m(ma,mb,res,CURRENT_ARCH); + return(res); +} + +template::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb,const TMP &mbt) +{ + typename OutputMatrix::type res(ma.rows(),mb.columns()); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(mbt)>; + _dot_m_m(ma,mb,res,mbt,CURRENT_ARCH); + return(res); +} + + + +template +Matrix mk_identity(const vector_length_t l) +{ + Matrix res(l,l); + _identity(res,l); + return(res); +}; + + +template +Matrix mk_identity() +{ + Matrix res; + _identity(res,L); + return(res); +}; + +/*! @} */ + +} diff --git a/dsppp/Include/dsppp/arch.hpp b/dsppp/Include/dsppp/arch.hpp new file mode 100644 index 000000000..7326ba182 --- /dev/null +++ b/dsppp/Include/dsppp/arch.hpp @@ -0,0 +1,64 @@ +// -*- C++ -*- +/** @file */ +#pragma once + + +namespace arm_cmsis_dsp { + +/** \addtogroup ARCH Architecture detection + * \ingroup DSPPP + * @{ + */ + +/** + * Scalar architecture + */ +class Scalar {}; + +/** + * Architecture supporting DSP extensions + */ +class DSP:public Scalar {}; + +/** + * v8.1M Architecture + */ +class Helium:public DSP {}; + +/** + * v8.2M Architecture + */ +class Helium82:public Helium {}; + +/** + * Architecture supporting Neon + */ +class Neon:public Scalar {}; + +/*! @} */ + +} + +#include "arch_detection.hpp" + + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) +#define ARCH Helium82 +#elif defined(ARM_MATH_DSP) +#define ARCH DSP +#elif defined(ARM_MATH_NEON) +#define ARCH Neon +#else +#define ARCH Scalar +#endif + +#define CURRENT_ARCH (ARCH*)nullptr + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_DSP) || defined(ARM_MATH_NEON) +#define HAS_VECTOR +#endif + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) +#define HAS_PREDICATED_LOOP +#endif + diff --git a/dsppp/Include/dsppp/arch_detection.hpp b/dsppp/Include/dsppp/arch_detection.hpp new file mode 100644 index 000000000..d8194b407 --- /dev/null +++ b/dsppp/Include/dsppp/arch_detection.hpp @@ -0,0 +1,281 @@ +// -*- C++ -*- + +#pragma once + + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* Compiler specific diagnostic adjustment */ +#if defined ( __CC_ARM ) + +#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + +#elif defined ( __APPLE_CC__ ) + #pragma GCC diagnostic ignored "-Wold-style-cast" + +#elif defined ( __GNUC__ ) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wsign-conversion" + #pragma GCC diagnostic ignored "-Wconversion" + #pragma GCC diagnostic ignored "-Wunused-parameter" + #define GCC_COMPILER + +#elif defined ( __ICCARM__ ) + +#elif defined ( __TI_ARM__ ) + +#elif defined ( __CSMC__ ) + +#elif defined ( __TASKING__ ) + +#elif defined ( _MSC_VER ) + +#else + #error Unknown compiler +#endif + + +/* Included for instrinsics definitions */ +#if defined (_MSC_VER ) +#include +#define __STATIC_FORCEINLINE static __forceinline +#define __STATIC_INLINE static __inline +#define __ALIGNED(x) __declspec(align(x)) +#define __WEAK +#elif defined ( __APPLE_CC__ ) +#include +#define __ALIGNED(x) __attribute__((aligned(x))) +#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) +#define __STATIC_INLINE static inline +#define __WEAK +#elif defined (__GNUC_PYTHON__) +#include +#define __ALIGNED(x) __attribute__((aligned(x))) +#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) +#define __STATIC_INLINE static inline +#define __WEAK +#else +#include "cmsis_compiler.h" +#endif + + + +#include +#include +#include +#include + +/* evaluate ARM DSP feature */ +#if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)) + #define ARM_MATH_DSP 1 +#endif + +#if defined(ARM_MATH_NEON) + #if defined(_MSC_VER) && defined(_M_ARM64EC) + #include + #else + #include + #endif + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + #if !defined(ARM_MATH_NEON_FLOAT16) + #define ARM_MATH_NEON_FLOAT16 + #endif + #endif +#endif + +#if !defined(ARM_MATH_AUTOVECTORIZE) + + +#if defined(__ARM_FEATURE_MVE) +#if __ARM_FEATURE_MVE + #if !defined(ARM_MATH_MVEI) + #define ARM_MATH_MVEI + #endif +#endif + +#if (__ARM_FEATURE_MVE & 2) + #if !defined(ARM_MATH_MVEF) + #define ARM_MATH_MVEF + #endif + #if !defined(ARM_MATH_MVE_FLOAT16) + #define ARM_MATH_MVE_FLOAT16 + #endif +#endif + +#endif /*defined(__ARM_FEATURE_MVE)*/ +#endif /*!defined(ARM_MATH_AUTOVECTORIZE)*/ + + +#if defined (ARM_MATH_HELIUM) + #if !defined(ARM_MATH_MVEF) + #define ARM_MATH_MVEF + #endif + + #if !defined(ARM_MATH_MVEI) + #define ARM_MATH_MVEI + #endif + + #if !defined(ARM_MATH_MVE_FLOAT16) + #define ARM_MATH_MVE_FLOAT16 + #endif +#endif + + + +#if defined ( __CC_ARM ) + /* Enter low optimization region - place directly above function definition */ + #if defined( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_ENTER \ + _Pragma ("push") \ + _Pragma ("O1") + #else + #define LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_EXIT \ + _Pragma ("pop") + #else + #define LOW_OPTIMIZATION_EXIT + #endif + + /* Enter low optimization region - place directly above function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + + /* Exit low optimization region - place directly after end of function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined (__ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __APPLE_CC__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __GNUC__ ) + #define LOW_OPTIMIZATION_ENTER \ + __attribute__(( optimize("-O1") )) + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __ICCARM__ ) + /* Enter low optimization region - place directly above function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_ENTER \ + _Pragma ("optimize=low") + #else + #define LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #define LOW_OPTIMIZATION_EXIT + + /* Enter low optimization region - place directly above function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \ + _Pragma ("optimize=low") + #else + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __TI_ARM__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __CSMC__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __TASKING__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT +#endif + + + +/* Compiler specific diagnostic adjustment */ +#if defined ( __CC_ARM ) + +#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + +#elif defined ( __APPLE_CC__ ) + +#elif defined ( __GNUC__ ) +#pragma GCC diagnostic pop + +#elif defined ( __ICCARM__ ) + +#elif defined ( __TI_ARM__ ) + +#elif defined ( __CSMC__ ) + +#elif defined ( __TASKING__ ) + +#elif defined ( _MSC_VER ) + +#else + #error Unknown compiler +#endif + +#ifdef __cplusplus +} +#endif + +#if defined(__ARM_FEATURE_MVE) && __ARM_FEATURE_MVE +#include +#endif + +#if !(__ARM_FEATURE_MVE & 2) + #if !defined(DISABLEFLOAT16) + #if defined(__ARM_FP16_FORMAT_IEEE) || defined(__ARM_FP16_FORMAT_ALTERNATIVE) + typedef __fp16 float16_t; + #define ARM_FLOAT16_SUPPORTED + #endif + #endif +#else + /* When Vector float16, this flag is always defined and can't be disabled */ + #define ARM_FLOAT16_SUPPORTED +#endif + +#if defined(ARM_FLOAT16_SUPPORTED) + +#if defined(__ICCARM__) + +#define F16INFINITY ((float16_t) INFINITY) + +#else + +#define F16INFINITY ((float16_t)__builtin_inf()) + +#endif + +#endif + + + + diff --git a/dsppp/Include/dsppp/common.hpp b/dsppp/Include/dsppp/common.hpp new file mode 100644 index 000000000..cd272c03e --- /dev/null +++ b/dsppp/Include/dsppp/common.hpp @@ -0,0 +1,73 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include + +// For compiler detection +#include "arch.hpp" + + +#define ARM_COMPUTE_DISABLE_UNROLL +// For loop (not for fusion unrolling functions) +#define MEMORY_POOL_ALIGNMENT 128 +//#define MEMORY_ALLOCATION_DEBUG + +// TMP_ALLOC must be defined to use the library +// It is generally defined in an external header not +// part of the library. +// By default it is using the malloc allocator + +#ifndef TMP_ALLOC +#define TMP_ALLOC malloc_allocator +#endif + +#if !defined(GCC_COMPILER) +// clang / AC6 +#if defined(ARM_COMPUTE_DISABLE_UNROLL) +#define UNROLL_LOOP _Pragma ("clang loop unroll(disable)") +#else +#define UNROLL_LOOP _Pragma("clang loop unroll_count(4)") +#endif + +#define DISABLE_LOOP_UNROLL _Pragma("clang loop unroll(disable)") + +#else +// GCC +#define UNROLL_LOOP +#define DISABLE_LOOP_UNROLL +#endif + +namespace arm_cmsis_dsp { + +/** \addtogroup COMMON Common types and constants + * \ingroup DSPPP + * @{ + */ + // Dynamic objects (dimensions only known at runtime) + constexpr int DYNAMIC = -1; + + // Dynamic objects (dimensions only known at runtime) but + // with some constraints (like stride == nb_cols) + constexpr int CONSTRAINED_DYNAMIC = -2; + + // It must be a signed datatype + typedef int32_t index_t; + typedef int32_t vector_length_t; + +/*! @} */ + +/** \addtogroup DEBUG Tools for debugging + * \ingroup DSPPP + * @{ + */ +template +void PrintType(void) +{ + //T t; + std::cout << __PRETTY_FUNCTION__ << "\r\n"; +}; + +/*! @} */ +} diff --git a/dsppp/Include/dsppp/fixed_point.hpp b/dsppp/Include/dsppp/fixed_point.hpp new file mode 100644 index 000000000..82bc2d437 --- /dev/null +++ b/dsppp/Include/dsppp/fixed_point.hpp @@ -0,0 +1,1036 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include "arch.hpp" +#include + +#include +#include +namespace arm_cmsis_dsp { + +/** \addtogroup FIXED Fixed point datatypes + * \ingroup DSPPP + * @{ + */ + +/* + +Normally those kind of definitions are in a compiler file +in Core or Core_A. + +But for MSVC compiler it is a bit special. The goal is very specific +to CMSIS-DSP and only to allow the use of this library from other +systems like Python or Matlab. + +MSVC is not going to be used to cross-compile to ARM. So, having a MSVC +compiler file in Core or Core_A would not make sense. + +*/ +#if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) || defined(__APPLE_CC__) +__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data) +{ + if (data == 0U) { return 32U; } + + uint32_t count = 0U; + uint32_t mask = 0x80000000U; + + while ((data & mask) == 0U) + { + count += 1U; + mask = mask >> 1U; + } + return count; +} + +__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat) +{ + if ((sat >= 1U) && (sat <= 32U)) + { + const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U); + const int32_t min = -1 - max ; + if (val > max) + { + return max; + } + else if (val < min) + { + return min; + } + } + return val; +} + +__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat) +{ + if (sat <= 31U) + { + const uint32_t max = ((1U << sat) - 1U); + if (val > (int32_t)max) + { + return max; + } + else if (val < 0) + { + return 0U; + } + } + return (uint32_t)val; +} +#endif + +#if !defined(ARM_MATH_DSP) +__STATIC_FORCEINLINE int32_t clip_int64_to_q31( + int64_t x) + { + return ((int32_t) (x >> 32) != ((int32_t) x >> 31)) ? + ((0x7FFFFFFF ^ ((int32_t) (x >> 63)))) : (int32_t) x; + } + +__STATIC_FORCEINLINE int32_t __QADD( + int32_t x, + int32_t y) + { + return ((int32_t)(clip_int64_to_q31((int64_t)x + (int32_t)y))); + } + + + /* + * @brief C custom defined QSUB + */ + __STATIC_FORCEINLINE int32_t __QSUB( + int32_t x, + int32_t y) + { + return ((int32_t)(clip_int64_to_q31((int64_t)x - (int32_t)y))); + } + + +#endif + + +constexpr bool test64(const int M,const int F,const int S){return((M+F+S)>32 && (M+F+S)<=64);} +constexpr bool test32(const int M,const int F,const int S){return((M+F+S)>16 && (M+F+S)<=32);} +constexpr bool test16(const int M,const int F,const int S){return((M+F+S)>8 && (M+F+S)<=16);} +constexpr bool test8 (const int M,const int F,const int S){return((M+F+S)<=8);} + +template +struct fixed_storage_type +{ +}; + +template +struct fixed_storage_type +{ + typedef int64_t value_type; + typedef int64_t wider_type; + typedef int32_t narrow_type; +}; + +template +struct fixed_storage_type +{ + typedef uint64_t value_type; + typedef uint64_t wider_type; + typedef uint32_t narrow_type; +}; + + +template +struct fixed_storage_type +{ + typedef int32_t value_type; + typedef int64_t wider_type; + typedef int16_t narrow_type; +}; + +template +struct fixed_storage_type +{ + typedef uint32_t value_type; + typedef uint64_t wider_type; + typedef uint16_t narrow_type; +}; + + + +template +struct fixed_storage_type +{ + typedef int16_t value_type; + typedef int32_t wider_type; + typedef int8_t narrow_type; +}; + +template +struct fixed_storage_type +{ + typedef uint16_t value_type; + typedef uint32_t wider_type; + typedef uint8_t narrow_type; +}; + + +template +struct fixed_storage_type +{ + typedef int8_t value_type; + typedef int16_t wider_type; + typedef int8_t narrow_type; +}; + +template +struct fixed_storage_type +{ + typedef uint8_t value_type; + typedef uint16_t wider_type; + typedef uint8_t narrow_type; +}; + + +template::value_type> +struct Q {}; + +template +struct Q { + constexpr static int fracBits = F; + constexpr static int mantissaBits = M; + constexpr static bool isSigned = true; + using value_type = typename fixed_storage_type::value_type; + using wider_type = typename fixed_storage_type::wider_type; + constexpr static value_type maxVal = 0x7FFFFFFFFFFFFFFFLL; + constexpr static value_type minVal = 0x8000000000000000LL; + + + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= -1.0f ? minVal : value_type(f * (float)((maxVal >> (63 - F)) )))); + }; + + value_type v; + constexpr Q():v(0){}; + constexpr explicit Q(const value_type x):v(x){}; + constexpr static Q f(const float x){return Q(convert(x));} + + constexpr static Q one() {return f(1.0f);}; + + Q(Q&& other)=default; + Q(const Q& other)=default; + Q& operator=(Q&& other)=default; + Q& operator=(const Q& other)=default; + + + constexpr explicit Q(const Q&other) + :v{value_type(other.v)} {}; + + bool operator==(const Q& b) const + { + return(v == b.v); + } + + bool operator!=(const Q& b) const + { + return(v != b.v); + } + + bool operator<(const Q& b) const + { + return(v < b.v); + } + + bool operator>(const Q& b) const + { + return(v > b.v); + } + + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + + Q & operator+=(const Q other) + { + v += other.v; + return(*this); + } + + Q & operator-=(const Q other) + { + v -= other.v; + return(*this); + } + + + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / (maxVal >> (63 - F))) << "_Q(" << M << "," << F << ")";; + return(stream); + } + +}; + +template +struct Q { + constexpr static int fracBits = F; + constexpr static int mantissaBits = M; + constexpr static bool isSigned = false; + using value_type = typename fixed_storage_type::value_type; + using wider_type = typename fixed_storage_type::wider_type; + constexpr static value_type maxVal = 0xFFFFFFFFFFFFFFFFLL; + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= 0.0f ? 0LL : value_type(f * (float)((maxVal >> (64 - F)))))); + }; + + value_type v; + constexpr Q():v(0){}; + constexpr explicit Q(const value_type x):v(x){}; + constexpr static Q f(const float x){return Q(convert(x));} + + constexpr static Q one() {return f(1.0f);}; + + Q(Q&& other)=default; + Q(const Q& other)=default; + Q& operator=(Q&& other)=default; + Q& operator=(const Q& other)=default; + + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / (maxVal >> (64 - F))) << "_UQ(" << M << "," << F << ")";; + return(stream); + } + + bool operator==(const Q& b) const + { + return(v == b.v); + } + + bool operator!=(const Q& b) const + { + return(v != b.v); + } + + bool operator<(const Q& b) const + { + return(v < b.v); + } + + bool operator>(const Q& b) const + { + return(v > b.v); + } + + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + +}; + + +template +struct Q { + constexpr static int fracBits = F; + constexpr static int mantissaBits = M; + constexpr static bool isSigned = true; + using value_type = typename fixed_storage_type::value_type; + using wider_type = typename fixed_storage_type::wider_type; + constexpr static value_type maxVal = 0x7FFFFFFFL; + constexpr static value_type minVal = 0x80000000L; + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : (i<(value_type)minVal ? minVal : i)); + }; + + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= -1.0f ? minVal : value_type(f * (float)((wider_type)1<&other): + v{value_type(other.v)} {}; + + bool operator==(const Q& b) const + { + return(v == b.v); + } + + bool operator!=(const Q& b) const + { + return(v != b.v); + } + + bool operator<(const Q& b) const + { + return(v < b.v); + } + + bool operator>(const Q& b) const + { + return(v > b.v); + } + + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + Q & operator+=(const Q other) + { + v = __QADD(v,other.v); + return(*this); + } + + Q & operator-=(const Q other) + { + v = __QSUB(v,other.v); + return(*this); + } + + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / ((wider_type)1< +struct Q { + constexpr static int fracBits = F; + constexpr static int mantissaBits = M; + constexpr static bool isSigned = false; + using value_type = typename fixed_storage_type::value_type; + using wider_type = typename fixed_storage_type::wider_type; + constexpr static value_type maxVal = 0xFFFFFFFFL; + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : i); + }; + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= 0.0f ? 0 : value_type(f * (float)((wider_type)1<(const Q& b) const + { + return(v > b.v); + } + + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + bool operator>=(const Q& b) const + { + return(v >= b.v); + } +}; + +template +struct Q { + constexpr static int fracBits = F; + constexpr static int mantissaBits = M; + constexpr static bool isSigned = true; + using value_type = typename fixed_storage_type::value_type; + using wider_type = typename fixed_storage_type::wider_type; + constexpr static value_type maxVal = 0x7FFF; + constexpr static value_type minVal = 0x8000; + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : (i<(value_type)minVal ? minVal : i)); + }; + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= -1.0f ? minVal : value_type(f * (float)((wider_type)1<&other):v{value_type(other.v)} {}; + + bool operator==(const Q& b) const + { + return(v == b.v); + } + + bool operator!=(const Q& b) const + { + return(v != b.v); + } + + bool operator<(const Q& b) const + { + return(v < b.v); + } + + bool operator>(const Q& b) const + { + return(v > b.v); + } + + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + Q & operator+=(const Q other) + { + #if !defined(ARM_MATH_DSP) + v = __SSAT((value_type)v + other.v,16); + #else + v = (value_type) __QADD16(v, other.v); + #endif + return(*this); + } + + Q & operator-=(const Q other) + { + #if !defined(ARM_MATH_DSP) + v = __SSAT((value_type)v - other.v,16); + #else + v = (value_type) __QSUB16(v, other.v); + #endif + return(*this); + } + + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / (((wider_type)1)< +struct Q { + constexpr static int fracBits = F; + constexpr static int mantissaBits = M; + constexpr static bool isSigned = false; + using value_type = typename fixed_storage_type::value_type; + using wider_type = typename fixed_storage_type::wider_type; + constexpr static value_type maxVal = 0xFFFF; + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : i); + }; + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= 0.0f ? 0 : value_type(f * (float)((wider_type)1<(const Q& b) const + { + return(v > b.v); + } + + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + Q & operator+=(const Q other) + { + v = __USAT((value_type)v + other.v,16); + return(*this); + } + + + + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / ((wider_type)1< +struct Q { + constexpr static int fracBits = F; + constexpr static int mantissaBits = M; + constexpr static bool isSigned = true; + using value_type = typename fixed_storage_type::value_type; + using wider_type = typename fixed_storage_type::wider_type; + constexpr static value_type maxVal = 0x7F; + constexpr static value_type minVal = 0x80; + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : (i<(value_type)minVal ? minVal : i)); + }; + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= -1.0f ? minVal : value_type(f * (float)((wider_type)1<&other):v{value_type(other.v)} {}; + + bool operator==(const Q& b) const + { + return(v == b.v); + } + + bool operator!=(const Q& b) const + { + return(v != b.v); + } + + bool operator<(const Q& b) const + { + return(v < b.v); + } + + bool operator>(const Q& b) const + { + return(v > b.v); + } + + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + Q & operator+=(const Q other) + { + #if !defined(ARM_MATH_DSP) + v = __SSAT((value_type)v + other.v,8); + #else + v = (value_type) __QADD8(v, other.v); + #endif + return(*this); + } + + Q & operator-=(const Q other) + { + #if !defined(ARM_MATH_DSP) + v = __SSAT((value_type)v + other.v,8); + #else + v = (value_type) __QSUB8(v, other.v); + #endif + return(*this); + } + + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / ((wider_type)1< +struct Q { + constexpr static int fracBits = F; + constexpr static int mantissaBits = M; + constexpr static bool isSigned = false; + using value_type = typename fixed_storage_type::value_type; + using wider_type = typename fixed_storage_type::wider_type; + constexpr static value_type maxVal = 0xFF; + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : i); + }; + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= 0.0f ? 0 : value_type(f * (float)((wider_type)1<(const Q& b) const + { + return(v > b.v); + } + + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + Q & operator+=(const Q other) + { + v = __USAT((value_type)v + other.v,8); + return(*this); + } + + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / ((wider_type)1<; +using Q31 = Q<0,31>; +using Q15 = Q<0,15>; +using Q7 = Q<0,7>; + +constexpr Q63 operator ""_q63(long double x){return Q63(Q63::convert((float)x));} +constexpr Q31 operator ""_q31(long double x){return Q31(Q31::convert((float)x));} +constexpr Q15 operator ""_q15(long double x){return Q15(Q15::convert((float)x));} +constexpr Q7 operator ""_q7(long double x){return Q7(Q7::convert((float)x));} + + + +template +inline Q< MA+MB+1 , FA+FB,SA || SB> mult(const Q &a, + const Q &b) +{ + /* + + Why mantissa of result is MA + MB + 1. + If we take as example, Q7 * Q7 and we multiply + 0x80 * 0x80 (-128 * -128) we get 0x4000 and if we shift right by 7 + we get 0x080 (on 9 bits). If the additional mantissa bit was not + kept, we would get 0x80 (on 8 bits) which would mean a negative + number. + + Saturation of 0x080 (on 9 bits) will give 0x7F whereas + saturation of 0x80 (on 8 bits) would keep 0x80 and thus + the wrong sign. + + By using MA + MB + 1 we ensure that Q7 * Q7 is Q<1,14> + and not Q<0,14>. + + To convert Q<1,14> to Q<0,7> we need a toFract and a saturate. + + */ + using ResType = typename Q< MA+MB+1 , FA+FB,SA || SB>::value_type; + ResType res = ((ResType)a.v * (ResType)b.v); + return(Q(res)); +} + + +template +inline Q operator+(const Q &a,const Q &b) +{ + Q ret(a); + ret+=b; + return ret; +} + +template +inline Q operator-(const Q &a,const Q &b) +{ + Q ret(a); + ret-=b; + return ret; +} + +template +inline Q operator-(const Q &a) +{ + Q ret; + ret-=a; + return ret; +} + +// Unsaturating add +template +inline Q add(const Q &a,const Q &b) +{ + return Q(a.v + b.v); +} + +// Unsaturating sub +template +inline Q sub(const Q &a,const Q &b) +{ + return Q(a.v - b.v); +} + + +template +constexpr std::integral_constant i_{}; + +template +inline Q operator >>(const Q &a, std::integral_constant) noexcept { + return Q(a.v >> N); +} + +template +inline Q< M+N , F,S> operator <<(const Q &a, std::integral_constant) noexcept { + using ResType = typename Q::value_type; + return Q(ResType(a.v) << N); +} + +template +inline Q saturate(const Q &src, + typename std::enable_if<(MD < MS) && ((MD+F)<31)>::type* = nullptr) +{ + return(Q(__SSAT(src.v, MD+F+1))); +} + + +template +inline Q saturate(const Q &src,typename std::enable_if<(MD < MS) && ((MD+F)<31)>::type* = nullptr) +{ + return(Q(__USAT(src.v, MD+F+1))); +} + + +template +struct FixedCastShift {}; + +/* Positive shift */ +template +struct FixedCastShiftFS)> { + constexpr static Q shift(const Q &src) + { + using DstType = typename Q::value_type; + return(Q(DstType(src.v) << (FD-FS))); + } +}; + +template +struct FixedCastShift { + constexpr static Q shift(const Q &src) + { + using DstType = typename Q::value_type; + using SrcType = typename Q::value_type; + + return(Q(DstType(SrcType(src.v) >> (FS-FD)))); + } +}; + +template +inline Q toFrac(const Q &src) +{ + return(FixedCastShift::shift(src)); +} + + +template +struct Accumulate; + +template +struct Accumulate { + static Q acc (const Q &a,const Q &b) + { + using DstType = typename Q::value_type; + return(Q(DstType(a.v) + DstType(b.v))); + } +}; + +template +inline Q accumulate(const Q &a,const Q &b) +{ + return(AccumulateMS)>::acc(a,b)); +} + + +template +inline Q _abs(const Q a) +{ + using DestType = typename Q::value_type; + return(Q(DestType(abs(a.v)))); +} + +inline Q7 operator*(const Q7 &a,const Q7 &b) +{ + return(saturate(toFrac<7>(mult(a,b)))); +} + +inline Q15 operator*(const Q15 &a,const Q15 &b) +{ + return (saturate(toFrac<15>(mult(a,b)))); +} + +inline Q31 operator*(const Q31 &a,const Q31 &b) +{ + return (toFrac<31>(saturate(toFrac<30>(mult(a,b))))); +} + +template +inline bool operator>(const Q &a,const Q &b) +{ + return(a.v>b.v); +} + +template +inline bool operator<(const Q &a,const Q &b) +{ + return(a.v +inline bool operator>=(const Q &a,const Q &b) +{ + return(a.v>=b.v); +} + + +template +inline bool operator<=(const Q &a,const Q &b) +{ + return(a.v<=b.v); +} + +template +inline bool operator==(const Q a,const Q b) +{ + return(a.v==b.v); +} + +template +inline bool operator!=(const Q a,const Q b) +{ + return(a.v!=b.v); +} + + +template +inline Q operator/(const Q a,const int32_t b) +{ + return(Q(a.v / b)); +} + + +template +inline Q operator+(const Q &a) +{ + return(a); +} + +/*! @} */ + +} \ No newline at end of file diff --git a/dsppp/Include/dsppp/forward.hpp b/dsppp/Include/dsppp/forward.hpp new file mode 100644 index 000000000..012953dcc --- /dev/null +++ b/dsppp/Include/dsppp/forward.hpp @@ -0,0 +1,149 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +namespace arm_cmsis_dsp { + +template +struct Vector_Base; + +template +struct VectorView; + +template typename Allocator> +struct Vector; + +template typename Allocator> +struct Matrix; + +template +struct MatrixView; + +template +struct NbRows; + +template +struct NbCols; + +template +struct Complexity; + +template +struct OutputVectorDim; + +template +struct CompatibleStaticMatMatProduct; + +template +struct CompatibleStaticMatVecProduct; + +template +struct CompatibleDynamicMatVecProduct; + +template +struct CompatibleDynamicMatMatProductStaticStride; + +template +struct CompatibleDynamicMatMatProductDynamicStride; + +template +struct CompatibleDynamicMatMatProduct; + +template +struct OutputVector; + +template +struct OutputMatrix; + + + +/* + +Identifications + +*/ + +/* + +Is a contiguous array in memory with scalar indexing +(operator[]) +It can be an _Expr +Vector has a length + +Generally used whe scalar indexing is required or length + +*/ +template +struct IsVector; + +/* + +Has matrix indexing (operator()) +and matrix operations like transpose, identity. +So it cannot be an _Expr because _Expr has no transpose, identity +Has rows, columns +Matrix may be vectors Vectors (with above definition) + +Generally used when transpose or identity are required. + +*/ +template +struct IsMatrix; + +/* + +Has matrix indexing (operator()) +but no matrix operator like transpose. +It can be an Expr +Has rows, columns +It may not always be a Vector (MatrixView are not contiguous) + +Generally used only when matrix indexing is mandatory + +*/ +template +struct HasMatrixIndexing; + +/* + + +Type Matrix : IsVector, IsMatrix, HasMatrixIndexing +Type MatrixView : , IsMatrix, HasMatrixIndexing +Type _Expr with Matrix : IsVector, , HasMatrixIndexing +Type _Expr with some MatrixView : HasMatrixIndexing + +*/ + + +/* + +Dimensions only known at runtime + +*/ +template +struct IsDynamic; + +/* + +StaticLength if known at build time otherwise 0 +*/ +template +struct StaticLength; + +/* + +Type of elements in vector or matrix + +*/ +template +struct ElementType; + +template +struct HasStaticStride; + +template +struct StaticStride; + +} diff --git a/dsppp/Include/dsppp/fusion.hpp b/dsppp/Include/dsppp/fusion.hpp new file mode 100644 index 000000000..db8864c4a --- /dev/null +++ b/dsppp/Include/dsppp/fusion.hpp @@ -0,0 +1,760 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +namespace arm_cmsis_dsp { + +/** \addtogroup FUSION Abstract syntax tree for fusion + * \ingroup DSPPP + * @{ + */ + +template struct traits +{ + typedef T Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits::vector Vector; +#endif +}; + +template +struct Complexity +{ + constexpr static int value = 0; +}; + +/* + +An unregular datatype has different strides like MatrixView +and can only be assigned to a MatrixView using a 2D functions. +So all normal eval functions will reject unregular structures. + +*/ +template +struct HasMatrixIndexing +{ + constexpr static bool value = false; +}; + +template +struct HasStaticStride +{ + constexpr static bool value = false; +}; + + + +template +struct IsVector +{ + constexpr static bool value = false; +}; + +template +struct IsMatrix +{ + constexpr static bool value = false; +}; + + + +template +struct StaticLength +{ + constexpr static vector_length_t value = 0; +}; + +template +struct ElementType +{ + typedef T type; +}; + + +template +using SameElementType=std::is_same::type,typename ElementType::type>; + +template +constexpr bool has_vector_inst() {return (vector_traits::type>::has_vector);} + +template +constexpr bool has_predicate_inst() {return (vector_traits::type>::has_predicate);} + +template +constexpr bool is_scalar() {return (!IsVector::value && + !HasMatrixIndexing::value);} + +template +constexpr bool must_use_matrix_idx() {return (!IsVector::value && + HasMatrixIndexing::value);} +template +constexpr bool vector_idx_pair() {return (IsVector::value && + IsVector::value && + SameElementType::value);} + +// By default scalar has no vector size so can't be used +// to infer a size at build time. They are considered as dynamic +// Otherwise, by default vectors are considered static +// except is there is a specialization of this template +// (and that is the case for dynamic vectors) +template +struct IsDynamic +{ + constexpr static bool value = is_scalar(); +}; + +/* + +Vector only not including matrixes (which are also vectors) + +*/ +template +constexpr bool is_only_vector() {return (IsVector::value && + !HasMatrixIndexing::value);} + +template +constexpr bool must_use_matrix_idx_pair() {return ((must_use_matrix_idx() || must_use_matrix_idx()) && + SameElementType::value);} + + +/* + +Static length is 0 for scalar and Dynamic vectors +*/ +template +constexpr vector_length_t static_length() { + return ((StaticLength::value==0) ? StaticLength::value : StaticLength::value); +} + +/* + +False only when DA and DB are static vector and with differet size +Anyother case is ok. + +*/ + +template +constexpr bool same_static_length() +{ + return((StaticLength::value == 0) || /* Scalar or dynamic case */ + (StaticLength::value == 0) || /* Scalar or dynamic case */ + (StaticLength::value == StaticLength::value)); +} +/* + +Vector operators at instruction level + +*/ +#include "fusion_ops.hpp" + + +template +struct _Expr { + + using Scalar = typename traits::Scalar; +#if defined(HAS_VECTOR) + using Vector = typename traits::Vector; +#endif + + T& derived() {return(static_cast(*this));} + + T const& derived() const {return(static_cast(*this));} + + Scalar const operator[](const index_t i) const {return(this->derived()[i]);} + + Scalar const operator()(const index_t r,const index_t c) const {return(this->derived()(r,c));} + +#if defined(HAS_VECTOR) + Vector const vector_op(const index_t i) const {return(this->derived().vector_op(i));} + + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const {return(this->derived().vector_op_tail(i,remaining));} + + Vector const matrix_op(const index_t r,const index_t c) const {return(this->derived().matrix_op(r,c));} + + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const {return(this->derived().matrix_op_tail(r,c,remaining));} +#endif + + vector_length_t length() const {return(this->derived().length());} + vector_length_t rows() const {return(this->derived().rows());} + vector_length_t columns() const {return(this->derived().columns());} + + virtual ~_Expr(){}; + +protected: + _Expr() = default; + _Expr(const _Expr&) = default; + _Expr(_Expr&&) = default; + _Expr& operator=(const _Expr& other) = delete; + _Expr& operator=(_Expr&& other) = delete; +}; + +/***************** + * + * BINARY AST + */ + +template +struct _Binary: _Expr<_Binary> +{ + using Scalar = typename traits::Scalar; +#if defined(HAS_VECTOR) + using Vector = typename traits::Vector; +#endif + _Binary(const LHS &lhs, + const RHS &rhs, + const _BinaryOperator &op): + lhs_(lhs),rhs_(rhs),op_(op){ + } + + + _Binary(const _Binary &other): + lhs_(other.lhs_),rhs_(other.rhs_),op_(other.op_){ + } + + _Binary& operator=(const _Binary& other) = delete; + _Binary& operator=(_Binary&& other) = delete; + + _Binary(_Binary &&other): + lhs_(std::move(other.lhs_)),rhs_(std::move(other.rhs_)),op_(std::move(other.op_)) + { + } + + template::value,bool>::type = true> + vector_length_t length() const { + return(lhs_.length()); + } + + template::value && IsVector::value,bool>::type = true> + vector_length_t length() const { + return(rhs_.length()); + } + + template::value,bool>::type = true> + vector_length_t rows() const { + return(lhs_.rows()); + } + + template::value && HasMatrixIndexing::value,bool>::type = true> + vector_length_t rows() const { + return(rhs_.rows()); + } + + template::value,bool>::type = true> + vector_length_t columns() const { + return(lhs_.columns()); + } + + template::value && HasMatrixIndexing::value,bool>::type = true> + vector_length_t columns() const { + return(rhs_.columns()); + } + + + + template::value && + IsVector::value,bool>::type = true> + Scalar const operator[](const index_t i) const { + return(op_(lhs_[i],rhs_[i])); + } + + template::value && + is_scalar(),bool>::type = true> + Scalar const operator[](const index_t i) const { + return(op_(lhs_[i],rhs_)); + } + + template() && + IsVector::value,bool>::type = true> + Scalar const operator[](const index_t i) const { + return(op_(lhs_,rhs_[i])); + } + + template::value && + HasMatrixIndexing::value,bool>::type = true> + Scalar const operator()(const index_t r,const index_t c) const + { + return(op_(lhs_(r,c),rhs_(r,c))); + } + + template() && + HasMatrixIndexing::value,bool>::type = true> + Scalar const operator()(const index_t r,const index_t c) const + { + return(op_(lhs_,rhs_(r,c))); + } + + template::value && + is_scalar(),bool>::type = true> + Scalar const operator()(const index_t r,const index_t c) const + { + return(op_(lhs_(r,c),rhs_)); + } + +#if defined(HAS_VECTOR) + /* V + V */ + template::value && + IsVector::value,bool>::type = true> + Vector const vector_op(const index_t i) const + { + return(op_(lhs_.vector_op(i),rhs_.vector_op(i))); + } + + template() && + IsVector::value && + IsVector::value,bool>::type = true> + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(op_(lhs_.vector_op_tail(i,remaining),rhs_.vector_op_tail(i,remaining),inner::vctpq::mk(remaining))); + } + + /* V + S */ + template::value && + is_scalar(),bool>::type = true> + Vector const vector_op(const index_t i) const + { + return(op_(lhs_.vector_op(i),rhs_)); + } + + template() && + IsVector::value && + is_scalar(),bool>::type = true> + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(op_(lhs_.vector_op_tail(i,remaining),rhs_,inner::vctpq::mk(remaining))); + } + + + + /* S + V */ + template() && + IsVector::value,bool>::type = true> + Vector const vector_op(const index_t i) const + { + return(op_(lhs_,rhs_.vector_op(i))); + } + + template() && + is_scalar() && + IsVector::value,bool>::type = true> + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(op_(lhs_,rhs_.vector_op_tail(i,remaining),inner::vctpq::mk(remaining))); + } + + + /************* + * + * For matrix + * + */ + + /* V + V */ + template::value && + HasMatrixIndexing::value,bool>::type = true> + Vector const matrix_op(const index_t r,const index_t c) const + { + return(op_(lhs_.matrix_op(r,c),rhs_.matrix_op(r,c))); + } + + template() && + HasMatrixIndexing::value && + HasMatrixIndexing::value,bool>::type = true> + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const + { + return(op_(lhs_.matrix_op_tail(r,c,remaining),rhs_.matrix_op_tail(r,c,remaining),inner::vctpq::mk(remaining))); + } + + /* V + S */ + template::value && + is_scalar(),bool>::type = true> + Vector const matrix_op(const index_t r,const index_t c) const + { + return(op_(lhs_.matrix_op(r,c),rhs_)); + } + + template() && + HasMatrixIndexing::value && + is_scalar(),bool>::type = true> + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const + { + return(op_(lhs_.matrix_op_tail(r,c,remaining),rhs_,inner::vctpq::mk(remaining))); + } + + + + /* S + V */ + template() && + HasMatrixIndexing::value,bool>::type = true> + Vector const matrix_op(const index_t r,const index_t c) const + { + return(op_(lhs_,rhs_.matrix_op(r,c))); + } + + template() && + is_scalar() && + HasMatrixIndexing::value,bool>::type = true> + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const + { + return(op_(lhs_,rhs_.matrix_op_tail(r,c,remaining),inner::vctpq::mk(remaining))); + } + + +#endif + const LHS lhs_; + const RHS rhs_; + const _BinaryOperator op_; +}; + +template +struct Complexity<_Expr> +{ + constexpr static int value = Complexity::value; +}; + +template +struct ElementType<_Expr> +{ + typedef typename ElementType::type type; +}; + +template +struct Complexity<_Binary> +{ + constexpr static int lhsv = Complexity::value; + constexpr static int rhsv = Complexity::value; + constexpr static int value = lhsv + rhsv + 1; +}; + +template +struct ElementType<_Binary> +{ + typedef typename ElementType::type type; +}; + + +template +struct IsVector<_Expr> +{ + constexpr static bool value = IsVector::value; +}; + +template +struct HasMatrixIndexing<_Expr> +{ + constexpr static bool value = HasMatrixIndexing::value; +}; + +template +struct IsVector<_Binary> +{ + constexpr static bool value = + (IsVector::value && IsVector::value) || + (IsVector::value && is_scalar()) || + (is_scalar() && IsVector::value); +}; + +template +struct HasMatrixIndexing<_Binary> +{ + constexpr static bool value = + (HasMatrixIndexing::value && HasMatrixIndexing::value) || + (HasMatrixIndexing::value && is_scalar()) || + (is_scalar() && HasMatrixIndexing::value); +}; + +template +struct IsDynamic<_Expr> +{ + constexpr static bool value = IsDynamic::value; +}; + +template +struct IsDynamic<_Binary> +{ + constexpr static bool value = IsDynamic::value && IsDynamic::value; +}; + +template +struct StaticLength<_Expr> +{ + constexpr static vector_length_t value = StaticLength::value; +}; + +template +struct StaticLength<_Binary> +{ + constexpr static vector_length_t value = static_length(); + +}; + +template +struct traits<_Expr> +{ + typedef typename traits::Scalar Scalar; +#if defined(HAS_VECTOR) + typedef typename traits::Vector Vector; +#endif +}; + +template +struct traits<_Binary> +{ + typedef typename traits::Scalar Scalar; +#if defined(HAS_VECTOR) + typedef typename traits::Vector Vector; +#endif +}; + + +/***************** + * + * UNARY AST + */ + +template +struct _Unary: _Expr<_Unary> +{ + using Scalar = typename traits::Scalar; +#if defined(HAS_VECTOR) + using Vector = typename traits::Vector; +#endif + _Unary(const LHS &lhs, + const _UnaryOperator &op): + lhs_(lhs),op_(op){ + } + + _Unary(const _Unary &other): + lhs_(other.lhs_),op_(other.op_){ + } + + _Unary(LHS &&other): + lhs_(std::move(other.lhs_)),op_(std::move(other.op_)){ + } + + _Unary& operator=(const _Unary& other) = delete; + _Unary& operator=(_Unary&& other) = delete; + + + vector_length_t length() const { + return(lhs_.length()); + } + + template::value,bool>::type = true> + vector_length_t rows() const { + return(lhs_.rows()); + } + + template::value,bool>::type = true> + vector_length_t columns() const { + return(lhs_.columns()); + } + + template::value ,bool>::type = true> + Scalar const operator[](const index_t i) const { + return(op_(lhs_[i])); + } + + template::value ,bool>::type = true> + Scalar const operator()(const index_t r,const index_t c) const + { + return(op_(lhs_(r,c))); + } + + +#if defined(HAS_VECTOR) + /* V */ + template::value ,bool>::type = true> + Vector const vector_op(const index_t i) const + { + return(op_(lhs_.vector_op(i))); + } + + template() && + IsVector::value ,bool>::type = true> + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(op_(lhs_.vector_op_tail(i,remaining),inner::vctpq::mk(remaining))); + } + + /* + + For Matrix + + */ + + /* V */ + template::value ,bool>::type = true> + Vector const matrix_op(const index_t r,const index_t c) const + { + return(op_(lhs_.matrix_op(r,c))); + } + + template() && + HasMatrixIndexing::value ,bool>::type = true> + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const + { + return(op_(lhs_.matrix_op_tail(r,c,remaining),inner::vctpq::mk(remaining))); + } + + +#endif + const LHS lhs_; + const _UnaryOperator op_; +}; + +template +struct Complexity<_Unary> +{ + constexpr static int value = 1 + Complexity::value; +}; + +template +struct ElementType<_Unary> +{ + typedef typename ElementType::type type; +}; + +template +struct IsVector<_Unary> +{ + constexpr static bool value = IsVector::value; +}; + +template +struct HasMatrixIndexing<_Unary> +{ + constexpr static bool value = HasMatrixIndexing::value; +}; + +template +struct IsDynamic<_Unary> +{ + constexpr static bool value = IsDynamic::value; +}; + +template +struct StaticLength<_Unary> +{ + constexpr static vector_length_t value = StaticLength::value; +}; + + +template +struct traits<_Unary> +{ + typedef typename traits::Scalar Scalar; +#if defined(HAS_VECTOR) + typedef typename traits::Vector Vector; +#endif +}; + + + + +/* + +Dot product + +*/ + +template +using DotResult = typename number_traits::Scalar>::accumulator; + + + + +template() && + is_only_vector() && + is_only_vector() && + (!IsDynamic::value || !IsDynamic::value),bool>::type = true> +inline DotResult dot(const VA& a, + const VB& b) +{ + constexpr vector_length_t l = static_length(); + return(_dot(a,b,l,CURRENT_ARCH)); +} + +template() && + is_only_vector() && + is_only_vector() && + (IsDynamic::value && IsDynamic::value),bool>::type = true> +inline DotResult dot(const VA& a, + const VB& b) +{ + const vector_length_t l = a.length(); + + return(_dot(a,b,l,CURRENT_ARCH)); +} + + + + + +template() && + (!IsDynamic::value || !IsDynamic::value),bool>::type = true> +inline void swap(VA&& a, + VB&& b) +{ + constexpr vector_length_t l = static_length(); + + _swap(std::forward(a),std::forward(b),l,CURRENT_ARCH); +} + +template() && + (IsDynamic::value && IsDynamic::value),bool>::type = true> +inline void swap(VA&& a, + VB&& b) +{ + const vector_length_t l = a.length(); + + _swap(std::forward(a),std::forward(b),l,CURRENT_ARCH); +} + +/*! @} */ + +} + diff --git a/dsppp/Include/dsppp/fusion_ops.hpp b/dsppp/Include/dsppp/fusion_ops.hpp new file mode 100644 index 000000000..a1a83d932 --- /dev/null +++ b/dsppp/Include/dsppp/fusion_ops.hpp @@ -0,0 +1,358 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup FUSION + * @{ + */ + + +template +struct _UnaryOperator{ + Derived& derived() {return(static_cast(*this));} + + Derived const& derived() const {return(static_cast(*this));} + + Scalar const operator()(const Scalar lhs) const + { + return(this->derived()(lhs)); + } + + #if defined(HAS_VECTOR) + using Vector= typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs) const + { + return(this->derived()(lhs)); + } + + /* + + Predicated operation when exists (Helium) + + */ + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs,const pred_t p0) const + { + return(this->derived()(lhs,p0)); + } + + /* + Vector const to_vector(const Scalar lhs) const + { + return(this->derived().to_vector(lhs)); + } + */ +#endif +}; + +template +struct _BinaryOperator{ + Derived& derived() {return(static_cast(*this));} + + Derived const& derived() const {return(static_cast(*this));} + + Scalar const operator()(const Scalar lhs, + const Scalar rhs) const + { + return(this->derived()(lhs,rhs)); + } + + #if defined(HAS_VECTOR) + using Vector= typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + + Vector const operator()(const Vector lhs, + const Vector rhs) const + { + return(this->derived()(lhs,rhs)); + } + + Vector const operator()(const Vector lhs, + const Scalar rhs) const + { + return(this->derived()(lhs,rhs)); + } + + Vector const operator()(const Scalar lhs, + const Vector rhs) const + { + return(this->derived()(lhs,rhs)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Vector rhs, + const pred_t p0) const + { + return(this->derived()(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Scalar rhs, + const pred_t p0) const + { + return(this->derived()(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Scalar lhs, + const Vector rhs, + const pred_t p0) const + { + return(this->derived()(lhs,rhs,p0)); + } +#endif +}; + +/***************** + * + * BINARY + * + */ +template +struct _AddOp:_BinaryOperator> +{ + Scalar const operator()(const Scalar lhs, + const Scalar rhs) const { + return(lhs + rhs); + } + +#if defined(HAS_VECTOR) + using Vector=typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs, + const Vector rhs) const + { + return(inner::vadd(lhs,rhs)); + } + + Vector const operator()(const Vector lhs, + const Scalar rhs) const + { + return(inner::vadd(lhs,rhs)); + } + + Vector const operator()(const Scalar lhs, + const Vector rhs) const + { + return(inner::vadd(lhs,rhs)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vadd(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Scalar rhs, + const pred_t p0) const + { + return(inner::vadd(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Scalar lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vadd(lhs,rhs,p0)); + } +#endif +}; + +template +struct _SubOp:_BinaryOperator> +{ + Scalar const operator()(const Scalar lhs, + const Scalar rhs) const { + return(lhs - rhs); + } + +#if defined(HAS_VECTOR) + using Vector=typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs, + const Vector rhs) const + { + return(inner::vsub(lhs,rhs)); + } + + Vector const operator()(const Vector lhs, + const Scalar rhs) const + { + return(inner::vsub(lhs,rhs)); + } + + Vector const operator()(const Scalar lhs, + const Vector rhs) const + { + return(inner::vsub(lhs,rhs)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vsub(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Scalar rhs, + const pred_t p0) const + { + return(inner::vsub(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Scalar lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vsub(lhs,rhs,p0)); + } +#endif +}; + +template +struct _MulOp:_BinaryOperator> +{ + Scalar const operator()(const Scalar lhs, + const Scalar rhs) const { + return(lhs * rhs); + } + +#if defined(HAS_VECTOR) + using Vector= typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs, + const Vector rhs) const + { + return(inner::vmul(lhs,rhs)); + } + + Vector const operator()(const Vector lhs, + const Scalar rhs) const + { + return(inner::vmul(lhs,rhs)); + } + + Vector const operator()(const Scalar lhs, + const Vector rhs) const + { + return(inner::vmul(lhs,rhs)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vmul(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Scalar rhs, + const pred_t p0) const + { + return(inner::vmul(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Scalar lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vmul(lhs,rhs,p0)); + } +#endif +}; + +/***************** + * + * UNARY + * + */ +template +struct _NegOp:_UnaryOperator> +{ + Scalar const operator()(const Scalar lhs) const { + return(-lhs); + } + +#if defined(HAS_VECTOR) + using Vector= typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs) const + { + return(inner::vneg(lhs)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const pred_t p0) const + { + return(inner::vneg(lhs,p0)); + } + + +#endif +}; + +template +struct _NoOp:_UnaryOperator> +{ + Scalar const operator()(const Scalar lhs) const { + return(lhs); + } + +#if defined(HAS_VECTOR) + using Vector= typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs) const + { + return(lhs); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const pred_t p0) const + { + (void)p0; + return(lhs); + } + +#endif +}; + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/matrix.hpp b/dsppp/Include/dsppp/matrix.hpp new file mode 100644 index 000000000..7836d6bec --- /dev/null +++ b/dsppp/Include/dsppp/matrix.hpp @@ -0,0 +1,647 @@ +// -*- C++ -*- +/** @file */ +#pragma once + + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" +#include "vec.hpp" +#include "matrix_impl.hpp" +#include "matrix_view.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup Matrix Matrixes + * \ingroup DSPPP + * @{ + */ + +template typename A> +struct traits> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + +template +struct traits> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + +template typename A> +struct traits&> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + +template +struct traits&> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + + +template typename Allocator> +struct IsVector> +{ + constexpr static bool value = true; +}; + + + +template typename Allocator> +struct HasStaticStride> +{ + constexpr static bool value = (C>0); +}; + +template typename Allocator> +struct StaticStride> +{ + constexpr static index_t value = C; +}; + +template typename Allocator> +struct IsMatrix> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct HasMatrixIndexing> +{ + constexpr static bool value = true; +}; + +template +struct IsMatrix> +{ + constexpr static bool value = true; +}; + +template +struct HasStaticStride> +{ + constexpr static bool value = (S>0); +}; + +template +struct StaticStride> +{ + constexpr static index_t value = S; +}; + +template +struct HasMatrixIndexing> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct IsVector&> +{ + constexpr static bool value = true; +}; + + +template +struct IsVector> +{ + constexpr static bool value = true; +}; + +template +struct IsVector&> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct HasStaticStride&> +{ + constexpr static bool value = (C>0); +}; + +template typename Allocator> +struct StaticStride&> +{ + constexpr static index_t value = C; +}; + +template typename Allocator> +struct HasMatrixIndexing&> +{ + constexpr static bool value = true; +}; + + +template +struct IsMatrix&> +{ + constexpr static bool value = true; +}; + +template +struct HasMatrixIndexing&> +{ + constexpr static bool value = true; +}; + +template +struct HasStaticStride&> +{ + constexpr static bool value = (S>0); +}; + +template +struct StaticStride&> +{ + constexpr static index_t value = S; +}; + +template typename Allocator> +struct ElementType> +{ + typedef P type; +}; + + +template +struct ElementType> +{ + typedef P type; +}; + +template typename Allocator> +struct ElementType&> +{ + typedef P type; +}; + +template +struct ElementType&> +{ + typedef P type; +}; + +template typename Allocator> +struct StaticLength> +{ + constexpr static vector_length_t value = (R*C<0) ? 0 : R*C; +}; + +template +struct StaticLength> +{ + constexpr static vector_length_t value = 0; +}; + +template typename Allocator> +struct StaticLength&> +{ + constexpr static vector_length_t value = (R*C<0) ? 0 : R*C; +}; + +template +struct StaticLength&> +{ + constexpr static vector_length_t value = 0 ; +}; + +template typename Allocator> +struct IsDynamic> +{ + constexpr static bool value = (R<0) || (C<0); +}; + +template +struct IsDynamic> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct IsDynamic&> +{ + constexpr static bool value = (R<0) || (C<0); +}; + +template +struct IsDynamic&> +{ + constexpr static bool value = true; +}; + +/* + + +Compatibility of vector and matrix dimensions at build time + +*/ + +template +struct NbRows +{ + constexpr static vector_length_t value = DYNAMIC; +}; + +template typename Allocator> +struct NbRows> +{ + constexpr static vector_length_t value = R; +}; + +template typename Allocator> +struct NbRows&> +{ + constexpr static vector_length_t value = R; +}; + +template +struct NbCols +{ + constexpr static vector_length_t value = DYNAMIC; +}; + +template typename Allocator> +struct NbCols> +{ + constexpr static vector_length_t value = C; +}; + +template typename Allocator> +struct NbCols&> +{ + constexpr static vector_length_t value = C; +}; + + +template +struct CompatibleStaticMatVecProduct +{ + constexpr static bool value = + is_only_vector() && + HasMatrixIndexing::value && + (NbCols::value == StaticLength::value) && + !IsDynamic::value + && SameElementType::value; + +}; + +/* MB IsMatrix because we need transpose operator */ +template +struct CompatibleStaticMatMatProduct +{ + constexpr static bool value = + HasMatrixIndexing::value && + IsMatrix::value && + (NbCols::value == NbRows::value) && + !IsDynamic::value && + SameElementType::value; + +}; + +template +struct CompatibleDynamicMatVecProduct +{ + constexpr static bool value = + HasMatrixIndexing::value && + IsDynamic::value && + is_only_vector() && + SameElementType::value; + +}; + +/* MB IsMatrix because we need transpose operator */ +template +struct CompatibleDynamicMatMatProductStaticStride +{ + constexpr static bool value = + HasMatrixIndexing::value && + IsMatrix::value && + IsDynamic::value && + HasStaticStride::value && + SameElementType::value; +}; + +template +struct CompatibleDynamicMatMatProductDynamicStride +{ + constexpr static bool value = + HasMatrixIndexing::value && + IsMatrix::value && + IsDynamic::value && + !HasStaticStride::value && + SameElementType::value; +}; + +template +struct CompatibleDynamicMatMatProduct +{ + constexpr static bool value = + HasMatrixIndexing::value && + IsMatrix::value && + IsDynamic::value && + SameElementType::value; +}; + +template +struct OutputVector { + typedef Vector::Scalar, + OutputVectorDim::value,TMP_ALLOC> type; +}; + +template +struct OutputMatrix { + constexpr static bool dynamic = (NbRows::value < 0) || (NbCols::value < 0); + constexpr static vector_length_t nbrows = dynamic ? DYNAMIC : NbRows::value; + constexpr static vector_length_t nbcols = dynamic ? DYNAMIC : NbCols::value; + + typedef Matrix::Scalar,nbrows,nbcols,TMP_ALLOC> type; +}; + + + +template +struct OutputVectorDim +{ + constexpr static vector_length_t value = DYNAMIC; +}; + +template typename Allocator> +struct OutputVectorDim> +{ + constexpr static vector_length_t value = R; +}; + +template typename Allocator> +struct OutputVectorDim&> +{ + constexpr static vector_length_t value = R; +}; + + +template +struct VecRef> +{ + typedef MatrixView type; + static type ref(const MatrixView&a){ + return(a); + }; +}; + +template typename A> +struct VecRef,((R>0) && (C>0))> +{ + typedef const Matrix& type; + static type ref(const Matrix&a,typename std::enable_if<(R>0) && (C>0)>::type* = nullptr){ + return(a); + }; +}; + +template typename A> +struct VecRef,((R<0) || (C<0))> +{ + typedef MatrixView type; + static type ref(const Matrix&a,typename std::enable_if<(R<0) || (C<0)>::type* = nullptr){ + return(type(a,a.rows(),a.columns())); + }; +}; + + +/***************** + * + * + * Fused matrix operators + * + ****************/ + +template +struct _Outer: _Expr<_Outer> +{ + using Scalar = typename traits::Scalar; +#if defined(HAS_VECTOR) + using Vector = typename traits::Vector; +#endif + _Outer(const LHS &lhs, + const RHS &rhs, + const _BinaryOperator &op): + lhs_(lhs),rhs_(rhs),op_(op){ + } + + + _Outer(const _Outer &other): + lhs_(other.lhs_),rhs_(other.rhs_),op_(other.op_){ + } + + _Outer& operator=(const _Outer& other) = delete; + _Outer& operator=(_Outer&& other) = delete; + + _Outer(_Outer &&other): + lhs_(std::move(other.lhs_)),rhs_(std::move(other.rhs_)),op_(std::move(other.op_)) + { + } + + + + template::value && IsVector::value,bool>::type = true> + vector_length_t length() const { + return(lhs_.length() * rhs_.length()); + } + + template::value,bool>::type = true> + vector_length_t rows() const { + return(lhs_.length()); + } + + + + template::value,bool>::type = true> + vector_length_t columns() const { + return(rhs_.length()); + } + + + + template::value && + IsVector::value,bool>::type = true> + Scalar const operator()(const index_t r,const index_t c) const + { + return(op_(lhs_[r],rhs_[c])); + } + + +#if defined(HAS_VECTOR) + /************* + * + * For matrix + * + */ + + /* V + V */ + template::value && + IsVector::value,bool>::type = true> + Vector const matrix_op(const index_t r,const index_t c) const + { + return(op_(lhs_[r],rhs_.vector_op(c))); + } + + template::value && + IsVector::value,bool>::type = true> + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const + { + return(op_(lhs_[r],rhs_.vector_op_tail(c,remaining),inner::vctpq::mk(remaining))); + } + + +#endif + const LHS lhs_; + const RHS rhs_; + const _BinaryOperator op_; +}; + +template +struct IsVector<_Outer> +{ + constexpr static bool value = false; +}; + +template +struct HasMatrixIndexing<_Outer> +{ + constexpr static bool value = true; +}; + +template +struct StaticLength<_Outer> +{ + constexpr static vector_length_t value = StaticLength::value * StaticLength::value; +}; + +template +struct IsDynamic<_Outer> +{ + constexpr static vector_length_t value = IsDynamic::value || IsDynamic::value; +}; + +template +struct Complexity<_Outer> +{ + constexpr static int lhsv = Complexity::value; + constexpr static int rhsv = Complexity::value; + constexpr static int value = lhsv + rhsv + 1; +}; + +template +struct ElementType<_Outer> +{ + typedef typename ElementType::type type; +}; + +template +struct traits<_Outer> +{ + typedef typename traits::Scalar Scalar; +#if defined(HAS_VECTOR) + typedef typename traits::Vector Vector; +#endif +}; + +template +struct VecRef<_Outer> +{ + typedef _Outer type; + static type ref(const _Outer&a){ + return(a); + }; +}; + +template +struct NbRows<_Outer> +{ + constexpr static vector_length_t value = NbRows::value; +}; + + +template +struct NbCols<_Outer> +{ + constexpr static vector_length_t value = NbCols::value; +}; + + +template(),bool>::type = true> +inline auto outer(const VA&a,const VB&b) +{ + //constexpr int NBROWS = StaticLength::value; + //constexpr int NBCOLS = StaticLength::value; + + //using T = typename traits::Scalar; + + //Matrix res; + //_outer(res,a,b); + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + using VecRHS = VecRef; + + return(_Outer>(VecLHS::ref(a),VecRHS::ref(b),_MulOp())); + + +} + +/*! @} */ +} diff --git a/dsppp/Include/dsppp/matrix_impl.hpp b/dsppp/Include/dsppp/matrix_impl.hpp new file mode 100644 index 000000000..759e11cbc --- /dev/null +++ b/dsppp/Include/dsppp/matrix_impl.hpp @@ -0,0 +1,612 @@ +// -*- C++ -*- +/** @file */ +#pragma once + + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" +#include "vec.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup Matrix + * @{ + */ + +/******************** + * + * MATRIX + * + ********************/ + +struct Slice +{ + Slice(const index_t s,const index_t e):start(s),stop(e){}; + + const index_t start; + const index_t stop; +}; + +template typename Allocator = TMP_ALLOC> +struct Matrix:Vector +{ + constexpr vector_length_t rows() const {return(R);} + constexpr vector_length_t columns() const {return(C);} + constexpr uint32_t stride() const {return(C);} + + + Matrix():Vector(){}; + explicit Matrix(P init_val):Vector(init_val){}; + + Matrix(const Matrix& other) = default; + Matrix(Matrix&& other) = default; + + template typename OtherAllocator> + explicit Matrix(const Matrix& other):Vector() + { + eval(*this,+other,(vector_length_t)(R*C),CURRENT_ARCH); + }; + + /* Applies only when the AST does not contain any dynamic MatrixView */ + template::value,bool>::type = true> + Matrix(const _Expr& other):Vector(other) + { + }; + + /* Applies only when AST is containing any dynamic MatrixView */ + template(),bool>::type = true> + Matrix(const _Expr& other):Vector() + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + }; + + template::value,bool>::type = true> + Matrix& operator=(const _Expr& other) + { + eval(*this,other.derived(),(vector_length_t)R*C,CURRENT_ARCH); + return(*this); + } + + /* Applies only when AST is containing any dynamic MatrixView */ + template(),bool>::type = true> + Matrix& operator=(const _Expr& other) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + } + + MatrixView sub(const index_t rs,const index_t cs) + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols)); + } + + const MatrixView sub(const index_t rs,const index_t cs) const + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols)); + } + + MatrixView sub(const Slice &rs,const index_t cs) + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs),nb_rows,nb_cols)); + } + + const MatrixView sub(const Slice &rs,const index_t cs) const + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs),nb_rows,nb_cols)); + } + + MatrixView sub(const index_t rs,const Slice &cs) + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs.start),nb_rows,nb_cols)); + } + + const MatrixView sub(const index_t rs,const Slice &cs) const + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs.start),nb_rows,nb_cols)); + } + + MatrixView sub(const Slice& rs,const Slice& cs) + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs.start),nb_rows,nb_cols)); + } + + const MatrixView sub(const Slice& rs,const Slice& cs) const + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs.start),nb_rows,nb_cols)); + } + + MatrixView sub(const index_t rs, + const index_t re, + const index_t cs, + const index_t ce) + { + const vector_length_t nb_rows = re - rs; + const vector_length_t nb_cols = ce - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols)); + } + + const MatrixView sub(const index_t rs, + const index_t re, + const index_t cs, + const index_t ce) const + { + const vector_length_t nb_rows = re - rs; + const vector_length_t nb_cols = ce - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols)); + } + + + Matrix& operator=(const Matrix& other) = default; + + Matrix& operator=(Matrix&& other) = default; + + P& operator()(const index_t r,const index_t c) + { + return(Vector_Base

::ptr()[r*C+c]); + } + + P& operator()(const index_t r,const index_t c) const + { + return(Vector_Base

::ptr()[r*C+c]); + } + + + friend std::ostream& operator<< (std::ostream& stream, const Matrix& other) { + int c=0; + for(index_t k=0;k + VectorView row(const index_t i,const index_t start=0,const index_t stop=C) + { + return(VectorView(*this,i*stride()+start,i*stride()+stop)); + } + + template + const VectorView row(const index_t i,const index_t start=0,const index_t stop=C) const + { + return(VectorView(*this,i*stride()+start,i*stride()+stop)); + } + + + template + VectorView col(const index_t i,const index_t start=0,const index_t stop=R) + { + return(VectorView(*this,i+stride()*start,i+stride()*stop)); + } + + template + const VectorView col(const index_t i,const index_t start=0,const index_t stop=R) const + { + return(VectorView(*this,i+stride()*start,i+stride()*stop)); + } + + template::value && + (RA == CA) && (RA>0) && + SameElementType::value,bool>::type = true> + static Matrix diagonal(const VA& a) + { + Matrix res; + _diagonal(res,a,RA); + return(res); + } + + template::value && + (RA == CA) && (RA>0) && + SameElementType::value,bool>::type = true> + void fill_diagonal(const VA& a) + { + _fill_diagonal(*this,a,RA); + } + + template0),bool>::type = true> + static Matrix identity() + { + Matrix res; + _identity(res,RA); + return(res); + } + + Matrix create() const + { + Matrix res; + return(res); + } + + Matrix transpose() const + { + Matrix res; + transposeTo(res,*this); + return(res); + } + +#if defined(HAS_VECTOR) + using VectorType = typename vector_traits

::vector; + void matrix_store(const index_t row, + const index_t col, + const VectorType val) const + { + Vector_Base

::vector_store(row*C + col,val); + } + +#if defined(HAS_PREDICATED_LOOP) + void matrix_store_tail(const index_t row, + const index_t col, + const vector_length_t remaining, + const VectorType val) const + { + Vector_Base

::vector_store_tail(row*C + col,remaining,val); + } + + VectorType const matrix_op_tail(const index_t row, + const index_t col, + const vector_length_t remaining) const + { + return(Vector_Base

::vector_op_tail(row*C + col,remaining)); + } +#endif + + VectorType const matrix_op(const index_t row, + const index_t col) const + { + return(Vector_Base

::vector_op(row*C + col)); + } +#endif + +}; + +template typename Allocator> +struct Matrix:Vector +{ + vector_length_t rows() const {return(rows_);} + vector_length_t columns() const {return(columns_);} + uint32_t stride() const {return(columns_);} + + + explicit Matrix(vector_length_t r,vector_length_t c): + Vector(r*c),rows_(r),columns_(c){}; + explicit Matrix(vector_length_t r,vector_length_t c,P init_val): + Vector(r*c,init_val),rows_(r),columns_(c){}; + + Matrix(const Matrix& other) = default; + Matrix(Matrix&& other) = default; + + P& operator()(const index_t r,const index_t c) + { + return(Vector_Base

::ptr()[r*columns()+c]); + } + + P& operator()(const index_t r,const index_t c) const + { + return(Vector_Base

::ptr()[r*columns()+c]); + } + + template typename OtherAllocator> + explicit Matrix(const Matrix& other): + Vector(other.rows()*other.columns()), + rows_(other.rows()),columns_(other.columns()) + { + if ((other.rows() == rows()) && (other.columns() == columns())) + { + eval(*this,+other,(vector_length_t)(other.rows()*other.columns()),CURRENT_ARCH); + } + }; + + template::value,bool>::type = true> + Matrix(const _Expr& other):Vector(other), + rows_(other.rows()),columns_(other.columns()) + { + }; + + template(),bool>::type = true> + Matrix(const _Expr& other): + Vector(other.rows()*other.columns()), + rows_(other.rows()),columns_(other.columns()) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + }; + + template::value,bool>::type = true> + Matrix& operator=(const _Expr& other) + { + eval(*this,other.derived(),rows()*columns(),CURRENT_ARCH); + return(*this); + }; + + + template(),bool>::type = true> + Matrix& operator=(const _Expr& other) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + Matrix& operator=(const Matrix& other) = default; + + Matrix& operator=(Matrix&& other) = default; + + friend std::ostream& operator<< (std::ostream& stream, const Matrix& other) { + int c=0; + for(index_t k=0;k::value && + SameElementType::value,bool>::type = true> + static Matrix diagonal(const VA& a) + { + Matrix res(a.length(),a.length()); + _diagonal(res,a,a.length()); + return(res); + } + + template::value && + SameElementType::value,bool>::type = true> + void fill_diagonal(const VA& a) + { + _fill_diagonal(*this,a,this->length()); + } + + static Matrix identity(const vector_length_t l) + { + Matrix res(l,l); + _identity(res,l); + return(res); + } + + Matrix create() const + { + Matrix res(rows(),columns()); + return(res); + } + + Matrix transpose() const + { + Matrix res(columns(),rows()); + transposeTo(res,*this); + return(res); + } + + VectorView row(const index_t i,const index_t start=0) + { + return(VectorView(*this,i*this->stride()+start,i*this->stride()+this->columns())); + } + + VectorView row(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(*this,i*this->stride()+start,i*this->stride()+stop)); + } + + const VectorView row(const index_t i,const index_t start=0) const + { + return(VectorView(*this,i*this->stride()+start,i*this->stride()+this->columns())); + } + + const VectorView row(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(*this,i*this->stride()+start,i*this->stride()+stop)); + } + + template + VectorView col(const index_t i,const index_t start=0) + { + return(VectorView(*this,i+this->stride()*start,i+this->stride()*this->rows(),this->stride()*CS)); + } + + template + VectorView col(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(*this,i+this->stride()*start,i+this->stride()*stop,this->stride()*CS)); + } + + template + const VectorView col(const index_t i,const index_t start=0) const + { + return(VectorView(*this,i+this->stride()*start,i+this->stride()*this->rows(),this->stride()*CS)); + } + + template + const VectorView col(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(*this,i+this->stride()*start,i+this->stride()*stop,this->stride()*CS)); + } + +#if defined(HAS_VECTOR) + using VectorType = typename vector_traits

::vector; + void matrix_store(const index_t row, + const index_t col, + const VectorType val) const + { + Vector_Base

::vector_store(row*stride() + col,val); + } + +#if defined(HAS_PREDICATED_LOOP) + void matrix_store_tail(const index_t row, + const index_t col, + const vector_length_t remaining, + const VectorType val) const + { + Vector_Base

::vector_store_tail(row*stride() + col,remaining,val); + } + + VectorType const matrix_op_tail(const index_t row, + const index_t col, + const vector_length_t remaining) const + { + return(Vector_Base

::vector_op_tail(row*stride() + col,remaining)); + } +#endif + + VectorType const matrix_op(const index_t row, + const index_t col) const + { + return(Vector_Base

::vector_op(row*stride() + col)); + } +#endif + + MatrixView sub(const index_t rs,const index_t cs) + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols,stride())); + } + + const MatrixView sub(const index_t rs,const index_t cs) const + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols,stride())); + } + + MatrixView sub(const Slice &rs,const index_t cs) + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs),nb_rows,nb_cols,stride())); + } + + const MatrixView sub(const Slice &rs,const index_t cs) const + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs),nb_rows,nb_cols,stride())); + } + + MatrixView sub(const index_t rs,const Slice &cs) + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs.start),nb_rows,nb_cols,stride())); + } + + const MatrixView sub(const index_t rs,const Slice &cs) const + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs.start),nb_rows,nb_cols,stride())); + } + + MatrixView sub(const Slice& rs,const Slice& cs) + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs.start),nb_rows,nb_cols,stride())); + } + + const MatrixView sub(const Slice& rs,const Slice& cs) const + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs.start),nb_rows,nb_cols,stride())); + } + + MatrixView sub(const index_t rs, + const index_t re, + const index_t cs, + const index_t ce) + { + const vector_length_t nb_rows = re - rs; + const vector_length_t nb_cols = ce - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols,stride())); + } + + const MatrixView sub(const index_t rs, + const index_t re, + const index_t cs, + const index_t ce) const + { + const vector_length_t nb_rows = re - rs; + const vector_length_t nb_cols = ce - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols,stride())); + } + +protected: + vector_length_t rows_,columns_; +}; + + +/*! @} */ +} \ No newline at end of file diff --git a/dsppp/Include/dsppp/matrix_view.hpp b/dsppp/Include/dsppp/matrix_view.hpp new file mode 100644 index 000000000..d37bf9dce --- /dev/null +++ b/dsppp/Include/dsppp/matrix_view.hpp @@ -0,0 +1,751 @@ +// -*- C++ -*- +/** @file */ +#pragma once + + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" +#include "vec.hpp" +#include "matrix_impl.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup Matrix + * @{ + */ + +template +struct MatrixView +{ + vector_length_t rows() const {return(nb_rows_);} + vector_length_t columns() const {return(nb_cols_);} + constexpr uint32_t stride() const {return(S);} + + explicit MatrixView(T* v, + const vector_length_t rows, + const vector_length_t cols): + v_(v),nb_rows_(rows),nb_cols_(cols){}; + + explicit MatrixView(const Vector_Base &v, + const vector_length_t rows, + const vector_length_t cols): + v_(v.ptr()),nb_rows_(rows),nb_cols_(cols){}; + + virtual ~MatrixView() {}; + + MatrixView(const MatrixView& other): + v_(other.v_), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_){}; + + MatrixView(MatrixView&& other) : + v_(other.v_), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_){}; + + + MatrixView& operator=(const MatrixView& other) = delete; + MatrixView& operator=(MatrixView&& other) = delete; + + T& operator()(const index_t r,const index_t c) + { + return(v_[r*stride()+c]); + } + + T const operator()(const index_t r,const index_t c) const + { + return(v_[r*stride()+c]); + } + + + template + MatrixView& operator=(const _Expr&other) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + } + + MatrixView& operator=(const T val) + { + _Fill2D(*this,val,rows(),columns(),CURRENT_ARCH); + + return(*this); + } + + + template + MatrixView& operator +=(const _Expr& other) + { + eval2D(*this,*this + other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator +=(const MatrixView& other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator +=(const T other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + template + MatrixView& operator -=(const _Expr& other) + { + eval2D(*this,*this - other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + + MatrixView& operator -=(const MatrixView& other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator -=(const T other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + template + MatrixView& operator *=(const _Expr& other) + { + eval2D(*this,*this * other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator *=(const MatrixView& other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator *=(const T other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + friend std::ostream& operator<< (std::ostream& stream, const MatrixView& other) { + for(index_t row=0;row row(const index_t i,const index_t start=0) + { + return(VectorView(v_,i*stride()+start,i*stride()+columns())); + } + + VectorView row(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(v_,i*stride()+start,i*stride()+stop)); + } + + const VectorView row(const index_t i,const index_t start=0) const + { + return(VectorView(v_,i*stride()+start,i*stride()+columns())); + } + + const VectorView row(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(v_,i*stride()+start,i*stride()+stop)); + } + + template + VectorView col(const index_t i,const index_t start=0) + { + return(VectorView(v_,i+stride()*start,i+stride()*rows())); + } + + template + VectorView col(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(v_,i+stride()*start,i+stride()*stop)); + } + + template + const VectorView col(const index_t i,const index_t start=0) const + { + return(VectorView(v_,i+stride()*start,i+stride()*rows())); + } + + template + const VectorView col(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(v_,i+stride()*start,i+stride()*stop)); + } + + #if defined(HAS_VECTOR) + using VectorType = typename vector_traits::vector; + void matrix_store(const index_t row, + const index_t col, + const VectorType val) const + { + inner::vstore1<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),val); + } + +#if defined(HAS_PREDICATED_LOOP) + void matrix_store_tail(const index_t row, + const index_t col, + const vector_length_t remaining, + const VectorType val) const + { + inner::vstore1_z<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),val,remaining,inner::vctpq::mk(remaining)); + } + + VectorType const matrix_op_tail(const index_t row, + const index_t col, + const vector_length_t remaining) const + { + return(inner::vload1_z<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),remaining,inner::vctpq::mk(remaining))); + } +#endif + + VectorType const matrix_op(const index_t row, + const index_t col) const + { + return(inner::vload1<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]))); + } +#endif + + template::value && + SameElementType::value,bool>::type = true> + void fill_diagonal(const VA& a) + { + _fill_diagonal(*this,a,this->length()); + } + + Matrix transpose() const + { + Matrix res(columns(),rows()); + transposeTo(res,*this); + return(res); + } + + Matrix create() const + { + Matrix res(rows(),columns()); + return(res); + } + + T* ptr() const {return(v_);} + const T* const_ptr() const {return(v_);} + +protected: + T* const v_; + const vector_length_t nb_rows_; + const vector_length_t nb_cols_; +}; + +/* + +When the stride if not known at build time AND different +from the nb_cols_ + +*/ +template +struct MatrixView +{ + vector_length_t rows() const {return(nb_rows_);} + vector_length_t columns() const {return(nb_cols_);} + uint32_t stride() const {return(stride_);} + + explicit MatrixView(T* v, + const vector_length_t rows, + const vector_length_t cols, + const uint32_t stride): + v_(v),nb_rows_(rows),nb_cols_(cols),stride_(stride){}; + + explicit MatrixView(const Vector_Base &v, + const vector_length_t rows, + const vector_length_t cols, + const uint32_t stride): + v_(v.ptr()),nb_rows_(rows),nb_cols_(cols),stride_(stride){}; + + virtual ~MatrixView() {}; + + MatrixView(const MatrixView& other): + v_(other.v_), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_),stride_(other.stride_){}; + + MatrixView(MatrixView&& other) : + v_(other.v_), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_),stride_(other.stride_){}; + + + MatrixView& operator=(const MatrixView& other) = delete; + MatrixView& operator=(MatrixView&& other) = delete; + + T& operator()(const index_t r,const index_t c) + { + return(v_[r*stride()+c]); + } + + T const operator()(const index_t r,const index_t c) const + { + return(v_[r*stride()+c]); + } + + + template + MatrixView& operator=(const _Expr&other) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + } + + MatrixView& operator=(const T val) + { + _Fill2D(*this,val,rows(),columns(),CURRENT_ARCH); + + return(*this); + } + + + template + MatrixView& operator +=(const _Expr& other) + { + eval2D(*this,*this + other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator +=(const MatrixView& other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator +=(const T other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + template + MatrixView& operator -=(const _Expr& other) + { + eval2D(*this,*this - other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + + MatrixView& operator -=(const MatrixView& other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator -=(const T other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + template + MatrixView& operator *=(const _Expr& other) + { + eval2D(*this,*this * other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator *=(const MatrixView& other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator *=(const T other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + friend std::ostream& operator<< (std::ostream& stream, const MatrixView& other) { + for(index_t row=0;row row(const index_t i,const index_t start=0) + { + return(VectorView(v_,i*stride()+start,i*stride()+columns())); + } + + VectorView row(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(v_,i*stride()+start,i*stride()+stop)); + } + + const VectorView row(const index_t i,const index_t start=0) const + { + return(VectorView(v_,i*stride()+start,i*stride()+columns())); + } + + const VectorView row(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(v_,i*stride()+start,i*stride()+stop)); + } + + template + VectorView col(const index_t i,const index_t start=0) + { + return(VectorView(v_,i+stride()*start,i+stride()*rows(),stride()*CS)); + } + + template + VectorView col(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(v_,i+stride()*start,i+stride()*stop,stride()*CS)); + } + + template + const VectorView col(const index_t i,const index_t start=0) const + { + return(VectorView(v_,i+stride()*start,i+stride()*rows(),stride()*CS)); + } + + template + const VectorView col(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(v_,i+stride()*start,i+stride()*stop,stride()*CS)); + } + + #if defined(HAS_VECTOR) + using VectorType = typename vector_traits::vector; + void matrix_store(const index_t row, + const index_t col, + const VectorType val) const + { + inner::vstore1<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),val); + } + +#if defined(HAS_PREDICATED_LOOP) + void matrix_store_tail(const index_t row, + const index_t col, + const vector_length_t remaining, + const VectorType val) const + { + inner::vstore1_z<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),val,remaining,inner::vctpq::mk(remaining)); + } + + VectorType const matrix_op_tail(const index_t row, + const index_t col, + const vector_length_t remaining) const + { + return(inner::vload1_z<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),remaining,inner::vctpq::mk(remaining))); + } +#endif + + VectorType const matrix_op(const index_t row, + const index_t col) const + { + return(inner::vload1<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]))); + } +#endif + + template::value && + SameElementType::value,bool>::type = true> + void fill_diagonal(const VA& a) + { + _fill_diagonal(*this,a,this->length()); + } + + Matrix transpose() const + { + Matrix res(columns(),rows()); + transposeTo(res,*this); + return(res); + } + + Matrix create() const + { + Matrix res(rows(),columns()); + return(res); + } + + T* ptr() const {return(v_);} + + const T* const_ptr() const {return(v_);} + + +protected: + T* const v_; + const vector_length_t nb_rows_; + const vector_length_t nb_cols_; + const uint32_t stride_; +}; + +/* + + +Dynamic but with stride == nb_cols_ + +*/ + +template +struct MatrixView:VectorView +{ + vector_length_t rows() const {return(nb_rows_);} + vector_length_t columns() const {return(nb_cols_);} + uint32_t stride() const {return(nb_cols_);} + + explicit MatrixView(T* v, + const vector_length_t rows, + const vector_length_t cols): + VectorView(v,0,rows*cols), + nb_rows_(rows),nb_cols_(cols){}; + + explicit MatrixView(const Vector_Base &v, + const vector_length_t rows, + const vector_length_t cols): + VectorView(v.ptr(),0,rows*cols), + nb_rows_(rows),nb_cols_(cols){}; + + virtual ~MatrixView() {}; + + MatrixView(const MatrixView& other): + VectorView(other), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_){}; + + MatrixView(MatrixView&& other) : + VectorView(std::forward(other)), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_){}; + + + MatrixView& operator=(const MatrixView& other) = delete; + MatrixView& operator=(MatrixView&& other) = delete; + + T& operator()(const index_t r,const index_t c) + { + return(&(*this)[r*stride()+c]); + } + + T const operator()(const index_t r,const index_t c) const + { + return((*this)[r*stride()+c]); + } + + + template + MatrixView& operator=(const _Expr&other) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + } + + MatrixView& operator=(const T val) + { + _Fill2D(*this,val,rows(),columns(),CURRENT_ARCH); + + return(*this); + } + + + template + MatrixView& operator +=(const _Expr& other) + { + eval2D(*this,*this + other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator +=(const MatrixView& other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator +=(const T other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + template + MatrixView& operator -=(const _Expr& other) + { + eval2D(*this,*this - other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + + MatrixView& operator -=(const MatrixView& other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator -=(const T other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + template + MatrixView& operator *=(const _Expr& other) + { + eval2D(*this,*this * other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator *=(const MatrixView& other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + MatrixView& operator *=(const T other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + friend std::ostream& operator<< (std::ostream& stream, const MatrixView& other) { + for(index_t row=0;row row(const index_t i,const index_t start=0) + { + return(VectorView(this->ptr(),i*stride()+start,i*stride()+columns())); + } + + VectorView row(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(this->ptr(),i*stride()+start,i*stride()+stop)); + } + + const VectorView row(const index_t i,const index_t start=0) const + { + return(VectorView(this->ptr(),i*stride()+start,i*stride()+columns())); + } + + const VectorView row(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(this->ptr(),i*stride()+start,i*stride()+stop)); + } + + template + VectorView col(const index_t i,const index_t start=0) + { + return(VectorView(this->ptr(),i+stride()*start,i+stride()*rows(),stride()*CS)); + } + + template + VectorView col(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(this->ptr(),i+stride()*start,i+stride()*stop,stride()*CS)); + } + + template + const VectorView col(const index_t i,const index_t start=0) const + { + return(VectorView(this->ptr(),i+stride()*start,i+stride()*rows(),stride()*CS)); + } + + template + const VectorView col(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(this->ptr(),i+stride()*start,i+stride()*stop,stride()*CS)); + } + + #if defined(HAS_VECTOR) + using VectorType = typename vector_traits::vector; + void matrix_store(const index_t row, + const index_t col, + const VectorType val) const + { + inner::vstore1<1>((typename std::remove_cv::type*)(ptr(row*stride() + col)),val); + } + +#if defined(HAS_PREDICATED_LOOP) + void matrix_store_tail(const index_t row, + const index_t col, + const vector_length_t remaining, + const VectorType val) const + { + inner::vstore1_z<1>((typename std::remove_cv::type*)(ptr(row*stride() + col)),val,remaining,inner::vctpq::mk(remaining)); + } + + VectorType const matrix_op_tail(const index_t row, + const index_t col, + const vector_length_t remaining) const + { + return(inner::vload1_z<1>((typename std::remove_cv::type*)(VectorView::ptr(row*stride() + col)),remaining,inner::vctpq::mk(remaining))); + } +#endif + + VectorType const matrix_op(const index_t row, + const index_t col) const + { + return(inner::vload1<1>((typename std::remove_cv::type*)(VectorView::ptr(row*stride() + col)))); + } +#endif + + template::value && + SameElementType::value,bool>::type = true> + void fill_diagonal(const VA& a) + { + _fill_diagonal(*this,a,this->length()); + } + + Matrix transpose() const + { + Matrix res(columns(),rows()); + transposeTo(res,*this); + return(res); + } + + Matrix create() const + { + Matrix res(rows(),columns()); + return(res); + } + + +protected: + const vector_length_t nb_rows_; + const vector_length_t nb_cols_; +}; + +/*! @} */ + +} \ No newline at end of file diff --git a/dsppp/Include/dsppp/memory_pool.hpp b/dsppp/Include/dsppp/memory_pool.hpp new file mode 100644 index 000000000..7b60b4f83 --- /dev/null +++ b/dsppp/Include/dsppp/memory_pool.hpp @@ -0,0 +1,259 @@ +// -*- C++ -*- +/** @file */ +#pragma once + + +#include +#include +#include +#include "common.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup MEMORY Memory allocator + * \ingroup DSPPP + * @{ + */ + +/* + +Buffer allocator + +Can be used to build memory allocators foe vector +and matrix. + +For instance, it is usedin the Memory pool allocator + +*/ + +struct default_user_allocator_malloc_free +{ + static char * malloc(const std::size_t bytes) + { + #if !defined(MEMORY_ALLOCATION_DEBUG) + return reinterpret_cast(std::malloc(bytes)); + #else + char *ret=reinterpret_cast(std::malloc(bytes)); + if (ret==nullptr) + { + std::cout << "out of memory for " << bytes << " bytes\r\n"; + } + return(ret); + #endif + } + static void free(char * const block) + { + #if defined(MEMORY_ALLOCATION_DEBUG) + if (block==nullptr) + { + std::cout << "free null ptr \r\n"; + } + #endif + std::free(block); + } +}; + +inline void* aligned_malloc(std::size_t alignment, std::size_t size) +{ + void *ptr=std::malloc(size+alignment+sizeof(void*)); + void *aligned = + reinterpret_cast( + (reinterpret_cast(ptr)+sizeof(void*)+alignment) & ~(alignment-1) + ); + + *(static_cast(aligned) - 1) = ptr; + return(aligned); +} + +inline void +aligned_free(void* ptr) +{ + if (ptr) { + std::free(*(static_cast(ptr) - 1)); + } +}; + + +struct user_allocator_aligned_malloc +{ + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + static char * malloc(const size_type bytes) + { + #if !defined(MEMORY_ALLOCATION_DEBUG) + return reinterpret_cast(aligned_malloc(MEMORY_POOL_ALIGNMENT, bytes)); + #else + char *ret = reinterpret_cast(aligned_malloc(MEMORY_POOL_ALIGNMENT, bytes)); + if (ret==nullptr) + { + std::cout << "out of memory for " << bytes << " bytes\r\n"; + } + return(ret); + #endif + } + static void free(char * const block) + { + #if defined(MEMORY_ALLOCATION_DEBUG) + if (block==nullptr) + { + std::cout << "free null ptr \r\n"; + } + #endif + aligned_free(block); + } +}; + +/* + +Memory allocator for vector and matrix. + +*/ + +// Default allocator +// Other allocator must be provided by user of the library +template +struct malloc_allocator { + /* Dynamic size allocations */ + static char* allocate ( vector_length_t sz) noexcept{ + char *res; + res=reinterpret_cast(std::malloc(sz)); + #if defined(MEMORY_ALLOCATION_DEBUG) + if (res==nullptr) + { + std::cout << "out of memory for " << sz << " bytes\r\n"; + } + #endif + return(res); + } + + /* Size know at build time */ + static char* allocate ( ) noexcept{ + char *res; + res=reinterpret_cast(std::malloc(L)); + #if defined(MEMORY_ALLOCATION_DEBUG) + if (res==nullptr) + { + std::cout << "out of memory for " << L << " bytes\r\n"; + } + #endif + return(res); + } + + static void destroy ( char* ptr ) noexcept { + #if defined(MEMORY_ALLOCATION_DEBUG) + if (ptr==nullptr) + { + std::cout << "free null ptr \r\n"; + } + #endif + std::free(ptr); + } + +}; + + +/* + +Memory pool + +Memory pool is using a buffer +allocator (aligned or normal malloc) + +A memory pool can be used to by a memory allocator for +vectors and matrixes. + + +*/ + +struct ListElem; + +struct ListElem { + ListElem *next; +}; + +template +class MemoryPool { +public: + explicit MemoryPool(const uint16_t nbBufs) + { + buffer_list.reserve(nbBufs); + buffer_list.assign(nbBufs,nullptr); + for(auto p=buffer_list.begin();p != buffer_list.end(); ++p) + { + *p = UserAllocator::malloc(BUF_SIZE < sizeof(ListElem) ? sizeof(ListElem) : BUF_SIZE); + } + reset(); + }; + + ~MemoryPool() + { + for(auto p=buffer_list.begin();p != buffer_list.end(); ++p) + { + UserAllocator::free(*p); + } + } + + MemoryPool(const MemoryPool& other) = delete; + + MemoryPool(MemoryPool&& other) = delete; + + + MemoryPool& operator=(const MemoryPool& other) = delete; + + MemoryPool& operator=(MemoryPool&& other) = delete; + + char* get_new_buffer() noexcept + { + /* No error handling. + The sizing of the pool must have been done, for + instance, with a statistic allocator. + Allocation is thus assumed to succeed */ + char* res = reinterpret_cast(free); + free = free->next; + #if defined(MEMORY_ALLOCATION_DEBUG) + if (res == nullptr) + { + std::cout << "memory pool alloc error " << BUF_SIZE << " bytes\r\n"; + } + #endif + return(res); + } + + void recycle_buffer(char* buf) noexcept + { + ListElem *l = reinterpret_cast(buf); + #if defined(MEMORY_ALLOCATION_DEBUG) + if (l == nullptr) + { + std::cout << "memory pool free error " << BUF_SIZE << " bytes\r\n"; + } + #endif + l->next = free; + free = l; + } + + void reset() noexcept + { + const int nbBufs = buffer_list.size(); + for(int i=0;i(buffer_list[i]); + l->next = reinterpret_cast(buffer_list[i+1]); + } + ListElem *l=reinterpret_cast(buffer_list[nbBufs-1]); + l->next = nullptr; + free = reinterpret_cast(buffer_list[0]); + } + + + +protected: + ListElem *free; + std::vector buffer_list; +}; + + +/*! @} */ + +} \ No newline at end of file diff --git a/dsppp/Include/dsppp/num_features/double.hpp b/dsppp/Include/dsppp/num_features/double.hpp new file mode 100644 index 000000000..1e3c78ae7 --- /dev/null +++ b/dsppp/Include/dsppp/num_features/double.hpp @@ -0,0 +1,63 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber Scalar number definitions + * \ingroup NUMBER + * @{ + * \addtogroup GenericDoubleNumber Double + * \ingroup GenericNumber + * @{ + */ + + +template<> +struct number_traits +{ + static constexpr bool is_float = true; + static constexpr bool is_fixed = false; + typedef double accumulator; + static constexpr double one() {return 1.0;}; + typedef double compute_type; +}; + +template +struct vector_traits { + typedef double type; + typedef double storage_type; + + // No vector type but must still be defined + typedef bool vector; + typedef bool temp_accumulator; + typedef uint32_t predicate_t; + + static constexpr bool has_vector = false; + static constexpr bool is_float = true; + static constexpr bool is_fixed = false; + static constexpr bool has_predicate = false; +}; + +namespace inner { + __STATIC_FORCEINLINE double from_accumulator(const double a) + { + return(a); + }; + + __STATIC_FORCEINLINE double mac(const double acc,const double a,const double b) + { + return(acc+a*b); + }; + + __STATIC_FORCEINLINE void accumulate(double &a,const double &b) +{ + a += b; +} + +__STATIC_FORCEINLINE double mult(double &a,const double &b) +{ + return(a*b); +} +} + +/*! @} */ +/*! @} */ diff --git a/dsppp/Include/dsppp/num_features/float.hpp b/dsppp/Include/dsppp/num_features/float.hpp new file mode 100644 index 000000000..bf7838302 --- /dev/null +++ b/dsppp/Include/dsppp/num_features/float.hpp @@ -0,0 +1,77 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericFloatNumber Float + * \ingroup GenericNumber + * @{ + */ + +template<> +struct number_traits +{ + static constexpr bool is_float = true; + static constexpr bool is_fixed = false; + typedef float accumulator; + static constexpr float one() {return 1.0f;}; + typedef float compute_type; +}; + + +/* + +If arch is not deriving from Neon or Helium, then there are +no vectors for float + +*/ +template +struct vector_traits::value && + !std::is_base_of::value>::type> { + typedef float type; + typedef float storage_type; + + // No vector type but must still be defined + typedef bool vector; + typedef bool temp_accumulator; + typedef uint32_t predicate_t; + + + static constexpr bool has_vector = false; + static constexpr bool is_float = true; + static constexpr bool is_fixed = false; + static constexpr bool has_predicate = false; + +}; + +namespace inner { + __STATIC_FORCEINLINE float from_accumulator(const float a) + { + return(a); + }; + + __STATIC_FORCEINLINE float mac(const float acc,const float a,const float b) + { + return(acc+a*b); + }; + +__STATIC_FORCEINLINE void accumulate(float &a,const float &b) +{ + a += b; +} + +__STATIC_FORCEINLINE float mult(float &a,const float &b) +{ + return(a*b); +} + +} + + +/*! @} */ +/*! @} */ + + diff --git a/dsppp/Include/dsppp/num_features/group.hpp b/dsppp/Include/dsppp/num_features/group.hpp new file mode 100644 index 000000000..f55d98770 --- /dev/null +++ b/dsppp/Include/dsppp/num_features/group.hpp @@ -0,0 +1,171 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericTUPLENumber Tuple + * Tuples of numbers or expressions used for unrolling + * \ingroup GenericNumber + * @{ + */ + +template +struct number_traits> +{ + static constexpr bool is_float = false; + static constexpr bool is_fixed = false; + typedef std::tuple::accumulator...> accumulator; + typedef std::tuple::compute_type...> compute_type; + + static std::tuple::accumulator...> one() + { + return(std::make_tuple(vector_traits::one()...)); + } + +}; + +/* + +Assume that all E are using the same scalar type or coherent types +like f32 and q13 that have same number of lanes. + +Any other mix will not work and won't be catched at build time. + +*/ +template +struct vector_traits,arch> { + using RefScalar = typename std::tuple_element<0,std::tuple>::type; + + typedef std::tuple::temp_accumulator...> temp_accumulator; + typedef std::tuple::vector...> vector; + typedef std::tuple::predicate_t...> predicate_t; + + static constexpr int nb_lanes = vector_traits::nb_lanes; + + static constexpr bool has_vector = vector_traits::has_vector; + static constexpr bool is_float = vector_traits::is_float; + static constexpr bool is_fixed = vector_traits::is_fixed; + static constexpr bool has_predicate = vector_traits::has_predicate; + + static temp_accumulator temp_acc_zero() + { + return(std::make_tuple(vector_traits::temp_acc_zero()...)); + } + +}; + +namespace inner { + + + + /* + + Assume that the vctpq is the same for all tuple elements. + If it is not the case, we can't get a predicated loop and + the code contains additional VPSTTTT and it is not + efficient. + + */ +#if defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) + template + struct vctpq> + { + static auto mk(const uint32_t v/*, + typename std::enable_if<(vector_traits::nb_lanes == ...),bool>::type* = nullptr*/) + { + return(vctpq>>::mk(v)); + }; + }; +#endif + /* + + Typical configuration is vmacc between tuple and tuple + but also very common is vmacc between tuple and vector + + */ + + template + __STATIC_FORCEINLINE A vmacc_impl(const A &acc,const V &a,const V &b, std::index_sequence) + { + return(std::make_tuple(vmacc(std::get(acc),std::get(a),std::get(b))...)); + }; + + template + __STATIC_FORCEINLINE A + vmacc(const A &acc,const std::tuple &a,const std::tuple &b) + { + return(vmacc_impl(acc,a,b,std::make_index_sequence())); + }; + + template + __STATIC_FORCEINLINE A vmacc_impl(const A &acc,const V &a,const V &b, const B p0,std::index_sequence) + { + return(std::make_tuple(vmacc(std::get(acc),std::get(a),std::get(b),p0)...)); + }; + + template + __STATIC_FORCEINLINE A + vmacc(const A &acc,const std::tuple &a,const std::tuple &b,const B p0) + { + return(vmacc_impl(acc,a,b,p0,std::make_index_sequence())); + }; + + + + template + __STATIC_FORCEINLINE auto vreduce_impl(const A &acc, std::index_sequence) + { + return(std::make_tuple(vreduce(std::get(acc))...)); + }; + + template + __STATIC_FORCEINLINE auto vreduce(const std::tuple &acc) + { + return(vreduce_impl(acc,std::make_index_sequence())); + }; + + template + __STATIC_FORCEINLINE auto from_accumulator_impl(const A &acc, std::index_sequence) + { + return(std::make_tuple(from_accumulator(std::get(acc))...)); + }; + + template + __STATIC_FORCEINLINE auto from_accumulator(const std::tuple &acc) + { + return(from_accumulator_impl(acc,std::make_index_sequence())); + }; + + template + __STATIC_FORCEINLINE A mac_impl(const A &acc,const V &a,const V &b, std::index_sequence) + { + return(std::make_tuple(mac(std::get(acc),std::get(a),std::get(b))...)); + }; + + template + __STATIC_FORCEINLINE A + mac(const A &acc,const std::tuple &a,const std::tuple &b) + { + return(mac_impl(acc,a,b,std::make_index_sequence())); + }; + + template + __STATIC_FORCEINLINE A mac_impl(const A &acc,const V &a,const V &b, const B p0,std::index_sequence) + { + return(std::make_tuple(mac(std::get(acc),std::get(a),std::get(b),p0)...)); + }; + + template + __STATIC_FORCEINLINE A + mac(const A &acc,const std::tuple &a,const std::tuple &b,const B p0) + { + return(mac_impl(acc,a,b,p0,std::make_index_sequence())); + }; + +}; + + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/num_features/half.hpp b/dsppp/Include/dsppp/num_features/half.hpp new file mode 100644 index 000000000..dd24fc785 --- /dev/null +++ b/dsppp/Include/dsppp/num_features/half.hpp @@ -0,0 +1,76 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_FLOAT16_SUPPORTED +#endif + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericHalfNumber Half + * \ingroup GenericNumber + * @{ + */ + +#if defined(ARM_FLOAT16_SUPPORTED) +template<> +struct number_traits +{ + static constexpr bool is_float = true; + static constexpr bool is_fixed = false; + typedef float16_t accumulator; + static constexpr float16_t one() {return ((float16_t)1.0f);}; + + typedef _Float16 compute_type; +}; + + +#if !defined(ARM_MATH_MVE_FLOAT16) +template<> +struct vector_traits { + typedef float16_t type; + typedef float16_t storage_type; + + // No vector type but must still be defined + typedef bool vector; + typedef bool temp_accumulator; + typedef uint32_t predicate_t; + + + static constexpr bool has_vector = false; + static constexpr bool is_float = true; + static constexpr bool is_fixed = false; + static constexpr bool has_predicate = false; +}; +#endif + +namespace inner { + __STATIC_FORCEINLINE float16_t from_accumulator(const float16_t a) + { + return(a); + }; + + __STATIC_FORCEINLINE float16_t mac(const float16_t acc,const float16_t a,const float16_t b) + { + return((_Float16)acc+(_Float16)a*(_Float16)b); + }; + + +__STATIC_FORCEINLINE void accumulate(float16_t &a,const float16_t &b) +{ + a += (_Float16)b; +} + +__STATIC_FORCEINLINE float16_t mult(float16_t &a,const float16_t &b) +{ + return((_Float16)a*(_Float16)b); +} + +} + +#endif + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/num_features/q15.hpp b/dsppp/Include/dsppp/num_features/q15.hpp new file mode 100644 index 000000000..5bd5d9fc3 --- /dev/null +++ b/dsppp/Include/dsppp/num_features/q15.hpp @@ -0,0 +1,66 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericQ15Number Q15 + * \ingroup GenericNumber + * @{ + */ + +template<> +struct number_traits +{ + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + typedef Q<33,30> accumulator; + static constexpr Q15 one() {return Q15::one();}; + typedef Q15 compute_type; +}; + +template +struct vector_traits::value && + !std::is_base_of::value && + !std::is_base_of::value>::type> { + typedef Q15 type; + typedef type::value_type storage_type; + + // No vector type but must still be defined + typedef bool vector; + typedef bool temp_accumulator; + typedef uint32_t predicate_t; + + + + static constexpr bool has_vector = false; + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + static constexpr bool has_predicate = false; + +}; + +namespace inner { +#if defined(ARM_MATH_MVEI) + __STATIC_FORCEINLINE Q15 from_accumulator(const Q<33,30> a) + { + //return(saturate(toFrac<15>(a))); + return(Q15((sqrshrl_sat48(a.v, -(32-15)) >> 32) & 0xffffffff)); + }; +#else + __STATIC_FORCEINLINE Q15 from_accumulator(const Q<33,30> a) + { + return(saturate(toFrac<15>(a))); + }; +#endif + + __STATIC_FORCEINLINE Q<33,30> mac(const Q<33,30> acc,const Q15 a,const Q15 b) + { + return(accumulate(acc , mult(a,b))); + }; +} + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/num_features/q31.hpp b/dsppp/Include/dsppp/num_features/q31.hpp new file mode 100644 index 000000000..5af4f5647 --- /dev/null +++ b/dsppp/Include/dsppp/num_features/q31.hpp @@ -0,0 +1,65 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericQ31Number Q31 + * \ingroup GenericNumber + * @{ + */ + +template<> +struct number_traits +{ + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + typedef Q<15,48> accumulator; + static constexpr Q31 one() {return Q31::one();}; + typedef Q31 compute_type; +}; + +template +struct vector_traits::value && + !std::is_base_of::value>::type> { + typedef Q31 type; + typedef type::value_type storage_type; + + // No vector type but must still be defined + typedef bool vector; + typedef bool temp_accumulator; + typedef uint32_t predicate_t; + + + + static constexpr bool has_vector = false; + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + static constexpr bool has_predicate = false; +}; + +namespace inner { +#if defined(ARM_MATH_MVEI) + __STATIC_FORCEINLINE Q31 from_accumulator(const Q<15,48> a) + { + return(Q31(asrl(a.v, 17))); + }; +#else + __STATIC_FORCEINLINE Q31 from_accumulator(const Q<15,48> a) + { + return(Q31(a.v >> 17)); + }; +#endif + + +__STATIC_FORCEINLINE Q<15,48> mac(const Q<15,48> acc,const Q31 a,const Q31 b) +{ + return(accumulate(acc , toFrac<48>(mult(a,b)))); +}; + + } + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/num_features/q7.hpp b/dsppp/Include/dsppp/num_features/q7.hpp new file mode 100644 index 000000000..e408801bd --- /dev/null +++ b/dsppp/Include/dsppp/num_features/q7.hpp @@ -0,0 +1,57 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericQ7Number Q7 + * \ingroup GenericNumber + * @{ + */ + +template<> +struct number_traits +{ + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + typedef Q<17,14> accumulator; + static constexpr Q7 one() {return Q7::one();}; + typedef Q7 compute_type; +}; + +template +struct vector_traits::value && + !std::is_base_of::value && + !std::is_base_of::value>::type> { + typedef Q7 type; + typedef type::value_type storage_type; + + // No vector type but must still be defined + typedef bool vector; + typedef bool temp_accumulator; + typedef uint32_t predicate_t; + + + + static constexpr bool has_vector = false; + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + static constexpr bool has_predicate = false; +}; + +namespace inner { + __STATIC_FORCEINLINE Q7 from_accumulator(const Q<17,14> a) + { + return(Q7(__SSAT(a.v >> 7, 8))); + }; + + __STATIC_FORCEINLINE Q<17,14> mac(const Q<17,14> acc,const Q7 a,const Q7 b) + { + return(accumulate(acc , mult(a,b))); + }; +} + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/number.hpp b/dsppp/Include/dsppp/number.hpp new file mode 100644 index 000000000..033a65e49 --- /dev/null +++ b/dsppp/Include/dsppp/number.hpp @@ -0,0 +1,190 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include "fixed_point.hpp" +#include + +#include "arm_math_types.h" + +#if defined(ARM_FLOAT16_SUPPORTED) +#include "arm_math_types_f16.h" +#endif + +#if defined(ARM_MATH_DSP) +#include "DSP/memory.hpp" +#endif + +namespace arm_cmsis_dsp { + +/** \addtogroup NUMBER Number datatypes + * \ingroup DSPPP + * Number datatypes expressing different properties of the numbers + * according to the architecture. + * + * Those definitions are used to write architecture independents + * algorithms. + * @{ + */ + +constexpr uint32_t maskFromShift(const uint32_t shift) +{ + return ((1<>1)); +} + +/** Properties of a scalar datatype + * + * Needs to contain two static bool : is_float and is_fixed + * + * Needs to contain a static function `one` returning the value + * + * 1 for this datatype (used to write some datatype generic + * algorithms) + */ +template +struct number_traits; + + +/* + +When vector is true we have a vector datatype +A temporary accumulator datatype and an accumulator datatype. +For most types the temporary and accumulator are the same. +For float, vector instruction mac is doing a mac per lane. +So temporay is a vector and the final accumulator is a float. + +*/ + +/** @brief Properties of a vector datatype linked to a scalar datatype + * @tparam T Type of the scalar + * @tparam arch Architecture. It is defined by the + * architecture selection code and should never be + * set by the user. + */ +template +struct vector_traits { + typedef T type; //!< Scalar datatype + typedef T storage_type; //!< Storage type (for instance for Q15 scalar the storage is int16_t) + static constexpr bool has_vector = false; //!< True if scalar type has a related vector type + static constexpr bool is_float = false; //!< True if scalar type is a float (half, float or double) + static constexpr bool is_fixed = false; //!< True if scalar type is fixed point +}; + +/** @brief Scalar properties of fixed point datatype + * @tparam M Mantissa bits (not including sign bit) + * @tparam F Fractional bits + * @tparam S Signed or unsigned + * @tparam T Storage datatype + */ +template +struct number_traits> +{ + static constexpr bool is_float = false; //!< False because scalar is not a float datatype (half, float, double) + static constexpr bool is_fixed = true; //!< True because datatype is a fixed point arithmetic one + + /** @brief Return 1 for this datatype + * + * Used for writing datatype generic algorithms + */ + static constexpr Q one() {return Q::one();}; +}; + + +namespace inner { + +/** @brief Predicate (only defined for vector architectures) + * @tparam T scalar data type + * @param v Number of loops + * @return Predicate for the given architecture + */ +template +struct vctpq { +static typename vector_traits::predicate_t mk(uint32_t v); +}; + +}; + + +/* + +vconst +vconst_tail +vadd +vsub +vmul +vacc + + +vload1 +vstore1 + +// When predicate +vctpq +vload1_z +vstore1_z + +// When predicated loop +vadd_x +vsub_x +vmul_x +vmacc_p + + +*/ + + + +// Common to all architectures +#include "num_features/double.hpp" +#include "num_features/float.hpp" +#include "num_features/half.hpp" +#include "num_features/q31.hpp" +#include "num_features/q15.hpp" +#include "num_features/q7.hpp" + +// Specific for some architecture +//#include +#include "DSP/num_features.hpp" +#include "Helium/num_features.hpp" +//#include + + +#include "num_features/group.hpp" + +/* + +If there is the need to tune the intrinsics depending on the +Helium variant of the architecture, somehting like that could be used. +In practice, selection is done at level of of algorithms more than +instructions where it may be simple to just use a #if to use the +right intrinsics when it is available. + +*/ +#if 0 +template +__STATIC_FORCEINLINE mve_pred16_t _vctpq(uint32_t v,Helium * = nullptr); + +template<> +__STATIC_FORCEINLINE mve_pred16_t _vctpq(uint32_t v,Helium *) +{ + return(vctp32q(v)); +}; + +template +__STATIC_FORCEINLINE mve_pred16_t vctpq(uint32_t v) +{ + return(_vctpq(v,CURRENT_ARCH)); +} + +#endif + +/*! @} */ + +} // cmsis-dsp namespace \ No newline at end of file diff --git a/dsppp/Include/dsppp/unroll.hpp b/dsppp/Include/dsppp/unroll.hpp new file mode 100644 index 000000000..b6e6693f5 --- /dev/null +++ b/dsppp/Include/dsppp/unroll.hpp @@ -0,0 +1,247 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include + +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "matrix.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup UNROLLING Unrolling + * \ingroup DSPPP + * @{ + */ + +template +struct Merged +{ + + using ScalarResult = std::tuple>::Scalar...>; + using TypeOfElement = typename std::tuple_element<0,ScalarResult>::type; + + constexpr explicit Merged(const E& ... values) : vals { values ...} { } + + constexpr Merged(Merged&& other) = default; + constexpr Merged(const Merged& other) = default; + constexpr Merged& operator=(const Merged& other) = delete; + constexpr Merged& operator=(Merged&& other) = delete; + ~Merged() = default; + + constexpr vector_length_t length() const noexcept {return std::get<0>(vals).length();}; + + template + constexpr ScalarResult val_impl(const int i, const std::index_sequence) const noexcept + { + return std::tuple(std::get(vals)[i]...); + } + + constexpr ScalarResult operator[](const int i) noexcept{ + return val_impl(i,std::make_index_sequence()); + } + + constexpr ScalarResult const operator[](const int i) const noexcept{ + return val_impl(i,std::make_index_sequence()); + } + +#if defined(HAS_VECTOR) + + using Vector = std::tuple>::Scalar>::vector...>; + + template + void vector_store_impl(const index_t i,const Vector &val, const std::index_sequence) const noexcept + { + (inner::vstore1<1>((std::get(vals).ptr(i)),std::get(val)),...); + } + + void vector_store(const index_t i,const Vector &val) const noexcept + { + vector_store_impl(i,val,std::make_index_sequence()); + } + +#if defined(HAS_PREDICATED_LOOP) + template + void vector_store_tail_impl(const index_t i,const vector_length_t remaining,const Vector &val, const std::index_sequence) const noexcept + { + (inner::vstore1_z<1>((std::get(vals).ptr(i)),std::get(val),remaining,inner::vctpq::mk(remaining)),...); + } + + + void vector_store_tail(const index_t i,const vector_length_t remaining,const Vector &val) const noexcept + { + vector_store_tail_impl(i,remaining,val,std::make_index_sequence()); + } +#endif + + + template + Vector vector_op_impl(const int i, const std::index_sequence) const noexcept + { + return std::make_tuple(std::get(vals).vector_op(i)...); + } + + Vector vector_op(const index_t i) const noexcept + { + return(vector_op_impl(i,std::make_index_sequence())); + } + +#if defined(HAS_PREDICATED_LOOP) + template + Vector vector_op_tail_impl(const index_t i,const vector_length_t remaining, const std::index_sequence) const noexcept + { + return std::make_tuple(std::get(vals).vector_op_tail(i,remaining)...); + } + + Vector vector_op_tail(const index_t i,const vector_length_t remaining) const noexcept + { + return(vector_op_tail_impl(i,remaining,std::make_index_sequence())); + } +#endif +#endif + + template + Merged& operator=(const Merged& other) noexcept + { + eval(*this,other,std::get<0>(vals).length(),CURRENT_ARCH); + return(*this); + } + + const std::tuple vals; +}; + +template +static inline Merged<_Tp&...> +results(_Tp&... __t) noexcept {return Merged<_Tp&...>(__t...);} + + +template +struct traits> +{ + typedef std::tuple>::Scalar...> Scalar; + +#if defined(HAS_VECTOR) + typedef std::tuple>::Scalar>::vector...> Vector; +#endif +}; + +template +struct IsVector> +{ + constexpr static bool value = true; +}; + +template +struct IsDynamic> +{ + constexpr static bool value = (... && IsDynamic>::value); +}; + +template +struct ElementType> +{ + typedef std::tuple>::type...> type; +}; + +constexpr vector_length_t max_length(const vector_length_t a,const vector_length_t b) noexcept +{ + return((a>b) ? a : b); +}; + + +template +constexpr vector_length_t max_vec_length(F a,N ...b) noexcept +{ + if constexpr (sizeof...(b) == 0) + { + return(a); + } + else + { + return max_length(a,max_vec_length(b...)); + } +}; + + +template +struct StaticLength> +{ + constexpr static vector_length_t value = max_vec_length(StaticLength>::value...); +}; + + + template + auto unroll_impl(const F& func,std::index_sequence) noexcept + { + return Merged{func(Ns)...}; + }; + + template + auto unroll(const F& func) noexcept + { + return unroll_impl(func,std::make_index_sequence()); + }; + + template + constexpr static const E& constres(const E& r,const std::size_t) noexcept + { + return(r); + } + + template + auto replicate_impl(const E& expr,std::index_sequence) noexcept + { + return Merged{constres(expr,Ns)...}; + }; + + template + auto replicate(const E& expr) noexcept + { + return replicate_impl(expr,std::make_index_sequence()); + }; + + /* + + We don't want to replicate the Vector but only a reference + to the vector. So it is packed into an expr + + */ + template typename A> + auto replicate(const Vector& e) noexcept + { + //return replicate_impl(expr(e),std::make_index_sequence()); + return replicate_impl(VectorView(e),std::make_index_sequence()); + }; + + template + auto results_impl(std::array &a,std::index_sequence) noexcept + { + return std::tie(a[Ns]...); + }; + + template + auto results(std::array &a) noexcept + { + return results_impl(a,std::make_index_sequence()); + }; + + template + auto result_impl_func(const F& func,std::index_sequence) noexcept + { + return std::tie(*func(Ns)...); + }; + + template + auto results(const F& func) noexcept + { + return result_impl_func(func,std::make_index_sequence()); + }; + +/*! @} */ +} diff --git a/dsppp/Include/dsppp/vec.hpp b/dsppp/Include/dsppp/vec.hpp new file mode 100644 index 000000000..c388bae7d --- /dev/null +++ b/dsppp/Include/dsppp/vec.hpp @@ -0,0 +1,442 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" +#include "vector_impl.hpp" +#include "vector_view.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup VECTOR Vectors + * \ingroup DSPPP + * @{ + */ + +template +struct VecRef; + +template +struct VecRef> +{ + typedef VectorView type; + static type ref(const Vector_Base&a){ + return(type(a)); + }; +}; + +template +struct VecRef> +{ + typedef VectorView type; + static type ref(const VectorView&a){ + return(a); + }; +}; + + +template typename A> +struct VecRef,(L<0)> +{ + + typedef VectorView type; + static VectorView ref(const Vector&a,typename std::enable_if<(L<0)>::type* = nullptr){ + return(VectorView(a)); + }; + +}; + +template typename A> +struct VecRef,(L>0)> +{ + typedef const Vector& type; + static const Vector& ref(const Vector&a,typename std::enable_if<(L>0)>::type* = nullptr){ + return(a); + }; +}; + + + +template +struct VecRef<_Binary> +{ + typedef _Binary type; + static type ref(const _Binary&a){ + return(a); + }; +}; + +template +struct VecRef<_Unary> +{ + typedef _Unary type; + static type ref(const _Unary&a){ + return(a); + }; +}; + +template +struct VecRef<_Expr> +{ + typedef Derived type; + static type ref(const _Expr&a){ + return(a.derived()); + }; +}; + +template<> +struct VecRef +{ + typedef double type; + static type ref(const double a){ + return(a); + }; +}; + +template<> +struct VecRef +{ + typedef float type; + static type ref(const float a){ + return(a); + }; +}; + +#if defined(ARM_FLOAT16_SUPPORTED) +template<> +struct VecRef +{ + typedef float16_t type; + static type ref(const float16_t a){ + return(a); + }; +}; +#endif + +template<> +struct VecRef +{ + typedef Q7 type; + static type ref(const Q7 a){ + return(a); + }; +}; + +template<> +struct VecRef +{ + typedef Q15 type; + static type ref(const Q15 a){ + return(a); + }; +}; + +template<> +struct VecRef +{ + typedef Q31 type; + static type ref(const Q31 a){ + return(a); + }; +}; + + +template +struct traits> +{ + typedef T Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits::vector Vector; +#endif +}; + + +template typename Allocator> +struct traits> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + + +template typename Allocator> +struct traits&> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + + + +template +struct StaticStride +{ + constexpr static std::size_t value = 1; +}; + +template +struct StaticStride> +{ + constexpr static std::size_t value = S; +}; + + +template typename Allocator> +struct IsVector> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct IsVector&> +{ + constexpr static bool value = true; +}; + +template +struct IsVector&> +{ + constexpr static bool value = true; +}; + +template +struct IsVector> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct ElementType> +{ + typedef P type; +}; + +template typename Allocator> +struct ElementType&> +{ + typedef P type; +}; + + +template +struct ElementType> +{ + typedef P type; +}; + +template +struct ElementType&> +{ + typedef P type; +}; + +template +struct IsVector> +{ + constexpr static bool value = true; +}; + + +template typename Allocator> +struct StaticLength> +{ + constexpr static vector_length_t value = (L<0) ? 0 : L; +}; + +template typename Allocator> +struct StaticLength&> +{ + constexpr static vector_length_t value = (L<0) ? 0 : L; +}; + + +template +struct ElementType> +{ + typedef T type; +}; + +template +struct ElementType&> +{ + typedef T type; +}; + + +template typename Allocator> +struct IsDynamic> +{ + constexpr static bool value = (L<0); +}; + +template typename Allocator> +struct IsDynamic&> +{ + constexpr static bool value = (L<0); +}; + +template +struct IsDynamic> +{ + constexpr static bool value = true; +}; + + + + +// Assume one at least is static +template +using StaticType=typename std::conditional::value,VB,VA>::type; + + + +template() || + !is_scalar()) && + SameElementType::value && + same_static_length(),bool>::type = true> +inline auto operator+(const LHS &a,const RHS &b) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + using VecRHS = VecRef; + + return(_Binary>(VecLHS::ref(a),VecRHS::ref(b),_AddOp())); +}; + +template(),bool>::type = true> +inline auto operator+(const LHS &a) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + + return(_Unary>(VecLHS::ref(a),_NoOp())); +}; + + +/* + +VectorView = VectorView must be a cheap copy of reference only. +So when we want to copy a VectorView onto another we need to +write +VectorView = expr(VectorView) or copy + +we cannot rely on the copy or move constructors. + +*/ +template(),bool>::type = true> +inline auto expr(const LHS &a) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + return(_Unary>(VecLHS::ref(a),_NoOp())); +}; + +template(),bool>::type = true> +inline auto copy(const LHS &a) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + return(_Unary>(VecLHS::ref(a),_NoOp())); +}; + +template() || + !is_scalar()) && + SameElementType::value && + same_static_length(),bool>::type = true> +inline auto operator-(const LHS &a,const RHS &b) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + using VecRHS = VecRef; + + return(_Binary>( + VecLHS::ref(a),VecRHS::ref(b),_SubOp())); +}; + +template(),bool>::type = true> +inline auto operator-(const LHS &a) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + + return(_Unary>(VecLHS::ref(a),_NegOp())); +}; + + +template() || + !is_scalar()) && + SameElementType::value && + same_static_length(),bool>::type = true> +inline auto operator*(const LHS &a,const RHS &b) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + using VecRHS = VecRef; + + return(_Binary>( + VecLHS::ref(a),VecRHS::ref(b),_MulOp())); +}; + + + +#if 0 +template::value && + IsVector::value && + SameElementType::value && + (same_static_length(StaticLength::value , StaticLength::value)),bool>::type = true> +inline _Expr operator+(const VA &a, + const VB &b) +{ + + return(_Add(a,b)); +}; +#endif + +/* + +Core algorithms that cannot be expressed only with high level +abstractions and need intrinsincs. + +*/ +#include "Helium/matrix_multiply.hpp" +#include "DSP/matrix_multiply.hpp" +#include "Scalar/matrix_multiply.hpp" + +/*! @} */ + +} diff --git a/dsppp/Include/dsppp/vector_impl.hpp b/dsppp/Include/dsppp/vector_impl.hpp new file mode 100644 index 000000000..6978e2db6 --- /dev/null +++ b/dsppp/Include/dsppp/vector_impl.hpp @@ -0,0 +1,576 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup VECTOR + * @{ + */ + +/* + +Generic evaluators. + +*/ +#include "Scalar/basic.hpp" +#include "DSP/basic.hpp" +#include "Helium/basic.hpp" +#include "Neon/basic.hpp" + + +template +struct Vector_Base { + + typedef P element_type; + + vector_length_t length() const {return(length_);}; + + P* ptr() const {return(values_);} + P* ptr(const index_t i) const {return(&values_[i]);} + + const P* const_ptr() const {return(values_);} + const P* const_ptr(const index_t i) const {return(&values_[i]);} + + + P* begin() const {return(values_);} + P* end() const {return(values_+length_);} + + + friend std::ostream& operator<< (std::ostream& stream, const Vector_Base

& other) { + constexpr int nb = 10; + int i=0; + for(index_t k=0;k::vector; + + template::has_vector,bool>::type = true> + void vector_store(const index_t i,const Vector val) const + { + inner::vstore1<1>((typename std::remove_cv

::type*)(&values_[i]),val); + } + +#if defined(HAS_PREDICATED_LOOP) + void vector_store_tail(const index_t i,const vector_length_t remaining,const Vector val) const + { + inner::vstore1_z<1>((typename std::remove_cv

::type*)(&values_[i]),val,remaining,inner::vctpq

::mk(remaining)); + } + + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(inner::vload1_z<1>((typename std::remove_cv

::type*)(&values_[i]),remaining,inner::vctpq

::mk(remaining))); + } +#endif + + Vector const vector_op(const index_t i) const + { + return(inner::vload1<1>((typename std::remove_cv

::type*)(&values_[i]))); + } + +#endif + + + +protected: + + //Vector_Base():length_(0),values_(nullptr){}; + Vector_Base() = delete; + + explicit Vector_Base(vector_length_t length, char *val): + length_(length), + values_(reinterpret_cast(val)){}; + + explicit Vector_Base(vector_length_t length, char *val,P init_val): + length_(length), + values_(reinterpret_cast(val)){ + _Fill(*this,init_val,length,CURRENT_ARCH); + }; + + + Vector_Base& operator=(const Vector_Base& other) + { + if ((length_ == other.length_) && (this != &other)) + { + _Fill(*this,other,other.length_,CURRENT_ARCH); + //std::memcpy(values_,other.values_,sizeof(P)*length_); + } + return(*this); + } + + // Done in derivated classes since we need + // the allocator destroy + Vector_Base& operator=(Vector_Base&& other) = delete; + + + + + vector_length_t length_; + P* values_; +}; + + +template +struct traits> +{ + typedef T Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits::vector Vector; +#endif +}; + +/** @brief Vector template + * @tparam P Type of the scalar + * @tparam L Vector length in number of elements. + * Negative if length not known at build time. It is the default value + * @tparam Allocator Memory allocator to use. By default it is the macro `TMP_ALLOC` + */ +template typename Allocator = TMP_ALLOC> +struct Vector:Vector_Base

{ + + + //! Type of vector elements + using element_type = P; + + //! Length of the vector when known at build time. + constexpr static vector_length_t vector_size = sizeof(P)*L; + + /** + * @brief Allocate a buffer for this vector using the memory allocator + * + * @return A new memory buffer + */ + static char* allocate(){return(Allocator::allocate());}; + + /** + * @brief Construct a new vector + * + * The length is known at build time. + * + */ + Vector():Vector_Base

(L,Vector::allocate()){}; + + /** + * @brief Construct a new vector and initialize it + * + * The length is known at build time. + * + * @param init_val Initialization value + */ + explicit Vector(P init_val):Vector_Base

(L,Vector::allocate(),init_val){ + }; + + /** + * @brief Construct a new vector and initialize it with a list + * + * The length is known at build time. + * + * @param l Initialization list + */ + Vector(const std::initializer_list

&l) + :Vector_Base

(L,Vector::allocate()){ + std::memcpy(Vector_Base

::values_,l.data(),vector_size); + }; + + Vector(Vector&& other) = default; + + Vector(const Vector& other):Vector_Base

(L,Vector::allocate()) + { + eval(*this,+other,(vector_length_t)L,CURRENT_ARCH); + + //std::memcpy(Vector_Base

::values_,other.values_,vector_size); + }; + + template typename OtherAllocator> + explicit Vector(const Vector& other):Vector_Base

(L,Vector::allocate()) + { + eval(*this,+other,(vector_length_t)L,CURRENT_ARCH); + }; + + template typename OtherAllocator> + explicit Vector(const Vector& other):Vector_Base

(L,Vector::allocate()) + { + if (other.length() == Vector_Base

::length()) + { + eval(*this,+other,(vector_length_t)L,CURRENT_ARCH); + } + }; + + template + explicit Vector(const VectorView& other):Vector_Base

(L,Vector::allocate()) + { + eval(*this,+other,(vector_length_t)L,CURRENT_ARCH); + }; + + + + + template + Vector(const _Expr& other):Vector_Base

(L,Vector::allocate()) + { + eval(*this,other.derived(),(vector_length_t)L,CURRENT_ARCH); + }; + + + Vector& operator=(const Vector& other) = default; + + Vector& operator=(Vector&& other) + { + if (this != &other) + { + if (Vector_Base

::values_!=nullptr) + { + Allocator::destroy(reinterpret_cast(Vector_Base

::values_)); + } + + Vector_Base

::length_= other.length_; + Vector_Base

::values_ = other.values_; + other.values_=nullptr; + other.length_ = 0; + } + + return(*this); + } + + template + Vector& operator=(const _Expr&other) + { + eval(*this,other.derived(),(vector_length_t)L,CURRENT_ARCH); + return(*this); + } + + + template(),bool>::type = true> + Vector& operator=(const T other) + { + _Fill(*this,other,L,CURRENT_ARCH); + return(*this); + } + + + + + + template + Vector& operator +=(const _Expr& other) + { + eval(*this,*this + other.derived(),(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + Vector& operator +=(const Vector& other) + { + eval(*this,*this + other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + Vector& operator +=(const P other) + { + eval(*this,*this + other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + template + Vector& operator -=(const _Expr& other) + { + eval(*this,*this - other.derived(),(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + Vector& operator -=(const Vector& other) + { + eval(*this,*this - other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + Vector& operator -=(const P other) + { + eval(*this,*this - other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + template + Vector& operator *=(const _Expr& other) + { + eval(*this,*this * other.derived(),(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + Vector& operator *=(const Vector& other) + { + eval(*this,*this * other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + Vector& operator *=(const P other) + { + eval(*this,*this * other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + + + + template + VectorView sub(const index_t start=0,const index_t stop=L) + { + return(VectorView(*this,start,stop)); + } + + template + const VectorView sub(const index_t start=0,const index_t stop=L) const + { + return(VectorView(*this,start,stop)); + } + + + virtual ~Vector() { + if (Vector_Base

::values_!=nullptr) + { + Allocator::destroy(reinterpret_cast(Vector_Base

::values_)); + } + } + + + + +}; + + + +template typename Allocator> +struct Vector:Vector_Base

{ + + static char* allocate(vector_length_t length){return(Allocator::allocate(sizeof(P)*length));}; + + Vector() = delete; + + explicit Vector(vector_length_t length,P init_val): + Vector_Base

(length,Vector::allocate(length),init_val){}; + + explicit Vector(vector_length_t length): + Vector_Base

(length,Vector::allocate(length)){}; + + explicit Vector(const std::initializer_list

&l) + :Vector_Base

(l.size(),Vector::allocate(l.size())){ + std::memcpy(Vector_Base

::values_,l.data(),sizeof(P)*l.size()); + }; + + template typename OtherAllocator> + explicit Vector(const Vector& other):Vector_Base

(other.length(),Vector::allocate(other.length())) + { + eval(*this,+other,Vector_Base

::length(),CURRENT_ARCH); + }; + + + Vector(const Vector& other):Vector_Base

(other.length(),Vector::allocate(other.length())) + { + eval(*this,+other,Vector_Base

::length(),CURRENT_ARCH); + + //std::memcpy(Vector_Base

::values_,other.values_,vector_size); + }; + + template + explicit Vector(const VectorView& other):Vector_Base

(other.length(),Vector::allocate(other.length())) + { + eval(*this,+other,Vector_Base

::length(),CURRENT_ARCH); + }; + + template + Vector(const _Expr& other):Vector_Base

(other.length(),Vector::allocate(other.length())) + { + eval(*this,other.derived(),Vector_Base

::length(),CURRENT_ARCH); + }; + + Vector(Vector&& other) = default; + + + + Vector& operator=(const Vector& other) = default; + + Vector& operator=(Vector&& other) + { + if (this != &other) + { + if (Vector_Base

::values_!=nullptr) + { + Allocator::destroy(reinterpret_cast(Vector_Base

::values_)); + } + + Vector_Base

::length_= other.length_; + Vector_Base

::values_ = other.values_; + other.values_=nullptr; + other.length_ = 0; + } + + return(*this); + } + + template + Vector& operator=(const _Expr&other) + { + eval(*this,other.derived(),Vector_Base

::length(),CURRENT_ARCH); + return(*this); + } + + template(),bool>::type = true> + Vector& operator=(const T other) + { + _Fill(*this,other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + } + + template + Vector& operator +=(const _Expr& other) + { + eval(*this,*this + other.derived(),Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + Vector& operator +=(const Vector& other) + { + eval(*this,*this + other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + Vector& operator +=(const P other) + { + eval(*this,*this + other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + template + Vector& operator -=(const _Expr& other) + { + eval(*this,*this - other.derived(),Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + Vector& operator -=(const Vector& other) + { + eval(*this,*this - other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + Vector& operator -=(const P other) + { + eval(*this,*this - other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + template + Vector& operator *=(const _Expr& other) + { + eval(*this,*this * other.derived(),Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + Vector& operator *=(const Vector& other) + { + eval(*this,*this * other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + + Vector& operator *=(const P other) + { + eval(*this,*this * other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + template + VectorView sub(const index_t start=0,const index_t stop=-1) + { + if (stop<0) + { + return(VectorView(*this,start,Vector_Base

::length())); + } + else + { + return(VectorView(*this,start,stop)); + } + } + + template + const VectorView sub(const index_t start=0,const index_t stop=-1) const + { + if (stop<0) + { + return(VectorView(*this,start,Vector_Base

::length())); + } + else + { + return(VectorView(*this,start,stop)); + } + } + + + + virtual ~Vector() { + if (Vector_Base

::values_!=nullptr) + { + Allocator::destroy(reinterpret_cast(Vector_Base

::values_)); + } + } + +}; + +/*! @} */ + +} + diff --git a/dsppp/Include/dsppp/vector_view.hpp b/dsppp/Include/dsppp/vector_view.hpp new file mode 100644 index 000000000..b3ddc9c6f --- /dev/null +++ b/dsppp/Include/dsppp/vector_view.hpp @@ -0,0 +1,449 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" +#include "vector_impl.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup VECTOR + * @{ + */ + +template +struct VectorView +{ + + /* + + Start and stop are the position in the raw Vector_base pointer. + Stop is the first sample outside of the vector + + */ + VectorView() = delete; + + constexpr static vector_length_t compute_length(const index_t start,const index_t stop) + { + return(1+(stop-1 -start)/stride); + } + + explicit VectorView(T *v,const vector_length_t start,const vector_length_t stop): + v_(v+start),nb_samples_(compute_length(start,stop)){}; + + explicit VectorView(const Vector_Base &v): + v_(v.ptr()),nb_samples_(compute_length(0,v.length())){}; + + explicit VectorView(const Vector_Base &v,const index_t start,const index_t stop): + v_(v.ptr()+start),nb_samples_(compute_length(start,stop)){}; + + vector_length_t length() const {return(nb_samples_);}; + + + T* ptr() const {return(v_);} + T* ptr(const index_t i) const {return(&v_[i*stride]);} + + const T* const_ptr() const {return(v_);} + const T* const_ptr(const index_t i) const {return(&v_[i*stride]);} + + T& operator[](const index_t i) + { + return(v_[i*stride]); + } + + T& operator[](const index_t i) const + { + return(v_[i*stride]); + } + +#if defined(HAS_VECTOR) + using Vector = typename vector_traits::vector; + void vector_store(const index_t i,const Vector val) + { + inner::vstore1((typename std::remove_cv::type*)(&v_[i*stride]),val); + } + +#if defined(HAS_PREDICATED_LOOP) + void vector_store_tail(const index_t i,const vector_length_t remaining,const Vector val) + { + inner::vstore1_z((typename std::remove_cv::type*)(&v_[i*stride]),val,remaining,inner::vctpq::mk(remaining)); + } + + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(inner::vload1_z((typename std::remove_cv::type*)(&v_[i*stride]),remaining,inner::vctpq::mk(remaining))); + } +#endif + + Vector const vector_op(const index_t i) const + { + return(inner::vload1((typename std::remove_cv::type*)(&v_[i*stride]))); + } +#endif + + virtual ~VectorView() {}; + + VectorView(const VectorView& other): + v_(other.v_),nb_samples_(other.nb_samples_){}; + + + VectorView(VectorView&& other) : + v_(std::move(other.v_)),nb_samples_(other.nb_samples_) + { + other.v_ = nullptr; + }; + +VectorView& operator=(const VectorView& other) = delete; +VectorView& operator=(VectorView&& other) = delete; + + + + template + VectorView& operator=(const _Expr&other) + { + eval(*this,other.derived(),length(),CURRENT_ARCH); + return(*this); + } + + + VectorView& operator=(const T val) + { + _Fill(*this,val,length(),CURRENT_ARCH); + + return(*this); + } + + template + VectorView& operator +=(const _Expr& other) + { + eval(*this,*this + other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + VectorView& operator +=(const VectorView& other) + { + eval(*this,*this + other,length(),CURRENT_ARCH); + return(*this); + }; + + VectorView& operator +=(const T other) + { + eval(*this,*this + other,length(),CURRENT_ARCH); + return(*this); + }; + + template + VectorView& operator -=(const _Expr& other) + { + eval(*this,*this - other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + + VectorView& operator -=(const VectorView& other) + { + eval(*this,*this - other,length(),CURRENT_ARCH); + return(*this); + }; + + VectorView& operator -=(const T other) + { + eval(*this,*this - other,length(),CURRENT_ARCH); + return(*this); + }; + + template + VectorView& operator *=(const _Expr& other) + { + eval(*this,*this * other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + VectorView& operator *=(const VectorView& other) + { + eval(*this,*this * other,length(),CURRENT_ARCH); + return(*this); + }; + + VectorView& operator *=(const T other) + { + eval(*this,*this * other,length(),CURRENT_ARCH); + return(*this); + }; + + friend std::ostream& operator<< (std::ostream& stream, const VectorView& other) { + constexpr int nb = 10; + int i=0; + for(index_t k=0;k + VectorView sub(const index_t start=0,const index_t stop=-1) + { + if (stop < 0) + { + return(VectorView(v_,stride*start,stride*length())); + } + else + { + return(VectorView(v_,stride*start,stride*stop)); + } + } + + template + const VectorView sub(const index_t start=0,const index_t stop=-1) const + { + if (stop < 0) + { + return(VectorView(v_,stride*start,stride*length())); + } + else + { + return(VectorView(v_,stride*start,stride*stop)); + } + } + + +protected: + T* const v_; + const vector_length_t nb_samples_; +}; + +template +struct VectorView +{ + + /* + + Start and stop are the position in the raw Vector_base pointer. + Stop is the first sample outside of the vector + + */ + VectorView() = delete; + + vector_length_t compute_length(const index_t start,const index_t stop,const index_t stride) const + { + return(1+(stop-1 -start)/stride); + } + + explicit VectorView(T *v,const index_t start,const index_t stop,const index_t stride): + v_(v+start),nb_samples_(compute_length(start,stop,stride)),stride_(stride){}; + + explicit VectorView(const Vector_Base &v,const index_t stride): + v_(v.ptr()),nb_samples_(compute_length(0,v.length(),stride)),stride_(stride){}; + + explicit VectorView(const Vector_Base &v,const index_t start,const index_t stop,const index_t stride): + v_(v.ptr()+start),nb_samples_(compute_length(start,stop,stride)),stride_(stride){}; + + vector_length_t length() const {return(nb_samples_);}; + + + T* ptr() const {return(v_);} + T* ptr(const index_t i) const {return(&v_[i*stride_]);} + + const T* const_ptr() const {return(v_);} + const T* const_ptr(const index_t i) const {return(&v_[i*stride_]);} + + T& operator[](index_t i) + { + return(v_[i*stride_]); + } + + T& operator[](index_t i) const + { + return(v_[i*stride_]); + } + +#if defined(HAS_VECTOR) + using Vector = typename vector_traits::vector; + void vector_store(index_t i,Vector val) + { + inner::vstore1((typename std::remove_cv::type*)(&v_[i*stride_]),stride_,val); + } + +#if defined(HAS_PREDICATED_LOOP) + void vector_store_tail(index_t i,vector_length_t remaining,Vector val) + { + inner::vstore1_z((typename std::remove_cv::type*)(&v_[i*stride_]),stride_,val,remaining,inner::vctpq::mk(remaining)); + } + + Vector const vector_op_tail(index_t i,vector_length_t remaining) const + { + return(inner::vload1_z((typename std::remove_cv::type*)(&v_[i*stride_]),stride_,remaining,inner::vctpq::mk(remaining))); + } +#endif + + Vector const vector_op(index_t i) const + { + return(inner::vload1((typename std::remove_cv::type*)(&v_[i*stride_]),stride_)); + } +#endif + + virtual ~VectorView() {}; + + VectorView(const VectorView& other): + v_(other.v_),nb_samples_(other.nb_samples_),stride_(other.stride_){}; + + + VectorView(VectorView&& other) : + v_(std::move(other.v_)),nb_samples_(other.nb_samples_),stride_(other.stride_) + { + other.v_ = nullptr; + }; + +VectorView& operator=(const VectorView& other) = delete; +VectorView& operator=(VectorView&& other) = delete; + + + + template + VectorView& operator=(const _Expr&other) + { + eval(*this,other.derived(),length(),CURRENT_ARCH); + return(*this); + } + + + VectorView& operator=(const T val) + { + _Fill(*this,val,length(),CURRENT_ARCH); + + return(*this); + } + + template + VectorView& operator +=(const _Expr& other) + { + eval(*this,*this + other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + VectorView& operator +=(const VectorView& other) + { + eval(*this,*this + other,length(),CURRENT_ARCH); + return(*this); + }; + + VectorView& operator +=(const T other) + { + eval(*this,*this + other,length(),CURRENT_ARCH); + return(*this); + }; + + template + VectorView& operator -=(const _Expr& other) + { + eval(*this,*this - other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + + VectorView& operator -=(const VectorView& other) + { + eval(*this,*this - other,length(),CURRENT_ARCH); + return(*this); + }; + + VectorView& operator -=(const T other) + { + eval(*this,*this - other,length(),CURRENT_ARCH); + return(*this); + }; + + template + VectorView& operator *=(const _Expr& other) + { + eval(*this,*this * other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + VectorView& operator *=(const VectorView& other) + { + eval(*this,*this * other,length(),CURRENT_ARCH); + return(*this); + }; + + VectorView& operator *=(const T other) + { + eval(*this,*this * other,length(),CURRENT_ARCH); + return(*this); + }; + + friend std::ostream& operator<< (std::ostream& stream, const VectorView& other) { + constexpr int nb = 10; + int i=0; + for(index_t k=0;k + VectorView sub(const index_t start=0,const index_t stop=-1) + { + if (stop<0) + { + return(VectorView(v_,stride()*start,stride()*length(),stride()*S)); + } + else + { + return(VectorView(v_,stride()*start,stride()*stop,stride()*S)); + } + } + + template + const VectorView sub(const index_t start=0,const index_t stop=-1) const + { + if (stop<0) + { + return(VectorView(v_,stride()*start,stride()*length(),stride()*S)); + } + else + { + return(VectorView(v_,stride()*start,stride()*stop,stride()*S)); + } + } + +protected: + T* const v_; + const vector_length_t nb_samples_; + const index_t stride_; +}; + +/*! @} */ + +} + diff --git a/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct new file mode 100644 index 000000000..0f499b2cc --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct @@ -0,0 +1,80 @@ +#! armclang -E --target=arm-arm-none-eabi -mcpu=cortex-m0+ -xc +; command above MUST be in first line (no comment above!) + +/* +;-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- +*/ + +/*--------------------- Flash Configuration ---------------------------------- +; Flash Configuration +; Flash Base Address <0x0-0xFFFFFFFF:8> +; Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __ROM_BASE 0x00000000 +#define __ROM_SIZE 0x00080000 + +/*--------------------- Embedded RAM Configuration --------------------------- +; RAM Configuration +; RAM Base Address <0x0-0xFFFFFFFF:8> +; RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __RAM_BASE 0x20000000 +#define __RAM_SIZE 0x00040000 + +/*--------------------- Stack / Heap Configuration --------------------------- +; Stack / Heap Configuration +; Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +; Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 + +/* +;------------- <<< end of configuration section >>> --------------------------- +*/ + + +/*---------------------------------------------------------------------------- + User Stack & Heap boundary definition + *----------------------------------------------------------------------------*/ +#define __STACK_TOP (__RAM_BASE + __RAM_SIZE) /* starts at end of RAM */ +#define __HEAP_BASE (AlignExpr(+0, 8)) /* starts after RW_RAM section, 8 byte aligned */ + + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ +#define __RO_BASE __ROM_BASE +#define __RO_SIZE __ROM_SIZE + +#define __RW_BASE __RAM_BASE +#define __RW_SIZE (__RAM_SIZE - __STACK_SIZE - __HEAP_SIZE) + + +LR_ROM __RO_BASE __RO_SIZE { ; load region size_region + ER_ROM __RO_BASE __RO_SIZE { ; load address = execution address + *.o (RESET, +First) + *(InRoot$$Sections) + .ANY (+RO) + .ANY (+XO) + } + + RW_NOINIT __RW_BASE UNINIT __RW_SIZE { + *(.bss.noinit) + } + + RW_RAM AlignExpr(+0, 8) (__RW_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP __HEAP_BASE EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK __STACK_TOP EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } +} diff --git a/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct.base@1.0.0 b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct.base@1.0.0 new file mode 100644 index 000000000..0f499b2cc --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct.base@1.0.0 @@ -0,0 +1,80 @@ +#! armclang -E --target=arm-arm-none-eabi -mcpu=cortex-m0+ -xc +; command above MUST be in first line (no comment above!) + +/* +;-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- +*/ + +/*--------------------- Flash Configuration ---------------------------------- +; Flash Configuration +; Flash Base Address <0x0-0xFFFFFFFF:8> +; Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __ROM_BASE 0x00000000 +#define __ROM_SIZE 0x00080000 + +/*--------------------- Embedded RAM Configuration --------------------------- +; RAM Configuration +; RAM Base Address <0x0-0xFFFFFFFF:8> +; RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __RAM_BASE 0x20000000 +#define __RAM_SIZE 0x00040000 + +/*--------------------- Stack / Heap Configuration --------------------------- +; Stack / Heap Configuration +; Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +; Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 + +/* +;------------- <<< end of configuration section >>> --------------------------- +*/ + + +/*---------------------------------------------------------------------------- + User Stack & Heap boundary definition + *----------------------------------------------------------------------------*/ +#define __STACK_TOP (__RAM_BASE + __RAM_SIZE) /* starts at end of RAM */ +#define __HEAP_BASE (AlignExpr(+0, 8)) /* starts after RW_RAM section, 8 byte aligned */ + + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ +#define __RO_BASE __ROM_BASE +#define __RO_SIZE __ROM_SIZE + +#define __RW_BASE __RAM_BASE +#define __RW_SIZE (__RAM_SIZE - __STACK_SIZE - __HEAP_SIZE) + + +LR_ROM __RO_BASE __RO_SIZE { ; load region size_region + ER_ROM __RO_BASE __RO_SIZE { ; load address = execution address + *.o (RESET, +First) + *(InRoot$$Sections) + .ANY (+RO) + .ANY (+XO) + } + + RW_NOINIT __RW_BASE UNINIT __RW_SIZE { + *(.bss.noinit) + } + + RW_RAM AlignExpr(+0, 8) (__RW_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP __HEAP_BASE EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK __STACK_TOP EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } +} diff --git a/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld new file mode 100644 index 000000000..93ed813c8 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld @@ -0,0 +1,263 @@ +/* + *-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- + */ + +/*---------------------- Flash Configuration ---------------------------------- + Flash Configuration + Flash Base Address <0x0-0xFFFFFFFF:8> + Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__ROM_BASE = 0x00000000; +__ROM_SIZE = 0x00040000; + +/*--------------------- Embedded RAM Configuration ---------------------------- + RAM Configuration + RAM Base Address <0x0-0xFFFFFFFF:8> + RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__RAM_BASE = 0x20000000; +__RAM_SIZE = 0x00020000; + +/*--------------------- Stack / Heap Configuration ---------------------------- + Stack / Heap Configuration + Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> + Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__STACK_SIZE = 0x00000400; +__HEAP_SIZE = 0x00000C00; + +/* + *-------------------- <<< end of configuration section >>> ------------------- + */ + +MEMORY +{ + FLASH (rx) : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE + RAM (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > FLASH + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > FLASH + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT > FLASH + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM2 AT > FLASH +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM AT > RAM + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM2 AT > RAM2 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM + + .stack (ORIGIN(RAM) + LENGTH(RAM) - __STACK_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM + PROVIDE(__stack = __StackTop); + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld.base@2.2.0 b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld.base@2.2.0 new file mode 100644 index 000000000..93ed813c8 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld.base@2.2.0 @@ -0,0 +1,263 @@ +/* + *-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- + */ + +/*---------------------- Flash Configuration ---------------------------------- + Flash Configuration + Flash Base Address <0x0-0xFFFFFFFF:8> + Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__ROM_BASE = 0x00000000; +__ROM_SIZE = 0x00040000; + +/*--------------------- Embedded RAM Configuration ---------------------------- + RAM Configuration + RAM Base Address <0x0-0xFFFFFFFF:8> + RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__RAM_BASE = 0x20000000; +__RAM_SIZE = 0x00020000; + +/*--------------------- Stack / Heap Configuration ---------------------------- + Stack / Heap Configuration + Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> + Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__STACK_SIZE = 0x00000400; +__HEAP_SIZE = 0x00000C00; + +/* + *-------------------- <<< end of configuration section >>> ------------------- + */ + +MEMORY +{ + FLASH (rx) : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE + RAM (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > FLASH + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > FLASH + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT > FLASH + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM2 AT > FLASH +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM AT > RAM + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM2 AT > RAM2 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM + + .stack (ORIGIN(RAM) + LENGTH(RAM) - __STACK_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM + PROVIDE(__stack = __StackTop); + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/ARMCM0P/ac6_linker_script.sct b/dsppp/RTE/Device/ARMCM0P/ac6_linker_script.sct new file mode 100644 index 000000000..4d6e579d0 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/ac6_linker_script.sct @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ + +LR_ROM0 __ROM0_BASE __ROM0_SIZE { + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + ER_CMSE_VENEER __ROM0_BASE+__ROM0_SIZE -__ROM0_SIZE { + *(Veneer$$CMSE) + } + #define ER_CMSE_VENEER_SIZE AlignExpr(ImageLength(ER_CMSE_VENEER), 8) +#else + #define ER_CMSE_VENEER_SIZE 0 +#endif + + ER_ROM0 __ROM0_BASE (__ROM0_SIZE - ER_CMSE_VENEER_SIZE) { + *.o (RESET, +First) + *(InRoot$$Sections) + *(+RO +XO) + } + + RW_NOINIT __RAM0_BASE UNINIT (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE) { + *(.bss.noinit) + } + + RW_RAM0 AlignExpr(+0, 8) (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP (AlignExpr(+0, 8)) EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK (__RAM0_BASE + __RAM0_SIZE - __STACKSEAL_SIZE) EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } + +#if __STACKSEAL_SIZE > 0 + STACKSEAL +0 EMPTY 8 { ; Reserve empty region for stack seal immediately after stack + } +#endif + +#if __RAM1_SIZE > 0 + RW_RAM1 __RAM1_BASE __RAM1_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM2_SIZE > 0 + RW_RAM2 __RAM2_BASE __RAM2_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM3_SIZE > 0 + RW_RAM3 __RAM3_BASE __RAM3_SIZE { + .ANY (+RW +ZI) + } +#endif +} + +#if __ROM1_SIZE > 0 +LR_ROM1 __ROM1_BASE __ROM1_SIZE { + ER_ROM1 +0 __ROM1_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM2_SIZE > 0 +LR_ROM2 __ROM2_BASE __ROM2_SIZE { + ER_ROM2 +0 __ROM2_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM3_SIZE > 0 +LR_ROM3 __ROM3_BASE __ROM3_SIZE { + ER_ROM3 +0 __ROM3_SIZE { + .ANY (+RO +XO) + } +} +#endif diff --git a/dsppp/RTE/Device/ARMCM0P/clang_linker_script.ld b/dsppp/RTE/Device/ARMCM0P/clang_linker_script.ld new file mode 100644 index 000000000..40f955c16 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/clang_linker_script.ld @@ -0,0 +1,353 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + *(.gnu.linkonce.r.*) + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef __HEAP_SIZE + PROVIDE (__heap_end = __heap_start + __HEAP_SIZE); + PROVIDE (__heap_size = __HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - __STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - __STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += __STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack) + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/RTE/Device/ARMCM0P/gcc_linker_script.ld b/dsppp/RTE/Device/ARMCM0P/gcc_linker_script.ld new file mode 100644 index 000000000..a018e5d4e --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/gcc_linker_script.ld @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rwx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rwx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rwx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rwx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - __STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/ARMCM0P/regions_ARMCM0P.h b/dsppp/RTE/Device/ARMCM0P/regions_ARMCM0P.h new file mode 100644 index 000000000..c9b457cbc --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/regions_ARMCM0P.h @@ -0,0 +1,60 @@ +#ifndef REGIONS_ARMCM0P_H +#define REGIONS_ARMCM0P_H + + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// Device pack: ARM::Cortex_DFP@1.0.0 +// Device pack used to generate this file + +// ROM Configuration +// ======================= +// ROM=<__ROM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __ROM0_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00040000 +#define __ROM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __ROM0_DEFAULT 1 +// Startup +// Selects region to be used for startup code. +#define __ROM0_STARTUP 1 +// + +// + +// RAM Configuration +// ======================= +// RAM=<__RAM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM0_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM0_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM0_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM0_NOINIT 0 +// + +// + +// Stack / Heap Configuration +// Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +// Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 +// + + +#endif /* REGIONS_ARMCM0P_H */ diff --git a/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c b/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c new file mode 100644 index 000000000..25b202457 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c @@ -0,0 +1,146 @@ +/****************************************************************************** + * @file startup_ARMCM0plus.c + * @brief CMSIS-Core(M) Device Startup File for a Cortex-M0+ Device + * @version V3.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM0P) + #include "ARMCM0plus.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; + +extern __NO_RETURN void __PROGRAM_START(void); + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler (void); + void Default_Handler(void); + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +/* Exceptions */ +void NMI_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void HardFault_Handler (void) __attribute__ ((weak)); +void SVC_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void PendSV_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SysTick_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + +void Interrupt0_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt1_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt2_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt3_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt4_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt5_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt6_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt7_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt8_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt9_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[48]; + const VECTOR_TABLE_Type __VECTOR_TABLE[48] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14 NMI Handler */ + HardFault_Handler, /* -13 Hard Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5 SVCall Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + PendSV_Handler, /* -2 PendSV Handler */ + SysTick_Handler, /* -1 SysTick Handler */ + + /* Interrupts */ + Interrupt0_Handler, /* 0 Interrupt 0 */ + Interrupt1_Handler, /* 1 Interrupt 1 */ + Interrupt2_Handler, /* 2 Interrupt 2 */ + Interrupt3_Handler, /* 3 Interrupt 3 */ + Interrupt4_Handler, /* 4 Interrupt 4 */ + Interrupt5_Handler, /* 5 Interrupt 5 */ + Interrupt6_Handler, /* 6 Interrupt 6 */ + Interrupt7_Handler, /* 7 Interrupt 7 */ + Interrupt8_Handler, /* 8 Interrupt 8 */ + Interrupt9_Handler /* 9 Interrupt 9 */ + /* Interrupts 10..31 are left out */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler(void) +{ + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} + + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wmissing-noreturn" +#endif + +/*---------------------------------------------------------------------------- + Hard Fault Handler + *----------------------------------------------------------------------------*/ +void HardFault_Handler(void) +{ + while(1); +} + +/*---------------------------------------------------------------------------- + Default Handler for Exceptions / Interrupts + *----------------------------------------------------------------------------*/ +void Default_Handler(void) +{ + while(1); +} + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic pop +#endif + diff --git a/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c.base@3.0.0 b/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c.base@3.0.0 new file mode 100644 index 000000000..25b202457 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c.base@3.0.0 @@ -0,0 +1,146 @@ +/****************************************************************************** + * @file startup_ARMCM0plus.c + * @brief CMSIS-Core(M) Device Startup File for a Cortex-M0+ Device + * @version V3.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM0P) + #include "ARMCM0plus.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; + +extern __NO_RETURN void __PROGRAM_START(void); + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler (void); + void Default_Handler(void); + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +/* Exceptions */ +void NMI_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void HardFault_Handler (void) __attribute__ ((weak)); +void SVC_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void PendSV_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SysTick_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + +void Interrupt0_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt1_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt2_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt3_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt4_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt5_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt6_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt7_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt8_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt9_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[48]; + const VECTOR_TABLE_Type __VECTOR_TABLE[48] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14 NMI Handler */ + HardFault_Handler, /* -13 Hard Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5 SVCall Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + PendSV_Handler, /* -2 PendSV Handler */ + SysTick_Handler, /* -1 SysTick Handler */ + + /* Interrupts */ + Interrupt0_Handler, /* 0 Interrupt 0 */ + Interrupt1_Handler, /* 1 Interrupt 1 */ + Interrupt2_Handler, /* 2 Interrupt 2 */ + Interrupt3_Handler, /* 3 Interrupt 3 */ + Interrupt4_Handler, /* 4 Interrupt 4 */ + Interrupt5_Handler, /* 5 Interrupt 5 */ + Interrupt6_Handler, /* 6 Interrupt 6 */ + Interrupt7_Handler, /* 7 Interrupt 7 */ + Interrupt8_Handler, /* 8 Interrupt 8 */ + Interrupt9_Handler /* 9 Interrupt 9 */ + /* Interrupts 10..31 are left out */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler(void) +{ + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} + + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wmissing-noreturn" +#endif + +/*---------------------------------------------------------------------------- + Hard Fault Handler + *----------------------------------------------------------------------------*/ +void HardFault_Handler(void) +{ + while(1); +} + +/*---------------------------------------------------------------------------- + Default Handler for Exceptions / Interrupts + *----------------------------------------------------------------------------*/ +void Default_Handler(void) +{ + while(1); +} + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic pop +#endif + diff --git a/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c b/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c new file mode 100644 index 000000000..164d16da0 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c @@ -0,0 +1,69 @@ +/**************************************************************************//** + * @file system_ARMCM0plus.c + * @brief CMSIS Device System Source File for + * ARMCM0plus Device + * @version V2.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM0P) + #include "ARMCM0plus.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ +#define XTAL (50000000UL) /* Oscillator frequency */ + +#define SYSTEM_CLOCK (XTAL / 2U) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[48]; + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; /* System Core Clock Frequency */ + + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ + +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t) &(__VECTOR_TABLE[0]); +#endif + + SystemCoreClock = SYSTEM_CLOCK; +} diff --git a/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c.base@2.0.0 b/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c.base@2.0.0 new file mode 100644 index 000000000..164d16da0 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c.base@2.0.0 @@ -0,0 +1,69 @@ +/**************************************************************************//** + * @file system_ARMCM0plus.c + * @brief CMSIS Device System Source File for + * ARMCM0plus Device + * @version V2.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM0P) + #include "ARMCM0plus.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ +#define XTAL (50000000UL) /* Oscillator frequency */ + +#define SYSTEM_CLOCK (XTAL / 2U) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[48]; + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; /* System Core Clock Frequency */ + + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ + +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t) &(__VECTOR_TABLE[0]); +#endif + + SystemCoreClock = SYSTEM_CLOCK; +} diff --git a/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct b/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct new file mode 100644 index 000000000..eb67b5fe6 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct @@ -0,0 +1,80 @@ +#! armclang -E --target=arm-arm-none-eabi -mcpu=cortex-m4 -xc +; command above MUST be in first line (no comment above!) + +/* +;-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- +*/ + +/*--------------------- Flash Configuration ---------------------------------- +; Flash Configuration +; Flash Base Address <0x0-0xFFFFFFFF:8> +; Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __ROM_BASE 0x00000000 +#define __ROM_SIZE 0x00080000 + +/*--------------------- Embedded RAM Configuration --------------------------- +; RAM Configuration +; RAM Base Address <0x0-0xFFFFFFFF:8> +; RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __RAM_BASE 0x20000000 +#define __RAM_SIZE 0x00040000 + +/*--------------------- Stack / Heap Configuration --------------------------- +; Stack / Heap Configuration +; Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +; Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 + +/* +;------------- <<< end of configuration section >>> --------------------------- +*/ + + +/*---------------------------------------------------------------------------- + User Stack & Heap boundary definition + *----------------------------------------------------------------------------*/ +#define __STACK_TOP (__RAM_BASE + __RAM_SIZE) /* starts at end of RAM */ +#define __HEAP_BASE (AlignExpr(+0, 8)) /* starts after RW_RAM section, 8 byte aligned */ + + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ +#define __RO_BASE __ROM_BASE +#define __RO_SIZE __ROM_SIZE + +#define __RW_BASE __RAM_BASE +#define __RW_SIZE (__RAM_SIZE - __STACK_SIZE - __HEAP_SIZE) + + +LR_ROM __RO_BASE __RO_SIZE { ; load region size_region + ER_ROM __RO_BASE __RO_SIZE { ; load address = execution address + *.o (RESET, +First) + *(InRoot$$Sections) + .ANY (+RO) + .ANY (+XO) + } + + RW_NOINIT __RW_BASE UNINIT __RW_SIZE { + *(.bss.noinit) + } + + RW_RAM AlignExpr(+0, 8) (__RW_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP __HEAP_BASE EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK __STACK_TOP EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } +} diff --git a/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct.base@1.0.0 b/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct.base@1.0.0 new file mode 100644 index 000000000..eb67b5fe6 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct.base@1.0.0 @@ -0,0 +1,80 @@ +#! armclang -E --target=arm-arm-none-eabi -mcpu=cortex-m4 -xc +; command above MUST be in first line (no comment above!) + +/* +;-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- +*/ + +/*--------------------- Flash Configuration ---------------------------------- +; Flash Configuration +; Flash Base Address <0x0-0xFFFFFFFF:8> +; Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __ROM_BASE 0x00000000 +#define __ROM_SIZE 0x00080000 + +/*--------------------- Embedded RAM Configuration --------------------------- +; RAM Configuration +; RAM Base Address <0x0-0xFFFFFFFF:8> +; RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __RAM_BASE 0x20000000 +#define __RAM_SIZE 0x00040000 + +/*--------------------- Stack / Heap Configuration --------------------------- +; Stack / Heap Configuration +; Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +; Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 + +/* +;------------- <<< end of configuration section >>> --------------------------- +*/ + + +/*---------------------------------------------------------------------------- + User Stack & Heap boundary definition + *----------------------------------------------------------------------------*/ +#define __STACK_TOP (__RAM_BASE + __RAM_SIZE) /* starts at end of RAM */ +#define __HEAP_BASE (AlignExpr(+0, 8)) /* starts after RW_RAM section, 8 byte aligned */ + + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ +#define __RO_BASE __ROM_BASE +#define __RO_SIZE __ROM_SIZE + +#define __RW_BASE __RAM_BASE +#define __RW_SIZE (__RAM_SIZE - __STACK_SIZE - __HEAP_SIZE) + + +LR_ROM __RO_BASE __RO_SIZE { ; load region size_region + ER_ROM __RO_BASE __RO_SIZE { ; load address = execution address + *.o (RESET, +First) + *(InRoot$$Sections) + .ANY (+RO) + .ANY (+XO) + } + + RW_NOINIT __RW_BASE UNINIT __RW_SIZE { + *(.bss.noinit) + } + + RW_RAM AlignExpr(+0, 8) (__RW_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP __HEAP_BASE EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK __STACK_TOP EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } +} diff --git a/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld b/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld new file mode 100644 index 000000000..93ed813c8 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld @@ -0,0 +1,263 @@ +/* + *-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- + */ + +/*---------------------- Flash Configuration ---------------------------------- + Flash Configuration + Flash Base Address <0x0-0xFFFFFFFF:8> + Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__ROM_BASE = 0x00000000; +__ROM_SIZE = 0x00040000; + +/*--------------------- Embedded RAM Configuration ---------------------------- + RAM Configuration + RAM Base Address <0x0-0xFFFFFFFF:8> + RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__RAM_BASE = 0x20000000; +__RAM_SIZE = 0x00020000; + +/*--------------------- Stack / Heap Configuration ---------------------------- + Stack / Heap Configuration + Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> + Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__STACK_SIZE = 0x00000400; +__HEAP_SIZE = 0x00000C00; + +/* + *-------------------- <<< end of configuration section >>> ------------------- + */ + +MEMORY +{ + FLASH (rx) : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE + RAM (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > FLASH + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > FLASH + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT > FLASH + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM2 AT > FLASH +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM AT > RAM + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM2 AT > RAM2 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM + + .stack (ORIGIN(RAM) + LENGTH(RAM) - __STACK_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM + PROVIDE(__stack = __StackTop); + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld.base@2.2.0 b/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld.base@2.2.0 new file mode 100644 index 000000000..93ed813c8 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld.base@2.2.0 @@ -0,0 +1,263 @@ +/* + *-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- + */ + +/*---------------------- Flash Configuration ---------------------------------- + Flash Configuration + Flash Base Address <0x0-0xFFFFFFFF:8> + Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__ROM_BASE = 0x00000000; +__ROM_SIZE = 0x00040000; + +/*--------------------- Embedded RAM Configuration ---------------------------- + RAM Configuration + RAM Base Address <0x0-0xFFFFFFFF:8> + RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__RAM_BASE = 0x20000000; +__RAM_SIZE = 0x00020000; + +/*--------------------- Stack / Heap Configuration ---------------------------- + Stack / Heap Configuration + Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> + Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__STACK_SIZE = 0x00000400; +__HEAP_SIZE = 0x00000C00; + +/* + *-------------------- <<< end of configuration section >>> ------------------- + */ + +MEMORY +{ + FLASH (rx) : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE + RAM (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > FLASH + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > FLASH + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT > FLASH + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM2 AT > FLASH +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM AT > RAM + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM2 AT > RAM2 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM + + .stack (ORIGIN(RAM) + LENGTH(RAM) - __STACK_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM + PROVIDE(__stack = __StackTop); + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/ARMCM4/clang_linker_script.ld b/dsppp/RTE/Device/ARMCM4/clang_linker_script.ld new file mode 100644 index 000000000..40f955c16 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/clang_linker_script.ld @@ -0,0 +1,353 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + *(.gnu.linkonce.r.*) + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef __HEAP_SIZE + PROVIDE (__heap_end = __heap_start + __HEAP_SIZE); + PROVIDE (__heap_size = __HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - __STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - __STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += __STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack) + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/RTE/Device/ARMCM4/regions_ARMCM4.h b/dsppp/RTE/Device/ARMCM4/regions_ARMCM4.h new file mode 100644 index 000000000..3ee4d4228 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/regions_ARMCM4.h @@ -0,0 +1,60 @@ +#ifndef REGIONS_ARMCM4_H +#define REGIONS_ARMCM4_H + + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// Device pack: ARM::Cortex_DFP@1.0.0 +// Device pack used to generate this file + +// ROM Configuration +// ======================= +// ROM=<__ROM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __ROM0_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00040000 +#define __ROM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __ROM0_DEFAULT 1 +// Startup +// Selects region to be used for startup code. +#define __ROM0_STARTUP 1 +// + +// + +// RAM Configuration +// ======================= +// RAM=<__RAM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM0_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM0_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM0_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM0_NOINIT 0 +// + +// + +// Stack / Heap Configuration +// Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +// Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 +// + + +#endif /* REGIONS_ARMCM4_H */ diff --git a/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c b/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c new file mode 100644 index 000000000..9d5777366 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c @@ -0,0 +1,150 @@ +/****************************************************************************** + * @file startup_ARMCM4.c + * @brief CMSIS-Core(M) Device Startup File for a Cortex-M4 Device + * @version V3.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM4) + #include "ARMCM4.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; + +extern __NO_RETURN void __PROGRAM_START(void); + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler (void); + void Default_Handler(void); + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +/* Exceptions */ +void NMI_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void HardFault_Handler (void) __attribute__ ((weak)); +void MemManage_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void BusFault_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void UsageFault_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SVC_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void DebugMon_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void PendSV_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SysTick_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + +void Interrupt0_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt1_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt2_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt3_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt4_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt5_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt6_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt7_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt8_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt9_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[240]; + const VECTOR_TABLE_Type __VECTOR_TABLE[240] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14 NMI Handler */ + HardFault_Handler, /* -13 Hard Fault Handler */ + MemManage_Handler, /* -12 MPU Fault Handler */ + BusFault_Handler, /* -11 Bus Fault Handler */ + UsageFault_Handler, /* -10 Usage Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5 SVC Handler */ + DebugMon_Handler, /* -4 Debug Monitor Handler */ + 0, /* Reserved */ + PendSV_Handler, /* -2 PendSV Handler */ + SysTick_Handler, /* -1 SysTick Handler */ + + /* Interrupts */ + Interrupt0_Handler, /* 0 Interrupt 0 */ + Interrupt1_Handler, /* 1 Interrupt 1 */ + Interrupt2_Handler, /* 2 Interrupt 2 */ + Interrupt3_Handler, /* 3 Interrupt 3 */ + Interrupt4_Handler, /* 4 Interrupt 4 */ + Interrupt5_Handler, /* 5 Interrupt 5 */ + Interrupt6_Handler, /* 6 Interrupt 6 */ + Interrupt7_Handler, /* 7 Interrupt 7 */ + Interrupt8_Handler, /* 8 Interrupt 8 */ + Interrupt9_Handler /* 9 Interrupt 9 */ + /* Interrupts 10 .. 223 are left out */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler(void) +{ + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} + + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wmissing-noreturn" +#endif + +/*---------------------------------------------------------------------------- + Hard Fault Handler + *----------------------------------------------------------------------------*/ +void HardFault_Handler(void) +{ + while(1); +} + +/*---------------------------------------------------------------------------- + Default Handler for Exceptions / Interrupts + *----------------------------------------------------------------------------*/ +void Default_Handler(void) +{ + while(1); +} + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic pop +#endif + diff --git a/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c.base@3.0.0 b/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c.base@3.0.0 new file mode 100644 index 000000000..9d5777366 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c.base@3.0.0 @@ -0,0 +1,150 @@ +/****************************************************************************** + * @file startup_ARMCM4.c + * @brief CMSIS-Core(M) Device Startup File for a Cortex-M4 Device + * @version V3.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM4) + #include "ARMCM4.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; + +extern __NO_RETURN void __PROGRAM_START(void); + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler (void); + void Default_Handler(void); + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +/* Exceptions */ +void NMI_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void HardFault_Handler (void) __attribute__ ((weak)); +void MemManage_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void BusFault_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void UsageFault_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SVC_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void DebugMon_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void PendSV_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SysTick_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + +void Interrupt0_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt1_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt2_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt3_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt4_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt5_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt6_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt7_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt8_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt9_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[240]; + const VECTOR_TABLE_Type __VECTOR_TABLE[240] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14 NMI Handler */ + HardFault_Handler, /* -13 Hard Fault Handler */ + MemManage_Handler, /* -12 MPU Fault Handler */ + BusFault_Handler, /* -11 Bus Fault Handler */ + UsageFault_Handler, /* -10 Usage Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5 SVC Handler */ + DebugMon_Handler, /* -4 Debug Monitor Handler */ + 0, /* Reserved */ + PendSV_Handler, /* -2 PendSV Handler */ + SysTick_Handler, /* -1 SysTick Handler */ + + /* Interrupts */ + Interrupt0_Handler, /* 0 Interrupt 0 */ + Interrupt1_Handler, /* 1 Interrupt 1 */ + Interrupt2_Handler, /* 2 Interrupt 2 */ + Interrupt3_Handler, /* 3 Interrupt 3 */ + Interrupt4_Handler, /* 4 Interrupt 4 */ + Interrupt5_Handler, /* 5 Interrupt 5 */ + Interrupt6_Handler, /* 6 Interrupt 6 */ + Interrupt7_Handler, /* 7 Interrupt 7 */ + Interrupt8_Handler, /* 8 Interrupt 8 */ + Interrupt9_Handler /* 9 Interrupt 9 */ + /* Interrupts 10 .. 223 are left out */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler(void) +{ + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} + + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wmissing-noreturn" +#endif + +/*---------------------------------------------------------------------------- + Hard Fault Handler + *----------------------------------------------------------------------------*/ +void HardFault_Handler(void) +{ + while(1); +} + +/*---------------------------------------------------------------------------- + Default Handler for Exceptions / Interrupts + *----------------------------------------------------------------------------*/ +void Default_Handler(void) +{ + while(1); +} + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic pop +#endif + diff --git a/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c b/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c new file mode 100644 index 000000000..803d4fc3e --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c @@ -0,0 +1,79 @@ +/**************************************************************************//** + * @file system_ARMCM4.c + * @brief CMSIS Device System Source File for + * ARMCM4 Device + * @version V2.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM4) + #include "ARMCM4.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ +#define XTAL (50000000UL) /* Oscillator frequency */ + +#define SYSTEM_CLOCK (XTAL / 2U) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[240]; + + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; /* System Core Clock Frequency */ + + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ + +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t) &(__VECTOR_TABLE[0]); +#endif + +#if defined (__FPU_USED) && (__FPU_USED == 1U) + SCB->CPACR |= ((3U << 10U*2U) | /* enable CP10 Full Access */ + (3U << 11U*2U) ); /* enable CP11 Full Access */ +#endif + +#ifdef UNALIGNED_SUPPORT_DISABLE + SCB->CCR |= SCB_CCR_UNALIGN_TRP_Msk; +#endif + + SystemCoreClock = SYSTEM_CLOCK; +} diff --git a/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c.base@2.0.0 b/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c.base@2.0.0 new file mode 100644 index 000000000..803d4fc3e --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c.base@2.0.0 @@ -0,0 +1,79 @@ +/**************************************************************************//** + * @file system_ARMCM4.c + * @brief CMSIS Device System Source File for + * ARMCM4 Device + * @version V2.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM4) + #include "ARMCM4.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ +#define XTAL (50000000UL) /* Oscillator frequency */ + +#define SYSTEM_CLOCK (XTAL / 2U) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[240]; + + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; /* System Core Clock Frequency */ + + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ + +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t) &(__VECTOR_TABLE[0]); +#endif + +#if defined (__FPU_USED) && (__FPU_USED == 1U) + SCB->CPACR |= ((3U << 10U*2U) | /* enable CP10 Full Access */ + (3U << 11U*2U) ); /* enable CP11 Full Access */ +#endif + +#ifdef UNALIGNED_SUPPORT_DISABLE + SCB->CCR |= SCB_CCR_UNALIGN_TRP_Msk; +#endif + + SystemCoreClock = SYSTEM_CLOCK; +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h b/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h new file mode 100644 index 000000000..31255472f --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __RTE_DEVICE_H +#define __RTE_DEVICE_H + +// USART (Universal synchronous - asynchronous receiver transmitter) [Driver_USART0] +// Configuration settings for Driver_USART0 in component ::Drivers:USART +#define RTE_USART0 1 + +// USART (Universal synchronous - asynchronous receiver transmitter) [Driver_USART1] +// Configuration settings for Driver_USART1 in component ::Drivers:USART +#define RTE_USART1 1 + +// MPC (Memory Protection Controller) [Driver_ISRAM0_MPC] +// Configuration settings for Driver_ISRAM0_MPC in component ::Drivers:MPC +#define RTE_ISRAM0_MPC 0 + +// MPC (Memory Protection Controller) [Driver_ISRAM1_MPC] +// Configuration settings for Driver_ISRAM1_MPC in component ::Drivers:MPC +#define RTE_ISRAM1_MPC 0 + +// MPC (Memory Protection Controller) [Driver_SRAM_MPC] +// Configuration settings for Driver_SRAM_MPC in component ::Drivers:MPC +#define RTE_SRAM_MPC 0 + +// MPC (Memory Protection Controller) [Driver_QSPI_MPC] +// Configuration settings for Driver_QSPI_MPC in component ::Drivers:MPC +#define RTE_QSPI_MPC 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN0] +// Configuration settings for Driver_PPC_SSE300_MAIN0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN_EXP0] +// Configuration settings for Driver_PPC_SSE300_MAIN_EXP0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN_EXP0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN_EXP1] +// Configuration settings for Driver_PPC_SSE300_MAIN_EXP1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN_EXP1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH0] +// Configuration settings for Driver_PPC_SSE300_PERIPH0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH1] +// Configuration settings for Driver_PPC_SSE300_PERIPH1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP0] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP1] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP2] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP2 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP2 0 + +// Flash device emulated by SRAM [Driver_Flash0] +// Configuration settings for Driver_Flash0 in component ::Drivers:Flash +#define RTE_FLASH0 1 + +// I2C SBCon [Driver_I2C0] +// Configuration settings for Driver_I2C0 in component ::Drivers:I2C +#define RTE_I2C0 1 + +#endif /* __RTE_DEVICE_H */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h.base@1.1.0 b/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h.base@1.1.0 new file mode 100644 index 000000000..31255472f --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h.base@1.1.0 @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __RTE_DEVICE_H +#define __RTE_DEVICE_H + +// USART (Universal synchronous - asynchronous receiver transmitter) [Driver_USART0] +// Configuration settings for Driver_USART0 in component ::Drivers:USART +#define RTE_USART0 1 + +// USART (Universal synchronous - asynchronous receiver transmitter) [Driver_USART1] +// Configuration settings for Driver_USART1 in component ::Drivers:USART +#define RTE_USART1 1 + +// MPC (Memory Protection Controller) [Driver_ISRAM0_MPC] +// Configuration settings for Driver_ISRAM0_MPC in component ::Drivers:MPC +#define RTE_ISRAM0_MPC 0 + +// MPC (Memory Protection Controller) [Driver_ISRAM1_MPC] +// Configuration settings for Driver_ISRAM1_MPC in component ::Drivers:MPC +#define RTE_ISRAM1_MPC 0 + +// MPC (Memory Protection Controller) [Driver_SRAM_MPC] +// Configuration settings for Driver_SRAM_MPC in component ::Drivers:MPC +#define RTE_SRAM_MPC 0 + +// MPC (Memory Protection Controller) [Driver_QSPI_MPC] +// Configuration settings for Driver_QSPI_MPC in component ::Drivers:MPC +#define RTE_QSPI_MPC 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN0] +// Configuration settings for Driver_PPC_SSE300_MAIN0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN_EXP0] +// Configuration settings for Driver_PPC_SSE300_MAIN_EXP0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN_EXP0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN_EXP1] +// Configuration settings for Driver_PPC_SSE300_MAIN_EXP1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN_EXP1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH0] +// Configuration settings for Driver_PPC_SSE300_PERIPH0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH1] +// Configuration settings for Driver_PPC_SSE300_PERIPH1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP0] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP1] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP2] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP2 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP2 0 + +// Flash device emulated by SRAM [Driver_Flash0] +// Configuration settings for Driver_Flash0 in component ::Drivers:Flash +#define RTE_FLASH0 1 + +// I2C SBCon [Driver_I2C0] +// Configuration settings for Driver_I2C0 in component ::Drivers:I2C +#define RTE_I2C0 1 + +#endif /* __RTE_DEVICE_H */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h b/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h new file mode 100644 index 000000000..bfc348f47 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CMSIS_DRIVER_CONFIG_H__ +#define __CMSIS_DRIVER_CONFIG_H__ + +#include "system_SSE300MPS3.h" +#include "device_cfg.h" +#include "device_definition.h" +#include "platform_base_address.h" + +#endif /* __CMSIS_DRIVER_CONFIG_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h.base@1.1.1 b/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h.base@1.1.1 new file mode 100644 index 000000000..bfc348f47 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h.base@1.1.1 @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CMSIS_DRIVER_CONFIG_H__ +#define __CMSIS_DRIVER_CONFIG_H__ + +#include "system_SSE300MPS3.h" +#include "device_cfg.h" +#include "device_definition.h" +#include "platform_base_address.h" + +#endif /* __CMSIS_DRIVER_CONFIG_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h b/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h new file mode 100644 index 000000000..2ff3eaa77 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DEVICE_CFG_H__ +#define __DEVICE_CFG_H__ + +/** + * \file device_cfg.h + * \brief Configuration file native driver re-targeting + * + * \details This file can be used to add native driver specific macro + * definitions to select which peripherals are available in the build. + * + * This is a default device configuration file with all peripherals enabled. + */ + +/* Secure only peripheral configuration */ + +/* ARM MPS3 IO SCC */ +#define MPS3_IO_S +#define MPS3_IO_DEV MPS3_IO_DEV_S + +/* I2C_SBCon */ +#define I2C0_SBCON_S +#define I2C0_SBCON_DEV I2C0_SBCON_DEV_S + +/* I2S */ +#define MPS3_I2S_S +#define MPS3_I2S_DEV MPS3_I2S_DEV_S + +/* ARM UART Controller PL011 */ +#define UART0_CMSDK_S +#define UART0_CMSDK_DEV UART0_CMSDK_DEV_S +#define UART1_CMSDK_S +#define UART1_CMSDK_DEV UART1_CMSDK_DEV_S + +#define DEFAULT_UART_BAUDRATE 115200U + +/* To be used as CODE and DATA sram */ +#define MPC_ISRAM0_S +#define MPC_ISRAM0_DEV MPC_ISRAM0_DEV_S + +#define MPC_ISRAM1_S +#define MPC_ISRAM1_DEV MPC_ISRAM0_DEV_S + +#define MPC_SRAM_S +#define MPC_SRAM_DEV MPC_SRAM_DEV_S + +#define MPC_QSPI_S +#define MPC_QSPI_DEV MPC_QSPI_DEV_S + +/** System Counter Armv8-M */ +#define SYSCOUNTER_CNTRL_ARMV8_M_S +#define SYSCOUNTER_CNTRL_ARMV8_M_DEV SYSCOUNTER_CNTRL_ARMV8_M_DEV_S + +#define SYSCOUNTER_READ_ARMV8_M_S +#define SYSCOUNTER_READ_ARMV8_M_DEV SYSCOUNTER_READ_ARMV8_M_DEV_S +/** + * Arbitrary scaling values for test purposes + */ +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE0_INT 1u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE0_FRACT 0u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE1_INT 1u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE1_FRACT 0u + +/* System timer */ +#define SYSTIMER0_ARMV8_M_S +#define SYSTIMER0_ARMV8_M_DEV SYSTIMER0_ARMV8_M_DEV_S +#define SYSTIMER1_ARMV8_M_S +#define SYSTIMER1_ARMV8_M_DEV SYSTIMER1_ARMV8_M_DEV_S +#define SYSTIMER2_ARMV8_M_S +#define SYSTIMER2_ARMV8_M_DEV SYSTIMER2_ARMV8_M_DEV_S +#define SYSTIMER3_ARMV8_M_S +#define SYSTIMER3_ARMV8_M_DEV SYSTIMER3_ARMV8_M_DEV_S + +#define SYSTIMER0_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER1_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER2_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER3_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) + +/* CMSDK GPIO driver structures */ +#define GPIO0_CMSDK_S +#define GPIO0_CMSDK_DEV GPIO0_CMSDK_DEV_S +#define GPIO1_CMSDK_S +#define GPIO1_CMSDK_DEV GPIO1_CMSDK_DEV_S +#define GPIO2_CMSDK_S +#define GPIO2_CMSDK_DEV GPIO2_CMSDK_DEV_S +#define GPIO3_CMSDK_S +#define GPIO3_CMSDK_DEV GPIO3_CMSDK_DEV_S + +/* System Watchdogs */ +#define SYSWDOG_ARMV8_M_S +#define SYSWDOG_ARMV8_M_DEV SYSWDOG_ARMV8_M_DEV_S + +/* ARM MPC SIE 300 driver structures */ +#define MPC_VM0_S +#define MPC_VM0_DEV MPC_VM0_DEV_S +#define MPC_VM1_S +#define MPC_VM1_DEV MPC_VM1_DEV_S +#define MPC_SSRAM2_S +#define MPC_SSRAM2_DEV MPC_SSRAM2_DEV_S +#define MPC_SSRAM3_S +#define MPC_SSRAM3_DEV MPC_SSRAM3_DEV_S + +/* ARM PPC driver structures */ +#define PPC_SSE300_MAIN0_S +#define PPC_SSE300_MAIN0_DEV PPC_SSE300_MAIN0_DEV_S +#define PPC_SSE300_MAIN_EXP0_S +#define PPC_SSE300_MAIN_EXP0_DEV PPC_SSE300_MAIN_EXP0_DEV_S +#define PPC_SSE300_MAIN_EXP1_S +#define PPC_SSE300_MAIN_EXP1_DEV PPC_SSE300_MAIN_EXP1_DEV_S +#define PPC_SSE300_MAIN_EXP2_S +#define PPC_SSE300_MAIN_EXP2_DEV PPC_SSE300_MAIN_EXP2_DEV_S +#define PPC_SSE300_MAIN_EXP3_S +#define PPC_SSE300_MAIN_EXP3_DEV PPC_SSE300_MAIN_EXP3_DEV_S +#define PPC_SSE300_PERIPH0_S +#define PPC_SSE300_PERIPH0_DEV PPC_SSE300_PERIPH0_DEV_S +#define PPC_SSE300_PERIPH1_S +#define PPC_SSE300_PERIPH1_DEV PPC_SSE300_PERIPH1_DEV_S +#define PPC_SSE300_PERIPH_EXP0_S +#define PPC_SSE300_PERIPH_EXP0_DEV PPC_SSE300_PERIPH_EXP0_DEV_S +#define PPC_SSE300_PERIPH_EXP1_S +#define PPC_SSE300_PERIPH_EXP1_DEV PPC_SSE300_PERIPH_EXP1_DEV_S +#define PPC_SSE300_PERIPH_EXP2_S +#define PPC_SSE300_PERIPH_EXP2_DEV PPC_SSE300_PERIPH_EXP2_DEV_S +#define PPC_SSE300_PERIPH_EXP3_S +#define PPC_SSE300_PERIPH_EXP3_DEV PPC_SSE300_PERIPH_EXP3_DEV_S + +/* ARM SPI PL022 */ +/* Invalid device stubs are not defined */ +#define DEFAULT_SPI_SPEED_HZ 4000000U /* 4MHz */ +#define SPI1_PL022_S +#define SPI1_PL022_DEV SPI1_PL022_DEV_S + + +#endif /* __DEVICE_CFG_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h.base@1.1.3 b/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h.base@1.1.3 new file mode 100644 index 000000000..2ff3eaa77 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h.base@1.1.3 @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DEVICE_CFG_H__ +#define __DEVICE_CFG_H__ + +/** + * \file device_cfg.h + * \brief Configuration file native driver re-targeting + * + * \details This file can be used to add native driver specific macro + * definitions to select which peripherals are available in the build. + * + * This is a default device configuration file with all peripherals enabled. + */ + +/* Secure only peripheral configuration */ + +/* ARM MPS3 IO SCC */ +#define MPS3_IO_S +#define MPS3_IO_DEV MPS3_IO_DEV_S + +/* I2C_SBCon */ +#define I2C0_SBCON_S +#define I2C0_SBCON_DEV I2C0_SBCON_DEV_S + +/* I2S */ +#define MPS3_I2S_S +#define MPS3_I2S_DEV MPS3_I2S_DEV_S + +/* ARM UART Controller PL011 */ +#define UART0_CMSDK_S +#define UART0_CMSDK_DEV UART0_CMSDK_DEV_S +#define UART1_CMSDK_S +#define UART1_CMSDK_DEV UART1_CMSDK_DEV_S + +#define DEFAULT_UART_BAUDRATE 115200U + +/* To be used as CODE and DATA sram */ +#define MPC_ISRAM0_S +#define MPC_ISRAM0_DEV MPC_ISRAM0_DEV_S + +#define MPC_ISRAM1_S +#define MPC_ISRAM1_DEV MPC_ISRAM0_DEV_S + +#define MPC_SRAM_S +#define MPC_SRAM_DEV MPC_SRAM_DEV_S + +#define MPC_QSPI_S +#define MPC_QSPI_DEV MPC_QSPI_DEV_S + +/** System Counter Armv8-M */ +#define SYSCOUNTER_CNTRL_ARMV8_M_S +#define SYSCOUNTER_CNTRL_ARMV8_M_DEV SYSCOUNTER_CNTRL_ARMV8_M_DEV_S + +#define SYSCOUNTER_READ_ARMV8_M_S +#define SYSCOUNTER_READ_ARMV8_M_DEV SYSCOUNTER_READ_ARMV8_M_DEV_S +/** + * Arbitrary scaling values for test purposes + */ +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE0_INT 1u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE0_FRACT 0u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE1_INT 1u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE1_FRACT 0u + +/* System timer */ +#define SYSTIMER0_ARMV8_M_S +#define SYSTIMER0_ARMV8_M_DEV SYSTIMER0_ARMV8_M_DEV_S +#define SYSTIMER1_ARMV8_M_S +#define SYSTIMER1_ARMV8_M_DEV SYSTIMER1_ARMV8_M_DEV_S +#define SYSTIMER2_ARMV8_M_S +#define SYSTIMER2_ARMV8_M_DEV SYSTIMER2_ARMV8_M_DEV_S +#define SYSTIMER3_ARMV8_M_S +#define SYSTIMER3_ARMV8_M_DEV SYSTIMER3_ARMV8_M_DEV_S + +#define SYSTIMER0_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER1_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER2_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER3_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) + +/* CMSDK GPIO driver structures */ +#define GPIO0_CMSDK_S +#define GPIO0_CMSDK_DEV GPIO0_CMSDK_DEV_S +#define GPIO1_CMSDK_S +#define GPIO1_CMSDK_DEV GPIO1_CMSDK_DEV_S +#define GPIO2_CMSDK_S +#define GPIO2_CMSDK_DEV GPIO2_CMSDK_DEV_S +#define GPIO3_CMSDK_S +#define GPIO3_CMSDK_DEV GPIO3_CMSDK_DEV_S + +/* System Watchdogs */ +#define SYSWDOG_ARMV8_M_S +#define SYSWDOG_ARMV8_M_DEV SYSWDOG_ARMV8_M_DEV_S + +/* ARM MPC SIE 300 driver structures */ +#define MPC_VM0_S +#define MPC_VM0_DEV MPC_VM0_DEV_S +#define MPC_VM1_S +#define MPC_VM1_DEV MPC_VM1_DEV_S +#define MPC_SSRAM2_S +#define MPC_SSRAM2_DEV MPC_SSRAM2_DEV_S +#define MPC_SSRAM3_S +#define MPC_SSRAM3_DEV MPC_SSRAM3_DEV_S + +/* ARM PPC driver structures */ +#define PPC_SSE300_MAIN0_S +#define PPC_SSE300_MAIN0_DEV PPC_SSE300_MAIN0_DEV_S +#define PPC_SSE300_MAIN_EXP0_S +#define PPC_SSE300_MAIN_EXP0_DEV PPC_SSE300_MAIN_EXP0_DEV_S +#define PPC_SSE300_MAIN_EXP1_S +#define PPC_SSE300_MAIN_EXP1_DEV PPC_SSE300_MAIN_EXP1_DEV_S +#define PPC_SSE300_MAIN_EXP2_S +#define PPC_SSE300_MAIN_EXP2_DEV PPC_SSE300_MAIN_EXP2_DEV_S +#define PPC_SSE300_MAIN_EXP3_S +#define PPC_SSE300_MAIN_EXP3_DEV PPC_SSE300_MAIN_EXP3_DEV_S +#define PPC_SSE300_PERIPH0_S +#define PPC_SSE300_PERIPH0_DEV PPC_SSE300_PERIPH0_DEV_S +#define PPC_SSE300_PERIPH1_S +#define PPC_SSE300_PERIPH1_DEV PPC_SSE300_PERIPH1_DEV_S +#define PPC_SSE300_PERIPH_EXP0_S +#define PPC_SSE300_PERIPH_EXP0_DEV PPC_SSE300_PERIPH_EXP0_DEV_S +#define PPC_SSE300_PERIPH_EXP1_S +#define PPC_SSE300_PERIPH_EXP1_DEV PPC_SSE300_PERIPH_EXP1_DEV_S +#define PPC_SSE300_PERIPH_EXP2_S +#define PPC_SSE300_PERIPH_EXP2_DEV PPC_SSE300_PERIPH_EXP2_DEV_S +#define PPC_SSE300_PERIPH_EXP3_S +#define PPC_SSE300_PERIPH_EXP3_DEV PPC_SSE300_PERIPH_EXP3_DEV_S + +/* ARM SPI PL022 */ +/* Invalid device stubs are not defined */ +#define DEFAULT_SPI_SPEED_HZ 4000000U /* 4MHz */ +#define SPI1_PL022_S +#define SPI1_PL022_DEV SPI1_PL022_DEV_S + + +#endif /* __DEVICE_CFG_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld new file mode 100644 index 000000000..5c64ad4f1 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld @@ -0,0 +1,242 @@ +;/* +; * Copyright (c) 2009-2023 Arm Limited +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; */ + +/* Linker script to configure memory regions. */ +/* This file will be run trough the pre-processor. */ + +#include "region_defs.h" + +MEMORY +{ + FLASH (rx) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE + RAM (rw) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE +} + +__heap_size__ = HEAP_SIZE; +__stack_size__ = STACK_SIZE; + +/* Library configurations */ +GROUP(libgcc.a libc.a libm.a libnosys.a) + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapBase + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + * __Vectors_End + * __Vectors_Size + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + __Vectors_End = .; + __Vectors_Size = __Vectors_End - __Vectors; + __end__ = .; + + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + /* + * Place the CMSE Veneers (containing the SG instruction) after the code, in a + * separate 32 bytes aligned region so that the SAU can programmed to just set + * this region as Non-Secure Callable. The maximum size of this executable + * region makes it only used the space left over by the ER_CODE region + * so that you can rely on code+veneer size combined will not exceed the + * S_CODE_SIZE value. We also substract from the available space the + * area used to align this section on 32 bytes boundary (for SAU conf). + */ + .gnu.sgstubs : ALIGN(32) + { + *(.gnu.sgstubs*) + } > FLASH + . = ALIGN(32); + Image$$ER_CODE_CMSE_VENEER$$Base = ADDR(.gnu.sgstubs); + Image$$ER_CODE_CMSE_VENEER$$Limit = .; + Image$$ER_CODE_CMSE_VENEER$$Length = Image$$ER_CODE_CMSE_VENEER$$Limit - Image$$ER_CODE_CMSE_VENEER$$Base; + + /* Make sure veneers fit into code memory */ + ASSERT(((S_CODE_START + S_CODE_SIZE) > Image$$ER_CODE_CMSE_VENEER$$Limit), "Veneer region does not fit into code memory") + + .ARM.extab : ALIGN(32) + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + /* To copy multiple ROM to RAM sections, + * define etext2/data2_start/data2_end and + * define __STARTUP_COPY_MULTIPLE in startup_cmsdk_mps2_sse_200.S */ + .copy.table : ALIGN(4) + { + __copy_table_start__ = .; + LONG (__etext) + LONG (__data_start__) + LONG ((__data_end__ - __data_start__) / 4) + LONG (DEFINED(__etext2) ? __etext2 : 0) + LONG (DEFINED(__data2_start__) ? __data2_start__ : 0) + LONG (DEFINED(__data2_start__) ? ((__data2_end__ - __data2_start__) / 4) : 0) + __copy_table_end__ = .; + } > FLASH + + /* To clear multiple BSS sections, + * uncomment .zero.table section and, + * define __STARTUP_CLEAR_BSS_MULTIPLE in startup_ARMCMx.S */ + .zero.table : ALIGN(4) + { + __zero_table_start__ = .; + LONG (__bss_start__) + LONG ((__bss_end__ - __bss_start__) / 4) + LONG (DEFINED(__bss2_start__) ? __bss2_start__ : 0) + LONG (DEFINED(__bss2_start__) ? ((__bss2_end__ - __bss2_start__) / 4) : 0) + __zero_table_end__ = .; + } > FLASH + + __etext = ALIGN(4); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT> FLASH + + .bss : ALIGN(4) + { + __bss_start__ = .; + *(.bss*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM + + bss_size = __bss_end__ - __bss_start__; + + .stack : ALIGN(8) + { + __StackLimit = .; + KEEP(*(.stack*)) + . += __stack_size__ - 0x8; + __StackTop = .; + } > RAM + + .msp_stack_seal_res : + { + . += 0x8; + } > RAM + __StackSeal = ADDR(.msp_stack_seal_res); + + .heap : ALIGN(8) + { + __end__ = .; + PROVIDE(end = .); + __HeapBase = .; + . += __heap_size__; + __HeapLimit = .; + __heap_limit = .; /* Add for _sbrk */ + } > RAM + + /* Set stack top to end of the used RAM section, and stack limit move down by + * size of stack_dummy section */ + PROVIDE(__stack = __StackTop); + + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackTop <= (S_DATA_START + S_DATA_SIZE), "Secure RAM region overflowed") +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld.base@1.0.0 b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld.base@1.0.0 new file mode 100644 index 000000000..ff09e8e31 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld.base@1.0.0 @@ -0,0 +1,242 @@ +;/* +; * Copyright (c) 2009-2023 Arm Limited +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; */ + +/* Linker script to configure memory regions. */ +/* This file will be run trough the pre-processor. */ + +#include "region_defs.h" + +MEMORY +{ + FLASH (rx) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE + RAM (rwx) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE +} + +__heap_size__ = HEAP_SIZE; +__stack_size__ = STACK_SIZE; + +/* Library configurations */ +GROUP(libgcc.a libc.a libm.a libnosys.a) + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapBase + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + * __Vectors_End + * __Vectors_Size + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + __Vectors_End = .; + __Vectors_Size = __Vectors_End - __Vectors; + __end__ = .; + + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + /* + * Place the CMSE Veneers (containing the SG instruction) after the code, in a + * separate 32 bytes aligned region so that the SAU can programmed to just set + * this region as Non-Secure Callable. The maximum size of this executable + * region makes it only used the space left over by the ER_CODE region + * so that you can rely on code+veneer size combined will not exceed the + * S_CODE_SIZE value. We also substract from the available space the + * area used to align this section on 32 bytes boundary (for SAU conf). + */ + .gnu.sgstubs : ALIGN(32) + { + *(.gnu.sgstubs*) + } > FLASH + . = ALIGN(32); + Image$$ER_CODE_CMSE_VENEER$$Base = ADDR(.gnu.sgstubs); + Image$$ER_CODE_CMSE_VENEER$$Limit = .; + Image$$ER_CODE_CMSE_VENEER$$Length = Image$$ER_CODE_CMSE_VENEER$$Limit - Image$$ER_CODE_CMSE_VENEER$$Base; + + /* Make sure veneers fit into code memory */ + ASSERT(((S_CODE_START + S_CODE_SIZE) > Image$$ER_CODE_CMSE_VENEER$$Limit), "Veneer region does not fit into code memory") + + .ARM.extab : ALIGN(32) + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + /* To copy multiple ROM to RAM sections, + * define etext2/data2_start/data2_end and + * define __STARTUP_COPY_MULTIPLE in startup_cmsdk_mps2_sse_200.S */ + .copy.table : ALIGN(4) + { + __copy_table_start__ = .; + LONG (__etext) + LONG (__data_start__) + LONG ((__data_end__ - __data_start__) / 4) + LONG (DEFINED(__etext2) ? __etext2 : 0) + LONG (DEFINED(__data2_start__) ? __data2_start__ : 0) + LONG (DEFINED(__data2_start__) ? ((__data2_end__ - __data2_start__) / 4) : 0) + __copy_table_end__ = .; + } > FLASH + + /* To clear multiple BSS sections, + * uncomment .zero.table section and, + * define __STARTUP_CLEAR_BSS_MULTIPLE in startup_ARMCMx.S */ + .zero.table : ALIGN(4) + { + __zero_table_start__ = .; + LONG (__bss_start__) + LONG ((__bss_end__ - __bss_start__) / 4) + LONG (DEFINED(__bss2_start__) ? __bss2_start__ : 0) + LONG (DEFINED(__bss2_start__) ? ((__bss2_end__ - __bss2_start__) / 4) : 0) + __zero_table_end__ = .; + } > FLASH + + __etext = ALIGN(4); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT> FLASH + + .bss : ALIGN(4) + { + __bss_start__ = .; + *(.bss*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM + + bss_size = __bss_end__ - __bss_start__; + + .stack : ALIGN(8) + { + __StackLimit = .; + KEEP(*(.stack*)) + . += __stack_size__ - 0x8; + __StackTop = .; + } > RAM + + .msp_stack_seal_res : + { + . += 0x8; + } > RAM + __StackSeal = ADDR(.msp_stack_seal_res); + + .heap : ALIGN(8) + { + __end__ = .; + PROVIDE(end = .); + __HeapBase = .; + . += __heap_size__; + __HeapLimit = .; + __heap_limit = .; /* Add for _sbrk */ + } > RAM + + /* Set stack top to end of the used RAM section, and stack limit move down by + * size of stack_dummy section */ + PROVIDE(__stack = __StackTop); + + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackTop <= (S_DATA_START + S_DATA_SIZE), "Secure RAM region overflowed") +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct new file mode 100644 index 000000000..8b95c189d --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct @@ -0,0 +1,62 @@ +#! armclang --target=arm-arm-none-eabi -march=armv8.1-m.main -E -xc + +;/* +; * Copyright (c) 2018-2023 Arm Limited +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; * +; */ + +#include "region_defs.h" + +LR_CODE S_CODE_START { + ER_CODE S_CODE_START { + *.o (RESET +First) + .ANY (+RO) + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + CODE_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(CODE_WATERMARK) <= S_CODE_START + S_CODE_SIZE) + + ER_DATA S_DATA_START { + .ANY (+ZI +RW) + } + + #if HEAP_SIZE > 0 + ARM_LIB_HEAP +0 ALIGN 8 EMPTY HEAP_SIZE { ; Reserve empty region for heap + } + #endif + + ARM_LIB_STACK +0 ALIGN 32 EMPTY STACK_SIZE - 0x8 { ; Reserve empty region for stack + } + + STACKSEAL +0 EMPTY 0x8 { + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + SRAM_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(SRAM_WATERMARK) <= S_DATA_START + S_DATA_SIZE) +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct.base@1.1.0 b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct.base@1.1.0 new file mode 100644 index 000000000..8b95c189d --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct.base@1.1.0 @@ -0,0 +1,62 @@ +#! armclang --target=arm-arm-none-eabi -march=armv8.1-m.main -E -xc + +;/* +; * Copyright (c) 2018-2023 Arm Limited +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; * +; */ + +#include "region_defs.h" + +LR_CODE S_CODE_START { + ER_CODE S_CODE_START { + *.o (RESET +First) + .ANY (+RO) + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + CODE_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(CODE_WATERMARK) <= S_CODE_START + S_CODE_SIZE) + + ER_DATA S_DATA_START { + .ANY (+ZI +RW) + } + + #if HEAP_SIZE > 0 + ARM_LIB_HEAP +0 ALIGN 8 EMPTY HEAP_SIZE { ; Reserve empty region for heap + } + #endif + + ARM_LIB_STACK +0 ALIGN 32 EMPTY STACK_SIZE - 0x8 { ; Reserve empty region for stack + } + + STACKSEAL +0 EMPTY 0x8 { + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + SRAM_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(SRAM_WATERMARK) <= S_DATA_START + S_DATA_SIZE) +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h b/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h new file mode 100644 index 000000000..32ac16b37 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_DEFS_H__ +#define __REGION_DEFS_H__ + +#include "region_limits.h" + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure regions */ +#define S_CODE_START ( S_ROM_ALIAS ) +#define S_CODE_SIZE ( TOTAL_S_ROM_SIZE ) +#define S_CODE_LIMIT ( S_CODE_START + S_CODE_SIZE ) + +#define S_DATA_START ( S_RAM_ALIAS ) +#define S_DATA_SIZE ( TOTAL_S_RAM_SIZE ) +#define S_DATA_LIMIT ( S_DATA_START + S_DATA_SIZE ) + +#define S_DDR4_START ( S_DDR4_ALIAS ) +#define S_DDR4_SIZE ( TOTAL_S_DDR4_SIZE ) +#define S_DDR4_LIMIT ( S_DDR4_START + S_DDR4_SIZE ) + +#endif /* __REGION_DEFS_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h.base@1.0.0 b/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h.base@1.0.0 new file mode 100644 index 000000000..32ac16b37 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h.base@1.0.0 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_DEFS_H__ +#define __REGION_DEFS_H__ + +#include "region_limits.h" + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure regions */ +#define S_CODE_START ( S_ROM_ALIAS ) +#define S_CODE_SIZE ( TOTAL_S_ROM_SIZE ) +#define S_CODE_LIMIT ( S_CODE_START + S_CODE_SIZE ) + +#define S_DATA_START ( S_RAM_ALIAS ) +#define S_DATA_SIZE ( TOTAL_S_RAM_SIZE ) +#define S_DATA_LIMIT ( S_DATA_START + S_DATA_SIZE ) + +#define S_DDR4_START ( S_DDR4_ALIAS ) +#define S_DDR4_SIZE ( TOTAL_S_DDR4_SIZE ) +#define S_DDR4_LIMIT ( S_DDR4_START + S_DDR4_SIZE ) + +#endif /* __REGION_DEFS_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h b/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h new file mode 100644 index 000000000..0d600a363 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_LIMITS_H__ +#define __REGION_LIMITS_H__ + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure Code */ +#define S_ROM_ALIAS (0x10000000) /* ITCM_BASE_S */ +#define TOTAL_S_ROM_SIZE (0x00080000) /* 512 kB */ + +/* Secure Data */ +#define S_RAM_ALIAS (0x30000000) /* DTCM_BASE_S */ +#define TOTAL_S_RAM_SIZE (0x00080000) /* 512 kB */ + +/* Secure DDR4 */ +#define S_DDR4_ALIAS (0x70000000) /* DDR4_BLK1_BASE_S */ +#define TOTAL_S_DDR4_SIZE (0x10000000) /* 256 MB */ + +/* Heap and Stack sizes for secure and nonsecure applications */ +#define HEAP_SIZE (0x00038000) /* 1 KiB */ +#define STACK_SIZE (0x00002000) /* 1 KiB */ + +#endif /* __REGION_LIMITS_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h.base@1.0.0 b/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h.base@1.0.0 new file mode 100644 index 000000000..e7897866a --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h.base@1.0.0 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_LIMITS_H__ +#define __REGION_LIMITS_H__ + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure Code */ +#define S_ROM_ALIAS (0x10000000) /* ITCM_BASE_S */ +#define TOTAL_S_ROM_SIZE (0x00080000) /* 512 kB */ + +/* Secure Data */ +#define S_RAM_ALIAS (0x30000000) /* DTCM_BASE_S */ +#define TOTAL_S_RAM_SIZE (0x00080000) /* 512 kB */ + +/* Secure DDR4 */ +#define S_DDR4_ALIAS (0x70000000) /* DDR4_BLK1_BASE_S */ +#define TOTAL_S_DDR4_SIZE (0x10000000) /* 256 MB */ + +/* Heap and Stack sizes for secure and nonsecure applications */ +#define HEAP_SIZE (0x00000400) /* 1 KiB */ +#define STACK_SIZE (0x00000400) /* 1 KiB */ + +#endif /* __REGION_LIMITS_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c b/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c new file mode 100644 index 000000000..72b39ca55 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2022-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file is derivative of CMSIS V5.9.0 startup_ARMCM55.c + * Git SHA: 2b7495b8535bdcb306dac29b9ded4cfb679d7e5c + */ + +#include "SSE300MPS3.h" + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; +extern uint32_t __STACK_LIMIT; +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +extern uint64_t __STACK_SEAL; +#endif + +extern void __PROGRAM_START(void) __NO_RETURN; + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +void Reset_Handler (void) __NO_RETURN; + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +#define DEFAULT_IRQ_HANDLER(handler_name) \ +void __WEAK handler_name(void) __NO_RETURN; \ +void handler_name(void) { \ + while(1); \ +} + +/* Exceptions */ +DEFAULT_IRQ_HANDLER(NMI_Handler) +DEFAULT_IRQ_HANDLER(HardFault_Handler) +DEFAULT_IRQ_HANDLER(MemManage_Handler) +DEFAULT_IRQ_HANDLER(BusFault_Handler) +DEFAULT_IRQ_HANDLER(UsageFault_Handler) +DEFAULT_IRQ_HANDLER(SecureFault_Handler) +DEFAULT_IRQ_HANDLER(SVC_Handler) +DEFAULT_IRQ_HANDLER(DebugMon_Handler) +DEFAULT_IRQ_HANDLER(PendSV_Handler) +DEFAULT_IRQ_HANDLER(SysTick_Handler) + +DEFAULT_IRQ_HANDLER(NONSEC_WATCHDOG_RESET_REQ_Handler) +DEFAULT_IRQ_HANDLER(NONSEC_WATCHDOG_Handler) +DEFAULT_IRQ_HANDLER(SLOWCLK_Timer_Handler) +DEFAULT_IRQ_HANDLER(TFM_TIMER0_IRQ_Handler) +DEFAULT_IRQ_HANDLER(TIMER1_Handler) +DEFAULT_IRQ_HANDLER(TIMER2_Handler) +DEFAULT_IRQ_HANDLER(MPC_Handler) +DEFAULT_IRQ_HANDLER(PPC_Handler) +DEFAULT_IRQ_HANDLER(MSC_Handler) +DEFAULT_IRQ_HANDLER(BRIDGE_ERROR_Handler) +DEFAULT_IRQ_HANDLER(MGMT_PPU_Handler) +DEFAULT_IRQ_HANDLER(SYS_PPU_Handler) +DEFAULT_IRQ_HANDLER(CPU0_PPU_Handler) +DEFAULT_IRQ_HANDLER(DEBUG_PPU_Handler) +DEFAULT_IRQ_HANDLER(TIMER3_AON_Handler) +DEFAULT_IRQ_HANDLER(CPU0_CTI_0_Handler) +DEFAULT_IRQ_HANDLER(CPU0_CTI_1_Handler) + +DEFAULT_IRQ_HANDLER(System_Timestamp_Counter_Handler) +DEFAULT_IRQ_HANDLER(UARTRX0_Handler) +DEFAULT_IRQ_HANDLER(UARTTX0_Handler) +DEFAULT_IRQ_HANDLER(UARTRX1_Handler) +DEFAULT_IRQ_HANDLER(UARTTX1_Handler) +DEFAULT_IRQ_HANDLER(UARTRX2_Handler) +DEFAULT_IRQ_HANDLER(UARTTX2_Handler) +DEFAULT_IRQ_HANDLER(UARTRX3_Handler) +DEFAULT_IRQ_HANDLER(UARTTX3_Handler) +DEFAULT_IRQ_HANDLER(UARTRX4_Handler) +DEFAULT_IRQ_HANDLER(UARTTX4_Handler) +DEFAULT_IRQ_HANDLER(UART0_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART1_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART2_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART3_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART4_Combined_Handler) +DEFAULT_IRQ_HANDLER(UARTOVF_Handler) +DEFAULT_IRQ_HANDLER(ETHERNET_Handler) +DEFAULT_IRQ_HANDLER(I2S_Handler) +DEFAULT_IRQ_HANDLER(TOUCH_SCREEN_Handler) +DEFAULT_IRQ_HANDLER(USB_Handler) +DEFAULT_IRQ_HANDLER(SPI_ADC_Handler) +DEFAULT_IRQ_HANDLER(SPI_SHIELD0_Handler) +DEFAULT_IRQ_HANDLER(SPI_SHIELD1_Handler) +DEFAULT_IRQ_HANDLER(ETHOS_U55_Handler) +#ifdef CORSTONE300_AN547 +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Combined_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Combined_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Combined_Handler) +#endif +DEFAULT_IRQ_HANDLER(GPIO0_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_3_Handler) +DEFAULT_IRQ_HANDLER(UARTRX5_Handler) +DEFAULT_IRQ_HANDLER(UARTTX5_Handler) +DEFAULT_IRQ_HANDLER(UART5_Handler) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[]; + const VECTOR_TABLE_Type __VECTOR_TABLE[] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14: NMI Handler */ + HardFault_Handler, /* -13: Hard Fault Handler */ + MemManage_Handler, /* -12: MPU Fault Handler */ + BusFault_Handler, /* -11: Bus Fault Handler */ + UsageFault_Handler, /* -10: Usage Fault Handler */ + SecureFault_Handler, /* -9: Secure Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5: SVCall Handler */ + DebugMon_Handler, /* -4: Debug Monitor Handler */ + 0, /* Reserved */ + PendSV_Handler, /* -2: PendSV Handler */ + SysTick_Handler, /* -1: SysTick Handler */ + + NONSEC_WATCHDOG_RESET_REQ_Handler, /* 0: Non-Secure Watchdog Reset Request Handler */ + NONSEC_WATCHDOG_Handler, /* 1: Non-Secure Watchdog Handler */ + SLOWCLK_Timer_Handler, /* 2: SLOWCLK Timer Handler */ + TFM_TIMER0_IRQ_Handler, /* 3: TIMER 0 Handler */ + TIMER1_Handler, /* 4: TIMER 1 Handler */ + TIMER2_Handler, /* 5: TIMER 2 Handler */ + 0, /* 6: Reserved */ + 0, /* 7: Reserved */ + 0, /* 8: Reserved */ + MPC_Handler, /* 9: MPC Combined (Secure) Handler */ + PPC_Handler, /* 10: PPC Combined (Secure) Handler */ + MSC_Handler, /* 11: MSC Combined (Secure) Handler */ + BRIDGE_ERROR_Handler, /* 12: Bridge Error (Secure) Handler */ + 0, /* 13: Reserved */ + MGMT_PPU_Handler, /* 14: MGMT PPU Handler */ + SYS_PPU_Handler, /* 15: SYS PPU Handler */ + CPU0_PPU_Handler, /* 16: CPU0 PPU Handler */ + 0, /* 17: Reserved */ + 0, /* 18: Reserved */ + 0, /* 19: Reserved */ + 0, /* 20: Reserved */ + 0, /* 21: Reserved */ + 0, /* 22: Reserved */ + 0, /* 23: Reserved */ + 0, /* 24: Reserved */ + 0, /* 25: Reserved */ + DEBUG_PPU_Handler, /* 26: DEBUG PPU Handler */ + TIMER3_AON_Handler, /* 27: TIMER 3 AON Handler */ + CPU0_CTI_0_Handler, /* 28: CPU0 CTI IRQ 0 Handler */ + CPU0_CTI_1_Handler, /* 29: CPU0 CTI IRQ 1 Handler */ + 0, /* 30: Reserved */ + 0, /* 31: Reserved */ + + /* External interrupts */ + System_Timestamp_Counter_Handler, /* 32: System timestamp counter Handler */ + UARTRX0_Handler, /* 33: UART 0 RX Handler */ + UARTTX0_Handler, /* 34: UART 0 TX Handler */ + UARTRX1_Handler, /* 35: UART 1 RX Handler */ + UARTTX1_Handler, /* 36: UART 1 TX Handler */ + UARTRX2_Handler, /* 37: UART 2 RX Handler */ + UARTTX2_Handler, /* 38: UART 2 TX Handler */ + UARTRX3_Handler, /* 39: UART 3 RX Handler */ + UARTTX3_Handler, /* 40: UART 3 TX Handler */ + UARTRX4_Handler, /* 41: UART 4 RX Handler */ + UARTTX4_Handler, /* 42: UART 4 TX Handler */ + UART0_Combined_Handler, /* 43: UART 0 Combined Handler */ + UART1_Combined_Handler, /* 44: UART 1 Combined Handler */ + UART2_Combined_Handler, /* 45: UART 2 Combined Handler */ + UART3_Combined_Handler, /* 46: UART 3 Combined Handler */ + UART4_Combined_Handler, /* 47: UART 4 Combined Handler */ + UARTOVF_Handler, /* 48: UART 0, 1, 2, 3, 4 & 5 Overflow Handler */ + ETHERNET_Handler, /* 49: Ethernet Handler */ + I2S_Handler, /* 50: Audio I2S Handler */ + TOUCH_SCREEN_Handler, /* 51: Touch Screen Handler */ + USB_Handler, /* 52: USB Handler */ + SPI_ADC_Handler, /* 53: SPI ADC Handler */ + SPI_SHIELD0_Handler, /* 54: SPI (Shield 0) Handler */ + SPI_SHIELD1_Handler, /* 55: SPI (Shield 0) Handler */ + ETHOS_U55_Handler, /* 56: Ethos-U55 Handler */ +#ifdef CORSTONE300_AN547 + 0, /* 57: Reserved */ + 0, /* 58: Reserved */ + 0, /* 59: Reserved */ + DMA_Ch_1_Error_Handler, /* 60: DMA Ch1 Error Handler */ + DMA_Ch_1_Terminal_Count_Handler, /* 61: DMA Ch1 Terminal Count Handler */ + DMA_Ch_1_Combined_Handler, /* 62: DMA Ch1 Combined Handler */ + DMA_Ch_2_Error_Handler, /* 63: DMA Ch2 Error Handler */ + DMA_Ch_2_Terminal_Count_Handler, /* 64: DMA Ch2 Terminal Count Handler */ + DMA_Ch_2_Combined_Handler, /* 65: DMA Ch2 Combined Handler */ + DMA_Ch_3_Error_Handler, /* 66: DMA Ch3 Error Handler */ + DMA_Ch_3_Terminal_Count_Handler, /* 67: DMA Ch3 Terminal Count Handler */ + DMA_Ch_3_Combined_Handler, /* 68: DMA Ch3 Combined Handler */ +#else + 0, /* 57: Reserved */ + 0, /* 58: Reserved */ + 0, /* 59: Reserved */ + 0, /* 60: Reserved */ + 0, /* 61: Reserved */ + 0, /* 62: Reserved */ + 0, /* 63: Reserved */ + 0, /* 64: Reserved */ + 0, /* 65: Reserved */ + 0, /* 66: Reserved */ + 0, /* 67: Reserved */ + 0, /* 68: Reserved */ +#endif + GPIO0_Combined_Handler, /* 69: GPIO 0 Combined Handler */ + GPIO1_Combined_Handler, /* 70: GPIO 1 Combined Handler */ + GPIO2_Combined_Handler, /* 71: GPIO 2 Combined Handler */ + GPIO3_Combined_Handler, /* 72: GPIO 3 Combined Handler */ + GPIO0_0_Handler, /* 73: GPIO0 Pin 0 Handler */ + GPIO0_1_Handler, /* 74: GPIO0 Pin 1 Handler */ + GPIO0_2_Handler, /* 75: GPIO0 Pin 2 Handler */ + GPIO0_3_Handler, /* 76: GPIO0 Pin 3 Handler */ + GPIO0_4_Handler, /* 77: GPIO0 Pin 4 Handler */ + GPIO0_5_Handler, /* 78: GPIO0 Pin 5 Handler */ + GPIO0_6_Handler, /* 79: GPIO0 Pin 6 Handler */ + GPIO0_7_Handler, /* 80: GPIO0 Pin 7 Handler */ + GPIO0_8_Handler, /* 81: GPIO0 Pin 8 Handler */ + GPIO0_9_Handler, /* 82: GPIO0 Pin 9 Handler */ + GPIO0_10_Handler, /* 83: GPIO0 Pin 10 Handler */ + GPIO0_11_Handler, /* 84: GPIO0 Pin 11 Handler */ + GPIO0_12_Handler, /* 85: GPIO0 Pin 12 Handler */ + GPIO0_13_Handler, /* 86: GPIO0 Pin 13 Handler */ + GPIO0_14_Handler, /* 87: GPIO0 Pin 14 Handler */ + GPIO0_15_Handler, /* 88: GPIO0 Pin 15 Handler */ + GPIO1_0_Handler, /* 89: GPIO1 Pin 0 Handler */ + GPIO1_1_Handler, /* 90: GPIO1 Pin 1 Handler */ + GPIO1_2_Handler, /* 91: GPIO1 Pin 2 Handler */ + GPIO1_3_Handler, /* 92: GPIO1 Pin 3 Handler */ + GPIO1_4_Handler, /* 93: GPIO1 Pin 4 Handler */ + GPIO1_5_Handler, /* 94: GPIO1 Pin 5 Handler */ + GPIO1_6_Handler, /* 95: GPIO1 Pin 6 Handler */ + GPIO1_7_Handler, /* 96: GPIO1 Pin 7 Handler */ + GPIO1_8_Handler, /* 97: GPIO1 Pin 8 Handler */ + GPIO1_9_Handler, /* 98: GPIO1 Pin 9 Handler */ + GPIO1_10_Handler, /* 99: GPIO1 Pin 10 Handler */ + GPIO1_11_Handler, /* 100: GPIO1 Pin 11 Handler */ + GPIO1_12_Handler, /* 101: GPIO1 Pin 12 Handler */ + GPIO1_13_Handler, /* 102: GPIO1 Pin 13 Handler */ + GPIO1_14_Handler, /* 103: GPIO1 Pin 14 Handler */ + GPIO1_15_Handler, /* 104: GPIO1 Pin 15 Handler */ + GPIO2_0_Handler, /* 105: GPIO2 Pin 0 Handler */ + GPIO2_1_Handler, /* 106: GPIO2 Pin 1 Handler */ + GPIO2_2_Handler, /* 107: GPIO2 Pin 2 Handler */ + GPIO2_3_Handler, /* 108: GPIO2 Pin 3 Handler */ + GPIO2_4_Handler, /* 109: GPIO2 Pin 4 Handler */ + GPIO2_5_Handler, /* 110: GPIO2 Pin 5 Handler */ + GPIO2_6_Handler, /* 111: GPIO2 Pin 6 Handler */ + GPIO2_7_Handler, /* 112: GPIO2 Pin 7 Handler */ + GPIO2_8_Handler, /* 113: GPIO2 Pin 8 Handler */ + GPIO2_9_Handler, /* 114: GPIO2 Pin 9 Handler */ + GPIO2_10_Handler, /* 115: GPIO2 Pin 10 Handler */ + GPIO2_11_Handler, /* 116: GPIO2 Pin 11 Handler */ + GPIO2_12_Handler, /* 117: GPIO2 Pin 12 Handler */ + GPIO2_13_Handler, /* 118: GPIO2 Pin 13 Handler */ + GPIO2_14_Handler, /* 119: GPIO2 Pin 14 Handler */ + GPIO2_15_Handler, /* 120: GPIO2 Pin 15 Handler */ + GPIO3_0_Handler, /* 121: GPIO3 Pin 0 Handler */ + GPIO3_1_Handler, /* 122: GPIO3 Pin 1 Handler */ + GPIO3_2_Handler, /* 123: GPIO3 Pin 2 Handler */ + GPIO3_3_Handler, /* 124: GPIO3 Pin 3 Handler */ + UARTRX5_Handler, /* 125: UART 5 RX Interrupt */ + UARTTX5_Handler, /* 126: UART 5 TX Interrupt */ + UART5_Handler, /* 127: UART 5 combined Interrupt */ + 0, /* 128: Reserved */ + 0, /* 129: Reserved */ + 0, /* 130: Reserved */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +void Reset_Handler(void) +{ + __set_PSP((uint32_t)(&__INITIAL_SP)); + + __set_MSPLIM((uint32_t)(&__STACK_LIMIT)); + __set_PSPLIM((uint32_t)(&__STACK_LIMIT)); + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + __TZ_set_STACKSEAL_S((uint32_t *)(&__STACK_SEAL)); +#endif + + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c.base@1.1.1 b/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c.base@1.1.1 new file mode 100644 index 000000000..72b39ca55 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c.base@1.1.1 @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2022-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file is derivative of CMSIS V5.9.0 startup_ARMCM55.c + * Git SHA: 2b7495b8535bdcb306dac29b9ded4cfb679d7e5c + */ + +#include "SSE300MPS3.h" + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; +extern uint32_t __STACK_LIMIT; +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +extern uint64_t __STACK_SEAL; +#endif + +extern void __PROGRAM_START(void) __NO_RETURN; + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +void Reset_Handler (void) __NO_RETURN; + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +#define DEFAULT_IRQ_HANDLER(handler_name) \ +void __WEAK handler_name(void) __NO_RETURN; \ +void handler_name(void) { \ + while(1); \ +} + +/* Exceptions */ +DEFAULT_IRQ_HANDLER(NMI_Handler) +DEFAULT_IRQ_HANDLER(HardFault_Handler) +DEFAULT_IRQ_HANDLER(MemManage_Handler) +DEFAULT_IRQ_HANDLER(BusFault_Handler) +DEFAULT_IRQ_HANDLER(UsageFault_Handler) +DEFAULT_IRQ_HANDLER(SecureFault_Handler) +DEFAULT_IRQ_HANDLER(SVC_Handler) +DEFAULT_IRQ_HANDLER(DebugMon_Handler) +DEFAULT_IRQ_HANDLER(PendSV_Handler) +DEFAULT_IRQ_HANDLER(SysTick_Handler) + +DEFAULT_IRQ_HANDLER(NONSEC_WATCHDOG_RESET_REQ_Handler) +DEFAULT_IRQ_HANDLER(NONSEC_WATCHDOG_Handler) +DEFAULT_IRQ_HANDLER(SLOWCLK_Timer_Handler) +DEFAULT_IRQ_HANDLER(TFM_TIMER0_IRQ_Handler) +DEFAULT_IRQ_HANDLER(TIMER1_Handler) +DEFAULT_IRQ_HANDLER(TIMER2_Handler) +DEFAULT_IRQ_HANDLER(MPC_Handler) +DEFAULT_IRQ_HANDLER(PPC_Handler) +DEFAULT_IRQ_HANDLER(MSC_Handler) +DEFAULT_IRQ_HANDLER(BRIDGE_ERROR_Handler) +DEFAULT_IRQ_HANDLER(MGMT_PPU_Handler) +DEFAULT_IRQ_HANDLER(SYS_PPU_Handler) +DEFAULT_IRQ_HANDLER(CPU0_PPU_Handler) +DEFAULT_IRQ_HANDLER(DEBUG_PPU_Handler) +DEFAULT_IRQ_HANDLER(TIMER3_AON_Handler) +DEFAULT_IRQ_HANDLER(CPU0_CTI_0_Handler) +DEFAULT_IRQ_HANDLER(CPU0_CTI_1_Handler) + +DEFAULT_IRQ_HANDLER(System_Timestamp_Counter_Handler) +DEFAULT_IRQ_HANDLER(UARTRX0_Handler) +DEFAULT_IRQ_HANDLER(UARTTX0_Handler) +DEFAULT_IRQ_HANDLER(UARTRX1_Handler) +DEFAULT_IRQ_HANDLER(UARTTX1_Handler) +DEFAULT_IRQ_HANDLER(UARTRX2_Handler) +DEFAULT_IRQ_HANDLER(UARTTX2_Handler) +DEFAULT_IRQ_HANDLER(UARTRX3_Handler) +DEFAULT_IRQ_HANDLER(UARTTX3_Handler) +DEFAULT_IRQ_HANDLER(UARTRX4_Handler) +DEFAULT_IRQ_HANDLER(UARTTX4_Handler) +DEFAULT_IRQ_HANDLER(UART0_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART1_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART2_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART3_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART4_Combined_Handler) +DEFAULT_IRQ_HANDLER(UARTOVF_Handler) +DEFAULT_IRQ_HANDLER(ETHERNET_Handler) +DEFAULT_IRQ_HANDLER(I2S_Handler) +DEFAULT_IRQ_HANDLER(TOUCH_SCREEN_Handler) +DEFAULT_IRQ_HANDLER(USB_Handler) +DEFAULT_IRQ_HANDLER(SPI_ADC_Handler) +DEFAULT_IRQ_HANDLER(SPI_SHIELD0_Handler) +DEFAULT_IRQ_HANDLER(SPI_SHIELD1_Handler) +DEFAULT_IRQ_HANDLER(ETHOS_U55_Handler) +#ifdef CORSTONE300_AN547 +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Combined_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Combined_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Combined_Handler) +#endif +DEFAULT_IRQ_HANDLER(GPIO0_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_3_Handler) +DEFAULT_IRQ_HANDLER(UARTRX5_Handler) +DEFAULT_IRQ_HANDLER(UARTTX5_Handler) +DEFAULT_IRQ_HANDLER(UART5_Handler) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[]; + const VECTOR_TABLE_Type __VECTOR_TABLE[] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14: NMI Handler */ + HardFault_Handler, /* -13: Hard Fault Handler */ + MemManage_Handler, /* -12: MPU Fault Handler */ + BusFault_Handler, /* -11: Bus Fault Handler */ + UsageFault_Handler, /* -10: Usage Fault Handler */ + SecureFault_Handler, /* -9: Secure Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5: SVCall Handler */ + DebugMon_Handler, /* -4: Debug Monitor Handler */ + 0, /* Reserved */ + PendSV_Handler, /* -2: PendSV Handler */ + SysTick_Handler, /* -1: SysTick Handler */ + + NONSEC_WATCHDOG_RESET_REQ_Handler, /* 0: Non-Secure Watchdog Reset Request Handler */ + NONSEC_WATCHDOG_Handler, /* 1: Non-Secure Watchdog Handler */ + SLOWCLK_Timer_Handler, /* 2: SLOWCLK Timer Handler */ + TFM_TIMER0_IRQ_Handler, /* 3: TIMER 0 Handler */ + TIMER1_Handler, /* 4: TIMER 1 Handler */ + TIMER2_Handler, /* 5: TIMER 2 Handler */ + 0, /* 6: Reserved */ + 0, /* 7: Reserved */ + 0, /* 8: Reserved */ + MPC_Handler, /* 9: MPC Combined (Secure) Handler */ + PPC_Handler, /* 10: PPC Combined (Secure) Handler */ + MSC_Handler, /* 11: MSC Combined (Secure) Handler */ + BRIDGE_ERROR_Handler, /* 12: Bridge Error (Secure) Handler */ + 0, /* 13: Reserved */ + MGMT_PPU_Handler, /* 14: MGMT PPU Handler */ + SYS_PPU_Handler, /* 15: SYS PPU Handler */ + CPU0_PPU_Handler, /* 16: CPU0 PPU Handler */ + 0, /* 17: Reserved */ + 0, /* 18: Reserved */ + 0, /* 19: Reserved */ + 0, /* 20: Reserved */ + 0, /* 21: Reserved */ + 0, /* 22: Reserved */ + 0, /* 23: Reserved */ + 0, /* 24: Reserved */ + 0, /* 25: Reserved */ + DEBUG_PPU_Handler, /* 26: DEBUG PPU Handler */ + TIMER3_AON_Handler, /* 27: TIMER 3 AON Handler */ + CPU0_CTI_0_Handler, /* 28: CPU0 CTI IRQ 0 Handler */ + CPU0_CTI_1_Handler, /* 29: CPU0 CTI IRQ 1 Handler */ + 0, /* 30: Reserved */ + 0, /* 31: Reserved */ + + /* External interrupts */ + System_Timestamp_Counter_Handler, /* 32: System timestamp counter Handler */ + UARTRX0_Handler, /* 33: UART 0 RX Handler */ + UARTTX0_Handler, /* 34: UART 0 TX Handler */ + UARTRX1_Handler, /* 35: UART 1 RX Handler */ + UARTTX1_Handler, /* 36: UART 1 TX Handler */ + UARTRX2_Handler, /* 37: UART 2 RX Handler */ + UARTTX2_Handler, /* 38: UART 2 TX Handler */ + UARTRX3_Handler, /* 39: UART 3 RX Handler */ + UARTTX3_Handler, /* 40: UART 3 TX Handler */ + UARTRX4_Handler, /* 41: UART 4 RX Handler */ + UARTTX4_Handler, /* 42: UART 4 TX Handler */ + UART0_Combined_Handler, /* 43: UART 0 Combined Handler */ + UART1_Combined_Handler, /* 44: UART 1 Combined Handler */ + UART2_Combined_Handler, /* 45: UART 2 Combined Handler */ + UART3_Combined_Handler, /* 46: UART 3 Combined Handler */ + UART4_Combined_Handler, /* 47: UART 4 Combined Handler */ + UARTOVF_Handler, /* 48: UART 0, 1, 2, 3, 4 & 5 Overflow Handler */ + ETHERNET_Handler, /* 49: Ethernet Handler */ + I2S_Handler, /* 50: Audio I2S Handler */ + TOUCH_SCREEN_Handler, /* 51: Touch Screen Handler */ + USB_Handler, /* 52: USB Handler */ + SPI_ADC_Handler, /* 53: SPI ADC Handler */ + SPI_SHIELD0_Handler, /* 54: SPI (Shield 0) Handler */ + SPI_SHIELD1_Handler, /* 55: SPI (Shield 0) Handler */ + ETHOS_U55_Handler, /* 56: Ethos-U55 Handler */ +#ifdef CORSTONE300_AN547 + 0, /* 57: Reserved */ + 0, /* 58: Reserved */ + 0, /* 59: Reserved */ + DMA_Ch_1_Error_Handler, /* 60: DMA Ch1 Error Handler */ + DMA_Ch_1_Terminal_Count_Handler, /* 61: DMA Ch1 Terminal Count Handler */ + DMA_Ch_1_Combined_Handler, /* 62: DMA Ch1 Combined Handler */ + DMA_Ch_2_Error_Handler, /* 63: DMA Ch2 Error Handler */ + DMA_Ch_2_Terminal_Count_Handler, /* 64: DMA Ch2 Terminal Count Handler */ + DMA_Ch_2_Combined_Handler, /* 65: DMA Ch2 Combined Handler */ + DMA_Ch_3_Error_Handler, /* 66: DMA Ch3 Error Handler */ + DMA_Ch_3_Terminal_Count_Handler, /* 67: DMA Ch3 Terminal Count Handler */ + DMA_Ch_3_Combined_Handler, /* 68: DMA Ch3 Combined Handler */ +#else + 0, /* 57: Reserved */ + 0, /* 58: Reserved */ + 0, /* 59: Reserved */ + 0, /* 60: Reserved */ + 0, /* 61: Reserved */ + 0, /* 62: Reserved */ + 0, /* 63: Reserved */ + 0, /* 64: Reserved */ + 0, /* 65: Reserved */ + 0, /* 66: Reserved */ + 0, /* 67: Reserved */ + 0, /* 68: Reserved */ +#endif + GPIO0_Combined_Handler, /* 69: GPIO 0 Combined Handler */ + GPIO1_Combined_Handler, /* 70: GPIO 1 Combined Handler */ + GPIO2_Combined_Handler, /* 71: GPIO 2 Combined Handler */ + GPIO3_Combined_Handler, /* 72: GPIO 3 Combined Handler */ + GPIO0_0_Handler, /* 73: GPIO0 Pin 0 Handler */ + GPIO0_1_Handler, /* 74: GPIO0 Pin 1 Handler */ + GPIO0_2_Handler, /* 75: GPIO0 Pin 2 Handler */ + GPIO0_3_Handler, /* 76: GPIO0 Pin 3 Handler */ + GPIO0_4_Handler, /* 77: GPIO0 Pin 4 Handler */ + GPIO0_5_Handler, /* 78: GPIO0 Pin 5 Handler */ + GPIO0_6_Handler, /* 79: GPIO0 Pin 6 Handler */ + GPIO0_7_Handler, /* 80: GPIO0 Pin 7 Handler */ + GPIO0_8_Handler, /* 81: GPIO0 Pin 8 Handler */ + GPIO0_9_Handler, /* 82: GPIO0 Pin 9 Handler */ + GPIO0_10_Handler, /* 83: GPIO0 Pin 10 Handler */ + GPIO0_11_Handler, /* 84: GPIO0 Pin 11 Handler */ + GPIO0_12_Handler, /* 85: GPIO0 Pin 12 Handler */ + GPIO0_13_Handler, /* 86: GPIO0 Pin 13 Handler */ + GPIO0_14_Handler, /* 87: GPIO0 Pin 14 Handler */ + GPIO0_15_Handler, /* 88: GPIO0 Pin 15 Handler */ + GPIO1_0_Handler, /* 89: GPIO1 Pin 0 Handler */ + GPIO1_1_Handler, /* 90: GPIO1 Pin 1 Handler */ + GPIO1_2_Handler, /* 91: GPIO1 Pin 2 Handler */ + GPIO1_3_Handler, /* 92: GPIO1 Pin 3 Handler */ + GPIO1_4_Handler, /* 93: GPIO1 Pin 4 Handler */ + GPIO1_5_Handler, /* 94: GPIO1 Pin 5 Handler */ + GPIO1_6_Handler, /* 95: GPIO1 Pin 6 Handler */ + GPIO1_7_Handler, /* 96: GPIO1 Pin 7 Handler */ + GPIO1_8_Handler, /* 97: GPIO1 Pin 8 Handler */ + GPIO1_9_Handler, /* 98: GPIO1 Pin 9 Handler */ + GPIO1_10_Handler, /* 99: GPIO1 Pin 10 Handler */ + GPIO1_11_Handler, /* 100: GPIO1 Pin 11 Handler */ + GPIO1_12_Handler, /* 101: GPIO1 Pin 12 Handler */ + GPIO1_13_Handler, /* 102: GPIO1 Pin 13 Handler */ + GPIO1_14_Handler, /* 103: GPIO1 Pin 14 Handler */ + GPIO1_15_Handler, /* 104: GPIO1 Pin 15 Handler */ + GPIO2_0_Handler, /* 105: GPIO2 Pin 0 Handler */ + GPIO2_1_Handler, /* 106: GPIO2 Pin 1 Handler */ + GPIO2_2_Handler, /* 107: GPIO2 Pin 2 Handler */ + GPIO2_3_Handler, /* 108: GPIO2 Pin 3 Handler */ + GPIO2_4_Handler, /* 109: GPIO2 Pin 4 Handler */ + GPIO2_5_Handler, /* 110: GPIO2 Pin 5 Handler */ + GPIO2_6_Handler, /* 111: GPIO2 Pin 6 Handler */ + GPIO2_7_Handler, /* 112: GPIO2 Pin 7 Handler */ + GPIO2_8_Handler, /* 113: GPIO2 Pin 8 Handler */ + GPIO2_9_Handler, /* 114: GPIO2 Pin 9 Handler */ + GPIO2_10_Handler, /* 115: GPIO2 Pin 10 Handler */ + GPIO2_11_Handler, /* 116: GPIO2 Pin 11 Handler */ + GPIO2_12_Handler, /* 117: GPIO2 Pin 12 Handler */ + GPIO2_13_Handler, /* 118: GPIO2 Pin 13 Handler */ + GPIO2_14_Handler, /* 119: GPIO2 Pin 14 Handler */ + GPIO2_15_Handler, /* 120: GPIO2 Pin 15 Handler */ + GPIO3_0_Handler, /* 121: GPIO3 Pin 0 Handler */ + GPIO3_1_Handler, /* 122: GPIO3 Pin 1 Handler */ + GPIO3_2_Handler, /* 123: GPIO3 Pin 2 Handler */ + GPIO3_3_Handler, /* 124: GPIO3 Pin 3 Handler */ + UARTRX5_Handler, /* 125: UART 5 RX Interrupt */ + UARTTX5_Handler, /* 126: UART 5 TX Interrupt */ + UART5_Handler, /* 127: UART 5 combined Interrupt */ + 0, /* 128: Reserved */ + 0, /* 129: Reserved */ + 0, /* 130: Reserved */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +void Reset_Handler(void) +{ + __set_PSP((uint32_t)(&__INITIAL_SP)); + + __set_MSPLIM((uint32_t)(&__STACK_LIMIT)); + __set_PSPLIM((uint32_t)(&__STACK_LIMIT)); + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + __TZ_set_STACKSEAL_S((uint32_t *)(&__STACK_SEAL)); +#endif + + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c b/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c new file mode 100644 index 000000000..4e67d536d --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2009-2022 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file is derivative of CMSIS V5.9.0 system_ARMCM55.c + * Git SHA: 2b7495b8535bdcb306dac29b9ded4cfb679d7e5c + */ + +#include "SSE300MPS3.h" + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ + #define XTAL (32000000UL) + #define SYSTEM_CLOCK (XTAL) + #define PERIPHERAL_CLOCK (25000000UL) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[496]; + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; +uint32_t PeripheralClock = PERIPHERAL_CLOCK; + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; + PeripheralClock = PERIPHERAL_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t)(&__VECTOR_TABLE[0]); +#endif + +#if (defined (__FPU_USED) && (__FPU_USED == 1U)) || \ + (defined (__ARM_FEATURE_MVE) && (__ARM_FEATURE_MVE > 0U)) + SCB->CPACR |= ((3U << 10U*2U) | /* enable CP10 Full Access */ + (3U << 11U*2U) ); /* enable CP11 Full Access */ + + /* Set low-power state for PDEPU */ + /* 0b00 | ON, PDEPU is not in low-power state */ + /* 0b01 | ON, but the clock is off */ + /* 0b10 | RET(ention) */ + /* 0b11 | OFF */ + + /* Clear ELPSTATE, value is 0b11 on Cold reset */ + PWRMODCTL->CPDLPSTATE &= ~(PWRMODCTL_CPDLPSTATE_ELPSTATE_Msk); + + /* Favor best FP/MVE performance by default, avoid EPU switch-ON delays */ + /* PDEPU ON, Clock OFF */ + PWRMODCTL->CPDLPSTATE |= 0x1 << PWRMODCTL_CPDLPSTATE_ELPSTATE_Pos; +#endif + +#ifdef UNALIGNED_SUPPORT_DISABLE + SCB->CCR |= SCB_CCR_UNALIGN_TRP_Msk; +#endif + + /* Enable Loop and branch info cache */ + SCB->CCR |= SCB_CCR_LOB_Msk; + __DSB(); + __ISB(); + + + SystemCoreClock = SYSTEM_CLOCK; + PeripheralClock = PERIPHERAL_CLOCK; +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c.base@1.1.1 b/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c.base@1.1.1 new file mode 100644 index 000000000..4e67d536d --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c.base@1.1.1 @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2009-2022 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file is derivative of CMSIS V5.9.0 system_ARMCM55.c + * Git SHA: 2b7495b8535bdcb306dac29b9ded4cfb679d7e5c + */ + +#include "SSE300MPS3.h" + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ + #define XTAL (32000000UL) + #define SYSTEM_CLOCK (XTAL) + #define PERIPHERAL_CLOCK (25000000UL) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[496]; + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; +uint32_t PeripheralClock = PERIPHERAL_CLOCK; + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; + PeripheralClock = PERIPHERAL_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t)(&__VECTOR_TABLE[0]); +#endif + +#if (defined (__FPU_USED) && (__FPU_USED == 1U)) || \ + (defined (__ARM_FEATURE_MVE) && (__ARM_FEATURE_MVE > 0U)) + SCB->CPACR |= ((3U << 10U*2U) | /* enable CP10 Full Access */ + (3U << 11U*2U) ); /* enable CP11 Full Access */ + + /* Set low-power state for PDEPU */ + /* 0b00 | ON, PDEPU is not in low-power state */ + /* 0b01 | ON, but the clock is off */ + /* 0b10 | RET(ention) */ + /* 0b11 | OFF */ + + /* Clear ELPSTATE, value is 0b11 on Cold reset */ + PWRMODCTL->CPDLPSTATE &= ~(PWRMODCTL_CPDLPSTATE_ELPSTATE_Msk); + + /* Favor best FP/MVE performance by default, avoid EPU switch-ON delays */ + /* PDEPU ON, Clock OFF */ + PWRMODCTL->CPDLPSTATE |= 0x1 << PWRMODCTL_CPDLPSTATE_ELPSTATE_Pos; +#endif + +#ifdef UNALIGNED_SUPPORT_DISABLE + SCB->CCR |= SCB_CCR_UNALIGN_TRP_Msk; +#endif + + /* Enable Loop and branch info cache */ + SCB->CCR |= SCB_CCR_LOB_Msk; + __DSB(); + __ISB(); + + + SystemCoreClock = SYSTEM_CLOCK; + PeripheralClock = PERIPHERAL_CLOCK; +} diff --git a/dsppp/RTE/Device/SSE_300_MPS3/ac6_linker_script.sct b/dsppp/RTE/Device/SSE_300_MPS3/ac6_linker_script.sct new file mode 100644 index 000000000..4d6e579d0 --- /dev/null +++ b/dsppp/RTE/Device/SSE_300_MPS3/ac6_linker_script.sct @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ + +LR_ROM0 __ROM0_BASE __ROM0_SIZE { + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + ER_CMSE_VENEER __ROM0_BASE+__ROM0_SIZE -__ROM0_SIZE { + *(Veneer$$CMSE) + } + #define ER_CMSE_VENEER_SIZE AlignExpr(ImageLength(ER_CMSE_VENEER), 8) +#else + #define ER_CMSE_VENEER_SIZE 0 +#endif + + ER_ROM0 __ROM0_BASE (__ROM0_SIZE - ER_CMSE_VENEER_SIZE) { + *.o (RESET, +First) + *(InRoot$$Sections) + *(+RO +XO) + } + + RW_NOINIT __RAM0_BASE UNINIT (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE) { + *(.bss.noinit) + } + + RW_RAM0 AlignExpr(+0, 8) (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP (AlignExpr(+0, 8)) EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK (__RAM0_BASE + __RAM0_SIZE - __STACKSEAL_SIZE) EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } + +#if __STACKSEAL_SIZE > 0 + STACKSEAL +0 EMPTY 8 { ; Reserve empty region for stack seal immediately after stack + } +#endif + +#if __RAM1_SIZE > 0 + RW_RAM1 __RAM1_BASE __RAM1_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM2_SIZE > 0 + RW_RAM2 __RAM2_BASE __RAM2_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM3_SIZE > 0 + RW_RAM3 __RAM3_BASE __RAM3_SIZE { + .ANY (+RW +ZI) + } +#endif +} + +#if __ROM1_SIZE > 0 +LR_ROM1 __ROM1_BASE __ROM1_SIZE { + ER_ROM1 +0 __ROM1_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM2_SIZE > 0 +LR_ROM2 __ROM2_BASE __ROM2_SIZE { + ER_ROM2 +0 __ROM2_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM3_SIZE > 0 +LR_ROM3 __ROM3_BASE __ROM3_SIZE { + ER_ROM3 +0 __ROM3_SIZE { + .ANY (+RO +XO) + } +} +#endif diff --git a/dsppp/RTE/Device/SSE_300_MPS3/clang_linker_script.ld b/dsppp/RTE/Device/SSE_300_MPS3/clang_linker_script.ld new file mode 100644 index 000000000..40f955c16 --- /dev/null +++ b/dsppp/RTE/Device/SSE_300_MPS3/clang_linker_script.ld @@ -0,0 +1,353 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + *(.gnu.linkonce.r.*) + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef __HEAP_SIZE + PROVIDE (__heap_end = __heap_start + __HEAP_SIZE); + PROVIDE (__heap_size = __HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - __STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - __STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += __STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack) + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/RTE/Device/SSE_300_MPS3/gcc_linker_script.ld b/dsppp/RTE/Device/SSE_300_MPS3/gcc_linker_script.ld new file mode 100644 index 000000000..a018e5d4e --- /dev/null +++ b/dsppp/RTE/Device/SSE_300_MPS3/gcc_linker_script.ld @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rwx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rwx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rwx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rwx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - __STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/SSE_300_MPS3/regions_V2M_MPS3_SSE_300_FVP.h b/dsppp/RTE/Device/SSE_300_MPS3/regions_V2M_MPS3_SSE_300_FVP.h new file mode 100644 index 000000000..cf6b28cc9 --- /dev/null +++ b/dsppp/RTE/Device/SSE_300_MPS3/regions_V2M_MPS3_SSE_300_FVP.h @@ -0,0 +1,400 @@ +#ifndef REGIONS_V2M_MPS3_SSE_300_FVP_H +#define REGIONS_V2M_MPS3_SSE_300_FVP_H + + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// Device pack: ARM::V2M_MPS3_SSE_300_BSP@1.4.0 +// Device pack used to generate this file + +// ROM Configuration +// ======================= +// + +// RAM Configuration +// ======================= +// IROM1=<__RAM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x10000000 +#define __RAM0_BASE 0x10000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00200000 +#define __RAM0_SIZE 0x00200000 +// Default region +// Enables memory region globally for the application. +#define __RAM0_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM0_NOINIT 0 +// + +// IROM2=<__RAM1> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __RAM1_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00200000 +#define __RAM1_SIZE 0x00200000 +// Default region +// Enables memory region globally for the application. +#define __RAM1_DEFAULT 0 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM1_NOINIT 0 +// + +// IRAM1=<__RAM2> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x30000000 +#define __RAM2_BASE 0x30000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM2_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM2_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM2_NOINIT 0 +// + +// IRAM2=<__RAM3> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM3_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM3_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM3_DEFAULT 0 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM3_NOINIT 0 +// + +// ITCM_NS=<__RAM4> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __RAM4_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00080000 +#define __RAM4_SIZE 0x00080000 +// Default region +// Enables memory region globally for the application. +#define __RAM4_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM4_NOINIT 0 +// + +// SRAM_NS=<__RAM5> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x01000000 +#define __RAM5_BASE 0x01000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM5_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM5_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM5_NOINIT 0 +// + +// DTCM0_NS=<__RAM6> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM6_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM6_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM6_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM6_NOINIT 0 +// + +// DTCM1_NS=<__RAM7> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20020000 +#define __RAM7_BASE 0x20020000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM7_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM7_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM7_NOINIT 0 +// + +// DTCM2_NS=<__RAM8> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20040000 +#define __RAM8_BASE 0x20040000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM8_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM8_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM8_NOINIT 0 +// + +// DTCM3_NS=<__RAM9> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20060000 +#define __RAM9_BASE 0x20060000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM9_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM9_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM9_NOINIT 0 +// + +// ISRAM0_NS=<__RAM10> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x21000000 +#define __RAM10_BASE 0x21000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM10_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM10_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM10_NOINIT 0 +// + +// ISRAM1_NS=<__RAM11> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x21100000 +#define __RAM11_BASE 0x21100000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM11_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM11_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM11_NOINIT 0 +// + +// QSPI_SRAM_NS=<__RAM12> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x28000000 +#define __RAM12_BASE 0x28000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00800000 +#define __RAM12_SIZE 0x00800000 +// Default region +// Enables memory region globally for the application. +#define __RAM12_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM12_NOINIT 0 +// + +// ITCM_S=<__RAM13> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x10000000 +#define __RAM13_BASE 0x10000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00080000 +#define __RAM13_SIZE 0x00080000 +// Default region +// Enables memory region globally for the application. +#define __RAM13_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM13_NOINIT 0 +// + +// SRAM_S=<__RAM14> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x11000000 +#define __RAM14_BASE 0x11000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM14_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM14_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM14_NOINIT 0 +// + +// DTCM0_S=<__RAM15> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x30000000 +#define __RAM15_BASE 0x30000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM15_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM15_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM15_NOINIT 0 +// + +// DTCM1_S=<__RAM16> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x30020000 +#define __RAM16_BASE 0x30020000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM16_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM16_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM16_NOINIT 0 +// + +// DTCM2_S=<__RAM17> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x30040000 +#define __RAM17_BASE 0x30040000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM17_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM17_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM17_NOINIT 0 +// + +// DTCM3_S=<__RAM18> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x30060000 +#define __RAM18_BASE 0x30060000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM18_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM18_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM18_NOINIT 0 +// + +// ISRAM0_S=<__RAM19> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x31000000 +#define __RAM19_BASE 0x31000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM19_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM19_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM19_NOINIT 0 +// + +// ISRAM1_S=<__RAM20> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x31100000 +#define __RAM20_BASE 0x31100000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM20_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM20_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM20_NOINIT 0 +// + +// QSPI_SRAM_S=<__RAM21> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x38000000 +#define __RAM21_BASE 0x38000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00800000 +#define __RAM21_SIZE 0x00800000 +// Default region +// Enables memory region globally for the application. +#define __RAM21_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM21_NOINIT 0 +// + +// + +// Stack / Heap Configuration +// Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +// Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00038000 +// + + +#endif /* REGIONS_V2M_MPS3_SSE_300_FVP_H */ diff --git a/dsppp/RTE/_Release_IPSS_M0P/RTE_Components.h b/dsppp/RTE/_Release_IPSS_M0P/RTE_Components.h new file mode 100644 index 000000000..332d6e45f --- /dev/null +++ b/dsppp/RTE/_Release_IPSS_M0P/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+IPSS_M0P' + * Target: 'Release+IPSS_M0P' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM0plus.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_IPSS_M4/RTE_Components.h b/dsppp/RTE/_Release_IPSS_M4/RTE_Components.h new file mode 100644 index 000000000..747232d25 --- /dev/null +++ b/dsppp/RTE/_Release_IPSS_M4/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+IPSS_M4' + * Target: 'Release+IPSS_M4' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM4.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_LLVM-Corstone-300/RTE_Components.h b/dsppp/RTE/_Release_LLVM-Corstone-300/RTE_Components.h new file mode 100644 index 000000000..cd99d204e --- /dev/null +++ b/dsppp/RTE/_Release_LLVM-Corstone-300/RTE_Components.h @@ -0,0 +1,23 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+LLVM-Corstone-300' + * Target: 'Release+LLVM-Corstone-300' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "SSE300MPS3.h" + +/* ARM::CMSIS-Compiler:STDOUT:Custom@1.0.0 */ +#define RTE_CMSIS_Compiler_STDOUT /* CMSIS-Compiler STDOUT */ + #define RTE_CMSIS_Compiler_STDOUT_Custom /* CMSIS-Compiler STDOUT: Custom */ + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_MPS3-Corstone-300/RTE_Components.h b/dsppp/RTE/_Release_MPS3-Corstone-300/RTE_Components.h new file mode 100644 index 000000000..d7da60e1c --- /dev/null +++ b/dsppp/RTE/_Release_MPS3-Corstone-300/RTE_Components.h @@ -0,0 +1,25 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+MPS3-Corstone-300' + * Target: 'Release+MPS3-Corstone-300' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "SSE300MPS3.h" + +/* ARM::CMSIS Driver:USART@1.0.0 */ +#define RTE_Drivers_USART +/* ARM::CMSIS-Compiler:STDOUT:Custom@1.0.0 */ +#define RTE_CMSIS_Compiler_STDOUT /* CMSIS-Compiler STDOUT */ + #define RTE_CMSIS_Compiler_STDOUT_Custom /* CMSIS-Compiler STDOUT: Custom */ + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_VHT-Corstone-300/RTE_Components.h b/dsppp/RTE/_Release_VHT-Corstone-300/RTE_Components.h new file mode 100644 index 000000000..c326941f9 --- /dev/null +++ b/dsppp/RTE/_Release_VHT-Corstone-300/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+VHT-Corstone-300' + * Target: 'Release+VHT-Corstone-300' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "SSE300MPS3.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_VHT-M0P/RTE_Components.h b/dsppp/RTE/_Release_VHT-M0P/RTE_Components.h new file mode 100644 index 000000000..8a0db96bc --- /dev/null +++ b/dsppp/RTE/_Release_VHT-M0P/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+VHT-M0P' + * Target: 'Release+VHT-M0P' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM0plus.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_VHT-M4/RTE_Components.h b/dsppp/RTE/_Release_VHT-M4/RTE_Components.h new file mode 100644 index 000000000..4c34863c1 --- /dev/null +++ b/dsppp/RTE/_Release_VHT-M4/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+VHT-M4' + * Target: 'Release+VHT-M4' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM4.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_VHT_M0P/RTE_Components.h b/dsppp/RTE/_Release_VHT_M0P/RTE_Components.h new file mode 100644 index 000000000..768bae446 --- /dev/null +++ b/dsppp/RTE/_Release_VHT_M0P/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+VHT_M0P' + * Target: 'Release+VHT_M0P' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM0plus.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_VHT_M4/RTE_Components.h b/dsppp/RTE/_Release_VHT_M4/RTE_Components.h new file mode 100644 index 000000000..44e1e938d --- /dev/null +++ b/dsppp/RTE/_Release_VHT_M4/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+VHT_M4' + * Target: 'Release+VHT_M4' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM4.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/allocator.cpp b/dsppp/allocator.cpp new file mode 100644 index 000000000..aaf61cc50 --- /dev/null +++ b/dsppp/allocator.cpp @@ -0,0 +1,98 @@ +#include "allocator.h" + +#define ALLOC_POOL(BYTES,NB) \ +MemoryPool vecPool_##BYTES(NB); + +#if defined(POOL_ALLOCATOR) +#include "allocation/all.cpp" +#endif + +std::map current_stats; +std::map max_stats; +std::map current_dyn_stats; + +void print_map(std::string comment) +{ + + std::cout << comment << "\r\n"; +#if !defined(POOL_ALLOCATOR) + std::size_t total_static=0; + std::size_t total_dynamic=0; + + for (const auto v : max_stats) + { + // Only count allocations with size known at build time + if (v.first > 0) + { + std::cout << "ALLOC_POOL(" << v.first << "," << v.second << "); \r\n"; + total_static += v.first * v.second; + } + } + + for (const auto v : max_stats) + { + // Only count allocations with size known at build time + if (v.first > 0) + { + std::cout << "POOL(" << v.first << "); \r\n"; + } + } + + std::cout << "\r\n"; + + std::cout << "Total static bytes: " << total_static << std::hex << " (0x" << total_static << ")\r\n"; + + total_dynamic = 0; + std::cout << "\r\nDynamic allocations\r\n"; + for (const auto v : max_stats) + { + // Only count allocations with size known at build time + if (v.first < 0) + { + // Count is meaningless for dynamic allocation + // since we can track the destroy (destroy has no length + // argument contrary to allocate and so can only get + // the length from the static value). + std::cout << std::dec << -v.first << " : " << v.second << "\r\n"; + total_dynamic += (-v.first) * v.second; + } + } + std::cout << "Total dynamic bytes: " << total_dynamic << std::hex << " (0x" << total_dynamic << ")\r\n"; + std::cout << "Total bytes: " << (total_static+total_dynamic) << std::hex << " (0x" << (total_static+total_dynamic) << ")\r\n"; + + +#endif +} + +void reset_current_stats() +{ +#if !defined(POOL_ALLOCATOR) + for (auto v : current_stats) + { + v.second = 0; + } +#endif +} + +void check_current_stats() +{ +#if !defined(POOL_ALLOCATOR) + for (const auto v : current_stats) + { + if (v.second > 0) + { + if (v.first>0) + { + std::cout << "Error memory pool " << v.first << " not empty = " << v.second << "\r\n"; + } + else + { + std::cout << "Error dynamic alloc " << -v.first << " not empty = " << v.second << "\r\n"; + } + } + } + + reset_current_stats(); +#endif +} + diff --git a/dsppp/allocator.h b/dsppp/allocator.h new file mode 100644 index 000000000..61e95006e --- /dev/null +++ b/dsppp/allocator.h @@ -0,0 +1,124 @@ +#pragma once + +#include +#include +#include +#include +#include "test_config.h" + + +// Allocator for temporaries +#if defined(POOL_ALLOCATOR) +#define TMP_ALLOC pool_allocator +#else +#define TMP_ALLOC stat_allocator +#endif + +#include + + +using namespace arm_cmsis_dsp; + + +constexpr int NBVEC_2 = 2; +constexpr int NBVEC_3 = 3; +constexpr int NBVEC_4 = 4; +constexpr int NBVEC_8 = 8; +constexpr int NBVEC_9 = 9; +constexpr int NBVEC_16 = 16; +constexpr int NBVEC_32 = 32; +constexpr int NBVEC_44 = 44; +constexpr int NBVEC_47 = 47; +constexpr int NBVEC_64 = 64; +constexpr int NBVEC_128 = 128; +constexpr int NBVEC_256 = 256; +constexpr int NBVEC_258 = 258; +constexpr int NBVEC_512 = 512; +constexpr int NBVEC_1024 = 1024; +constexpr int NBVEC_2048 = 2048; + + +template +struct pool_allocator; + +#define POOL(BYTES) \ +constexpr int POOL_BLOCK_##BYTES = BYTES; \ +extern MemoryPool vecPool_##BYTES;\ +template<> \ +struct pool_allocator { \ + static char* allocate () noexcept{ \ + return(vecPool_##BYTES.get_new_buffer()); \ + } \ + \ + static void destroy ( char* ptr ) noexcept { \ + vecPool_##BYTES.recycle_buffer(ptr); \ + } \ + \ +}; + + +#if defined(POOL_ALLOCATOR) +#include "allocation/all.h" +#endif + +template<> +struct pool_allocator { + /* Dynamic size allocations */ + static char* allocate ( std::size_t sz) noexcept{ + return(reinterpret_cast(std::malloc(sz))); + } + + static void destroy ( char* ptr ) noexcept { + std::free(ptr); + } + +}; + +extern std::map current_stats; +extern std::map max_stats; +extern std::map current_dyn_stats; + + +template +struct stat_allocator { + + /* Dynamic allocations */ + static char* allocate ( std::size_t sz) noexcept{ + current_stats[-sz]++; + if (current_stats[-sz]>max_stats[-sz]) + { + max_stats[-sz] = current_stats[-sz]; + } + void *ptr = std::malloc(sz); + current_dyn_stats[ptr]=sz; + return(reinterpret_cast(ptr)); + } + + /* Size known at build time */ + static char* allocate () noexcept{ + current_stats[L]++; + if (current_stats[L]>max_stats[L]) + { + max_stats[L] = current_stats[L]; + } + return(reinterpret_cast(std::malloc(L))); + } + + static void destroy ( char* ptr ) noexcept { + if (L<0) + { + std::size_t sz = current_dyn_stats[ptr]; + current_stats[-sz]--; + } + else + { + current_stats[L]--; + } + std::free(ptr); + } + +}; + +extern void print_map(std::string comment); +extern void check_current_stats(); +extern void reset_current_stats(); diff --git a/dsppp/cdefault.yml b/dsppp/cdefault.yml new file mode 100644 index 000000000..0ede69afd --- /dev/null +++ b/dsppp/cdefault.yml @@ -0,0 +1,142 @@ +default: + + compiler: AC6 + + misc: + - for-compiler: AC6 + C: + - -Wsign-compare + - -Wdouble-promotion + - -DNDEBUG + - -Wall + - -Wextra + - -Werror + - -std=c11 + - -Ofast + - -ffast-math + - -Wno-packed + - -Wno-missing-variable-declarations + - -Wno-missing-prototypes + - -Wno-missing-noreturn + - -Wno-sign-conversion + - -Wno-nonportable-include-path + - -Wno-reserved-id-macro + - -Wno-unused-macros + - -Wno-documentation-unknown-command + - -Wno-documentation + - -Wno-license-management + - -Wno-parentheses-equality + - -Wno-reserved-identifier + - -ffunction-sections + - -Wno-nan-infinity-disabled + - -DARM_MATH_LOOPUNROLL + CPP: + - -fno-rtti + - -fno-exceptions + - -DNDEBUG + - -Wall + - -Wextra + - -std=c++17 + - -Ofast + - -ffast-math + - -Wno-unused-function + - -ffunction-sections + - -mllvm -disable-vector-combine + ASM: + - -masm=auto + Link: + - --entry=Reset_Handler + - --info=summarysizes + - --info=sizes + - --info=totals + - --info=unused + - --info=veneers + + - for-compiler: GCC + C: + - -Wsign-compare + - -Wdouble-promotion + - -DNDEBUG + - -Wall + - -Wextra + - -Werror + - -std=c11 + - -Ofast + - -ffast-math + - -Wno-packed + - -Wno-missing-prototypes + - -Wno-missing-noreturn + - -Wno-sign-conversion + - -Wno-unused-macros + - -ffunction-sections + - -DARM_MATH_LOOPUNROLL + - -flax-vector-conversions + - -Wno-maybe-uninitialized + - -fdata-sections + - -fno-unroll-loops + CPP: + - -fno-rtti + - -fno-exceptions + - -DNDEBUG + - -Wall + - -Wextra + - -std=c++17 + - -Ofast + - -ffast-math + - -Wno-unused-function + - -ffunction-sections + - -fdata-sections + - -Wno-psabi + - -fno-unroll-loops + ASM: + - -masm=auto + Link: + - --specs=nano.specs + - -Wl,-Map=$elf()$.map + - -lm + - -Wl,--wrap=SysTick_Handler + - -Wl,--gc-sections + Library: + - -lm + + - for-compiler: CLANG + C: + - -Wsign-compare + - -Wdouble-promotion + - -DNDEBUG + - -Wall + - -Wextra + - -Werror + - -std=c11 + - -Ofast + - -ffast-math + - -Wno-packed + - -Wno-missing-variable-declarations + - -Wno-missing-prototypes + - -Wno-missing-noreturn + - -Wno-sign-conversion + - -Wno-nonportable-include-path + - -Wno-reserved-id-macro + - -Wno-unused-macros + - -Wno-documentation-unknown-command + - -Wno-documentation + - -Wno-parentheses-equality + - -Wno-reserved-identifier + - -ffunction-sections + - -DARM_MATH_LOOPUNROLL + CPP: + - -fno-rtti + - -fno-exceptions + - -DNDEBUG + - -Wall + - -Wextra + - -std=c++17 + - -Ofast + - -ffast-math + - -Wno-unused-function + - -ffunction-sections + ASM: + - -masm=auto + Link: + - -Wl,-Map=$elf()$.map + - -Wl,--gc-sections diff --git a/dsppp/clang_sse300.c b/dsppp/clang_sse300.c new file mode 100644 index 000000000..c6470905d --- /dev/null +++ b/dsppp/clang_sse300.c @@ -0,0 +1,65 @@ +#include "RTE_Components.h" +#include + +#include "Driver_USART.h" +#include "stdout_USART.h" + + + + +static int stdin_getc(FILE *file) { + (void)file; + return(0); +} + + +// iostream has references to stdin and stderr and there is a link +// error if not defined. +static FILE __stdin = FDEV_SETUP_STREAM(NULL, + stdin_getc, + NULL, + _FDEV_SETUP_READ); +FILE *const stdin = &__stdin; + +static int stderr_putc(char c, FILE *file) { + (void)file; + return(0); +} + +static FILE __stderr = FDEV_SETUP_STREAM(stderr_putc, + NULL, + NULL, + _FDEV_SETUP_WRITE); +FILE *const stderr = &__stderr; + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// STDOUT USART Interface + +// Connect to hardware via Driver_USART# <0-255> +// Select driver control block for USART interface +#define USART_DRV_NUM 0 + +// Baudrate +#define USART_BAUDRATE 115200 + +// + + +#define _USART_Driver_(n) Driver_USART##n +#define USART_Driver_(n) _USART_Driver_(n) + +extern ARM_DRIVER_USART USART_Driver_(USART_DRV_NUM); +#define ptrUSART (&USART_Driver_(USART_DRV_NUM)) + +int stdout_putchar(const unsigned char ch) { + uint8_t buf[1]; + + buf[0] = ch; + if (ptrUSART->Send(buf, 1) != ARM_DRIVER_OK) { + return (-1); + } + while (ptrUSART->GetTxCount() != 1); + return (ch); +} + diff --git a/dsppp/example.cproject.yml b/dsppp/example.cproject.yml new file mode 100644 index 000000000..0e41fef75 --- /dev/null +++ b/dsppp/example.cproject.yml @@ -0,0 +1,120 @@ +project: + groups: + - group: Examples + files: + #- file: Examples/dot_product.cpp + #- file: Examples/vector_op.cpp + - file: Examples/matrix_op.cpp + - file: clang_sse300.c + for-context: + - +MPS3-Corstone-300 + for-compiler: + - CLANG + add-path: + - Include + - Examples + + components: + - component: ARM::CMSIS:CORE + - component: ARM::CMSIS:DSP@1.15.0 + - component: ARM::Device:Startup&C Startup + for-context: + - +VHT-Corstone-300 + - +VHT-M0P + - +VHT-M4 + - +MPS3-Corstone-300 + - component: ARM::Device:Definition + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: CMSIS-Compiler:CORE + for-context: + - +MPS3-Corstone-300 + - component: CMSIS-Compiler:STDOUT:Custom@1.0.0 + for-context: + - +MPS3-Corstone-300 + - component: ARM::Device:USART STDOUT + for-context: + - +MPS3-Corstone-300 + - component: ARM::CMSIS Driver:USART + for-context: + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:SysCounter + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:SysTimer + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:Timeout + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:UART + for-context: + - +MPS3-Corstone-300 + + linker: + - script: linker_scripts/gcc_sse300_mps3.ld + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: GCC + + - script: linker_scripts/clang_sse300_mps3.sct + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: CLANG + + - script: linker_scripts/ac6_sse300_mps3_s.sct + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: AC6 + + - regions: linker_scripts/SSE-300-MPS3/region_defs.h + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + + - script: linker_scripts/gcc_m0p_mps3.ld + for-context: + - +VHT-M0P + for-compiler: GCC + + - script: linker_scripts/clang_m0p_mps3.ld + for-context: + - +VHT-M0P + for-compiler: CLANG + + - script: linker_scripts/ac6_m0p_mps3_s.sct + for-context: + - +VHT-M0P + for-compiler: AC6 + + - regions: linker_scripts/ARMCM0P/region_defs.h + for-context: + - +VHT-M0P + + - script: linker_scripts/gcc_m4_mps3.ld + for-context: + - +VHT-M4 + for-compiler: GCC + + - script: linker_scripts/clang_m4_mps3.ld + for-context: + - +VHT-M4 + for-compiler: CLANG + + - script: linker_scripts/ac6_m4_mps3_s.sct + for-context: + - +VHT-M4 + for-compiler: AC6 + + - regions: linker_scripts/ARMCM4/region_defs.h + for-context: + - +VHT-M4 + + diff --git a/dsppp/fvp_configs/VHT-Corstone-300.txt b/dsppp/fvp_configs/VHT-Corstone-300.txt new file mode 100644 index 000000000..e352bec1e --- /dev/null +++ b/dsppp/fvp_configs/VHT-Corstone-300.txt @@ -0,0 +1,9 @@ +core_clk.mul=100000000 +cpu0.semihosting-enable=1 +cpu0.semihosting-heap_base=0x0 +cpu0.semihosting-heap_limit=0x0 +cpu0.semihosting-stack_base=0x0 +cpu0.semihosting-stack_limit=0x0 +cpu0.FPU=1 +cpu0.MVE=2 +mps3_board.visualisation.disable-visualisation=1 diff --git a/dsppp/fvp_configs/VHT-M0P.txt b/dsppp/fvp_configs/VHT-M0P.txt new file mode 100644 index 000000000..4892c1e4e --- /dev/null +++ b/dsppp/fvp_configs/VHT-M0P.txt @@ -0,0 +1,3 @@ +fvp_mps2.mps2_visualisation.disable-visualisation=1 +armcortexm0plusct.semihosting-enable=1 +armcortexm0plusct.NUM_MPU_REGION=0x8 diff --git a/dsppp/fvp_configs/VHT-M4.txt b/dsppp/fvp_configs/VHT-M4.txt new file mode 100644 index 000000000..fda8c0249 --- /dev/null +++ b/dsppp/fvp_configs/VHT-M4.txt @@ -0,0 +1,3 @@ +fvp_mps2.mps2_visualisation.disable-visualisation=1 +armcortexm4ct.semihosting-enable=1 +armcortexm4ct.vfp-present=1 diff --git a/dsppp/getserial.py b/dsppp/getserial.py new file mode 100644 index 000000000..d3f7e6781 --- /dev/null +++ b/dsppp/getserial.py @@ -0,0 +1,28 @@ +import serial +import re +import io +from pyocd.core.target import Target + +lines = [] + +def read_stdout(target): + print("Waiting for serial") + lines = [] + + with serial.Serial('COM6', 115200, timeout=1,parity=serial.PARITY_NONE) as ser: + sio = io.TextIOWrapper(ser) + DONE = False + target.reset() + while not DONE: + line = sio.readline() + if len(line)==0: + raise Exception('Timeout error') + if re.match(r'Stats',line): + DONE=True + else: + #print(line) + lines.append(line) + + + return(lines) + \ No newline at end of file diff --git a/dsppp/linker_scripts/ARMCM0P/region_defs.h b/dsppp/linker_scripts/ARMCM0P/region_defs.h new file mode 100644 index 000000000..b66150bbb --- /dev/null +++ b/dsppp/linker_scripts/ARMCM0P/region_defs.h @@ -0,0 +1,60 @@ +#ifndef REGIONS_ARMCM0P_H +#define REGIONS_ARMCM0P_H + + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// Device pack: ARM::Cortex_DFP@1.0.0 +// Device pack used to generate this file + +// ROM Configuration +// ======================= +// ROM=<__ROM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __ROM0_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00040000 +#define __ROM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __ROM0_DEFAULT 1 +// Startup +// Selects region to be used for startup code. +#define __ROM0_STARTUP 1 +// + +// + +// RAM Configuration +// ======================= +// RAM=<__RAM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM0_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __RAM0_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM0_NOINIT 0 +// + +// + +// Stack / Heap Configuration +// Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +// Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +#define __STACK_SIZE 0x00002000 +#define __HEAP_SIZE 0x00038000 +// + + +#endif /* REGIONS_ARMCM0P_H */ diff --git a/dsppp/linker_scripts/ARMCM4/region_defs.h b/dsppp/linker_scripts/ARMCM4/region_defs.h new file mode 100644 index 000000000..dc63f5bb6 --- /dev/null +++ b/dsppp/linker_scripts/ARMCM4/region_defs.h @@ -0,0 +1,60 @@ +#ifndef REGIONS_ARMCM4_H +#define REGIONS_ARMCM4_H + + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// Device pack: ARM::Cortex_DFP@1.0.0 +// Device pack used to generate this file + +// ROM Configuration +// ======================= +// ROM=<__ROM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __ROM0_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00040000 +#define __ROM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __ROM0_DEFAULT 1 +// Startup +// Selects region to be used for startup code. +#define __ROM0_STARTUP 1 +// + +// + +// RAM Configuration +// ======================= +// RAM=<__RAM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM0_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __RAM0_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM0_NOINIT 0 +// + +// + +// Stack / Heap Configuration +// Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +// Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +#define __STACK_SIZE 0x00002000 +#define __HEAP_SIZE 0x00038000 +// + + +#endif /* REGIONS_ARMCM4_H */ diff --git a/dsppp/linker_scripts/SSE-300-MPS3/region_defs.h b/dsppp/linker_scripts/SSE-300-MPS3/region_defs.h new file mode 100644 index 000000000..32ac16b37 --- /dev/null +++ b/dsppp/linker_scripts/SSE-300-MPS3/region_defs.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_DEFS_H__ +#define __REGION_DEFS_H__ + +#include "region_limits.h" + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure regions */ +#define S_CODE_START ( S_ROM_ALIAS ) +#define S_CODE_SIZE ( TOTAL_S_ROM_SIZE ) +#define S_CODE_LIMIT ( S_CODE_START + S_CODE_SIZE ) + +#define S_DATA_START ( S_RAM_ALIAS ) +#define S_DATA_SIZE ( TOTAL_S_RAM_SIZE ) +#define S_DATA_LIMIT ( S_DATA_START + S_DATA_SIZE ) + +#define S_DDR4_START ( S_DDR4_ALIAS ) +#define S_DDR4_SIZE ( TOTAL_S_DDR4_SIZE ) +#define S_DDR4_LIMIT ( S_DDR4_START + S_DDR4_SIZE ) + +#endif /* __REGION_DEFS_H__ */ diff --git a/dsppp/linker_scripts/SSE-300-MPS3/region_limits.h b/dsppp/linker_scripts/SSE-300-MPS3/region_limits.h new file mode 100644 index 000000000..0d600a363 --- /dev/null +++ b/dsppp/linker_scripts/SSE-300-MPS3/region_limits.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_LIMITS_H__ +#define __REGION_LIMITS_H__ + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure Code */ +#define S_ROM_ALIAS (0x10000000) /* ITCM_BASE_S */ +#define TOTAL_S_ROM_SIZE (0x00080000) /* 512 kB */ + +/* Secure Data */ +#define S_RAM_ALIAS (0x30000000) /* DTCM_BASE_S */ +#define TOTAL_S_RAM_SIZE (0x00080000) /* 512 kB */ + +/* Secure DDR4 */ +#define S_DDR4_ALIAS (0x70000000) /* DDR4_BLK1_BASE_S */ +#define TOTAL_S_DDR4_SIZE (0x10000000) /* 256 MB */ + +/* Heap and Stack sizes for secure and nonsecure applications */ +#define HEAP_SIZE (0x00038000) /* 1 KiB */ +#define STACK_SIZE (0x00002000) /* 1 KiB */ + +#endif /* __REGION_LIMITS_H__ */ diff --git a/dsppp/linker_scripts/ac6_m0p_mps3_s.sct b/dsppp/linker_scripts/ac6_m0p_mps3_s.sct new file mode 100644 index 000000000..4d6e579d0 --- /dev/null +++ b/dsppp/linker_scripts/ac6_m0p_mps3_s.sct @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ + +LR_ROM0 __ROM0_BASE __ROM0_SIZE { + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + ER_CMSE_VENEER __ROM0_BASE+__ROM0_SIZE -__ROM0_SIZE { + *(Veneer$$CMSE) + } + #define ER_CMSE_VENEER_SIZE AlignExpr(ImageLength(ER_CMSE_VENEER), 8) +#else + #define ER_CMSE_VENEER_SIZE 0 +#endif + + ER_ROM0 __ROM0_BASE (__ROM0_SIZE - ER_CMSE_VENEER_SIZE) { + *.o (RESET, +First) + *(InRoot$$Sections) + *(+RO +XO) + } + + RW_NOINIT __RAM0_BASE UNINIT (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE) { + *(.bss.noinit) + } + + RW_RAM0 AlignExpr(+0, 8) (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP (AlignExpr(+0, 8)) EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK (__RAM0_BASE + __RAM0_SIZE - __STACKSEAL_SIZE) EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } + +#if __STACKSEAL_SIZE > 0 + STACKSEAL +0 EMPTY 8 { ; Reserve empty region for stack seal immediately after stack + } +#endif + +#if __RAM1_SIZE > 0 + RW_RAM1 __RAM1_BASE __RAM1_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM2_SIZE > 0 + RW_RAM2 __RAM2_BASE __RAM2_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM3_SIZE > 0 + RW_RAM3 __RAM3_BASE __RAM3_SIZE { + .ANY (+RW +ZI) + } +#endif +} + +#if __ROM1_SIZE > 0 +LR_ROM1 __ROM1_BASE __ROM1_SIZE { + ER_ROM1 +0 __ROM1_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM2_SIZE > 0 +LR_ROM2 __ROM2_BASE __ROM2_SIZE { + ER_ROM2 +0 __ROM2_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM3_SIZE > 0 +LR_ROM3 __ROM3_BASE __ROM3_SIZE { + ER_ROM3 +0 __ROM3_SIZE { + .ANY (+RO +XO) + } +} +#endif diff --git a/dsppp/linker_scripts/ac6_m4_mps3_s.sct b/dsppp/linker_scripts/ac6_m4_mps3_s.sct new file mode 100644 index 000000000..4d6e579d0 --- /dev/null +++ b/dsppp/linker_scripts/ac6_m4_mps3_s.sct @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ + +LR_ROM0 __ROM0_BASE __ROM0_SIZE { + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + ER_CMSE_VENEER __ROM0_BASE+__ROM0_SIZE -__ROM0_SIZE { + *(Veneer$$CMSE) + } + #define ER_CMSE_VENEER_SIZE AlignExpr(ImageLength(ER_CMSE_VENEER), 8) +#else + #define ER_CMSE_VENEER_SIZE 0 +#endif + + ER_ROM0 __ROM0_BASE (__ROM0_SIZE - ER_CMSE_VENEER_SIZE) { + *.o (RESET, +First) + *(InRoot$$Sections) + *(+RO +XO) + } + + RW_NOINIT __RAM0_BASE UNINIT (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE) { + *(.bss.noinit) + } + + RW_RAM0 AlignExpr(+0, 8) (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP (AlignExpr(+0, 8)) EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK (__RAM0_BASE + __RAM0_SIZE - __STACKSEAL_SIZE) EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } + +#if __STACKSEAL_SIZE > 0 + STACKSEAL +0 EMPTY 8 { ; Reserve empty region for stack seal immediately after stack + } +#endif + +#if __RAM1_SIZE > 0 + RW_RAM1 __RAM1_BASE __RAM1_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM2_SIZE > 0 + RW_RAM2 __RAM2_BASE __RAM2_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM3_SIZE > 0 + RW_RAM3 __RAM3_BASE __RAM3_SIZE { + .ANY (+RW +ZI) + } +#endif +} + +#if __ROM1_SIZE > 0 +LR_ROM1 __ROM1_BASE __ROM1_SIZE { + ER_ROM1 +0 __ROM1_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM2_SIZE > 0 +LR_ROM2 __ROM2_BASE __ROM2_SIZE { + ER_ROM2 +0 __ROM2_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM3_SIZE > 0 +LR_ROM3 __ROM3_BASE __ROM3_SIZE { + ER_ROM3 +0 __ROM3_SIZE { + .ANY (+RO +XO) + } +} +#endif diff --git a/dsppp/linker_scripts/ac6_sse300_mps3_s.sct b/dsppp/linker_scripts/ac6_sse300_mps3_s.sct new file mode 100644 index 000000000..6712e5cc0 --- /dev/null +++ b/dsppp/linker_scripts/ac6_sse300_mps3_s.sct @@ -0,0 +1,79 @@ + +;/* +; * Copyright (c) 2018-2021 Arm Limited. All rights reserved. +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; * +; */ + +;#include "region_defs.h" + +LR_CODE S_CODE_START { + ER_CODE S_CODE_START { + *.o (RESET +First) + .ANY (+RO) + /* different test vectors */ + * (InRoot$$Sections) + } + + /* + * Place the CMSE Veneers (containing the SG instruction) after the code, in + * a separate 32 bytes aligned region so that the SAU can programmed to just + * set this region as Non-Secure Callable. The maximum size of this + * executable region makes it only used the space left over by the ER_CODE + * region so that you can rely on code+veneer size combined will not exceed + * the S_CODE_SIZE value. We also substract from the available space the + * area used to align this section on 32 bytes boundary (for SAU conf). + */ + ER_CODE_CMSE_VENEER +0 ALIGN 32 { + *(Veneer$$CMSE) + } + /* + * This dummy region ensures that the next one will be aligned on a 32 bytes + * boundary, so that the following region will not be mistakenly configured + * as Non-Secure Callable by the SAU. + */ + ER_CODE_CMSE_VENEER_DUMMY +0 ALIGN 32 EMPTY 0 {} + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + CODE_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(CODE_WATERMARK) <= S_CODE_START + S_CODE_SIZE) + + ER_DATA S_DATA_START { + .ANY (+ZI +RW +RO-DATA) + } + + #if HEAP_SIZE > 0 + ARM_LIB_HEAP +0 ALIGN 8 EMPTY HEAP_SIZE { ; Reserve empty region for heap + } + #endif + + ARM_LIB_STACK +0 ALIGN 32 EMPTY STACK_SIZE { ; Reserve empty region for stack + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + SRAM_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(SRAM_WATERMARK) <= S_DATA_START + S_DATA_SIZE) +} diff --git a/dsppp/linker_scripts/ac6_sse310_mps3_s.sct b/dsppp/linker_scripts/ac6_sse310_mps3_s.sct new file mode 100644 index 000000000..0650639f8 --- /dev/null +++ b/dsppp/linker_scripts/ac6_sse310_mps3_s.sct @@ -0,0 +1,60 @@ + +;/* +; * Copyright (c) 2018-2021 Arm Limited +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; * +; */ + + + +LR_CODE S_CODE_START { + ER_CODE S_CODE_START { + *.o (RESET +First) + .ANY (+RO) + /* different test vectors */ + * (InRoot$$Sections) + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + CODE_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(CODE_WATERMARK) <= S_CODE_START + S_CODE_SIZE) + + ER_DATA S_DATA_START { + .ANY (+ZI +RW +RO-DATA) + } + + #if HEAP_SIZE > 0 + ARM_LIB_HEAP +0 ALIGN 8 EMPTY HEAP_SIZE { ; Reserve empty region for heap + } + #endif + + ARM_LIB_STACK +0 ALIGN 32 EMPTY STACK_SIZE { ; Reserve empty region for stack + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + SRAM_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(SRAM_WATERMARK) <= S_DATA_START + S_DATA_SIZE) +} diff --git a/dsppp/linker_scripts/clang_m0p_mps3.ld b/dsppp/linker_scripts/clang_m0p_mps3.ld new file mode 100644 index 000000000..40f955c16 --- /dev/null +++ b/dsppp/linker_scripts/clang_m0p_mps3.ld @@ -0,0 +1,353 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + *(.gnu.linkonce.r.*) + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef __HEAP_SIZE + PROVIDE (__heap_end = __heap_start + __HEAP_SIZE); + PROVIDE (__heap_size = __HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - __STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - __STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += __STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack) + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/linker_scripts/clang_m4_mps3.ld b/dsppp/linker_scripts/clang_m4_mps3.ld new file mode 100644 index 000000000..40f955c16 --- /dev/null +++ b/dsppp/linker_scripts/clang_m4_mps3.ld @@ -0,0 +1,353 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + *(.gnu.linkonce.r.*) + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef __HEAP_SIZE + PROVIDE (__heap_end = __heap_start + __HEAP_SIZE); + PROVIDE (__heap_size = __HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - __STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - __STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += __STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack) + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/linker_scripts/clang_sse300_mps3.sct b/dsppp/linker_scripts/clang_sse300_mps3.sct new file mode 100644 index 000000000..62352193b --- /dev/null +++ b/dsppp/linker_scripts/clang_sse300_mps3.sct @@ -0,0 +1,364 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE + 0x000000 +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE + 0x000000 +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + *(.gnu.linkonce.r.*) + + + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .veneers : + { + . = ALIGN(32); + KEEP(*(.gnu.sgstubs)) + } > ROM0 AT>ROM0 :text +#endif + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef HEAP_SIZE + PROVIDE (__heap_end = __heap_start + HEAP_SIZE); + PROVIDE (__heap_size = HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack); + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/linker_scripts/clang_sse310_mps3.sct b/dsppp/linker_scripts/clang_sse310_mps3.sct new file mode 100644 index 000000000..3f4877162 --- /dev/null +++ b/dsppp/linker_scripts/clang_sse310_mps3.sct @@ -0,0 +1,363 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE + 0x000000 +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE + 0x000000 +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + *(.gnu.linkonce.r.*) + + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .veneers : + { + . = ALIGN(32); + KEEP(*(.gnu.sgstubs)) + } > ROM0 AT>ROM0 :text +#endif + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef HEAP_SIZE + PROVIDE (__heap_end = __heap_start + HEAP_SIZE); + PROVIDE (__heap_size = HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack); + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/linker_scripts/gcc_m0p_mps3.ld b/dsppp/linker_scripts/gcc_m0p_mps3.ld new file mode 100644 index 000000000..a018e5d4e --- /dev/null +++ b/dsppp/linker_scripts/gcc_m0p_mps3.ld @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rwx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rwx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rwx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rwx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - __STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/linker_scripts/gcc_m4_mps3.ld b/dsppp/linker_scripts/gcc_m4_mps3.ld new file mode 100644 index 000000000..a018e5d4e --- /dev/null +++ b/dsppp/linker_scripts/gcc_m4_mps3.ld @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rwx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rwx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rwx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rwx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - __STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/linker_scripts/gcc_sse300_mps3.ld b/dsppp/linker_scripts/gcc_sse300_mps3.ld new file mode 100644 index 000000000..e00625ea6 --- /dev/null +++ b/dsppp/linker_scripts/gcc_sse300_mps3.ld @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rw) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rw) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rw) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rw) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + /* *(.rodata*) */ + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + *(.rodata*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/linker_scripts/gcc_sse310_mps3_s.ld b/dsppp/linker_scripts/gcc_sse310_mps3_s.ld new file mode 100644 index 000000000..7bea37e1a --- /dev/null +++ b/dsppp/linker_scripts/gcc_sse310_mps3_s.ld @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rwx) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rwx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rwx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rwx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + /* *(.rodata*) */ + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + *(.rodata*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/main.c b/dsppp/main.c new file mode 100644 index 000000000..3a7cdcefd --- /dev/null +++ b/dsppp/main.c @@ -0,0 +1,93 @@ +#include "test_config.h" +#include "RTE_Components.h" +#include CMSIS_device_header +#include "stdio.h" + +#if defined(MPS3) +#include "cmsis_driver_config.h" +#include "stdout_USART.h" +#endif + +#if defined(RTE_Compiler_EventRecorder) +#include "EventRecorder.h" +#endif + +#include "test.h" + + +int main(void) +{ +#if defined(MPS3) + stdout_init(); +#endif + +#if defined(RTE_Compiler_EventRecorder) && !defined(MPS3) + uint32_t res = EventRecorderInitialize (EventRecordAll, 1); + if (!res) + { + printf("Error enabling event recorder\n"); + goto endThread; + } +#endif + + #if !defined(SERIAL_DUMP) + printf("\033c\r\n\r\n"); + #endif + printf("\r\n\r\n\r\n----------------------\r\n"); + printf(__TIME__"\r\n"); + #if defined(ARMCM55) + printf("M55\r\n"); + #endif + #if defined(ARMCM4_FP) + printf("ARMCM4_FP\r\n"); + #endif + #if defined(ARMCM0P) + printf("ARMCM0P\r\n"); + #endif + + #if defined(MPS3) + printf("MPS3\r\n"); + #endif + #if defined(VHT) + printf("VHT\r\n"); + #endif + #if defined(IPSS) + printf("IPSS\r\n"); + #endif + + #if defined(DOT_TEST) + dot_test(); + #endif + #if defined(VECTOR_TEST) + vector_test(); + #endif + #if defined(ROW_TEST) + row_test(); + #endif + #if defined(COL_TEST) + col_test(); + #endif + #if defined(MATRIX_TEST) + matrix_test(); + #endif + #if 0 + filter_test(); + #endif + #if defined(FUSION_TEST) + fusion_test(); + #endif + //debug_test(); + + memory_pool_stats(); + +#if defined(MPS3) + while(1); +#else +#if defined(RTE_Compiler_EventRecorder) +endThread: +#endif + while(0); +#endif +} + + diff --git a/dsppp/mps3run.py b/dsppp/mps3run.py new file mode 100644 index 000000000..799e7145f --- /dev/null +++ b/dsppp/mps3run.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +from pyocd.core.helpers import ConnectHelper +from pyocd.flash.file_programmer import FileProgrammer +from pyocd.debug.elf.symbols import ELFSymbolProvider +from pyocd.core.target import Target +from pyocd.debug.elf.elf import ELFBinaryFile +from pyocd.flash.loader import MemoryLoader +import getserial + +import time +import os.path + +import serial +import re +import io + +import logging +logging.basicConfig(level=logging.ERROR) + + + +def run_out(exe_path,uuid): + lines= "" + + with ConnectHelper.session_with_chosen_probe(unique_id = uuid) as session: + print("Connecting") + board = session.board + target = board.target + #flash = target.memory_map.get_boot_memory() + + # Load firmware into device. + FileProgrammer(session).program(exe_path) + + #target.elf = elf_path + + + #provider = ELFSymbolProvider(target.elf) + #main_addr = provider.get_symbol_value("main") + #print("main() address: 0x%X" % main_addr) + + ## Set breakpoint. + #target.set_breakpoint(main_addr) + + #target.reset() + lines = getserial.read_stdout(target) + return("".join(lines)) + #target.resume() + ## + ## + #target.reset() + ## + ### Wait until breakpoint is hit. + #while target.get_state() != Target.State.HALTED: + # pass + ## + #pc = target.read_core_register("pc") + #print("pc: 0x%X" % pc) + # + #target.remove_breakpoint() + # + #target.resume() + +if __name__ == "__main__": + path = "." + out = "cprj/out/test/MPS3-Corstone-300" + bin = "Release/test.axf" + + axf_path = os.path.join(path,out,bin) + + #axf=ELFBinaryFile(axf_path) + #axf.close() + + lines = run_out(axf_path,"L85986697A") + + + print(lines) + + \ No newline at end of file diff --git a/dsppp/process.py b/dsppp/process.py new file mode 100644 index 000000000..be7aff9fd --- /dev/null +++ b/dsppp/process.py @@ -0,0 +1,137 @@ +import re +import xlsxwriter + +START = 0 +IN_TEST = 1 +MEASURE = 2 +CYCLE_CPP = 3 +CYCLE_C = 4 +ERROR = 5 + +line_nb = 0 +state = START +dimensions = "?" + +cpp = 0 +c = 0 + +stats = {} + +with open("result.txt","r") as f: + lines = f.readlines() + for l in lines: + if line_nb >= 3: + if re.match('Error',l): + state = ERROR + continue + if state == ERROR: + state = IN_TEST + continue + if state == START: + if re.match(r'^[a-zA-Z]+.*$',l): + #print(l) + test_name = l.strip("\n") + state = IN_TEST + stats[test_name]=[] + continue + if state == IN_TEST: + if re.match(r'----',l): + state = MEASURE + continue + if re.match(r'^[a-zA-Z]+.*$',l): + state = IN_TEST + test_name = l.strip("\n") + stats[test_name]=[] + continue + if state == MEASURE: + dimensions = l.strip("\n") + state = CYCLE_CPP + continue + if state == CYCLE_CPP: + m = re.match(r'Cycle count = ([0-9]+)',l) + if m: + cpp = m.group(1) + state = CYCLE_C + continue + if state == CYCLE_C: + if re.match(r'----',l): + state = MEASURE + stats[test_name].append({"dim":dimensions,"cpp":cpp}) + continue + m = re.match(r'Cycle count = ([0-9]+)',l) + if m: + c = m.group(1) + state = IN_TEST + stats[test_name].append({"dim":dimensions,"cpp":cpp,"c":c}) + continue + else: + stats[test_name].append({"dim":dimensions,"cpp":cpp}) + state = IN_TEST + continue + + + + + + line_nb = line_nb + 1 + +dst="C:/Users/CHRFAV01/OneDrive - ARM/Documents/Presentations/CMSIS_Compute" + +def pos(row,col): + return(f"{chr(ord('A')+col)}{row}") + +for s in stats: + ns = re.sub(r'[ ]',"_",s) + ".xlsx" + print(ns) + workbook = xlsxwriter.Workbook(dst+"/"+ns) + worksheet = workbook.add_worksheet("Results") + line_nb = 0 + + title = workbook.add_format({'bold': True,'font_size':24}) + sub_title = workbook.add_format({'bold': True, + 'font_size':14, + 'align':"center", + 'bg_color':"#CCCCCC"}) + percent = workbook.add_format({'num_format': '0.00%'}) + dimEven = workbook.add_format({'bold': True,'bg_color':"#CCCCCC"}) + dimOdd = workbook.add_format({'bold': True,'bg_color':"#EEEEEE"}) + + worksheet.write(line_nb,0, s,title) + line_nb = line_nb + 1 + + worksheet.set_row(line_nb, 30) + worksheet.set_column("D:D", 30) + + if len(stats[s])==2: + worksheet.write(line_nb,0, 'dims',sub_title) + worksheet.write(line_nb,1, 'cpp',sub_title) + worksheet.write(line_nb, 2, 'CPP Improvement',sub_title) + + else: + worksheet.write(line_nb,0, 'dims',sub_title) + worksheet.write(line_nb,1, 'cpp',sub_title) + worksheet.write(line_nb,2, 'c',sub_title) + worksheet.write(line_nb, 3, 'CPP Improvement',sub_title) + + line_nb = line_nb + 1 + for x in stats[s]: + if (line_nb % 2 == 0): + dim = dimOdd + else: + dim = dimEven + if "c" in x: + worksheet.write(line_nb,0, x["dim"],dim) + worksheet.write(line_nb,1, float(x["cpp"])) + worksheet.write(line_nb,2, float(x["c"])) + worksheet.write(line_nb, 3, f"=(C{line_nb+1}-B{line_nb+1})/C{line_nb+1}",percent) + else: + worksheet.write(line_nb,0, x["dim"],dim) + worksheet.write(line_nb,1, float(x["cpp"])) + worksheet.write(line_nb, 2, f"=(C{line_nb+1}-B{line_nb+1})/C{line_nb+1}",percent) + + line_nb = line_nb + 1 + + + + workbook.close() + \ No newline at end of file diff --git a/dsppp/run_all.py b/dsppp/run_all.py new file mode 100644 index 000000000..9c15f71b8 --- /dev/null +++ b/dsppp/run_all.py @@ -0,0 +1,390 @@ +import re +import argparse +import os.path +import itertools +import subprocess +import sys +import mps3run + +from colorama import init,Fore, Back, Style + +try: + os.mkdir("ac6_results") +except: + pass + +try: + os.mkdir("gcc_results") +except: + pass + +try: + os.mkdir("clang_results") +except: + pass + +DEBUG = False +ERROR_OCCURED = False + +all_errors = [] + +def printTitle(s): + print("\n" + Fore.GREEN + Style.BRIGHT + s + Style.RESET_ALL) + +def printSubTitle(s): + print(Fore.YELLOW + Style.BRIGHT + s + Style.RESET_ALL) + +def printError(s): + print(Fore.RED + Style.BRIGHT + s + Style.RESET_ALL+"\n") + +class Result: + def __init__(self,msg,error=False): + self._error = error + self._msg = msg + + @property + def error(self): + return self._error + + @property + def msg(self): + return self._msg + +def is_error(res,test_name,err): + if res.error: + printError("Error") + all_errors.append(test_name) + print(test_name,file=err) + print(res.msg,file=err) + print("--------------",file=err) + return(True) + return(False) + +def run(args,mustPrint=False,dumpStdErr=True,timeout=20,printCmd=False): + global ERROR_OCCURED + global DEBUG + try: + if DEBUG or printCmd: + print(" ".join(args)) + result=subprocess.run(args,text=True,capture_output=True,timeout=timeout) + if result.returncode !=0 : + ERROR_OCCURED = True + if dumpStdErr: + return(Result(result.stderr + "\n\nSTDOUT:\n\n" + result.stdout,error=True)) + else: + return(Result(result.stdout,error=True)) + + if mustPrint: + print(result.stdout) + return(Result(result.stdout)) + except Exception as e: + printError("Exception occured") + ERROR_OCCURED = True + return(Result(str(e),error=True)) + +parser = argparse.ArgumentParser(description='Parse test description') +parser.add_argument('-c', nargs='?',type = str, default="M55",help="M55/M4/M0") +parser.add_argument('-p', nargs='?',type = str, default="VHT",help="VHT/MPS3") +parser.add_argument('-a', action='store_true', help="Generate allocator definitions") +parser.add_argument('-i', action='store_true', help="Refresh global allocator index") +parser.add_argument('-b', action='store_true', help="Only benchmarks") +parser.add_argument('-d', action='store_true', help="Dry run") +parser.add_argument('-g', nargs='?',type = str, default="AC6",help="AC6 / CLANG / GCC") +parser.add_argument('-u', nargs='?',type = str, default="L85986697A",help="Debug UUID") + +args = parser.parse_args() + +init() + +if args.a: + printTitle("Mode allocator generations") + +if args.i: + printTitle("Allocator test index refresh") + +NAME_TO_BOARD = { + "M55": "Corstone-300", + "Corstone-300": "Corstone-300", + "M4": "M4", + "M0" : "M0P" +} + +def results(): + if args.g == "AC6": + return("ac6_results") + + if args.g == "GCC": + return("gcc_results") + + if args.g == "CLANG": + return("clang_results") + + print(f"Compiler {args.g} not known") + exit(1) + +def target_name(): + return(f"{args.p}-{NAME_TO_BOARD[args.c]}") + +def cmd_args(): + # cbuild -O cprj test.csolution.yml -r --toolchain AC6 -c test.Release+MPS3-Corstone-300 + toolchain = args.g + target = f"test.Release+{target_name()}" + + command = ["-O", "cprj", + "test.csolution.yml", + "--toolchain", toolchain, + "-c", target] + + return(command) + + + +if args.g == "AC6": + ext = ".axf" +else: + ext = ".elf" + +fvp = {"M55":"C:\\Keil_v5\\ARM\\VHT\\VHT_Corstone_SSE-300_Ethos-U55.exe", + "M4":"C:\\Keil_v5\\ARM\\VHT\\VHT_MPS2_Cortex-M4.exe", + "M0":"C:\\Keil_v5\\ARM\\VHT\\VHT_MPS2_Cortex-M0plus.exe"} + +TESTS=["DOT_TEST", + "VECTOR_TEST", + "ROW_TEST", + "COL_TEST", + "MATRIX_TEST", + "FUSION_TEST" + ] + +# Some tests are too big (code size) and needs to be decomposed +# They contain SUBTEST1, SUBTEST2 ... #if in the code +# This script must know how many subtests are defined in each test +# suite +# No need to define an entry in this dictionary when no +# subtest is defined +SUBTESTS = {"MATRIX_TEST":19} +# Subtests that are only for testing and not benchmarks +ONLY_TESTS = {"MATRIX_TEST":[3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]} + +def is_only_test(n,i): + if n[0] in ONLY_TESTS: + return(i in ONLY_TESTS[n[0]]) + return False + +DATATYPES = ["F64_DT", + "F32_DT", + "F16_DT", + "Q31_DT", + "Q15_DT", + "Q7_DT" + ] + +MODE = ["STATIC_TEST", + "DYNAMIC_TEST" + ] + +TESTS=["DOT_TEST","VECTOR_TEST"] +DATATYPES=["F32_DT"] +MODE = ["STATIC_TEST"] + +all_tests = list(itertools.product(TESTS,DATATYPES,MODE)) + + + +ALLOC = "#define POOL_ALLOCATOR" +if args.a: + # Stat allocator enabled and we do stats on VHT CS300 only + ALLOC = "//#define POOL_ALLOCATOR" + args.c = "M55" + args.p = "VHT" + +BENCH = "//#define ONLY_BENCHMARKS" +if args.b: + BENCH = "#define ONLY_BENCHMARKS" + +HEADER = f"""#ifndef TEST_CONFIG_H +#define TEST_CONFIG_H + +{ALLOC} +{BENCH} + +#define %s +#define %s +#define %s +%s + +#endif +""" + + + +def out_path(): + return(os.path.join("cprj","out","test",target_name(),"Release","test"+ ext)) + +def configure_and_build_test(test_name,test,err,subtest,first): + if subtest is not None: + subteststr = f"#define SUBTEST{subtest}" + else: + subteststr = "" + with open("test_config.h","w") as c: + print(HEADER % (test + (subteststr,)),file=c) + if first: + res = run(["cbuild"] + cmd_args() + ["-r","--update-rte"],timeout=600,printCmd=True) + else: + res = run(["cbuild"] +cmd_args(),timeout=600,printCmd=True) + if not is_error(res,test_name,err): + if DEBUG: + print(res.msg) + return(True) + return(False) + +def process_allocator_data(test_name,test,msg,subtest): + lines = msg.splitlines() + state = 0 + alloc_cpp = [] + alloc_h = [] + for l in lines: + if re.match(r"^ALLOC_POOL.*$",l): + alloc_cpp.append(l.strip()) + if re.match(r"^POOL.*$",l): + alloc_h.append(l.strip()) + if subtest is not None: + HEADER=f"#if defined({test[0]}) && defined({test[1]}) && defined({test[2]}) && defined(SUBTEST{subtest})" + else: + HEADER=f"#if defined({test[0]}) && defined({test[1]}) && defined({test[2]})" + # Gen h + with open(os.path.join("allocation",test_name)+".h","w") as h: + print(HEADER,file=h) + for l in alloc_h: + print(l,file=h) + print("#endif",file=h) + + # Gen cpp + with open(os.path.join("allocation",test_name)+".cpp","w") as h: + print(HEADER,file=h) + for l in alloc_cpp: + print(l,file=h) + print("#endif",file=h) + +def process_bench(test_name,test,msg,subtest): + global DEBUG + lines = msg.splitlines() + test_name = args.p +"_" + args.c + "_" + test_name + if DEBUG: + print(os.path.join(results(),test_name)+".txt") + with open(os.path.join(results(),test_name)+".txt","w") as h: + for l in lines: + print(l.rstrip(),file=h) + + +def process_result(test_name,test,msg,subtest): + printSubTitle("Process result") + if args.a: + process_allocator_data(test_name,test,msg,subtest) + else: + process_bench(test_name,test,msg,subtest) + +def runVHT(test_name,test,err,subtest): + core = args.c + target = target_name() + config = os.path.join("fvp_configs",target) + ".txt" + #print(target) + #print(config) + if core == "M55": + exe = "cpu0=" + out_path() + else: + exe = out_path() + res=run([fvp[core],"-f",config,"-a",exe]) + if not is_error(res,test_name,err): + process_result(test_name,test,res.msg,subtest) + +def runMPS3(test_name,test,err,subtest): + lines="" + res = None + try: + exe = out_path() + lines = mps3run.run_out(exe,args.u) + res = Result(lines) + except Exception as e: + res = Result(str(e),error = True) + if not is_error(res,test_name,err): + process_result(test_name,test,res.msg,subtest) + +def runATest(test,file_err,nb,NB_MAX,current_nb_axf,nb_axf,first=True,subtest=None): + global DEBUG + if subtest is not None: + maxsub = SUBTESTS[test[0]] + test_name=f"{test[0]}_{test[1]}_{test[2]}_{subtest}" + printTitle(test_name + f" : AXF {current_nb_axf} / {nb_axf}, TEST {nb}/{NB_MAX} (subtest {subtest}/{maxsub})") + else: + test_name=f"{test[0]}_{test[1]}_{test[2]}" + printTitle(test_name + f" : AXF {current_nb_axf} / {nb_axf}, TEST {nb}/{NB_MAX}") + if args.d: + return + printSubTitle("Configure and build") + if configure_and_build_test(test_name,test,file_err,subtest,first): + printSubTitle("Run") + if args.p == "VHT": + runVHT(test_name,test,file_err,subtest) + if args.p == "MPS3" and args.c == "M55": + runMPS3(test_name,test,file_err,subtest) + +nb_axf = 0 +for test in all_tests: + if test[0] in SUBTESTS: + for subtestnbb in range(SUBTESTS[test[0]]): + if not args.b or not is_only_test(test,subtestnbb+1): + nb_axf = nb_axf + 1 + else: + nb_axf = nb_axf + 1 +print(f"Number of axf to test = {nb_axf}") + +with open(os.path.join(results(),"errors.txt"),"w") as err: + # Generate include for allocations + if args.a or args.i: + with open(os.path.join("allocation","all.h"),"w") as fh: + for test in all_tests: + if test[0] in SUBTESTS: + for subtestnbb in range(SUBTESTS[test[0]]): + test_name=f"{test[0]}_{test[1]}_{test[2]}_{subtestnbb+1}" + print(f"#include \"{test_name}.h\"",file=fh) + else: + test_name=f"{test[0]}_{test[1]}_{test[2]}" + print(f"#include \"{test_name}.h\"",file=fh) + + with open(os.path.join("allocation","all.cpp"),"w") as fc: + for test in all_tests: + if test[0] in SUBTESTS: + for subtestnbb in range(SUBTESTS[test[0]]): + test_name=f"{test[0]}_{test[1]}_{test[2]}_{subtestnbb+1}" + print(f"#include \"{test_name}.cpp\"",file=fc) + else: + test_name=f"{test[0]}_{test[1]}_{test[2]}" + print(f"#include \"{test_name}.cpp\"",file=fc) + + if not args.i: + NB_MAX = len(all_tests) + nb = 1 # test cases + current_axf = 1 + first = True + for test in all_tests: + if test[0] in SUBTESTS: + for subtestnbb in range(SUBTESTS[test[0]]): + if not args.b or not is_only_test(test,subtestnbb+1): + runATest(test,err,nb,NB_MAX,current_axf,nb_axf,first,subtestnbb+1) + current_axf = current_axf + 1 + first = False + else: + runATest(test,err,nb,NB_MAX,current_axf,nb_axf,first) + current_axf = current_axf + 1 + first = False + nb = nb + 1 + + +if ERROR_OCCURED: + printError("Error in tests:") + for n in all_errors: + printError(n) + sys.exit("Error occurred") +else: + sys.exit(0) diff --git a/dsppp/test.cbuild-pack.yml b/dsppp/test.cbuild-pack.yml new file mode 100644 index 000000000..0f3c7dcfb --- /dev/null +++ b/dsppp/test.cbuild-pack.yml @@ -0,0 +1,17 @@ +cbuild-pack: + resolved-packs: + - resolved-pack: ARM::CMSIS@6.0.0 + selected-by: + - ARM::CMSIS@6.0.0 + - resolved-pack: ARM::CMSIS-Compiler@2.0.0 + selected-by: + - ARM::CMSIS-Compiler@2.0.0 + - resolved-pack: ARM::CMSIS-DSP@1.15.0 + selected-by: + - ARM::CMSIS-DSP@1.15.0 + - resolved-pack: ARM::Cortex_DFP@1.0.0 + selected-by: + - ARM::Cortex_DFP@1.0.0 + - resolved-pack: ARM::V2M_MPS3_SSE_300_BSP@1.4.0 + selected-by: + - ARM::V2M_MPS3_SSE_300_BSP@1.4.0 diff --git a/dsppp/test.cproject.yml b/dsppp/test.cproject.yml new file mode 100644 index 000000000..bdb280636 --- /dev/null +++ b/dsppp/test.cproject.yml @@ -0,0 +1,146 @@ +project: + groups: + - group: Tests + files: + - file: tests/matrix_test.cpp + - file: tests/dot_test.cpp + - file: tests/vector_test.cpp + - file: tests/row_test.cpp + - file: tests/col_test.cpp + #- file: tests/filter_test.cpp + - file: tests/fusion_test.cpp + #- file: tests/debug_test.cpp + #- file: tests/debug_test_external.cpp + - file: tests/common_tests.cpp + - file: tests/bench.c + - file: tests/cmsisdsp.cpp + - file: clang_sse300.c + for-context: + - +MPS3-Corstone-300 + for-compiler: + - CLANG + - group: App + files: + - file: main.c + - file: allocator.cpp + - group: IPSS + for-context: + - +IPSS-M0P + - +IPSS-M4 + files: + - file: IPSS/retarget_m0.c + for-context: + - +IPSS-M0P + - file: IPSS/retarget_m4.c + for-context: + - +IPSS-M4 + add-path: + - Include + - ../../../boost_1_84_0 + - . + - tests + + components: + - component: ARM::CMSIS:CORE + - component: ARM::CMSIS:DSP@1.15.0 + - component: ARM::Device:Startup&C Startup + for-context: + - +VHT-Corstone-300 + - +VHT-M0P + - +VHT-M4 + - +MPS3-Corstone-300 + - component: ARM::Device:Definition + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: CMSIS-Compiler:CORE + for-context: + - +MPS3-Corstone-300 + - component: CMSIS-Compiler:STDOUT:Custom@1.0.0 + for-context: + - +MPS3-Corstone-300 + - component: ARM::Device:USART STDOUT + for-context: + - +MPS3-Corstone-300 + - component: ARM::CMSIS Driver:USART + for-context: + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:SysCounter + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:SysTimer + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:Timeout + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:UART + for-context: + - +MPS3-Corstone-300 + + linker: + - script: linker_scripts/gcc_sse300_mps3.ld + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: GCC + + - script: linker_scripts/clang_sse300_mps3.sct + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: CLANG + + - script: linker_scripts/ac6_sse300_mps3_s.sct + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: AC6 + + - regions: linker_scripts/SSE-300-MPS3/region_defs.h + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + + - script: linker_scripts/gcc_m0p_mps3.ld + for-context: + - +VHT-M0P + for-compiler: GCC + + - script: linker_scripts/clang_m0p_mps3.ld + for-context: + - +VHT-M0P + for-compiler: CLANG + + - script: linker_scripts/ac6_m0p_mps3_s.sct + for-context: + - +VHT-M0P + for-compiler: AC6 + + - regions: linker_scripts/ARMCM0P/region_defs.h + for-context: + - +VHT-M0P + + - script: linker_scripts/gcc_m4_mps3.ld + for-context: + - +VHT-M4 + for-compiler: GCC + + - script: linker_scripts/clang_m4_mps3.ld + for-context: + - +VHT-M4 + for-compiler: CLANG + + - script: linker_scripts/ac6_m4_mps3_s.sct + for-context: + - +VHT-M4 + for-compiler: AC6 + + - regions: linker_scripts/ARMCM4/region_defs.h + for-context: + - +VHT-M4 + + diff --git a/dsppp/test.csolution.yml b/dsppp/test.csolution.yml new file mode 100644 index 000000000..028759a73 --- /dev/null +++ b/dsppp/test.csolution.yml @@ -0,0 +1,108 @@ +solution: + compiler: AC6@6.22.0 + + language-C: c11 + language-CPP: c++17 + cdefault: + + packs: + - pack: ARM::CMSIS@6.0.0 + - pack: ARM::CMSIS-DSP@1.15.0 + - pack: ARM::V2M_MPS3_SSE_300_BSP@1.4.0 + - pack: ARM::CMSIS-Compiler@2.0.0 + - pack: ARM::Cortex_DFP@1.0.0 + + target-types: + - type: MPS3-Corstone-300 + device: ARM::SSE-300-MPS3 + board: ARM::V2M-MPS3-SSE-300-FVP + define: + - CORTEXM + - SSE300MPS3 + - MPS3 + - ARMCM55 + misc: + - for-compiler: GCC + C: + - -Wno-sign-compare + - -Wno-unused-parameter + CPP: + - -Wno-sign-compare + - -Wno-unused-parameter + Link: + - --specs=nosys.specs + - for-compiler: CLANG + C: + - -Wno-sign-compare + - -Wno-unused-parameter + CPP: + - -Wno-sign-compare + - -Wno-unused-parameter + Link: + - -lcrt0 + + - type: VHT-Corstone-300 + device: ARM::SSE-300-MPS3 + board: ARM::V2M-MPS3-SSE-300-FVP + define: + - CORTEXM + - ARMCM55 + - VHT + misc: + - for-compiler: GCC + Link: + - --specs=rdimon.specs + Library: + - -lrdimon + - for-compiler: CLANG + Link: + - -lcrt0-semihost + - -lsemihost + + - type: VHT-M0P + device: ARMCM0P + #board: uVision Simulator + define: + - CORTEXM + - ARMCM0P + - DISABLEFLOAT16 + - VHT + misc: + - for-compiler: GCC + Link: + - --specs=rdimon.specs + Library: + - -lrdimon + - for-compiler: CLANG + Link: + - -lcrt0-semihost + - -lsemihost + + - type: VHT-M4 + device: ARMCM4 + #board: uVision Simulator + define: + - CORTEXM + - ARMCM4_FP + - DISABLEFLOAT16 + - VHT + misc: + - for-compiler: GCC + Link: + - --specs=rdimon.specs + Library: + - -lrdimon + - for-compiler: CLANG + Link: + - -lcrt0-semihost + - -lsemihost + + build-types: + - type: Release + debug: on + + + projects: + - project: ./test.cproject.yml + - project: ./example.cproject.yml + \ No newline at end of file diff --git a/dsppp/test_config.h b/dsppp/test_config.h new file mode 100644 index 000000000..9349fbb83 --- /dev/null +++ b/dsppp/test_config.h @@ -0,0 +1,13 @@ +#ifndef TEST_CONFIG_H +#define TEST_CONFIG_H + +#define POOL_ALLOCATOR +//#define ONLY_BENCHMARKS + +#define VECTOR_TEST +#define F32_DT +#define STATIC_TEST + + +#endif + diff --git a/dsppp/tests/bench.c b/dsppp/tests/bench.c new file mode 100644 index 000000000..d4055c846 --- /dev/null +++ b/dsppp/tests/bench.c @@ -0,0 +1,3 @@ +#include "bench.h" + +uint32_t start_time, stop_time, cycle_count; diff --git a/dsppp/tests/bench.h b/dsppp/tests/bench.h new file mode 100644 index 000000000..b045ddeea --- /dev/null +++ b/dsppp/tests/bench.h @@ -0,0 +1,60 @@ +#if !defined(HOST) +#if !defined(NORTE) + #include "RTE_Components.h" + #include CMSIS_device_header +#endif +#endif + +#ifdef __cplusplus + +#include +#else +#include +#endif + +#ifdef __cplusplus + + +extern "C" +{ +#endif + +extern uint32_t start_time; +extern uint32_t stop_time; +extern uint32_t cycle_count; + +#if defined(HOST) +#define INIT_SYSTICK +#define START_CYCLE_MEASUREMENT +#define STOP_CYCLE_MEASUREMENT +#else +#define INIT_SYSTICK \ + SysTick->CTRL=0; \ + SysTick->LOAD=0xFFFFFFUL;\ + SysTick->VAL=0; \ + SysTick->CTRL=5; \ + while (SysTick->VAL==0)\ + ; + +#define START_CYCLE_MEASUREMENT \ + start_time= SysTick->VAL; + +#define STOP_CYCLE_MEASUREMENT \ + stop_time= SysTick->VAL; \ + SysTick->CTRL=0; \ + cycle_count = start_time - stop_time; \ + printf ("Cycle count = %d\r\n",(int)cycle_count); +#endif + +#if !defined(HOST) && (__ARM_ARCH > 6) +#define dbgInst(imm) __asm volatile("DBG %0\n\t" : :"Ir" ((imm)) ) +#define startSectionNB(num) dbgInst(((num) & 0x7) | 0x0) +#define stopSectionNB(num) dbgInst(((num) & 0x7) | 0x8) +#else +#define startSectionNB(num) +#define stopSectionNB(num) +#endif + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/dsppp/tests/cmsis_tests.h b/dsppp/tests/cmsis_tests.h new file mode 100644 index 000000000..ed5e4486b --- /dev/null +++ b/dsppp/tests/cmsis_tests.h @@ -0,0 +1,699 @@ +#pragma once + + +extern "C" { +#include "arm_math_types.h" +#include "arm_math_types_f16.h" +#include "dsp/filtering_functions.h" +#include "dsp/matrix_functions.h" +#include "dsp/matrix_functions_f16.h" + +} + +template +struct NameOfType; + +template +struct TailForTests; + +template<> +struct NameOfType +{ + constexpr static const char* v="float64_t"; + constexpr static const char* xls="f64"; +}; + +template<> +struct NameOfType +{ + constexpr static const char* v="float32_t"; + constexpr static const char* xls="f32"; + +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct NameOfType +{ + constexpr static const char* v="float16_t"; + constexpr static const char* xls="f16"; + +}; +#endif + +template<> +struct NameOfType +{ + constexpr static const char* v="q31"; + constexpr static const char* xls="q31"; + +}; + +template<> +struct NameOfType +{ + constexpr static const char* v="q15"; + constexpr static const char* xls="q15"; + +}; + +template<> +struct NameOfType +{ + constexpr static const char* v="q7"; + constexpr static const char* xls="q7"; + +}; + +template<> +struct TailForTests +{ + constexpr static const int tail = 1; + constexpr static const int loop = 2; + +}; + +template<> +struct TailForTests +{ + constexpr static const int tail = 3; + constexpr static const int loop = 2*4; +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct TailForTests +{ + constexpr static const int tail = 7; + constexpr static const int loop = 2*8; + +}; +#endif + +template<> +struct TailForTests +{ + constexpr static const int tail = 3; + constexpr static const int loop = 2*4; +}; + +template<> +struct TailForTests +{ + constexpr static const int tail = 7; + constexpr static const int loop = 2*8; +}; + +template<> +struct TailForTests +{ + constexpr static const int tail = 15; + constexpr static const int loop = 2*16; +}; + +#include "common_tests.h" + +#if !defined(DISABLEFLOAT16) +extern void cmsisdsp_add(const float16_t* a, + const float16_t* b, + float16_t* c, + uint32_t l); +#endif + +extern void cmsisdsp_add(const float64_t* a, + const float64_t* b, + float64_t* c, + uint32_t l); + +extern void cmsisdsp_add(const float32_t* a, + const float32_t* b, + float32_t* c, + uint32_t l); + +extern void cmsisdsp_add(const Q31* a, + const Q31* b, + Q31* c, + uint32_t l); + +extern void cmsisdsp_add(const Q15* a, + const Q15* b, + Q15* c, + uint32_t l); + +extern void cmsisdsp_add(const Q7* a, + const Q7* b, + Q7* c, + uint32_t l); + +extern void cmsisdsp_mat_add(const float32_t* a, + const float32_t* b, + float32_t* c, + uint32_t row,uint32_t col); + +#if !defined(DISABLEFLOAT16) +extern void cmsisdsp_mat_add(const float16_t* a, + const float16_t* b, + float16_t* c, + uint32_t row,uint32_t col); +#endif + +extern void cmsisdsp_mat_add(const Q31* a, + const Q31* b, + Q31* c, + uint32_t row,uint32_t col); + +extern void cmsisdsp_mat_add(const Q15* a, + const Q15* b, + Q15* c, + uint32_t row,uint32_t col); + +extern void cmsisdsp_mat_add(const Q7* a, + const Q7* b, + Q7* c, + uint32_t row,uint32_t col); + +#if !defined(DISABLEFLOAT16) +extern void cmsisdsp_dot(const float16_t* a, + const float16_t* b, + float16_t &c, + uint32_t l); +#endif + +extern void cmsisdsp_dot(const float64_t* a, + const float64_t* b, + float64_t &c, + uint32_t l); + +extern void cmsisdsp_dot(const float32_t* a, + const float32_t* b, + float32_t &c, + uint32_t l); + +extern void cmsisdsp_dot(const Q31* a, + const Q31* b, + Q<15,48> &c, + uint32_t l); + +extern void cmsisdsp_dot(const Q15* a, + const Q15* b, + Q<33,30> &c, + uint32_t l); + +extern void cmsisdsp_dot(const Q7* a, + const Q7* b, + Q<17,14> &c, + uint32_t l); + +extern void cmsisdsp_dot_expr(const double* a, + const double* b, + const double* c, + const double* d, + double* tmp1, + double* tmp2, + const double scale, + double &r, + uint32_t l); + +extern void cmsisdsp_dot_expr(const float32_t* a, + const float32_t* b, + const float32_t* c, + const float32_t* d, + float32_t* tmp1, + float32_t* tmp2, + const float32_t scale, + float32_t &r, + uint32_t l); + +#if !defined(DISABLEFLOAT16) +extern void cmsisdsp_dot_expr(const float16_t* a, + const float16_t* b, + const float16_t* c, + const float16_t* d, + float16_t* tmp1, + float16_t* tmp2, + const float16_t scale, + float16_t &r, + uint32_t l); +#endif + +extern void cmsisdsp_dot_expr(const Q7* a, + const Q7* b, + const Q7* c, + const Q7* d, + Q7* tmp1, + Q7* tmp2, + const Q7 scale, + Q<17,14> &r, + uint32_t l); + +extern void cmsisdsp_dot_expr(const Q15* a, + const Q15* b, + const Q15* c, + const Q15* d, + Q15* tmp1, + Q15* tmp2, + const Q15 scale, + Q<33,30> &r, + uint32_t l); + +extern void cmsisdsp_dot_expr(const Q31* a, + const Q31* b, + const Q31* c, + const Q31* d, + Q31* tmp1, + Q31* tmp2, + const Q31 scale, + Q<15,48> &r, + uint32_t l); + +extern void cmsisdsp_fir(const arm_fir_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + +extern void cmsisdsp_fir(const arm_fir_instance_q7 * S, + const Q7 * pSrc, + Q7 * pDst, + uint32_t blockSize); + +extern void cmsisdsp_fir(const arm_fir_instance_q15 * S, + const Q15 * pSrc, + Q15 * pDst, + uint32_t blockSize); + +extern void cmsisdsp_fir(const arm_fir_instance_q31 * S, + const Q31 * pSrc, + Q31 * pDst, + uint32_t blockSize); + +template +struct CMSISOuter { + static void run(const T *a, + const T *b, + T *res, + const uint32_t r,const uint32_t c) + { + DISABLE_LOOP_UNROLL + for(unsigned int row=0; row +struct CMSISOuter { + static void run(const float32_t *a, + const float32_t *b, + float32_t *res, + const uint32_t r,const uint32_t c) + { + _cmsis_outer(a,b,res,r,c); + } +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct CMSISOuter { + static void run(const float16_t *a, + const float16_t *b, + float16_t *res, + const uint32_t r,const uint32_t c) + { + _cmsis_outer(a,b,res,r,c); + } +}; +#endif + +template<> +struct CMSISOuter { + static void run(const Q31 *a, + const Q31 *b, + Q31 *res, + const uint32_t r,const uint32_t c) + { + _cmsis_outer(a,b,res,r,c); + } +}; + +template<> +struct CMSISOuter { + static void run(const Q15 *a, + const Q15 *b, + Q15 *res, + const uint32_t r,const uint32_t c) + { + _cmsis_outer(a,b,res,r,c); + } +}; + +template<> +struct CMSISOuter { + static void run(const Q7 *a, + const Q7 *b, + Q7 *res, + const uint32_t r,const uint32_t c) + { + _cmsis_outer(a,b,res,r,c); + } +}; + +#endif + +extern void cmsis_init_householder(double *f,const int nb); +extern void cmsis_init_householder(float32_t *f,const int nb); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_init_householder(float16_t *f,const int nb); +#endif + +extern void cmsis_init_qr(double *f,const int r,const int c); +extern void cmsis_init_qr(float32_t *f,const int r,const int c); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_init_qr(float16_t *f,const int r,const int c); +#endif + +extern void cmsis_init_cholesky(double *f,const int r,const int c); +extern void cmsis_init_cholesky(float32_t *f,const int r,const int c); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_init_cholesky(float16_t *f,const int r,const int c); +#endif + +extern void cmsis_mat_mult(const arm_matrix_instance_f64* a, + const arm_matrix_instance_f64* b, + arm_matrix_instance_f64 *c, + double *pState); + +extern void cmsis_mat_mult(const arm_matrix_instance_f32* a, + const arm_matrix_instance_f32* b, + arm_matrix_instance_f32 *c, + float32_t *pState); +#if !defined(DISABLEFLOAT16) +extern void cmsis_mat_mult(const arm_matrix_instance_f16* a, + const arm_matrix_instance_f16* b, + arm_matrix_instance_f16 *c, + float16_t *pState); +#endif + +extern void cmsis_mat_mult(const arm_matrix_instance_q7* a, + const arm_matrix_instance_q7* b, + arm_matrix_instance_q7 *c, + q7_t *pState); + +extern void cmsis_mat_mult(const arm_matrix_instance_q15* a, + const arm_matrix_instance_q15* b, + arm_matrix_instance_q15 *c, + q15_t *pState); + +extern void cmsis_mat_mult(const arm_matrix_instance_q31* a, + const arm_matrix_instance_q31* b, + arm_matrix_instance_q31 *c, + q31_t *pState); + +extern void cmsis_mat_trans(const arm_matrix_instance_q7* a, + arm_matrix_instance_q7* b); + +extern void cmsis_mat_trans(const arm_matrix_instance_q15* a, + arm_matrix_instance_q15* b); + +extern void cmsis_mat_trans(const arm_matrix_instance_q31* a, + arm_matrix_instance_q31* b); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_mat_trans(const arm_matrix_instance_f16* a, + arm_matrix_instance_f16* b); +#endif + +extern void cmsis_mat_trans(const arm_matrix_instance_f64* a, + arm_matrix_instance_f64* b); + +extern void cmsis_mat_trans(const arm_matrix_instance_f32* a, + arm_matrix_instance_f32* b); + +extern double cmsis_householder(const double *,double* ,uint32_t); + +extern float32_t cmsis_householder(const float32_t *,float32_t* ,uint32_t); + +#if !defined(DISABLEFLOAT16) +extern float16_t cmsis_householder(const float16_t *,float16_t* ,uint32_t); +#endif + +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_f64 *pSrcMat, + const double *pVec, + double *pDst); + +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_f32 *pSrcMat, + const float32_t *pVec, + float32_t *pDst); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_f16 *pSrcMat, + const float16_t *pVec, + float16_t *pDst); +#endif + +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_q31 *pSrcMat, + const Q31 *pVec, + Q31 *pDst); + +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_q15 *pSrcMat, + const Q15 *pVec, + Q15 *pDst); + +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_q7 *pSrcMat, + const Q7 *pVec, + Q7 *pDst); + +extern arm_status cmsis_qr( + const arm_matrix_instance_f64 * pSrc, + const double threshold, + arm_matrix_instance_f64 * pOutR, + arm_matrix_instance_f64 * pOutQ, + double * pOutTau, + double *pTmpA, + double *pTmpB + ); + +extern arm_status cmsis_qr( + const arm_matrix_instance_f32 * pSrc, + const float32_t threshold, + arm_matrix_instance_f32 * pOutR, + arm_matrix_instance_f32 * pOutQ, + float32_t * pOutTau, + float32_t *pTmpA, + float32_t *pTmpB + ); + +#if !defined(DISABLEFLOAT16) +extern arm_status cmsis_qr( + const arm_matrix_instance_f16 * pSrc, + const float16_t threshold, + arm_matrix_instance_f16 * pOutR, + arm_matrix_instance_f16 * pOutQ, + float16_t * pOutTau, + float16_t *pTmpA, + float16_t *pTmpB + ); +#endif + +extern arm_status cmsis_cholesky( + const arm_matrix_instance_f64 * src, + arm_matrix_instance_f64 * dst); + +extern arm_status cmsis_cholesky( + const arm_matrix_instance_f32 * src, + arm_matrix_instance_f32 * dst); + +#if !defined(DISABLEFLOAT16) +extern arm_status cmsis_cholesky( + const arm_matrix_instance_f16 * src, + arm_matrix_instance_f16 * dst); +#endif + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_f64 * src, + const double * a, + const double * b, + const double scalar, + double * tmp, + double * dst); + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_f32 * src, + const float32_t * a, + const float32_t * b, + const float32_t scalar, + float32_t * tmp, + float32_t * dst); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_f16 * src, + const float16_t * a, + const float16_t * b, + const float16_t scalar, + float16_t * tmp, + float16_t * dst); +#endif + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q31 * src, + const Q31 * a, + const Q31 * b, + const Q31 scalar, + Q31 * tmp, + Q31 * dst); + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q15 * src, + const Q15 * a, + const Q15 * b, + const Q15 scalar, + Q15 * tmp, + Q15 * dst); + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q7 * src, + const Q7 * a, + const Q7 * b, + const Q7 scalar, + Q7 * tmp, + Q7 * dst); + +template +struct CMSISMatrixType; + +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_f64 type; + typedef double scalar; +}; + +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_f32 type; + typedef float32_t scalar; +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_f16 type; + typedef float16_t scalar; +}; +#endif + +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_q7 type; + typedef q7_t scalar; + +}; + +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_q15 type; + typedef q15_t scalar; + +}; + +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_q31 type; + typedef q31_t scalar; + +}; + +template +struct TestConstant; + +template<> +struct TestConstant +{ + constexpr static double v = 0.2; + constexpr static double small = 0.001; +}; + +template<> +struct TestConstant +{ + constexpr static float v = 0.2f; + constexpr static float small = 0.001f; +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct TestConstant +{ + constexpr static float16_t v = 0.2f; + constexpr static float16_t small = 0.001f; + +}; +#endif + +template<> +struct TestConstant +{ + constexpr static Q7 v = 0.2_q7; + constexpr static Q7 small = 0.001_q7; +}; + + +template<> +struct TestConstant +{ + constexpr static Q15 v = 0.2_q15; + constexpr static Q15 small = 0.001_q15; +}; + +template<> +struct TestConstant +{ + constexpr static Q31 v = 0.2_q31; + constexpr static Q31 small = 0.001_q31; +}; \ No newline at end of file diff --git a/dsppp/tests/cmsisdsp.cpp b/dsppp/tests/cmsisdsp.cpp new file mode 100644 index 000000000..7c6ad0c65 --- /dev/null +++ b/dsppp/tests/cmsisdsp.cpp @@ -0,0 +1,1146 @@ +#include "allocator.h" + +#include +#include +#include + +using namespace arm_cmsis_dsp; + + +#include "dsp/basic_math_functions.h" +#include "dsp/basic_math_functions_f16.h" +#include "dsp/filtering_functions.h" +#include "dsp/matrix_functions.h" +#include "dsp/matrix_functions_f16.h" + + +#include "bench.h" + +#if !defined(DISABLEFLOAT16) +void cmsisdsp_add(const float16_t* a, + const float16_t* b, + float16_t* c, + uint32_t l) +{ + + arm_add_f16(a,b,c,l); +}; +#endif + + +void cmsisdsp_add(const float64_t* a, + const float64_t* b, + float64_t* c, + uint32_t l) +{ + + arm_add_f64(a,b,c,l); +}; + + +void cmsisdsp_add(const float32_t* a, + const float32_t* b, + float32_t* c, + uint32_t l) +{ + arm_add_f32(a,b,c,l); +}; + + + + +void cmsisdsp_add(const Q31* a, + const Q31* b, + Q31* c, + uint32_t l) +{ + + arm_add_q31(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(c),l); +}; + + +void cmsisdsp_add(const Q15* a, + const Q15* b, + Q15* c, + uint32_t l) +{ + + arm_add_q15(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(c),l); +}; + +void cmsisdsp_add(const Q7* a, + const Q7* b, + Q7* c, + uint32_t l) +{ + + arm_add_q7(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(c),l); +}; + +#if !defined(DISABLEFLOAT16) +void cmsisdsp_dot(const float16_t* a, + const float16_t* b, + float16_t &c, + uint32_t l) +{ + arm_dot_prod_f16(a,b,l,&c); +}; +#endif + + +void cmsisdsp_dot(const float64_t* a, + const float64_t* b, + float64_t &c, + uint32_t l) +{ + arm_dot_prod_f64(a,b,l,&c); +}; + +void cmsisdsp_dot(const float32_t* a, + const float32_t* b, + float32_t &c, + uint32_t l) +{ + arm_dot_prod_f32(a,b,l,&c); +}; + + + + +void cmsisdsp_dot(const Q31* a, + const Q31* b, + Q<15,48> &c, + uint32_t l) +{ + arm_dot_prod_q31(reinterpret_cast(a), + reinterpret_cast(b),l, + reinterpret_cast(&c)); +}; + + +void cmsisdsp_dot(const Q15* a, + const Q15* b, + Q<33,30> &c, + uint32_t l) +{ + arm_dot_prod_q15(reinterpret_cast(a), + reinterpret_cast(b),l, + reinterpret_cast(&c)); +}; + + +void cmsisdsp_dot(const Q7* a, + const Q7* b, + Q<17,14> &c, + uint32_t l) +{ + arm_dot_prod_q7(reinterpret_cast(a), + reinterpret_cast(b),l, + reinterpret_cast(&c)); +}; + +void cmsisdsp_dot_expr(const double* a, + const double* b, + const double* c, + const double* d, + double* tmp1, + double* tmp2, + const double scale, + double &r, + uint32_t l) +{ + arm_add_f64(a,b,tmp1,l); + arm_scale_f64(tmp1,scale,tmp1,l); + arm_mult_f64(c,d,tmp2,l); + arm_dot_prod_f64(tmp1,tmp2,l,&r); +}; + +void cmsisdsp_dot_expr(const float32_t* a, + const float32_t* b, + const float32_t* c, + const float32_t* d, + float32_t* tmp1, + float32_t* tmp2, + const float32_t scale, + float32_t &r, + uint32_t l) +{ + arm_add_f32(a,b,tmp1,l); + arm_scale_f32(tmp1,scale,tmp1,l); + arm_mult_f32(c,d,tmp2,l); + arm_dot_prod_f32(tmp1,tmp2,l,&r); +}; + +#if !defined(DISABLEFLOAT16) +void cmsisdsp_dot_expr(const float16_t* a, + const float16_t* b, + const float16_t* c, + const float16_t* d, + float16_t* tmp1, + float16_t* tmp2, + const float16_t scale, + float16_t &r, + uint32_t l) +{ + arm_add_f16(a,b,tmp1,l); + arm_scale_f16(tmp1,scale,tmp1,l); + arm_mult_f16(c,d,tmp2,l); + arm_dot_prod_f16(tmp1,tmp2,l,&r); +}; +#endif + +void cmsisdsp_fir(const arm_fir_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + arm_fir_f32(S,pSrc,pDst,blockSize); +}; + +void cmsisdsp_fir(const arm_fir_instance_q7 * S, + const Q7 * pSrc, + Q7 * pDst, + uint32_t blockSize) +{ + arm_fir_q7(S,reinterpret_cast(pSrc), + reinterpret_cast(pDst),blockSize); +}; + +void cmsisdsp_fir(const arm_fir_instance_q15 * S, + const Q15 * pSrc, + Q15 * pDst, + uint32_t blockSize) +{ + arm_fir_q15(S,reinterpret_cast(pSrc), + reinterpret_cast(pDst),blockSize); +}; + +void cmsisdsp_fir(const arm_fir_instance_q31 * S, + const Q31 * pSrc, + Q31 * pDst, + uint32_t blockSize) +{ + arm_fir_q31(S,reinterpret_cast(pSrc), + reinterpret_cast(pDst),blockSize); +}; + + +void cmsisdsp_dot_expr(const Q7* a, + const Q7* b, + const Q7* c, + const Q7* d, + Q7* tmp1, + Q7* tmp2, + const Q7 scale, + Q<17,14> &r, + uint32_t l) +{ + arm_add_q7(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(tmp1),l); + arm_scale_q7(reinterpret_cast(tmp1),scale.v,0, + reinterpret_cast(tmp1),l); + + + arm_mult_q7(reinterpret_cast(c), + reinterpret_cast(d), + reinterpret_cast(tmp2),l); + + + arm_dot_prod_q7(reinterpret_cast(tmp1), + reinterpret_cast(tmp2),l,&r.v); +}; + +void cmsisdsp_dot_expr(const Q15* a, + const Q15* b, + const Q15* c, + const Q15* d, + Q15* tmp1, + Q15* tmp2, + const Q15 scale, + Q<33,30> &r, + uint32_t l) +{ + arm_add_q15(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(tmp1),l); + arm_scale_q15(reinterpret_cast(tmp1),scale.v,0, + reinterpret_cast(tmp1),l); + arm_mult_q15(reinterpret_cast(c), + reinterpret_cast(d), + reinterpret_cast(tmp2),l); + arm_dot_prod_q15(reinterpret_cast(tmp1), + reinterpret_cast(tmp2),l,&r.v); +}; + +void cmsisdsp_dot_expr(const Q31* a, + const Q31* b, + const Q31* c, + const Q31* d, + Q31* tmp1, + Q31* tmp2, + const Q31 scale, + Q<15,48> &r, + uint32_t l) +{ + arm_add_q31(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(tmp1),l); + arm_scale_q31(reinterpret_cast(tmp1),scale.v,0, + reinterpret_cast(tmp1),l); + arm_mult_q31(reinterpret_cast(c), + reinterpret_cast(d), + reinterpret_cast(tmp2),l); + arm_dot_prod_q31(reinterpret_cast(tmp1), + reinterpret_cast(tmp2),l,&r.v); +}; + + +void cmsisdsp_mat_add(const float32_t* a, + const float32_t* b, + float32_t* c, + uint32_t row,uint32_t col) +{ + arm_matrix_instance_f32 srca; + arm_matrix_instance_f32 srcb; + + arm_matrix_instance_f32 dst; + + + srca.numRows = row; + srca.numCols = col; + srca.pData = (float32_t*)a; + + srcb.numRows = row; + srcb.numCols = col; + srcb.pData = (float32_t*)b; + + dst.numRows = row; + dst.numCols = col; + dst.pData = c; + arm_mat_add_f32(&srca,&srcb,&dst); + +} + +#if !defined(DISABLEFLOAT16) +void cmsisdsp_mat_add(const float16_t* a, + const float16_t* b, + float16_t* c, + uint32_t row,uint32_t col) +{ + arm_matrix_instance_f16 srca; + arm_matrix_instance_f16 srcb; + + arm_matrix_instance_f16 dst; + + + srca.numRows = row; + srca.numCols = col; + srca.pData = (float16_t*)a; + + srcb.numRows = row; + srcb.numCols = col; + srcb.pData = (float16_t*)b; + + dst.numRows = row; + dst.numCols = col; + dst.pData = c; + arm_mat_add_f16(&srca,&srcb,&dst); + +} +#endif + +void cmsisdsp_mat_add(const Q31* a, + const Q31* b, + Q31* c, + uint32_t row,uint32_t col) +{ + arm_matrix_instance_q31 srca; + arm_matrix_instance_q31 srcb; + + arm_matrix_instance_q31 dst; + + + srca.numRows = row; + srca.numCols = col; + srca.pData = reinterpret_cast(const_cast(a)); + + srcb.numRows = row; + srcb.numCols = col; + srcb.pData = reinterpret_cast(const_cast(b)); + + dst.numRows = row; + dst.numCols = col; + dst.pData = reinterpret_cast(c); + arm_mat_add_q31(&srca,&srcb,&dst); + +} + +void cmsisdsp_mat_add(const Q15* a, + const Q15* b, + Q15* c, + uint32_t row,uint32_t col) +{ + arm_matrix_instance_q15 srca; + arm_matrix_instance_q15 srcb; + + arm_matrix_instance_q15 dst; + + + srca.numRows = row; + srca.numCols = col; + srca.pData = reinterpret_cast(const_cast(a)); + + srcb.numRows = row; + srcb.numCols = col; + srcb.pData = reinterpret_cast(const_cast(b)); + + dst.numRows = row; + dst.numCols = col; + dst.pData = reinterpret_cast(c); + arm_mat_add_q15(&srca,&srcb,&dst); + +} + + +void cmsisdsp_mat_add(const Q7* a, + const Q7* b, + Q7* c, + uint32_t row,uint32_t col) +{ + (void)a; + (void)b; + (void)c; + (void)row; + (void)col; + // Doing nothing since there is no equivalent CMSIS-DSP + // function + // Required to enable the build + + /* + arm_matrix_instance_q7 srca; + arm_matrix_instance_q7 srcb; + + arm_matrix_instance_q7 dst; + + + srca.numRows = row; + srca.numCols = col; + srca.pData = reinterpret_cast(const_cast(a)); + + srcb.numRows = row; + srcb.numCols = col; + srcb.pData = reinterpret_cast(const_cast(b)); + + dst.numRows = row; + dst.numCols = col; + dst.pData = reinterpret_cast(c); + arm_mat_add_q7(&srca,&srcb,&dst); +*/ +} + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) +void _cmsis_outer(const float32_t *a, + const float32_t *b, + float32_t *res, + const uint32_t r,const uint32_t c) +{ + for(unsigned int row=0; row(a); + const q31_t *pb = reinterpret_cast(b); + q31_t *pr = reinterpret_cast(res); + for(unsigned int row=0; row(a); + const q15_t *pb = reinterpret_cast(b); + q15_t *pr = reinterpret_cast(res); + for(unsigned int row=0; row(a); + const q7_t *pb = reinterpret_cast(b); + q7_t *pr = reinterpret_cast(res); + for(unsigned int row=0; row(pVec), + reinterpret_cast(pDst)); +} + +void cmsis_mat_vec_mult( + const arm_matrix_instance_q15 *pSrcMat, + const Q15 *pVec, + Q15 *pDst) +{ +arm_mat_vec_mult_q15(pSrcMat, + reinterpret_cast(pVec), + reinterpret_cast(pDst)); +} + +void cmsis_mat_vec_mult( + const arm_matrix_instance_q7 *pSrcMat, + const Q7 *pVec, + Q7 *pDst) +{ +arm_mat_vec_mult_q7(pSrcMat, + reinterpret_cast(pVec), + reinterpret_cast(pDst)); +} + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_f32 * src, + const float32_t * a, + const float32_t * b, + const float32_t scalar, + float32_t * tmp, + float32_t * dst) +{ + arm_scale_f32(b,scalar,tmp,src->numCols); + arm_add_f32(a,tmp,tmp,src->numCols); + arm_mat_vec_mult_f32(src, tmp, dst); +} + +#if !defined(DISABLEFLOAT16) +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_f16 * src, + const float16_t * a, + const float16_t * b, + const float16_t scalar, + float16_t * tmp, + float16_t * dst) +{ + arm_scale_f16(b,scalar,tmp,src->numCols); + arm_add_f16(a,tmp,tmp,src->numCols); + arm_mat_vec_mult_f16(src, tmp, dst); +} +#endif + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q31 * src, + const Q31 * a, + const Q31 * b, + const Q31 scalar, + Q31 * tmp, + Q31 * dst) +{ + arm_scale_q31(reinterpret_cast(b), + scalar.v,0, + reinterpret_cast(tmp),src->numCols); + arm_add_q31(reinterpret_cast(a), + reinterpret_cast(tmp), + reinterpret_cast(tmp),src->numCols); + arm_mat_vec_mult_q31(src, + reinterpret_cast(tmp), + reinterpret_cast(dst)); +} + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q15 * src, + const Q15 * a, + const Q15 * b, + const Q15 scalar, + Q15 * tmp, + Q15 * dst) +{ + arm_scale_q15(reinterpret_cast(b), + scalar.v,0, + reinterpret_cast(tmp),src->numCols); + arm_add_q15(reinterpret_cast(a), + reinterpret_cast(tmp), + reinterpret_cast(tmp),src->numCols); + arm_mat_vec_mult_q15(src, + reinterpret_cast(tmp), + reinterpret_cast(dst)); +} + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q7 * src, + const Q7 * a, + const Q7 * b, + const Q7 scalar, + Q7 * tmp, + Q7 * dst) +{ + arm_scale_q7(reinterpret_cast(b), + scalar.v,0, + reinterpret_cast(tmp),src->numCols); + arm_add_q7(reinterpret_cast(a), + reinterpret_cast(tmp), + reinterpret_cast(tmp),src->numCols); + arm_mat_vec_mult_q7(src, + reinterpret_cast(tmp), + reinterpret_cast(dst)); +} \ No newline at end of file diff --git a/dsppp/tests/col_test.cpp b/dsppp/tests/col_test.cpp new file mode 100644 index 000000000..f7c2bde49 --- /dev/null +++ b/dsppp/tests/col_test.cpp @@ -0,0 +1,112 @@ +extern "C" { + extern void col_test(); +} + +#include "allocator.h" + +#include +#include +#include + +#include + +#include + + + +#include "dsp/matrix_functions.h" +#include "matrix_utils.h" + +template +static void test() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PMat a; + PVector ref; + #else + PMat a(R,C); + PVector ref(R); + #endif + + init_array(a,R*C); + + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PVector res = copy(a.col(4)); + #else + PVector res = copy(a.col(4)); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + for(int i=0;i +void all_col_test() +{ + const int nb_tails = TailForTests::tail; + const int nb_loops = TailForTests::loop; + + title("Col test"); + + test(); + test(); + test(); + test(); + + test(); + test(); + test(); + test(); + test(); + +} + +void col_test() +{ +#if defined(COL_TEST) + #if defined(F64_DT) + all_col_test(); + #endif + #if defined(F32_DT) + all_col_test(); + #endif + #if defined(F16_DT) && !defined(DISABLEFLOAT16) + all_col_test(); + #endif + #if defined(Q31_DT) + all_col_test(); + #endif + #if defined(Q15_DT) + all_col_test(); + #endif + #if defined(Q7_DT) + all_col_test(); + #endif +#endif +} \ No newline at end of file diff --git a/dsppp/tests/common_tests.cpp b/dsppp/tests/common_tests.cpp new file mode 100644 index 000000000..999eb3cf0 --- /dev/null +++ b/dsppp/tests/common_tests.cpp @@ -0,0 +1,48 @@ +#include "allocator.h" +#include +#include +#include + +#include "cmsis_tests.h" + +extern "C" { + void memory_pool_stats(); +} + +#if 0 +template<> +void init_array(Vector_Base &pDst,std::size_t nb) +{ + for(std::size_t i=0;i +bool validate(const float32_t* a, + const float32_t* b, + std::size_t nb, + float abser, + float reler) +{ + for(std::size_t i=0;i +#include "allocator.h" + +using namespace arm_cmsis_dsp; + +#define REL_ERROR (1.0e-6) +#define ABS_ERROR (1.0e-6) +#define ERROR(A,B,AE,RE) ((fabs((A) - (B)) > (AE + RE * fabs((B))))) +#define ERRVAL(VAL,REF,AE,RE) \ + std::cout << "Error = " << fabs(VAL-REF) << "\r\n"; \ + std::cout << "compared to " << (AE + RE * abs((REF))) << "\r\n"; + +/************ + * + * Data types + * + */ + + +#if defined(POOL_ALLOCATOR) + + template + using PVector = Vector; + + template + using PMat = Matrix; + +#else + + template + using PVector = Vector; + + template + using PMat = Matrix; + +#endif + +template +using PView = VectorView; + +template typename A> +void init_array(Vector &pDst,std::size_t nb) +{ + for(std::size_t i=0;i typename A> +void init_array(Vector &pDst,std::size_t nb) +{ + for(std::size_t i=0;i +void init_array(Vector_Base &pDst,std::size_t nb) +{ + for(std::size_t i=0;i typename A> +//void init_array(Vector &pDst,std::size_t nb); + + +//extern template void init_array<>(Vector_Base &pDst,std::size_t nb); + + +template::value,bool>::type = true> +bool validate(const T a, const T b, std::size_t nb,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + for(std::size_t i=0;i>>::is_float) + { + if (ERROR(a[i],b[i],abser,reler) ) + { + std::cout << "Error at:" << i << " ; res=" << a[i] << " ; ref=" << b[i] << "\r\n"; + ERRVAL(a[i],b[i],abser,reler); + return(false); + } + } + else + { + if (a[i]!=b[i]) + { + std::cout << "Error at:" << i << " ; res=" << a[i] << " ; ref=" << b[i] << "\r\n"; + return(false); + } + } + } + return(true); +} + +template::value && + !HasMatrixIndexing::value && + IsVector::value && + !HasMatrixIndexing::value,bool>::type = true> +bool validate(const TA &a, const TB &b,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + for(index_t i=0;i::type>::is_float) + { + if (ERROR(a[i],b[i],abser,reler) ) + { + std::cout << "Error at:" << i << " ; res=" << a[i] << " ; ref=" << b[i] << "\r\n"; + ERRVAL(a[i],b[i],abser,reler); + return(false); + } + } + else + { + if (a[i]!=b[i]) + { + std::cout << "Error at:" << i << " ; res=" << a[i] << " ; ref=" << b[i] << "\r\n"; + return(false); + } + } + } + return(true); +} + + +template::value + && !IsVector::value && !HasMatrixIndexing::value,bool>::type = true> +bool validate(const T a, const T b,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + + if constexpr (number_traits>::is_float) + { + if (ERROR(a,b,abser,reler)) + { + std::cout << "Error: res=" << a << " ; ref=" << b << "\r\n"; + ERRVAL(a,b,abser,reler); + return(false); + } + } + else + { + if (a != b ) + { + std::cout << "Error : res=" << a << " ; ref=" << b << "\r\n"; + return(false); + } + } + + return(true); +} + +template::value && + HasMatrixIndexing::value && + number_traits::type>::is_float,bool>::type = true> +bool validate(const MA& a, const MB& b,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + for(index_t row=0;row < a.rows() ; row++) + { + for(index_t col=0;col < a.columns() ; col++) + { + if (ERROR(a(row,col),b(row,col),abser,reler) ) + { + //std::cout << fabs(a(row,col)-b(row,col)) << "\r\n"; + //std::cout << REL_ERROR*fabsf(a(row,col)) << "\r\n"; + //std::cout << a(row,col) << "\r\n"; + //std::cout << b(row,col) << "\r\n"; + + std::cout << "Error at : (" << row << "," << col << ") ; res=" << a(row,col) << " ; ref=" << b(row,col) << "\r\n"; + ERRVAL(a(row,col),b(row,col),abser,reler); + return(false); + } + } + } + return(true); +} + +template::value && + HasMatrixIndexing::value && + number_traits::type>::is_float,bool>::type = true> +bool validateLT(const MA& a, const MB& b,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + for(index_t row=0;row < a.rows() ; row++) + { + for(index_t col=0;col <= row ; col++) + { + if (ERROR(a(row,col),b(row,col),abser,reler) ) + { + //std::cout << fabs(a(row,col)-b(row,col)) << "\r\n"; + //std::cout << REL_ERROR*fabsf(a(row,col)) << "\r\n"; + //std::cout << a(row,col) << "\r\n"; + //std::cout << b(row,col) << "\r\n"; + + std::cout << "Error at : (" << row << "," << col << ") ; res=" << a(row,col) << " ; ref=" << b(row,col) << "\r\n"; + ERRVAL(a(row,col),b(row,col),abser,reler); + return(false); + } + } + } + return(true); +} + +template::value && + HasMatrixIndexing::value && + number_traits::type>::is_fixed,bool>::type = true> +bool validate(const MA& a, const MB& b,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + (void)abser; + (void)reler; + for(index_t row=0;row < a.rows() ; row++) + { + for(index_t col=0;col < a.columns() ; col++) + { + if (a(row,col).v != b(row,col).v) + { + std::cout << "Error at : (" << row << "," << col << ") ; res=" << a(row,col) << " ; ref=" << b(row,col) << "\r\n"; + std::cout << "Error = " << abs(a(row,col).v - b(row,col).v) << "\r\n"; + return(false); + } + } + } + return(true); +} + +template<> +bool validate(const float32_t* a, const float32_t* b, std::size_t nb,float abser , float reler ); + + +extern template +bool validate<>(const float32_t* a, const float32_t* b, std::size_t nb,float abser , float reler ); + + + + + +template +void title(const std::string &s) +{ +#if !defined(SERIAL_DUMP) +#if defined(STATIC_TEST) + std::cout<<"\r\n\033[31;1;4m" << s << " " << NameOfType::xls << "\033[0m\r\n"; +#else + std::cout<<"\r\n\033[31;1;4m" << s << " dynamic " << NameOfType::xls << "\033[0m\r\n"; +#endif +#else +#if defined(STATIC_TEST) + std::cout << "\r\n" << s << " " << NameOfType::xls << "\r\n"; +#else + std::cout << "\r\n" << s << " dynamic " << NameOfType::xls << "\r\n"; +#endif +#endif +}; \ No newline at end of file diff --git a/dsppp/tests/debug_mat.h b/dsppp/tests/debug_mat.h new file mode 100644 index 000000000..a0d7c707e --- /dev/null +++ b/dsppp/tests/debug_mat.h @@ -0,0 +1,738 @@ +void pmat(float32_t *p,int nbrows,int nbcols) +{ + for(int r=0;rnumRows < pSrc->numCols) + { + return(ARM_MATH_SIZE_MISMATCH); + } + + memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float32_t)); + pOutR->numCols = pSrc->numCols; + pOutR->numRows = pSrc->numRows; + + p = pOutR->pData; + + pc = pOutTau; + for(col=0 ; col < pSrc->numCols; col++) + { + int32_t j,k,blkCnt,blkCnt2; + float32_t *pa0,*pa1,*pa2,*pa3,*ptemp; + float32_t temp; + float32x4_t v1,v2,vtemp; + + + COPY_COL_F32(pOutR,col,col,pTmpA); + + beta = arm_householder_f32(pTmpA,threshold,pSrc->numRows - col,pTmpA); + *pc++ = beta; + + //pvec(pTmpA,pSrc->numRows-col); + //pmat(p,pSrc->numRows-col,pSrc->numCols-col); + + pdst = pTmpB; + + /* v.T A(col:,col:) -> tmpb */ + pv = pTmpA; + pa = p; + + temp = *pv; + blkCnt = (pSrc->numCols-col) >> 2; + while (blkCnt > 0) + { + v1 = vld1q_f32(pa); + v2 = vmulq_n_f32(v1,temp); + vst1q_f32(pdst,v2); + + pa += 4; + pdst += 4; + blkCnt--; + } + blkCnt = (pSrc->numCols-col) & 3; + if (blkCnt > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt); + v1 = vld1q_f32(pa); + v2 = vmulq_n_f32(v1,temp); + vst1q_p_f32(pdst,v2,p0); + + pa += blkCnt; + } + + + + pa += col; + pv++; + pdst = pTmpB; + + pa0 = pa; + pa1 = pa0 + pSrc->numCols; + pa2 = pa1 + pSrc->numCols; + pa3 = pa2 + pSrc->numCols; + + /* Unrolled loop */ + blkCnt = (pSrc->numRows-col - 1) >> 2; + k=1; + while(blkCnt > 0) + { + vtemp=vld1q_f32(pv); + + blkCnt2 = (pSrc->numCols-col) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pdst); + + v2 = vld1q_f32(pa0); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,0)); + + v2 = vld1q_f32(pa1); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,1)); + + v2 = vld1q_f32(pa2); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,2)); + + v2 = vld1q_f32(pa3); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,3)); + + vst1q_f32(pdst,v1); + + pdst += 4; + pa0 += 4; + pa1 += 4; + pa2 += 4; + pa3 += 4; + blkCnt2--; + } + blkCnt2 = (pSrc->numCols-col) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + + v1 = vld1q_f32(pdst); + + v2 = vld1q_f32(pa0); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,0)); + + v2 = vld1q_f32(pa1); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,1)); + + v2 = vld1q_f32(pa2); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,2)); + + v2 = vld1q_f32(pa3); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,3)); + + vst1q_p_f32(pdst,v1,p0); + + pa0 += blkCnt2; + pa1 += blkCnt2; + pa2 += blkCnt2; + pa3 += blkCnt2; + } + + pa0 += col + 3*pSrc->numCols; + pa1 += col + 3*pSrc->numCols; + pa2 += col + 3*pSrc->numCols; + pa3 += col + 3*pSrc->numCols; + pv += 4; + pdst = pTmpB; + k += 4; + blkCnt--; + } + + pa = pa0; + for(;knumRows-col; k++) + { + temp = *pv; + blkCnt2 = (pSrc->numCols-col) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pa); + v2 = vld1q_f32(pdst); + v2 = vfmaq_n_f32(v2,v1,temp); + vst1q_f32(pdst,v2); + + pa += 4; + pdst += 4; + blkCnt2--; + } + blkCnt2 = (pSrc->numCols-col) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + v1 = vld1q_f32(pa); + v2 = vld1q_f32(pdst); + v2 = vfmaq_n_f32(v2,v1,temp); + vst1q_p_f32(pdst,v2,p0); + + pa += blkCnt2; + } + + pa += col; + pv++; + pdst = pTmpB; + } + + //pvec(pTmpB,pSrc->numCols-col); + //printf("--\r\n"); + + /* A(col:,col:) - beta v tmpb */ + pa = p; + for(j=0;jnumRows-col; j++) + { + float32_t f = -beta * pTmpA[j]; + ptemp = pTmpB; + + blkCnt2 = (pSrc->numCols-col) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pa); + v2 = vld1q_f32(ptemp); + v1 = vfmaq_n_f32(v1,v2,f); + vst1q_f32(pa,v1); + + pa += 4; + ptemp += 4; + + blkCnt2--; + } + blkCnt2 = (pSrc->numCols-col) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + + v1 = vld1q_f32(pa); + v2 = vld1q_f32(ptemp); + v1 = vfmaq_n_f32(v1,v2,f); + vst1q_p_f32(pa,v1,p0); + + pa += blkCnt2; + } + + pa += col; + } + + /* Copy Householder reflectors into R matrix */ + pa = p + pOutR->numCols; + for(k=0;knumRows-col-1; k++) + { + *pa = pTmpA[k+1]; + pa += pOutR->numCols; + } + + p += 1 + pOutR->numCols; + } + + /* Generate Q if requested by user matrix */ + + if (pOutQ != NULL) + { + /* Initialize Q matrix to identity */ + memset(pOutQ->pData,0,sizeof(float32_t)*pOutQ->numRows*pOutQ->numRows); + + pa = pOutQ->pData; + for(col=0 ; col < pOutQ->numCols; col++) + { + *pa = 1.0f; + pa += pOutQ->numCols+1; + } + + nb = pOutQ->numRows - pOutQ->numCols + 1; + + pc = pOutTau + pOutQ->numCols - 1; + for(col=0 ; col < pOutQ->numCols; col++) + { + int32_t j,k, blkCnt, blkCnt2; + float32_t *pa0,*pa1,*pa2,*pa3,*ptemp; + float32_t temp; + float32x4_t v1,v2,vtemp; + + pos = pSrc->numRows - nb; + p = pOutQ->pData + pos + pOutQ->numCols*pos ; + + + COPY_COL_F32(pOutR,pos,pos,pTmpA); + pTmpA[0] = 1.0f; + pdst = pTmpB; + + /* v.T A(col:,col:) -> tmpb */ + + pv = pTmpA; + pa = p; + + temp = *pv; + blkCnt2 = (pOutQ->numRows-pos) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pa); + v1 = vmulq_n_f32(v1, temp); + vst1q_f32(pdst,v1); + + pa += 4; + pdst += 4; + + blkCnt2--; + } + blkCnt2 = (pOutQ->numRows-pos) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + + v1 = vld1q_f32(pa); + v1 = vmulq_n_f32(v1, temp); + vst1q_p_f32(pdst,v1,p0); + + pa += blkCnt2; + } + + pa += pos; + pv++; + pdst = pTmpB; + pa0 = pa; + pa1 = pa0 + pOutQ->numRows; + pa2 = pa1 + pOutQ->numRows; + pa3 = pa2 + pOutQ->numRows; + + /* Unrolled loop */ + blkCnt = (pOutQ->numRows-pos - 1) >> 2; + k=1; + while(blkCnt > 0) + { + + vtemp = vld1q_f32(pv); + blkCnt2 = (pOutQ->numRows-pos) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pdst); + + v2 = vld1q_f32(pa0); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,0)); + + v2 = vld1q_f32(pa1); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,1)); + + v2 = vld1q_f32(pa2); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,2)); + + v2 = vld1q_f32(pa3); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,3)); + + vst1q_f32(pdst,v1); + + pa0 += 4; + pa1 += 4; + pa2 += 4; + pa3 += 4; + pdst += 4; + + blkCnt2--; + } + blkCnt2 = (pOutQ->numRows-pos) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + + v1 = vld1q_f32(pdst); + + v2 = vld1q_f32(pa0); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,0)); + + v2 = vld1q_f32(pa1); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,1)); + + v2 = vld1q_f32(pa2); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,2)); + + v2 = vld1q_f32(pa3); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,3)); + + vst1q_p_f32(pdst,v1,p0); + + pa0 += blkCnt2; + pa1 += blkCnt2; + pa2 += blkCnt2; + pa3 += blkCnt2; + + } + + pa0 += pos + 3*pOutQ->numRows; + pa1 += pos + 3*pOutQ->numRows; + pa2 += pos + 3*pOutQ->numRows; + pa3 += pos + 3*pOutQ->numRows; + pv += 4; + pdst = pTmpB; + k += 4; + blkCnt--; + } + + pa = pa0; + for(;knumRows-pos; k++) + { + temp = *pv; + blkCnt2 = (pOutQ->numRows-pos) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pdst); + v2 = vld1q_f32(pa); + v1 = vfmaq_n_f32(v1, v2, temp); + vst1q_f32(pdst,v1); + + pdst += 4; + pa += 4; + + blkCnt2--; + } + blkCnt2 = (pOutQ->numRows-pos) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + v1 = vld1q_f32(pdst); + v2 = vld1q_f32(pa); + v1 = vfmaq_n_f32(v1, v2, temp); + vst1q_p_f32(pdst,v1,p0); + + pa += blkCnt2; + } + + pa += pos; + pv++; + pdst = pTmpB; + } + + pa = p; + beta = *pc--; + for(j=0;jnumRows-pos; j++) + { + float32_t f = -beta * pTmpA[j]; + ptemp = pTmpB; + + blkCnt2 = (pOutQ->numCols-pos) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pa); + v2 = vld1q_f32(ptemp); + v1 = vfmaq_n_f32(v1,v2,f); + vst1q_f32(pa,v1); + + pa += 4; + ptemp += 4; + + blkCnt2--; + } + blkCnt2 = (pOutQ->numCols-pos) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + + v1 = vld1q_f32(pa); + v2 = vld1q_f32(ptemp); + v1 = vfmaq_n_f32(v1,v2,f); + vst1q_p_f32(pa,v1,p0); + + pa += blkCnt2; + } + + pa += pos; + } + + + nb++; + } + } + + arm_status status = ARM_MATH_SUCCESS; + /* Return to application */ + return (status); +} + +#endif /*#if !defined(ARM_MATH_MVEF)*/ + + +#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/ + + + +#if (!defined(ARM_MATH_MVEF)) || defined(ARM_MATH_AUTOVECTORIZE) + +arm_status _arm_mat_qr_f32( + const arm_matrix_instance_f32 * pSrc, + const float32_t threshold, + arm_matrix_instance_f32 * pOutR, + arm_matrix_instance_f32 * pOutQ, + float32_t * pOutTau, + float32_t *pTmpA, + float32_t *pTmpB + ) + +{ + int32_t col=0; + int32_t nb,pos; + float32_t *pa,*pc; + float32_t beta; + float32_t *pv; + float32_t *pdst; + float32_t *p; + + if (pSrc->numRows < pSrc->numCols) + { + return(ARM_MATH_SIZE_MISMATCH); + } + + memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float32_t)); + pOutR->numCols = pSrc->numCols; + pOutR->numRows = pSrc->numRows; + + p = pOutR->pData; + + pc = pOutTau; + for(col=0 ; col < pSrc->numCols; col++) + { + int32_t i,j,k,blkCnt; + float32_t *pa0,*pa1,*pa2,*pa3; + COPY_COL_F32(pOutR,col,col,pTmpA); + + beta = arm_householder_f32(pTmpA,threshold,pSrc->numRows - col,pTmpA); + *pc++ = beta; + + pdst = pTmpB; + + /* v.T A(col:,col:) -> tmpb */ + pv = pTmpA; + pa = p; + for(j=0;jnumCols-col; j++) + { + *pdst++ = *pv * *pa++; + } + pa += col; + pv++; + pdst = pTmpB; + + pa0 = pa; + pa1 = pa0 + pSrc->numCols; + pa2 = pa1 + pSrc->numCols; + pa3 = pa2 + pSrc->numCols; + + /* Unrolled loop */ + blkCnt = (pSrc->numRows-col - 1) >> 2; + k=1; + while(blkCnt > 0) + { + float32_t sum; + + for(j=0;jnumCols-col; j++) + { + sum = *pdst; + + sum += pv[0] * *pa0++; + sum += pv[1] * *pa1++; + sum += pv[2] * *pa2++; + sum += pv[3] * *pa3++; + + *pdst++ = sum; + } + pa0 += col + 3*pSrc->numCols; + pa1 += col + 3*pSrc->numCols; + pa2 += col + 3*pSrc->numCols; + pa3 += col + 3*pSrc->numCols; + pv += 4; + pdst = pTmpB; + k += 4; + blkCnt--; + } + + pa = pa0; + for(;knumRows-col; k++) + { + for(j=0;jnumCols-col; j++) + { + *pdst++ += *pv * *pa++; + } + pa += col; + pv++; + pdst = pTmpB; + } + + /* A(col:,col:) - beta v tmpb */ + pa = p; + for(j=0;jnumRows-col; j++) + { + float32_t f = beta * pTmpA[j]; + + for(i=0;inumCols-col; i++) + { + *pa = *pa - f * pTmpB[i] ; + pa++; + } + pa += col; + } + + /* Copy Householder reflectors into R matrix */ + pa = p + pOutR->numCols; + for(k=0;knumRows-col-1; k++) + { + *pa = pTmpA[k+1]; + pa += pOutR->numCols; + } + + p += 1 + pOutR->numCols; + } + + /* Generate Q if requested by user matrix */ + + if (pOutQ != NULL) + { + /* Initialize Q matrix to identity */ + memset(pOutQ->pData,0,sizeof(float32_t)*pOutQ->numRows*pOutQ->numRows); + + pa = pOutQ->pData; + for(col=0 ; col < pOutQ->numCols; col++) + { + *pa = 1.0f; + pa += pOutQ->numCols+1; + } + + nb = pOutQ->numRows - pOutQ->numCols + 1; + + pc = pOutTau + pOutQ->numCols - 1; + for(col=0 ; col < pOutQ->numCols; col++) + { + int32_t i,j,k, blkCnt; + float32_t *pa0,*pa1,*pa2,*pa3; + pos = pSrc->numRows - nb; + p = pOutQ->pData + pos + pOutQ->numCols*pos ; + + + COPY_COL_F32(pOutR,pos,pos,pTmpA); + pTmpA[0] = 1.0f; + pdst = pTmpB; + + /* v.T A(col:,col:) -> tmpb */ + + pv = pTmpA; + pa = p; + for(j=0;jnumRows-pos; j++) + { + *pdst++ = *pv * *pa++; + } + pa += pos; + pv++; + pdst = pTmpB; + pa0 = pa; + pa1 = pa0 + pOutQ->numRows; + pa2 = pa1 + pOutQ->numRows; + pa3 = pa2 + pOutQ->numRows; + + /* Unrolled loop */ + blkCnt = (pOutQ->numRows-pos - 1) >> 2; + k=1; + while(blkCnt > 0) + { + float32_t sum; + + for(j=0;jnumRows-pos; j++) + { + sum = *pdst; + + sum += pv[0] * *pa0++; + sum += pv[1] * *pa1++; + sum += pv[2] * *pa2++; + sum += pv[3] * *pa3++; + + *pdst++ = sum; + } + pa0 += pos + 3*pOutQ->numRows; + pa1 += pos + 3*pOutQ->numRows; + pa2 += pos + 3*pOutQ->numRows; + pa3 += pos + 3*pOutQ->numRows; + pv += 4; + pdst = pTmpB; + k += 4; + blkCnt--; + } + + pa = pa0; + for(;knumRows-pos; k++) + { + for(j=0;jnumRows-pos; j++) + { + *pdst++ += *pv * *pa++; + } + pa += pos; + pv++; + pdst = pTmpB; + } + + pa = p; + beta = *pc--; + for(j=0;jnumRows-pos; j++) + { + float32_t f = beta * pTmpA[j]; + + for(i=0;inumCols-pos; i++) + { + *pa = *pa - f * pTmpB[i] ; + pa++; + } + pa += pos; + } + + + nb++; + } + } + + arm_status status = ARM_MATH_SUCCESS; + /* Return to application */ + return (status); +} + +#endif /* end of test for Helium or Neon availability */ diff --git a/dsppp/tests/debug_test.cpp b/dsppp/tests/debug_test.cpp new file mode 100644 index 000000000..ed0101528 --- /dev/null +++ b/dsppp/tests/debug_test.cpp @@ -0,0 +1,45 @@ +extern "C" { + extern void debug_test(); +} + +#include "allocator.h" + +#include +#include + +#include + +#include +#include "dsp/basic_math_functions.h" + + +using namespace arm_cmsis_dsp; + + + +extern Q15 external_debug(const PVector &a0, + const PVector &a1, + const PVector &a2, + const PVector &a3, + const PVector &b, + int l); + +template +static void test() +{ + + PrintType>(); +} + +void debug_test() +{ + title("Debug test"); + + + + test(); + + + + +} \ No newline at end of file diff --git a/dsppp/tests/debug_test_external.cpp b/dsppp/tests/debug_test_external.cpp new file mode 100644 index 000000000..795aaa53f --- /dev/null +++ b/dsppp/tests/debug_test_external.cpp @@ -0,0 +1,56 @@ +#include "allocator.h" + +#include +#include + +#include +#include + + + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) +Q15 external_debug(const PVector &a0, + const PVector &a1, + const PVector &a2, + const PVector &a3, + const PVector &b, + int l) +{ + int nb = l; + Q<33,30> acc0; + Q<33,30> acc1; + Q<33,30> acc2; + Q<33,30> acc3; + for(index_t i=0; i::mk(nb-i)); + acc1 = inner::vmacc(acc1,a1.vector_op_tail(i,nb-i),b.vector_op_tail(i,nb-i),inner::vctpq::mk(nb-i)); + acc2 = inner::vmacc(acc2,a2.vector_op_tail(i,nb-i),b.vector_op_tail(i,nb-i),inner::vctpq::mk(nb-i)); + acc3 = inner::vmacc(acc3,a3.vector_op_tail(i,nb-i),b.vector_op_tail(i,nb-i),inner::vctpq::mk(nb-i)); + } + Q15 r0,r1,r2,r3; + + r0 = inner::from_accumulator(acc0); + r1 = inner::from_accumulator(acc1); + r2 = inner::from_accumulator(acc2); + r3 = inner::from_accumulator(acc3); + + return(r0+r1+r2+r3); +} +#else +Q15 external_debug(const PVector &a0, + const PVector &a1, + const PVector &a2, + const PVector &a3, + const PVector &b, + int l) +{ + (void)a0; + (void)a1; + (void)a2; + (void)a3; + (void)b; + (void)l; + return(a0[0]); +} +#endif \ No newline at end of file diff --git a/dsppp/tests/dot_test.cpp b/dsppp/tests/dot_test.cpp new file mode 100644 index 000000000..53d878c7b --- /dev/null +++ b/dsppp/tests/dot_test.cpp @@ -0,0 +1,213 @@ +extern "C" { + extern void dot_test(); +} + +#include "allocator.h" + +#include +#include +#include + +#include + +#include + +#include "dsp/basic_math_functions.h" +#include "dsp/basic_math_functions_f16.h" + + + + + +template +static void complex_test(const T scale) +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + #if defined(STATIC_TEST) + PVector a; + PVector b; + PVector c; + PVector d; + + PVector res; + #else + PVector a(NB); + PVector b(NB); + PVector c(NB); + PVector d(NB); + + PVector res(NB); + #endif + + + init_array(a,NB); + init_array(b,NB); + init_array(c,NB); + init_array(d,NB); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + O result = dot(scale*(a+b),c*d); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + O ref; + PVector tmp1; + PVector tmp2; + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + cmsisdsp_dot_expr(a.const_ptr(), + b.const_ptr(), + c.const_ptr(), + d.const_ptr(), + tmp1.ptr(), + tmp2.ptr(), + scale, + ref,NB); + STOP_CYCLE_MEASUREMENT; + + if (!validate(result,ref)) + { + printf("dot expr failed \r\n"); + + } + + std::cout << "=====\r\n"; + +} + + +template +static void test() +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + #if defined(STATIC_TEST) + PVector a; + PVector b; + + PVector res; + #else + PVector a(NB); + PVector b(NB); + + PVector res(NB); + #endif + + init_array(a,NB); + init_array(b,NB); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + O result = dot(a,b); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + O ref; + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + cmsisdsp_dot(a.const_ptr(),b.const_ptr(),ref,NB); + STOP_CYCLE_MEASUREMENT; + + if (!validate(result,ref)) + { + printf("dot failed \r\n"); + + } + + std::cout << "=====\r\n"; + +} + + +template +void all_dot_test() +{ + + const int nb_tails = TailForTests::tail; + const int nb_loops = TailForTests::loop; + + using ACC = typename number_traits::accumulator; + constexpr auto v = TestConstant::v; + + title("Dot product"); + + + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + if constexpr (!std::is_same::value) + { + test(); + } + + test(); + test(); + test(); + test(); + test(); + + + title("Dot product with expressions"); + + + complex_test(v); + complex_test(v); + complex_test(v); + complex_test(v); + complex_test(v); + complex_test(v); + + complex_test(v); + + complex_test(v); + complex_test(v); + complex_test(v); + if constexpr (!std::is_same::value) + { + complex_test(v); + } + + complex_test(v); + complex_test(v); + complex_test(v); + complex_test(v); + complex_test(v); + + //print_map("Stats",max_stats); + +} + +void dot_test() +{ +#if defined(DOT_TEST) + #if defined(F64_DT) + all_dot_test(); + #endif + #if defined(F32_DT) + all_dot_test(); + #endif + #if defined(F16_DT) && !defined(DISABLEFLOAT16) + all_dot_test(); + #endif + #if defined(Q31_DT) + all_dot_test(); + #endif + #if defined(Q15_DT) + all_dot_test(); + #endif + #if defined(Q7_DT) + all_dot_test(); + #endif +#endif +} diff --git a/dsppp/tests/filter_test.cpp b/dsppp/tests/filter_test.cpp new file mode 100644 index 000000000..da5bea2a4 --- /dev/null +++ b/dsppp/tests/filter_test.cpp @@ -0,0 +1,657 @@ +extern "C" { + extern void filter_test(); +} + +#include "allocator.h" + +#include +#include +#include +#include + +#include + +#include + + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) + +#define MVE_ASRL_SAT16(acc, shift) ((sqrshrl_sat48(acc, -(32-shift)) >> 32) & 0xffffffff) + + +#define FIR_Q15_CORE(pOutput, nbAcc, nbVecTaps, pSample, vecCoeffs) \ + for (int j = 0; j < nbAcc; j++) { \ + const q15_t *pSmp = &pSample[j]; \ + q63_t acc[4]; \ + \ + acc[j] = 0; \ + for (int i = 0; i < nbVecTaps; i++) { \ + vecIn0 = vld1q(pSmp + 8 * i); \ + acc[j] = vmlaldavaq(acc[j], vecIn0, vecCoeffs[i]); \ + } \ + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc[j], 15); \ + } + +#define FIR_Q15_MAIN_CORE() \ +{ \ + q15_t *pState = S->pState; /* State pointer */ \ + const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ \ + q15_t *pStateCur; /* Points to the current sample of the state */ \ + const q15_t *pSamples; /* Temporary pointer to the sample buffer */ \ + q15_t *pOutput; /* Temporary pointer to the output buffer */ \ + const q15_t *pTempSrc; /* Temporary pointer to the source data */ \ + q15_t *pTempDest; /* Temporary pointer to the destination buffer */\ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */\ + int32_t blkCnt; \ + q15x8_t vecIn0; \ + \ + /* \ + * load coefs \ + */ \ + q15x8_t vecCoeffs[NBVECTAPS]; \ + \ + for (int i = 0; i < NBVECTAPS; i++) \ + vecCoeffs[i] = vldrhq_s16(pCoeffs + 8 * i); \ + \ + /* \ + * pState points to state array which contains previous frame (numTaps - 1) samples \ + * pStateCur points to the location where the new input data should be written \ + */ \ + pStateCur = &(pState[(numTaps - 1u)]); \ + pTempSrc = pSrc; \ + pSamples = pState; \ + pOutput = pDst; \ + \ + blkCnt = blockSize >> 2; \ + while (blkCnt > 0) { \ + /* \ + * Save 4 input samples in the history buffer \ + */ \ + vstrhq_s32(pStateCur, vldrhq_s32(pTempSrc)); \ + pStateCur += 4; \ + pTempSrc += 4; \ + \ + FIR_Q15_CORE(pOutput, 4, NBVECTAPS, pSamples, vecCoeffs); \ + pSamples += 4; \ + \ + blkCnt--; \ + } \ + \ + /* tail */ \ + int32_t residual = blockSize & 3; \ + \ + for (int i = 0; i < residual; i++) \ + *pStateCur++ = *pTempSrc++; \ + \ + FIR_Q15_CORE(pOutput, residual, NBVECTAPS, pSamples, vecCoeffs); \ + \ + /* \ + * Copy the samples back into the history buffer start \ + */ \ + pTempSrc = &pState[blockSize]; \ + pTempDest = pState; \ + \ + /* current compiler limitation */ \ + blkCnt = (numTaps - 1) >> 3; \ + while (blkCnt > 0) \ + { \ + vstrhq_s16(pTempDest, vldrhq_s16(pTempSrc)); \ + pTempSrc += 8; \ + pTempDest += 8; \ + blkCnt--; \ + } \ + blkCnt = (numTaps - 1) & 7; \ + if (blkCnt > 0) \ + { \ + mve_pred16_t p = vctp16q(blkCnt); \ + vstrhq_p_s16(pTempDest, vldrhq_z_s16(pTempSrc, p), p); \ + } \ +} + +static void arm_fir_q15_25_32_mve(const arm_fir_instance_q15 * S, + const q15_t * __restrict pSrc, + q15_t * __restrict pDst, uint32_t blockSize) +{ + #define NBTAPS 32 + #define NBVECTAPS (NBTAPS / 8) + FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS + #undef NBTAPS +} + +static void arm_fir_q15_17_24_mve(const arm_fir_instance_q15 * S, + const q15_t * __restrict pSrc, + q15_t * __restrict pDst, uint32_t blockSize) +{ + #define NBTAPS 24 + #define NBVECTAPS (NBTAPS / 8) + FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS + #undef NBTAPS +} + + +static void arm_fir_q15_9_16_mve(const arm_fir_instance_q15 * S, + const q15_t * __restrict pSrc, + q15_t * __restrict pDst, uint32_t blockSize) +{ + #define NBTAPS 16 + #define NBVECTAPS (NBTAPS / 8) + FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS + #undef NBTAPS +} + +static void arm_fir_q15_1_8_mve(const arm_fir_instance_q15 * S, + const q15_t * __restrict pSrc, + q15_t * __restrict pDst, uint32_t blockSize) +{ + #define NBTAPS 8 + #define NBVECTAPS (NBTAPS / 8) + FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS + #undef NBTAPS +} + + +void debug_arm_fir_q15( + const arm_fir_instance_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize) +{ + q15_t *pState = S->pState; /* State pointer */ + const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q15_t *pStateCur; /* Points to the current sample of the state */ + const q15_t *pSamples; /* Temporary pointer to the sample buffer */ + q15_t *pOutput; /* Temporary pointer to the output buffer */ + const q15_t *pTempSrc; /* Temporary pointer to the source data */ + q15_t *pTempDest; /* Temporary pointer to the destination buffer */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t blkCnt; + q15x8_t vecIn0; + uint32_t tapsBlkCnt = (numTaps + 7) / 8; + q63_t acc0, acc1, acc2, acc3; + + +int32_t nbTaps = (numTaps + 7) >> 3; + +switch(nbTaps) { + + case 1: + arm_fir_q15_1_8_mve(S, pSrc, pDst, blockSize); + return; + case 2: + arm_fir_q15_9_16_mve(S, pSrc, pDst, blockSize); + return; + case 3: + arm_fir_q15_17_24_mve(S, pSrc, pDst, blockSize); + return; + case 4: + arm_fir_q15_25_32_mve(S, pSrc, pDst, blockSize); + return; + } + /* + * pState points to state array which contains previous frame (numTaps - 1) samples + * pStateCur points to the location where the new input data should be written + */ + pStateCur = &(pState[(numTaps - 1u)]); + pTempSrc = pSrc; + pSamples = pState; + pOutput = pDst; + blkCnt = blockSize >> 2; + + while (blkCnt > 0U) + { + const q15_t *pCoeffsTmp = pCoeffs; + const q15_t *pSamplesTmp = pSamples; + + acc0 = 0LL; + acc1 = 0LL; + acc2 = 0LL; + acc3 = 0LL; + + /* + * Save 8 input samples in the history buffer + */ + vst1q(pStateCur, vld1q(pTempSrc)); + pStateCur += 8; + pTempSrc += 8; + + //INIT_SYSTICK; + //START_CYCLE_MEASUREMENT; + int i = tapsBlkCnt; + //startSectionNB(3); + while (i > 0) + { + /* + * load 8 coefs + */ + q15x8_t vecCoeffs = *(q15x8_t *) pCoeffsTmp; + + vecIn0 = vld1q(pSamplesTmp); + acc0 = vmlaldavaq(acc0, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[1]); + acc1 = vmlaldavaq(acc1, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[2]); + acc2 = vmlaldavaq(acc2, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[3]); + acc3 = vmlaldavaq(acc3, vecIn0, vecCoeffs); + + pSamplesTmp += 8; + pCoeffsTmp += 8; + /* + * Decrement the taps block loop counter + */ + i--; + } + //stopSectionNB(3); + //STOP_CYCLE_MEASUREMENT; + + + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc0, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc1, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc2, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc3, 15); + + pSamples += 4; + /* + * Decrement the sample block loop counter + */ + blkCnt--; + } + + uint32_t residual = blockSize & 3; + switch (residual) + { + case 3: + { + const q15_t *pCoeffsTmp = pCoeffs; + const q15_t *pSamplesTmp = pSamples; + + acc0 = 0LL; + acc1 = 0LL; + acc2 = 0LL; + + /* + * Save 8 input samples in the history buffer + */ + *(q15x8_t *) pStateCur = *(q15x8_t *) pTempSrc; + pStateCur += 8; + pTempSrc += 8; + + int i = tapsBlkCnt; + while (i > 0) + { + /* + * load 8 coefs + */ + q15x8_t vecCoeffs = *(q15x8_t *) pCoeffsTmp; + + vecIn0 = vld1q(pSamplesTmp); + acc0 = vmlaldavaq(acc0, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[2]); + acc1 = vmlaldavaq(acc1, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[4]); + acc2 = vmlaldavaq(acc2, vecIn0, vecCoeffs); + + pSamplesTmp += 8; + pCoeffsTmp += 8; + /* + * Decrement the taps block loop counter + */ + i--; + } + + acc0 = asrl(acc0, 15); + acc1 = asrl(acc1, 15); + acc2 = asrl(acc2, 15); + + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc0, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc1, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc2, 15); + } + break; + + case 2: + { + const q15_t *pCoeffsTmp = pCoeffs; + const q15_t *pSamplesTmp = pSamples; + + acc0 = 0LL; + acc1 = 0LL; + /* + * Save 8 input samples in the history buffer + */ + vst1q(pStateCur, vld1q(pTempSrc)); + pStateCur += 8; + pTempSrc += 8; + + int i = tapsBlkCnt; + while (i > 0) + { + /* + * load 8 coefs + */ + q15x8_t vecCoeffs = *(q15x8_t *) pCoeffsTmp; + + vecIn0 = vld1q(pSamplesTmp); + acc0 = vmlaldavaq(acc0, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[2]); + acc1 = vmlaldavaq(acc1, vecIn0, vecCoeffs); + + pSamplesTmp += 8; + pCoeffsTmp += 8; + /* + * Decrement the taps block loop counter + */ + i--; + } + + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc0, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc1, 15); + } + break; + + case 1: + { + const q15_t *pCoeffsTmp = pCoeffs; + const q15_t *pSamplesTmp = pSamples; + + acc0 = 0LL; + + /* + * Save 8 input samples in the history buffer + */ + vst1q(pStateCur, vld1q(pTempSrc)); + pStateCur += 8; + pTempSrc += 8; + + int i = tapsBlkCnt; + while (i > 0) + { + /* + * load 8 coefs + */ + q15x8_t vecCoeffs = *(q15x8_t *) pCoeffsTmp; + + vecIn0 = vld1q(pSamplesTmp); + acc0 = vmlaldavaq(acc0, vecIn0, vecCoeffs); + + pSamplesTmp += 8; + pCoeffsTmp += 8; + /* + * Decrement the taps block loop counter + */ + i--; + } + + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc0, 15); + } + break; + + } + + /* + * Copy the samples back into the history buffer start + */ + pTempSrc = &pState[blockSize]; + pTempDest = pState; + + blkCnt = numTaps >> 3; + while (blkCnt > 0U) + { + vst1q(pTempDest, vld1q(pTempSrc)); + pTempSrc += 8; + pTempDest += 8; + blkCnt--; + } + blkCnt = numTaps & 7; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp16q(blkCnt); + vstrhq_p_s16(pTempDest, vld1q(pTempSrc), p0); + } +} +#endif + +template +struct FirType; + +template<> +struct FirType +{ + typedef arm_fir_instance_f32 type; + static void init_state(type * S, + uint16_t numTaps, + const float32_t * pCoeffs, + float32_t * pState, + uint32_t blockSize) + { + arm_fir_init_f32(S,numTaps,pCoeffs,pState,blockSize); + }; + + static void init_coef(float32_t *coefs,uint16_t numTaps) + { + for(int i=0;i +struct FirType +{ + typedef arm_fir_instance_q15 type; + static void init_state(type * S, + uint16_t numTaps, + const Q15 * pCoeffs, + Q15 * pState, + uint32_t blockSize) + { + arm_fir_init_q15(S,numTaps, + reinterpret_cast(pCoeffs), + reinterpret_cast(pState),blockSize); + }; + + static void init_coef(Q15 *coefs,uint16_t numTaps) + { + for(int i=0;i +struct FIR { + + FIR(const PVector &coefs):coef_(coefs),state_(T{}) + {}; + + + PVector filter(const PVector &signal) + { + constexpr int UNROLL_FACTOR = 4; + PVector res(T{}); + using acc_type = typename number_traits::accumulator; + std::array accu; + index_t i=0; + +#if defined(ARM_COMPUTE_DISABLE_UNROLL) + #pragma clang loop unroll(disable) +#endif + for(;i<=BLOCK-UNROLL_FACTOR;i+=UNROLL_FACTOR) + { + + state_.sub(TAPS-1+i,TAPS-1+i+UNROLL_FACTOR) = copy(signal.sub(i,i+UNROLL_FACTOR)); + + //INIT_SYSTICK; + //START_CYCLE_MEASUREMENT; + //startSectionNB(2); + results(accu) = + dot(unroll( + [i,this](index_t k){return state_.sub(i+k,i+k+TAPS);}), + replicate(coef_) + ); + //stopSectionNB(2); + //STOP_CYCLE_MEASUREMENT; + + for(index_t k=0;k coef_; + PVector state_; +}; + +template +static void test() +{ + constexpr int NB = BLOCK; + std::cout << "----\r\n(" << BLOCK << "," << TAPS << ")\r\n"; + + typename FirType::type S; + PVector signal; + PVector coefs; + + FirType::init_coef(coefs.ptr(),TAPS); + + init_array(signal,NB); + FIR fir(coefs); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + PVector res = fir.filter(signal); + //PVector res; + //fir.purec(signal.const_ptr(),res.ptr()); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + T* state; + T* coefsb; + state=(T*)malloc(sizeof(T)*(TAPS+BLOCK+BLOCK)); + coefsb=(T*)malloc(sizeof(T)*(TAPS+32)); + memset(coefsb,0,sizeof(T)*(TAPS+32)); + for(int i =0;i::init_state(&S,TAPS,coefsb,state,BLOCK); + PVector ref; + //std::cout << "---\r\n"; + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + arm_fir_q15(&S, + reinterpret_cast(signal.const_ptr()), + reinterpret_cast(ref.ptr()),BLOCK); + STOP_CYCLE_MEASUREMENT; + + + if (!validate(res.const_ptr(),ref.const_ptr(),BLOCK)) + { + printf("fir failed \r\n"); + } + + free(state); + free(coefsb); + + +} + + + + +template +void all_filter_test() +{ + + title("FIR test"); + + + test(); + test(); + test(); + test(); + test(); + + test(); + test(); + test(); + test(); + test(); + + test(); + test(); + test(); + test(); + test(); + + + test(); + test(); + test(); + test(); + test(); + + +} + +void filter_test() +{ + //all_filter_test(); +} \ No newline at end of file diff --git a/dsppp/tests/fusion_test.cpp b/dsppp/tests/fusion_test.cpp new file mode 100644 index 000000000..0711cd1b6 --- /dev/null +++ b/dsppp/tests/fusion_test.cpp @@ -0,0 +1,247 @@ +extern "C" { + extern void fusion_test(); +} + +#include "allocator.h" + +#include + +#include +#include +#include +#include + +#include + +#include + +template +static void test() +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + + #if defined(STATIC_TEST) + PVector a; + PVector b; + PVector c; + #else + PVector a(NB); + PVector b(NB); + PVector c(NB); + #endif + + + init_array(a,NB); + init_array(b,NB); + init_array(c,NB); + + #if defined(STATIC_TEST) + PVector resa; + PVector resb; + #else + PVector resa(NB); + PVector resb(NB); + #endif + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + results(resa,resb) = Merged{a + b,a + c}; + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + PVector refa; + PVector refb; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + cmsisdsp_add(a.const_ptr(),b.const_ptr(),refa.ptr(),NB); + cmsisdsp_add(a.const_ptr(),c.const_ptr(),refb.ptr(),NB); + STOP_CYCLE_MEASUREMENT; + + if (!validate(resa.const_ptr(),refa.const_ptr(),NB)) + { + printf("add a failed \r\n"); + + } + + if (!validate(resb.const_ptr(),refb.const_ptr(),NB)) + { + printf("add b failed \r\n"); + + } + + std::cout << "=====\r\n"; +} + + +template +static void test2() +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + #if defined(STATIC_TEST) + PVector a; + PVector b; + PVector c; + #else + PVector a(NB); + PVector b(NB); + PVector c(NB); + #endif + using Acc = typename number_traits::accumulator; + + + init_array(a,NB); + init_array(b,NB); + init_array(c,NB); + + Acc resa,resb,refa,refb; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(2); + std::tie(resa,resb) = dot(Merged{expr(a),expr(a)}, + Merged{expr(b),expr(c)}); + stopSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + cmsisdsp_dot(a.const_ptr(),b.const_ptr(),refa,NB); + cmsisdsp_dot(a.const_ptr(),c.const_ptr(),refb,NB); + STOP_CYCLE_MEASUREMENT; + + if (!validate(resa,refa)) + { + printf("dot a failed \r\n"); + + } + + if (!validate(resb,refb)) + { + printf("dot b failed \r\n"); + + } + + std::cout << "=====\r\n"; + + +} + +template +static void test3() +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + + constexpr int U = 2; + #if defined(STATIC_TEST) + PVector a[U]; + PVector b[U]; + #else + PVector a[U]={PVector(NB),PVector(NB)}; + PVector b[U]={PVector(NB),PVector(NB)}; + #endif + + using Acc = typename number_traits::accumulator; + + for(int i=0;i res; + Acc ref[U]; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(3); + results(res) = dot(unroll( + [&a](index_t k){return expr(a[k]);}), + unroll( + [&b](index_t k){return expr(b[k]);}) + ); + stopSectionNB(3); + STOP_CYCLE_MEASUREMENT; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + for(int i=0;i +void all_fusion_test() +{ + + const int nb_tails = TailForTests::tail; + const int nb_loops = TailForTests::loop; + + title("Vector Fusion"); + + test(); + test(); + test(); + test(); + test(); + test(); + + title("Dot Product Fusion"); + + test2(); + test2(); + test2(); + test2(); + test2(); + test2(); + + title("Unroll Fusion"); + + test3(); + test3(); + test3(); + test3(); + test3(); + test3(); +} + +void fusion_test() +{ +#if defined(FUSION_TEST) + #if defined(F64_DT) + all_fusion_test(); + #endif + #if defined(F32_DT) + all_fusion_test(); + #endif + #if defined(F16_DT) && !defined(DISABLEFLOAT16) + all_fusion_test(); + #endif + #if defined(Q31_DT) + all_fusion_test(); + #endif + #if defined(Q15_DT) + all_fusion_test(); + #endif + #if defined(Q7_DT) + all_fusion_test(); + #endif +#endif + +} \ No newline at end of file diff --git a/dsppp/tests/matrix_test.cpp b/dsppp/tests/matrix_test.cpp new file mode 100644 index 000000000..e18a827ae --- /dev/null +++ b/dsppp/tests/matrix_test.cpp @@ -0,0 +1,1863 @@ +extern "C" { + extern void matrix_test(); +} + +#include "allocator.h" + +#include +#include +#include +#include + +#include + +#include +#include "boost/mp11.hpp" +using namespace boost::mp11; + + +extern "C" { +#include "dsp/matrix_functions.h" +#include "dsp/matrix_utils.h" +} + +template +struct MatTestConstant; + +template<> +struct MatTestConstant +{ + constexpr static double value = 0.001; + constexpr static double half = 0.5; +}; + +template<> +struct MatTestConstant +{ + constexpr static float value = 0.001f; + constexpr static float half = 0.5f; +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct MatTestConstant +{ + constexpr static float16_t value = (float16_t)0.001f; + constexpr static float16_t half = (float16_t)0.5f; + +}; +#endif + +template<> +struct MatTestConstant +{ + constexpr static Q7 value = 0.001_q7; + constexpr static Q7 half = 0.5_q7; +}; + +template<> +struct MatTestConstant +{ + constexpr static Q15 value = 0.001_q15; + constexpr static Q15 half = 0.5_q15; +}; + +template<> +struct MatTestConstant +{ + constexpr static Q31 value = 0.001_q31; + constexpr static Q31 half = 0.5_q31; +}; + + +template +struct ErrThreshold +{ + constexpr static float abserr = 0; + constexpr static float relerr = 0; + constexpr static float abserr_cholesky = 0; + constexpr static float relerr_cholesky = 0; + constexpr static float abserr_householder = 0; + constexpr static float relerr_householder = 0; + constexpr static float abserr_qr = 0; + constexpr static float relerr_qr = 0; + constexpr static float abserr_inv = 0; + constexpr static float relerr_inv = 0; +}; + +// Should be more accurate than F32 but right know +// we only check there is no regression compared to f32 +template<> +struct ErrThreshold +{ + constexpr static float abserr = ABS_ERROR; + constexpr static float relerr = REL_ERROR; + constexpr static float abserr_cholesky = 3e-4; + constexpr static float relerr_cholesky = 1e-4; + + constexpr static float abserr_householder = ABS_ERROR; + constexpr static float relerr_householder = REL_ERROR; + constexpr static float abserr_qr = ABS_ERROR; + constexpr static float relerr_qr = REL_ERROR; + + constexpr static float abserr_inv = ABS_ERROR; + constexpr static float relerr_inv = REL_ERROR; +}; + +template<> +struct ErrThreshold +{ + constexpr static float abserr = ABS_ERROR; + constexpr static float relerr = REL_ERROR; + constexpr static float abserr_cholesky = 3e-4; + constexpr static float relerr_cholesky = 1e-4; + + constexpr static float abserr_householder = ABS_ERROR; + constexpr static float relerr_householder = REL_ERROR; + constexpr static float abserr_qr = ABS_ERROR; + constexpr static float relerr_qr = REL_ERROR; + + constexpr static float abserr_inv = 4.0e-6; + constexpr static float relerr_inv = 5.0e-6; +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct ErrThreshold +{ + constexpr static float abserr = ABS_ERROR; + constexpr static float relerr = REL_ERROR; + constexpr static float abserr_cholesky = 2e-1; + constexpr static float relerr_cholesky = 2e-1; + + constexpr static float abserr_householder = 2e-4; + constexpr static float relerr_householder = 2e-3; + // 32x32 is not numerically behaving well with + // the matrix used as input + constexpr static float abserr_qr = 2.0; + constexpr static float relerr_qr = 1e-2; + + constexpr static float abserr_inv = 3e-2; + constexpr static float relerr_inv = 3e-2; +}; +#endif + +void cmsisdsp_mat_inv(float64_t *amod, + float64_t* b, + uint32_t r,uint32_t c) +{ + arm_matrix_instance_f64 src; + arm_matrix_instance_f64 dst; + + + src.numRows = r; + src.numCols = c; + src.pData = amod; + + dst.numRows = r; + dst.numCols = c; + dst.pData = b; + + arm_status status = arm_mat_inverse_f64(&src,&dst); + (void)status; +}; + +void cmsisdsp_mat_inv(float32_t *amod, + float32_t* b, + uint32_t r,uint32_t c) +{ + arm_matrix_instance_f32 src; + arm_matrix_instance_f32 dst; + + + src.numRows = r; + src.numCols = c; + src.pData = amod; + + dst.numRows = r; + dst.numCols = c; + dst.pData = b; + + arm_status status = arm_mat_inverse_f32(&src,&dst); + (void)status; +}; + +#if !defined(DISABLEFLOAT16) +void cmsisdsp_mat_inv(float16_t *amod, + float16_t* b, + uint32_t r,uint32_t c) +{ + arm_matrix_instance_f16 src; + arm_matrix_instance_f16 dst; + + + src.numRows = r; + src.numCols = c; + src.pData = amod; + + dst.numRows = r; + dst.numCols = c; + dst.pData = b; + + arm_status status = arm_mat_inverse_f16(&src,&dst); + (void)status; +}; +#endif + +const float32_t mat64[64] = {0.395744, 0.623798, 0.885422, 0.95415, 0.310384, 0.257541, + 0.631426, 0.424491, 0.130945, 0.799959, 0.133693, 0.479455, + 0.519254, 0.381039, 0.617455, 0.748273, 0.146944, 0.928945, + 0.430936, 0.508207, 0.829023, 0.358027, 0.999501, 0.851953, + 0.273895, 0.685898, 0.0436612, 0.295212, 0.467651, 0.0515567, + 0.21037, 0.607475, 0.570295, 0.281109, 0.979219, 0.0947969, + 0.319016, 0.398405, 0.349953, 0.710002, 0.431597, 0.447659, + 0.0747669, 0.057063, 0.165648, 0.773106, 0.135765, 0.709327, + 0.873836, 0.292361, 0.00202529, 0.392942, 0.520183, 0.0528055, + 0.797982, 0.613497, 0.509682, 0.0435791, 0.780526, 0.960582, + 0.535914, 0.216113, 0.134108, 0.225859}; + +const float32_t mat16[16] = {1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 5.0, 6.0, + 3.0, 5.0, 9.0, 10.0, 4.0, 6.0, 10.0, 16.0}; + +const float32_t mat256[256] = {0.97936, 0.498105, 0.452618, 0.299761, 0.688624, 0.247212, \ + 0.228337, 0.22905, 0.563815, 0.251998, 0.5238, 0.141223, 0.0980689, \ + 0.79112, 0.771182, 0.890995, 0.0256181, 0.0377277, 0.575629, \ + 0.648138, 0.926218, 0.803878, 0.620333, 0.325635, 0.587355, 0.041795, \ + 0.934271, 0.0690131, 0.0240136, 0.800828, 0.522999, 0.374706, \ + 0.266977, 0.208028, 0.112878, 0.0389899, 0.658311, 0.205067, \ + 0.244172, 0.0762778, 0.190575, 0.677312, 0.0682093, 0.367328, \ + 0.0191464, 0.988968, 0.437477, 0.130622, 0.907823, 0.0116559, \ + 0.614526, 0.447443, 0.0126975, 0.995496, 0.947676, 0.659996, \ + 0.321547, 0.725415, 0.658426, 0.0243924, 0.0843519, 0.351748, \ + 0.974332, 0.673381, 0.375012, 0.719626, 0.721219, 0.766905, \ + 0.17065, 0.648905, 0.770983, 0.360008, 0.344226, 0.179633, 0.347905, \ + 0.555561, 0.742615, 0.908389, 0.806959, 0.176078, 0.872167, \ + 0.321839, 0.098607, 0.954515, 0.627286, 0.235082, 0.746179, 0.163606, \ + 0.899323, 0.871471, 0.712448, 0.956971, 0.736687, 0.750702, 0.843348, \ + 0.302435, 0.444862, 0.0644597, 0.765519, 0.518397, 0.765541, \ + 0.900375, 0.201853, 0.490325, 0.721786, 0.893647, 0.774724, \ + 0.0983631, 0.339887, 0.526084, 0.0786152, 0.515697, 0.438801, \ + 0.226628, 0.125093, 0.886642, 0.617766, 0.71696, 0.473172, 0.640949, \ + 0.67688, 0.676214, 0.453662, 0.345796, 0.608999, 0.904448, 0.0965741, \ + 0.00461771, 0.467399, 0.292235, 0.0418646, 0.116632, 0.0766192, \ + 0.269051, 0.411649, 0.0538381, 0.973959, 0.667106, 0.301662, \ + 0.977206, 0.891751, 0.420267, 0.441334, 0.0896179, 0.249969, \ + 0.672614, 0.623966, 0.609733, 0.320772, 0.39723, 0.845196, 0.653877, \ + 0.0599186, 0.340188, 0.199787, 0.598104, 0.45664, 0.920485, 0.969439, \ + 0.446555, 0.0932837, 0.0247635, 0.747644, 0.438759, 0.639154, \ + 0.754049, 0.379433, 0.968655, 0.0452146, 0.208123, 0.252654, \ + 0.261898, 0.608665, 0.145211, 0.395368, 0.799111, 0.697823, \ + 0.382906, 0.456515, 0.262579, 0.284169, 0.881488, 0.860877, 0.155548, \ + 0.537387, 0.804235, 0.311383, 0.183216, 0.677692, 0.829542, 0.406049, \ + 0.860392, 0.467668, 0.385633, 0.654692, 0.841125, 0.178406, \ + 0.668945, 0.369609, 0.809711, 0.454593, 0.632028, 0.605791, 0.643851, \ + 0.787023, 0.285633, 0.832216, 0.30892, 0.303559, 0.704898, 0.61118, \ + 0.435547, 0.173678, 0.788689, 0.319511, 0.648378, 0.635417, 0.125127, \ + 0.310251, 0.800819, 0.4863, 0.924361, 0.308059, 0.952175, 0.449844, \ + 0.215496, 0.257826, 0.556383, 0.259735, 0.197234, 0.0509903, 0.21474, \ + 0.145085, 0.41288, 0.876758, 0.096721, 0.228955, 0.0152248, 0.126501, \ + 0.28899, 0.336668, 0.580015, 0.932761, 0.989783, 0.667379, \ + 0.798751, 0.587173, 0.445902, 0.041448, 0.311878, 0.0332857, \ + 0.401984, 0.795049, 0.8222, 0.678648, 0.807558}; + +template typename A> +void init_mat(Matrix &pDst,std::size_t r,std::size_t c) +{ + const float32_t *p; + if ((r==4) && (r==c)) + { + p = mat16; + } + + if ((r==8) && (r==c)) + { + p = mat64; + } + + if ((r==16) && (r==c)) + { + p = mat256; + } + + + for(std::size_t i=0;i typename A, + typename M> +void _matinv(const Matrix &a,M && res) +{ + + Matrix b = a; + + const vector_length_t nb_rows = a.rows(); + const vector_length_t nb_cols = a.columns(); + + + for(index_t r=0;r < nb_rows ; r++) + { + res.row(r) = T{}; + res(r,r) = number_traits::one(); + } + + + for(index_t c=0;c < nb_cols ; c++) + { + T pivot = b(c,c); + index_t selectedRow = c; + + + for(index_t r=c+1;r < nb_rows ; r++) + { + T newPivot = b(r,c); + if (_abs(newPivot)>_abs(pivot)) + { + pivot = newPivot; + selectedRow = r; + } + } + + if ((pivot!=T{}) && (selectedRow != c)) + { + swap(b.row(c,c),b.row(selectedRow,c)); + swap(res.row(c),res.row(selectedRow)); + } + else if (pivot == T{}) + { + break; + } + + pivot = number_traits::one() / pivot; + + b.row(c,c) *= pivot; + res.row(c) *= pivot; + + index_t r=0; + + for(;r < c ; r++) + { + const T tmp = b(r,c); + b.row(r,c) -= b.row(c,c)*tmp; + res.row(r) -= res.row(c)*tmp; + } + + for(r=c+1;r < nb_rows ; r++) + { + const T tmp = b(r,c); + b.row(r,c) -= b.row(c,c)*tmp; + res.row(r) -= res.row(c)*tmp; + } + + } + + +} + +template typename A, + typename std::enable_if<(NB>0),bool>::type = true> +Matrix matinv(const Matrix &a) +{ + Matrix res; + _matinv(a,res); + return(res); +} + +template typename A, + typename std::enable_if<(NB<0),bool>::type = true> +Matrix matinv(const Matrix &a) +{ + Matrix res(a.rows(),a.columns()); + return (_matinv(a,res)); + return(res); +} + +template typename A, + typename std::enable_if<(NB<0),bool>::type = true> +void matinv(Matrix &res, const Matrix &a) +{ + (void)_matinv(a,res); +} + + +template +void testinv() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PMat a; + #else + PMat a(R,C); + #endif + + init_mat(a,R,C); + + #if !defined(STATIC_TEST) + PMat res(R,C); + #endif + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PMat res = matinv(a); + #else + matinv(res,a); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + PMat amod(a); + PMat cmsis_res(R,C); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + cmsisdsp_mat_inv(amod.ptr(),cmsis_res.ptr(),R,C); + STOP_CYCLE_MEASUREMENT; + + + + if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C, + ErrThreshold::abserr_inv,ErrThreshold::relerr_inv)) + { + printf("inv failed \r\n"); + + } + + std::cout << "=====\r\n"; + +} + +template +void testadd() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PMat a; + PMat b; + #else + PMat a(R,C); + PMat b(R,C); + #endif + + init_array(a,R*C); + init_array(b,R*C); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PMat res = a+b; + #else + PMat res = a+b; + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //PrintType(); + //PrintType(); +// + //std::cout << "a: " << IsVector::value << "\r\n"; + //std::cout << "b: " << IsVector::value << "\r\n"; + //std::cout << "a+b: " << IsVector::value << "\r\n"; + //std::cout << "res: " << IsVector::value << "\r\n"; + //std::cout << "same: " << SameElementType::value << "\r\n"; +// + //std::cout << "vec inst: " << has_vector_inst() << "\r\n"; + //std::cout << "vec index pair: " << vector_idx_pair() << "\r\n"; + //std::cout << "must use mat idx: " << must_use_matrix_idx_pair() << "\r\n"; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PMat cmsis_res; + #else + PMat cmsis_res(R,C); + #endif + cmsisdsp_mat_add(a.const_ptr(),b.const_ptr(),cmsis_res.ptr(),R,C); + STOP_CYCLE_MEASUREMENT; + + + + if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("add failed \r\n"); + } + + std::cout << "=====\r\n"; + +} + + +template +void testdiag() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + #if defined(STATIC_TEST) + PVector a; + #else + PVector a(R); + #endif + init_array(a,R); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PMat res=PMat::diagonal(a); + #else + PMat res=PMat::diagonal(a); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + const T* ap = a.const_ptr(); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PMat cmsis_res; + #else + PMat cmsis_res(R,C); + #endif + T* refp = cmsis_res.ptr(); + + UNROLL_LOOP + for(index_t row=0;row < R; row++) + { + UNROLL_LOOP + for(index_t col=0;col < C; col++) + { + if (row != col) + { + refp[row*C+col] = T{}; + } + else + { + refp[row*C+col] = ap[row]; + } + } + } + STOP_CYCLE_MEASUREMENT; + + + + if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("diag failed \r\n"); + } + + std::cout << "=====\r\n"; +} + + + +template +void testouter() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + PVector a; + PVector b; + init_array(a,R); + init_array(b,C); + + b = b + b; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + PMat res = outer(a,b); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(2); + #if defined(STATIC_TEST) + PMat cmsis_res; + #else + PMat cmsis_res(R,C); + #endif + CMSISOuter::run(a.const_ptr(),b.const_ptr(),cmsis_res.ptr(),R,C); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + //std::cout<::abserr,ErrThreshold::relerr)) + { + printf("outer failed \r\n"); + } + + std::cout << "=====\r\n"; + +} + +template +void testview() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PVector a; + #else + PVector a(R); + #endif + init_array(a,R); + + #if defined(STATIC_TEST) + PMat res=PMat::diagonal(a); + #else + PMat res=PMat::diagonal(a); + #endif + //std::cout << res; + constexpr int subsize = 8; + constexpr int subpos = 8; + auto r = res.sub(Slice(subpos,subpos+subsize),Slice(subpos,subpos+subsize)); + + #if defined(STATIC_TEST) + PMat resb; + #else + PMat resb(subsize,subsize); + #endif + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + resb = r+r; + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //std::cout << IsMatrix::value << "\r\n"; + + PMat cmsis_res; + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(2); + DISABLE_LOOP_UNROLL + for(index_t row=0;row < subsize ; row++) + { + DISABLE_LOOP_UNROLL + for(index_t col=0;col < subsize ; col++) + { + cmsis_res(row,col) = r(row,col)+r(row,col); + } + } + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + //std::cout<::abserr,ErrThreshold::relerr)) + { + printf("sub matrix failed \r\n"); + } + + std::cout << "=====\r\n"; + +} + + + +template +void testmatvec() +{ + + using STO = typename vector_traits::storage_type; + + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PVector a; + #else + PVector a(C); + #endif + init_array(a,C); + + #if defined(STATIC_TEST) + PMat m; + #else + PMat m(R,C); + #endif + init_array(m,R*C); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PVector res = dot(m,a); + #else + PVector res = dot(m,a); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //std::cout << IsMatrix::value << "\r\n"; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PVector cmsis_res; + #else + PVector cmsis_res(R); + #endif + typename CMSISMatrixType::type S; + S.numRows = R; + S.numCols = C; + S.pData = reinterpret_cast(const_cast(m.ptr())); + + + startSectionNB(2); + cmsis_mat_vec_mult(&S, a.const_ptr(), cmsis_res.ptr()); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + //std::cout << cmsis_res; + + if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("matrix times vector failed \r\n"); + } + std::cout << "=====\r\n"; + +} + +template +void testcomplexmatvec() +{ + const T scalar = MatTestConstant::half; + using STO = typename vector_traits::storage_type; + + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PVector a; + PVector b; + #else + PVector a(C); + PVector b(C); + #endif + init_array(a,C); + init_array(b,C); + + #if defined(STATIC_TEST) + PMat m; + #else + PMat m(R,C); + #endif + init_array(m,R*C); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PVector tmpv = a + b * scalar; + PVector res = dot(m,tmpv); + #else + PVector tmpv = a + b * scalar; + PVector res = dot(m,tmpv); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //std::cout << IsMatrix::value << "\r\n"; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PVector cmsis_res; + PVector tmp; + #else + PVector cmsis_res(R); + PVector tmp(C); + #endif + typename CMSISMatrixType::type S; + S.numRows = R; + S.numCols = C; + S.pData = reinterpret_cast(const_cast(m.ptr())); + + + startSectionNB(2); + cmsis_complex_mat_vec(&S, + a.const_ptr(), + b.const_ptr(), + scalar, + tmp.ptr(), + cmsis_res.ptr()); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + + + //std::cout << cmsis_res; + + if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("matrix times vector expression failed \r\n"); + } + + std::cout << "=====\r\n"; + +} + + +template +void testmatmult() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << K << " x " << C << "\r\n"; + + using S = typename CMSISMatrixType::scalar; + + #if defined(STATIC_TEST) + PMat ma; + #else + PMat ma(R,K); + #endif + init_array(ma,R*K); + + #if defined(STATIC_TEST) + PMat mb; + #else + PMat mb(K,C); + #endif + init_array(mb,K*C); + + + + mb += TestConstant::small; + + //std::cout << ma; + //std::cout << mb; + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PMat res = dot(ma,mb); + #else + PMat res = dot(ma,mb); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //PrintType(); + //PrintType(); + //std::cout << ma; + //std::cout << mb; + //std::cout << res; + + + //std::cout << IsMatrix::value << "\r\n"; + + PMat tmp(C,K); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PMat cmsis_res; + #else + PMat cmsis_res(R,C); + #endif + + + typename CMSISMatrixType::type SA; + SA.numRows = R; + SA.numCols = K; + SA.pData = reinterpret_cast(ma.ptr()); + + typename CMSISMatrixType::type SB; + SB.numRows = K; + SB.numCols = C; + SB.pData = reinterpret_cast(mb.ptr()); + + typename CMSISMatrixType::type RES; + RES.numRows = R; + RES.numCols = C; + RES.pData = reinterpret_cast(cmsis_res.ptr()); + + + startSectionNB(2); + cmsis_mat_mult(&SA, &SB, &RES,reinterpret_cast(tmp.ptr())); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + + //std::cout << cmsis_res; + + if (!validate(res,cmsis_res, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("matrix times matrix expression failed \r\n"); + } + + std::cout << "=====\r\n"; + +} + +template +void testsubmatmult() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << K << " x " << C << "\r\n"; + + using S = typename CMSISMatrixType::scalar; + constexpr int TOTALA = 4 + 2*K + 2*R + K*R; + constexpr int TOTALB = 4 + 2*C + 2*K + C*K; + + #if defined(STATIC_TEST) + PMat ma; + #else + PMat ma(R+2,K+2); + #endif + init_array(ma,TOTALA); + + #if defined(STATIC_TEST) + PMat mb; + #else + PMat mb(K+2,C+2); + #endif + init_array(mb,TOTALB); + + + + mb += MatTestConstant::value; + + //std::cout << ma; + //std::cout << mb; + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PMat res(T{}); + #else + PMat res(R,C,T{}); + #endif + startSectionNB(1); + res.sub(Slice(0,R),Slice(0,C)) = copy(dot(ma.sub(Slice(0,R),Slice(0,K)),mb.sub(Slice(0,K),Slice(0,C)))); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //PrintType(); + //PrintType(); + //std::cout << ma; + //std::cout << mb; + //std::cout << res; + + + //std::cout << IsMatrix::value << "\r\n"; + + PMat cmsis_res(R,C); + PMat cmsis_ma(R,K); + PMat cmsis_mb(K,C); + PMat tmp(C,K); + + typename CMSISMatrixType::type SA; + SA.numRows = R; + SA.numCols = K; + SA.pData = reinterpret_cast(cmsis_ma.ptr()); + + typename CMSISMatrixType::type SB; + SB.numRows = K; + SB.numCols = C; + SB.pData = reinterpret_cast(cmsis_mb.ptr()); + + typename CMSISMatrixType::type RES; + RES.numRows = R; + RES.numCols = C; + RES.pData = reinterpret_cast(cmsis_res.ptr()); + + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(2); + cmsis_ma = copy(ma.sub(Slice(0,R),Slice(0,K))); + cmsis_mb = copy(mb.sub(Slice(0,K),Slice(0,C))); + cmsis_mat_mult(&SA, &SB, &RES,reinterpret_cast(tmp.ptr())); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + + //std::cout << cmsis_res; + + if (!validate(res.sub(Slice(0,R),Slice(0,C)),cmsis_res, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("matrix times matrix expression failed \r\n"); + } + + + std::cout << "=====\r\n"; +} + + +template +void testmattranspose() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PMat ma; + #else + PMat ma(R,C); + #endif + init_array(ma,R*C); + + + //PrintType(); + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PMat res = ma.transpose(); + #else + PMat res = ma.transpose(); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //std::cout << IsMatrix::value << "\r\n"; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PMat cmsis_res; + #else + PMat cmsis_res(C,R); + #endif + + typename CMSISMatrixType::type SA; + SA.numRows = R; + SA.numCols = C; + SA.pData = reinterpret_cast::scalar*>(ma.ptr()); + + typename CMSISMatrixType::type RES; + RES.numRows = C; + RES.numCols = R; + RES.pData = reinterpret_cast::scalar*>(cmsis_res.ptr()); + + + startSectionNB(2); + cmsis_mat_trans(&SA, &RES); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + //std::cout << cmsis_res; + + if (!validate(res,cmsis_res, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("matrix transpose failed \r\n"); + } + + + std::cout << "=====\r\n"; +} + + +#if !defined(DISABLEFLOAT16) +static float16_t _gen_sqrt(const float16_t v) +{ + return((float16_t)sqrtf(v)); +} +#endif + +static float32_t _gen_sqrt(const float32_t v) +{ + return(sqrtf(v)); +} + +static float64_t _gen_sqrt(const float64_t v) +{ + return(sqrt(v)); +} + +template typename A, + typename V,typename T> +inline T _householder(Vector &res,const V&v,const T eps) +{ + T alpha = v[0]; + T tau; + T beta; + if (v.length()==1) + { + res[0] = T{}; + return(T{}); + } + T xnorm2 = dot(v.sub(1),v.sub(1)); + + //std::cout << xnorm2 << "\r\n"; + if (xnorm2 <= eps) + { + tau = T{}; + res = T{}; + } + else + { + if (alpha<=0) + { + beta = _gen_sqrt(alpha*alpha+xnorm2); + } + else + { + beta = -_gen_sqrt(alpha*alpha+xnorm2); + } + T r = number_traits::one() / (alpha - beta); + res = v * r; + tau = (beta - alpha)/beta; + res[0] = number_traits::one(); + } + return(tau); +} + +template::value && + SameElementType::value && + IsVector::value,bool>::type = true> +auto householder(const V&v,const T threshold) +{ + constexpr int NB = StaticLength::value; + Vector res; + T beta = _householder(res,v,threshold); + return std::tuple>(beta,res); +} + +template::value && + SameElementType::value && + IsVector::value,bool>::type = true> +auto householder(const V&v,const T threshold) +{ + Vector res(v.length()); + T beta = _householder(res,v,threshold); + return std::tuple>(beta,res); +} + +template::value && + SameElementType::value && + IsVector::value,bool>::type = true> +auto householder(const V&v,const T threshold,TMP &res) +{ + T beta = _householder(res,v,threshold); + return beta; +} + +template +struct HouseholderThreshold; + +#if !defined(DISABLEFLOAT16) +template<> +struct HouseholderThreshold +{ + static constexpr float16_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F16; +}; +#endif + +template<> +struct HouseholderThreshold +{ + static constexpr float64_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F64; +}; + + +template<> +struct HouseholderThreshold +{ + static constexpr float32_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F32; +}; + + +template +static void testHouseholder() +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + #if defined(STATIC_TEST) + PVector a; + #else + PVector a(NB); + #endif + + cmsis_init_householder(a.ptr(),NB); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + auto res = householder(a,HouseholderThreshold::value); + //PVector res;// = a + b; + //float res_beta=0; + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PVector ref; + #else + PVector ref(NB); + #endif + T ref_beta = cmsis_householder(a.const_ptr(),ref.ptr(),NB); + STOP_CYCLE_MEASUREMENT; + + if (!validate(std::get<1>(res).const_ptr(),ref.const_ptr(),NB, + ErrThreshold::abserr_householder,ErrThreshold::relerr_householder)) + { + printf("householder vector failed \r\n"); + } + + + if (!validate(std::get<0>(res),ref_beta, + ErrThreshold::abserr_householder,ErrThreshold::relerr_householder)) + { + printf("householder beta failed \r\n"); + } + std::cout << "=====\r\n"; +} + +#include "debug_mat.h" + +#if 1 +// R >= C +template typename A> +auto QR(const Matrix&m,const T eps,bool wantQ) +{ + #if defined(STATIC_TEST) + Vector tau; + Matrix RM = m; + Matrix Q = Matrix::identity(); + + + // Temporaries + Vector tmpvec; + Matrix tmpmat; + #else + Vector tau(m.columns()); + Matrix RM = m; + Matrix Q = Matrix::identity(m.rows()); + + + // Temporaries + Vector tmpvec(m.rows()); + Matrix tmpmat(1,m.rows()); + #endif + + const int NBC = m.columns(); + const int NBR = m.rows(); + + + for(index_t c=0;c vt(tmpvec,1,NBR-c); + dot(tmpmat.sub(0,1,0,NBC-c),vt,RM.sub(c,c)); + + RM.sub(c,c) = + RM.sub(c,c) - beta * outer(tmpvec.sub(0,NBR-c),tmpmat.row(0,0,NBC-c)); + + // Copy householder reflector + // Not valid when c == C-1 + // We don't want to use a test since CMSIS-DSP is not using + // one and introducing a test would give worse performance + RM.col(c,c+1) = copy(tmpvec.sub(1,NBR-c)); + + } + + + auto beta = householder(RM.col(NBC-1,NBC-1),eps,tmpvec); + tau[NBC-1] = beta; + + MatrixView vt(tmpvec,1,NBR-(NBC-1)); + dot(tmpmat.sub(0,1,0,NBC-(NBC-1)),vt,RM.sub(NBC-1,NBC-1)); + + RM.sub(NBC-1,NBC-1) = + RM.sub(NBC-1,NBC-1) - beta * outer(tmpvec.sub(0,NBR-(NBC-1)),tmpmat.row(0,0,NBC-(NBC-1))); + + + + + if (wantQ) + { + for(index_t c=NBC-1;c>=0;c--) + { + tmpvec.sub(1) = copy(RM.col(c,c+1)); + tmpvec[0] = number_traits::one(); + + MatrixView vt(tmpvec,1,NBR-c); + dot(tmpmat.sub(0,1,0,NBR-c),vt,Q.sub(c,c)); + + Q.sub(c,c) = + Q.sub(c,c) - tau[c] * outer(tmpvec.sub(0,NBR-c),tmpmat.row(0,0,NBR-c)); + + } + } + + return std::make_tuple(RM,Q,tau); + +} + +template +static void testQR() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + #if defined(STATIC_TEST) + PMat a; + #else + PMat a(R,C); + #endif + + cmsis_init_qr(a.ptr(),R,C); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + auto res = QR(a,HouseholderThreshold::value,true); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //std::cout << "next\r\n"; + + //std::cout << std::get<0>(res); + //std::cout << std::get<1>(res); + //std::cout << std::get<2>(res); + + // For fair comparison, in dynamic mode we must take into + // account the memory allocations since they are made + // by the QR algorithms + #if !defined(STATIC_TEST) + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #endif + + #if 0 //defined(STATIC_TEST) + PMat cmsis_res; + PMat cmsis_outRp; + PMat cmsis_outQp; + PVector cmsis_tau; + PVector cmsis_tmpa; + PVector cmsis_tmpb; + #else + PMat cmsis_res(R,C); + PMat cmsis_outRp(R,C); + PMat cmsis_outQp(R,R); + PVector cmsis_tau(C); + PVector cmsis_tmpa(R); + PVector cmsis_tmpb(C); + #endif + + typename CMSISMatrixType::type RP; + RP.numRows = R; + RP.numCols = C; + RP.pData = cmsis_outRp.ptr(); + + typename CMSISMatrixType::type QP; + QP.numRows = R; + QP.numCols = R; + QP.pData = cmsis_outQp.ptr(); + + typename CMSISMatrixType::type IN; + IN.numRows = R; + IN.numCols = C; + IN.pData = a.ptr(); + + //std::cout << "-------\r\n"; + + #if defined(STATIC_TEST) + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #endif + arm_status status=cmsis_qr(&IN,HouseholderThreshold::value, + &RP,&QP, + cmsis_tau.ptr(), + cmsis_tmpa.ptr(), + cmsis_tmpb.ptr()); + (void)status; + STOP_CYCLE_MEASUREMENT; + + //std::cout << cmsis_outRp; + //std::cout << cmsis_outQp; + //std::cout << cmsis_tau; + + if (!validate(std::get<0>(res),cmsis_outRp, + ErrThreshold::abserr_qr,ErrThreshold::relerr_qr)) + { + printf("QR Rp matrix failed \r\n"); + } + + + if (!validate(std::get<1>(res),cmsis_outQp, + ErrThreshold::abserr_qr,ErrThreshold::relerr_qr)) + { + printf("QR Qp matrix failed \r\n"); + } + + if (!validate(std::get<2>(res),cmsis_tau, + ErrThreshold::abserr_qr,ErrThreshold::relerr_qr)) + { + printf("QR tau failed \r\n"); + } + std::cout << "=====\r\n"; +} + +#endif + + +template typename A> +auto cholesky(const Matrix&a) +{ + // Temporaries + #if defined(STATIC_TEST) + Matrix g = a; + Vector tmp; + #else + Matrix g = a; + Vector tmp(a.rows()); + #endif + + const int NBR = a.rows(); + + g.col(0,0) = g.col(0,0) * (T)(number_traits::one() / _gen_sqrt(g(0,0))); + + for(int j=1;j::one() / _gen_sqrt(g(j,j)- tmp[j])); + + } + return(g); +} + + +template +static void testCholesky() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << R << "\r\n"; + #if defined(STATIC_TEST) + PMat a; + #else + PMat a(R,R); + #endif + + cmsis_init_cholesky(a.ptr(),R,R); + + //std::cout << a; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + // Not totally equivalent to CMSIS implementation + // It should be possible to rewrite it to avoid use of + // temporary buffer like CMSIS-DSP + auto res = cholesky(a); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + //std::cout << res; + + PMat cmsis_res(T{}); + + typename CMSISMatrixType::type OUT; + OUT.numRows = R; + OUT.numCols = R; + OUT.pData = cmsis_res.ptr(); + + + typename CMSISMatrixType::type IN; + IN.numRows = R; + IN.numCols = R; + IN.pData = a.ptr(); + + //std::cout << "-------\r\n"; + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + arm_status status=cmsis_cholesky(&IN,&OUT); + (void)status; + STOP_CYCLE_MEASUREMENT; + + //std::cout << cmsis_res; + + + if (!validateLT(res,cmsis_res, + ErrThreshold::abserr_cholesky,ErrThreshold::relerr_cholesky)) + { + printf("cholesky failed \r\n"); + } + std::cout << "=====\r\n"; +} + +template +struct TESTINV +{ + static void all() + { + testinv(); + } +}; + +template +struct TESTOUTER +{ + static void all() + { + testouter(); + } +}; + +template +struct TESTMATVEC +{ + static void all() + { + testmatvec(); + } +}; + +template +struct TESTCOMPLEXMATVEC +{ + static void all() + { + testcomplexmatvec(); + } +}; + +template +struct TESTADD +{ + static void all() + { + testadd (); + } +}; + +template +struct TESTMATTRANSPOSE +{ + static void all() + { + testmattranspose(); + } +}; + +template +struct TESTMATMULT +{ + static void all() + { + testmatmult(); + } +}; + +template +struct TESTSUBMATMULT +{ + static void all() + { + testsubmatmult(); + } +}; + + +template +struct TEST_CASES +{ + static void all() + { + (mp_push_front::all(),...); + } +}; + +template